Merge drm/drm-next into drm-misc-next

Backmerging to pick up fixes from amdgpu.

Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
This commit is contained in:
Thomas Zimmermann 2022-08-01 15:54:52 +02:00
commit 9cf26c8968
558 changed files with 177959 additions and 16678 deletions

View file

@ -75,7 +75,7 @@ we have a dedicated glossary for Display Core at
PSP
Platform Security Processor
RCL
RLC
RunList Controller
SDMA

View file

@ -63,3 +63,44 @@ gpu_metrics
.. kernel-doc:: drivers/gpu/drm/amd/pm/amdgpu_pm.c
:doc: gpu_metrics
GFXOFF
======
GFXOFF is a feature found in most recent GPUs that saves power at runtime. The
card's RLC (RunList Controller) firmware powers off the gfx engine
dynamically when there is no workload on gfx or compute pipes. GFXOFF is on by
default on supported GPUs.
Userspace can interact with GFXOFF through a debugfs interface:
``amdgpu_gfxoff``
-----------------
Use it to enable/disable GFXOFF, and to check if it's current enabled/disabled::
$ xxd -l1 -p /sys/kernel/debug/dri/0/amdgpu_gfxoff
01
- Write 0 to disable it, and 1 to enable it.
- Read 0 means it's disabled, 1 it's enabled.
If it's enabled, that means that the GPU is free to enter into GFXOFF mode as
needed. Disabled means that it will never enter GFXOFF mode.
``amdgpu_gfxoff_status``
------------------------
Read it to check current GFXOFF's status of a GPU::
$ xxd -l1 -p /sys/kernel/debug/dri/0/amdgpu_gfxoff_status
02
- 0: GPU is in GFXOFF state, the gfx engine is powered down.
- 1: Transition out of GFXOFF state
- 2: Not in GFXOFF state
- 3: Transition into GFXOFF state
If GFXOFF is enabled, the value will be transitioning around [0, 3], always
getting into 0 when possible. When it's disabled, it's always at 2. Returns
``-EINVAL`` if it's not supported.

View file

@ -0,0 +1,189 @@
/**
* struct __drm_i915_memory_region_info - Describes one region as known to the
* driver.
*
* Note this is using both struct drm_i915_query_item and struct drm_i915_query.
* For this new query we are adding the new query id DRM_I915_QUERY_MEMORY_REGIONS
* at &drm_i915_query_item.query_id.
*/
struct __drm_i915_memory_region_info {
/** @region: The class:instance pair encoding */
struct drm_i915_gem_memory_class_instance region;
/** @rsvd0: MBZ */
__u32 rsvd0;
/**
* @probed_size: Memory probed by the driver
*
* Note that it should not be possible to ever encounter a zero value
* here, also note that no current region type will ever return -1 here.
* Although for future region types, this might be a possibility. The
* same applies to the other size fields.
*/
__u64 probed_size;
/**
* @unallocated_size: Estimate of memory remaining
*
* Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable accounting.
* Without this (or if this is an older kernel) the value here will
* always equal the @probed_size. Note this is only currently tracked
* for I915_MEMORY_CLASS_DEVICE regions (for other types the value here
* will always equal the @probed_size).
*/
__u64 unallocated_size;
union {
/** @rsvd1: MBZ */
__u64 rsvd1[8];
struct {
/**
* @probed_cpu_visible_size: Memory probed by the driver
* that is CPU accessible.
*
* This will be always be <= @probed_size, and the
* remainder (if there is any) will not be CPU
* accessible.
*
* On systems without small BAR, the @probed_size will
* always equal the @probed_cpu_visible_size, since all
* of it will be CPU accessible.
*
* Note this is only tracked for
* I915_MEMORY_CLASS_DEVICE regions (for other types the
* value here will always equal the @probed_size).
*
* Note that if the value returned here is zero, then
* this must be an old kernel which lacks the relevant
* small-bar uAPI support (including
* I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS), but on
* such systems we should never actually end up with a
* small BAR configuration, assuming we are able to load
* the kernel module. Hence it should be safe to treat
* this the same as when @probed_cpu_visible_size ==
* @probed_size.
*/
__u64 probed_cpu_visible_size;
/**
* @unallocated_cpu_visible_size: Estimate of CPU
* visible memory remaining
*
* Note this is only tracked for
* I915_MEMORY_CLASS_DEVICE regions (for other types the
* value here will always equal the
* @probed_cpu_visible_size).
*
* Requires CAP_PERFMON or CAP_SYS_ADMIN to get reliable
* accounting. Without this the value here will always
* equal the @probed_cpu_visible_size. Note this is only
* currently tracked for I915_MEMORY_CLASS_DEVICE
* regions (for other types the value here will also
* always equal the @probed_cpu_visible_size).
*
* If this is an older kernel the value here will be
* zero, see also @probed_cpu_visible_size.
*/
__u64 unallocated_cpu_visible_size;
};
};
};
/**
* struct __drm_i915_gem_create_ext - Existing gem_create behaviour, with added
* extension support using struct i915_user_extension.
*
* Note that new buffer flags should be added here, at least for the stuff that
* is immutable. Previously we would have two ioctls, one to create the object
* with gem_create, and another to apply various parameters, however this
* creates some ambiguity for the params which are considered immutable. Also in
* general we're phasing out the various SET/GET ioctls.
*/
struct __drm_i915_gem_create_ext {
/**
* @size: Requested size for the object.
*
* The (page-aligned) allocated size for the object will be returned.
*
* Note that for some devices we have might have further minimum
* page-size restrictions (larger than 4K), like for device local-memory.
* However in general the final size here should always reflect any
* rounding up, if for example using the I915_GEM_CREATE_EXT_MEMORY_REGIONS
* extension to place the object in device local-memory. The kernel will
* always select the largest minimum page-size for the set of possible
* placements as the value to use when rounding up the @size.
*/
__u64 size;
/**
* @handle: Returned handle for the object.
*
* Object handles are nonzero.
*/
__u32 handle;
/**
* @flags: Optional flags.
*
* Supported values:
*
* I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS - Signal to the kernel that
* the object will need to be accessed via the CPU.
*
* Only valid when placing objects in I915_MEMORY_CLASS_DEVICE, and only
* strictly required on configurations where some subset of the device
* memory is directly visible/mappable through the CPU (which we also
* call small BAR), like on some DG2+ systems. Note that this is quite
* undesirable, but due to various factors like the client CPU, BIOS etc
* it's something we can expect to see in the wild. See
* &__drm_i915_memory_region_info.probed_cpu_visible_size for how to
* determine if this system applies.
*
* Note that one of the placements MUST be I915_MEMORY_CLASS_SYSTEM, to
* ensure the kernel can always spill the allocation to system memory,
* if the object can't be allocated in the mappable part of
* I915_MEMORY_CLASS_DEVICE.
*
* Also note that since the kernel only supports flat-CCS on objects
* that can *only* be placed in I915_MEMORY_CLASS_DEVICE, we therefore
* don't support I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS together with
* flat-CCS.
*
* Without this hint, the kernel will assume that non-mappable
* I915_MEMORY_CLASS_DEVICE is preferred for this object. Note that the
* kernel can still migrate the object to the mappable part, as a last
* resort, if userspace ever CPU faults this object, but this might be
* expensive, and so ideally should be avoided.
*
* On older kernels which lack the relevant small-bar uAPI support (see
* also &__drm_i915_memory_region_info.probed_cpu_visible_size),
* usage of the flag will result in an error, but it should NEVER be
* possible to end up with a small BAR configuration, assuming we can
* also successfully load the i915 kernel module. In such cases the
* entire I915_MEMORY_CLASS_DEVICE region will be CPU accessible, and as
* such there are zero restrictions on where the object can be placed.
*/
#define I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS (1 << 0)
__u32 flags;
/**
* @extensions: The chain of extensions to apply to this object.
*
* This will be useful in the future when we need to support several
* different extensions, and we need to apply more than one when
* creating the object. See struct i915_user_extension.
*
* If we don't supply any extensions then we get the same old gem_create
* behaviour.
*
* For I915_GEM_CREATE_EXT_MEMORY_REGIONS usage see
* struct drm_i915_gem_create_ext_memory_regions.
*
* For I915_GEM_CREATE_EXT_PROTECTED_CONTENT usage see
* struct drm_i915_gem_create_ext_protected_content.
*/
#define I915_GEM_CREATE_EXT_MEMORY_REGIONS 0
#define I915_GEM_CREATE_EXT_PROTECTED_CONTENT 1
__u64 extensions;
};

View file

@ -0,0 +1,47 @@
==========================
I915 Small BAR RFC Section
==========================
Starting from DG2 we will have resizable BAR support for device local-memory(i.e
I915_MEMORY_CLASS_DEVICE), but in some cases the final BAR size might still be
smaller than the total probed_size. In such cases, only some subset of
I915_MEMORY_CLASS_DEVICE will be CPU accessible(for example the first 256M),
while the remainder is only accessible via the GPU.
I915_GEM_CREATE_EXT_FLAG_NEEDS_CPU_ACCESS flag
----------------------------------------------
New gem_create_ext flag to tell the kernel that a BO will require CPU access.
This becomes important when placing an object in I915_MEMORY_CLASS_DEVICE, where
underneath the device has a small BAR, meaning only some portion of it is CPU
accessible. Without this flag the kernel will assume that CPU access is not
required, and prioritize using the non-CPU visible portion of
I915_MEMORY_CLASS_DEVICE.
.. kernel-doc:: Documentation/gpu/rfc/i915_small_bar.h
:functions: __drm_i915_gem_create_ext
probed_cpu_visible_size attribute
---------------------------------
New struct__drm_i915_memory_region attribute which returns the total size of the
CPU accessible portion, for the particular region. This should only be
applicable for I915_MEMORY_CLASS_DEVICE. We also report the
unallocated_cpu_visible_size, alongside the unallocated_size.
Vulkan will need this as part of creating a separate VkMemoryHeap with the
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT set, to represent the CPU visible portion,
where the total size of the heap needs to be known. It also wants to be able to
give a rough estimate of how memory can potentially be allocated.
.. kernel-doc:: Documentation/gpu/rfc/i915_small_bar.h
:functions: __drm_i915_memory_region_info
Error Capture restrictions
--------------------------
With error capture we have two new restrictions:
1) Error capture is best effort on small BAR systems; if the pages are not
CPU accessible, at the time of capture, then the kernel is free to skip
trying to capture them.
2) On discrete and newer integrated platforms we now reject error capture
on recoverable contexts. In the future the kernel may want to blit during
error capture, when for example something is not currently CPU accessible.

View file

@ -0,0 +1,291 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright © 2022 Intel Corporation
*/
/**
* DOC: I915_PARAM_VM_BIND_VERSION
*
* VM_BIND feature version supported.
* See typedef drm_i915_getparam_t param.
*
* Specifies the VM_BIND feature version supported.
* The following versions of VM_BIND have been defined:
*
* 0: No VM_BIND support.
*
* 1: In VM_UNBIND calls, the UMD must specify the exact mappings created
* previously with VM_BIND, the ioctl will not support unbinding multiple
* mappings or splitting them. Similarly, VM_BIND calls will not replace
* any existing mappings.
*
* 2: The restrictions on unbinding partial or multiple mappings is
* lifted, Similarly, binding will replace any mappings in the given range.
*
* See struct drm_i915_gem_vm_bind and struct drm_i915_gem_vm_unbind.
*/
#define I915_PARAM_VM_BIND_VERSION 57
/**
* DOC: I915_VM_CREATE_FLAGS_USE_VM_BIND
*
* Flag to opt-in for VM_BIND mode of binding during VM creation.
* See struct drm_i915_gem_vm_control flags.
*
* The older execbuf2 ioctl will not support VM_BIND mode of operation.
* For VM_BIND mode, we have new execbuf3 ioctl which will not accept any
* execlist (See struct drm_i915_gem_execbuffer3 for more details).
*/
#define I915_VM_CREATE_FLAGS_USE_VM_BIND (1 << 0)
/* VM_BIND related ioctls */
#define DRM_I915_GEM_VM_BIND 0x3d
#define DRM_I915_GEM_VM_UNBIND 0x3e
#define DRM_I915_GEM_EXECBUFFER3 0x3f
#define DRM_IOCTL_I915_GEM_VM_BIND DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_BIND, struct drm_i915_gem_vm_bind)
#define DRM_IOCTL_I915_GEM_VM_UNBIND DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_VM_UNBIND, struct drm_i915_gem_vm_bind)
#define DRM_IOCTL_I915_GEM_EXECBUFFER3 DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER3, struct drm_i915_gem_execbuffer3)
/**
* struct drm_i915_gem_timeline_fence - An input or output timeline fence.
*
* The operation will wait for input fence to signal.
*
* The returned output fence will be signaled after the completion of the
* operation.
*/
struct drm_i915_gem_timeline_fence {
/** @handle: User's handle for a drm_syncobj to wait on or signal. */
__u32 handle;
/**
* @flags: Supported flags are:
*
* I915_TIMELINE_FENCE_WAIT:
* Wait for the input fence before the operation.
*
* I915_TIMELINE_FENCE_SIGNAL:
* Return operation completion fence as output.
*/
__u32 flags;
#define I915_TIMELINE_FENCE_WAIT (1 << 0)
#define I915_TIMELINE_FENCE_SIGNAL (1 << 1)
#define __I915_TIMELINE_FENCE_UNKNOWN_FLAGS (-(I915_TIMELINE_FENCE_SIGNAL << 1))
/**
* @value: A point in the timeline.
* Value must be 0 for a binary drm_syncobj. A Value of 0 for a
* timeline drm_syncobj is invalid as it turns a drm_syncobj into a
* binary one.
*/
__u64 value;
};
/**
* struct drm_i915_gem_vm_bind - VA to object mapping to bind.
*
* This structure is passed to VM_BIND ioctl and specifies the mapping of GPU
* virtual address (VA) range to the section of an object that should be bound
* in the device page table of the specified address space (VM).
* The VA range specified must be unique (ie., not currently bound) and can
* be mapped to whole object or a section of the object (partial binding).
* Multiple VA mappings can be created to the same section of the object
* (aliasing).
*
* The @start, @offset and @length must be 4K page aligned. However the DG2
* and XEHPSDV has 64K page size for device local memory and has compact page
* table. On those platforms, for binding device local-memory objects, the
* @start, @offset and @length must be 64K aligned. Also, UMDs should not mix
* the local memory 64K page and the system memory 4K page bindings in the same
* 2M range.
*
* Error code -EINVAL will be returned if @start, @offset and @length are not
* properly aligned. In version 1 (See I915_PARAM_VM_BIND_VERSION), error code
* -ENOSPC will be returned if the VA range specified can't be reserved.
*
* VM_BIND/UNBIND ioctl calls executed on different CPU threads concurrently
* are not ordered. Furthermore, parts of the VM_BIND operation can be done
* asynchronously, if valid @fence is specified.
*/
struct drm_i915_gem_vm_bind {
/** @vm_id: VM (address space) id to bind */
__u32 vm_id;
/** @handle: Object handle */
__u32 handle;
/** @start: Virtual Address start to bind */
__u64 start;
/** @offset: Offset in object to bind */
__u64 offset;
/** @length: Length of mapping to bind */
__u64 length;
/**
* @flags: Supported flags are:
*
* I915_GEM_VM_BIND_CAPTURE:
* Capture this mapping in the dump upon GPU error.
*
* Note that @fence carries its own flags.
*/
__u64 flags;
#define I915_GEM_VM_BIND_CAPTURE (1 << 0)
/**
* @fence: Timeline fence for bind completion signaling.
*
* Timeline fence is of format struct drm_i915_gem_timeline_fence.
*
* It is an out fence, hence using I915_TIMELINE_FENCE_WAIT flag
* is invalid, and an error will be returned.
*
* If I915_TIMELINE_FENCE_SIGNAL flag is not set, then out fence
* is not requested and binding is completed synchronously.
*/
struct drm_i915_gem_timeline_fence fence;
/**
* @extensions: Zero-terminated chain of extensions.
*
* For future extensions. See struct i915_user_extension.
*/
__u64 extensions;
};
/**
* struct drm_i915_gem_vm_unbind - VA to object mapping to unbind.
*
* This structure is passed to VM_UNBIND ioctl and specifies the GPU virtual
* address (VA) range that should be unbound from the device page table of the
* specified address space (VM). VM_UNBIND will force unbind the specified
* range from device page table without waiting for any GPU job to complete.
* It is UMDs responsibility to ensure the mapping is no longer in use before
* calling VM_UNBIND.
*
* If the specified mapping is not found, the ioctl will simply return without
* any error.
*
* VM_BIND/UNBIND ioctl calls executed on different CPU threads concurrently
* are not ordered. Furthermore, parts of the VM_UNBIND operation can be done
* asynchronously, if valid @fence is specified.
*/
struct drm_i915_gem_vm_unbind {
/** @vm_id: VM (address space) id to bind */
__u32 vm_id;
/** @rsvd: Reserved, MBZ */
__u32 rsvd;
/** @start: Virtual Address start to unbind */
__u64 start;
/** @length: Length of mapping to unbind */
__u64 length;
/**
* @flags: Currently reserved, MBZ.
*
* Note that @fence carries its own flags.
*/
__u64 flags;
/**
* @fence: Timeline fence for unbind completion signaling.
*
* Timeline fence is of format struct drm_i915_gem_timeline_fence.
*
* It is an out fence, hence using I915_TIMELINE_FENCE_WAIT flag
* is invalid, and an error will be returned.
*
* If I915_TIMELINE_FENCE_SIGNAL flag is not set, then out fence
* is not requested and unbinding is completed synchronously.
*/
struct drm_i915_gem_timeline_fence fence;
/**
* @extensions: Zero-terminated chain of extensions.
*
* For future extensions. See struct i915_user_extension.
*/
__u64 extensions;
};
/**
* struct drm_i915_gem_execbuffer3 - Structure for DRM_I915_GEM_EXECBUFFER3
* ioctl.
*
* DRM_I915_GEM_EXECBUFFER3 ioctl only works in VM_BIND mode and VM_BIND mode
* only works with this ioctl for submission.
* See I915_VM_CREATE_FLAGS_USE_VM_BIND.
*/
struct drm_i915_gem_execbuffer3 {
/**
* @ctx_id: Context id
*
* Only contexts with user engine map are allowed.
*/
__u32 ctx_id;
/**
* @engine_idx: Engine index
*
* An index in the user engine map of the context specified by @ctx_id.
*/
__u32 engine_idx;
/**
* @batch_address: Batch gpu virtual address/es.
*
* For normal submission, it is the gpu virtual address of the batch
* buffer. For parallel submission, it is a pointer to an array of
* batch buffer gpu virtual addresses with array size equal to the
* number of (parallel) engines involved in that submission (See
* struct i915_context_engines_parallel_submit).
*/
__u64 batch_address;
/** @flags: Currently reserved, MBZ */
__u64 flags;
/** @rsvd1: Reserved, MBZ */
__u32 rsvd1;
/** @fence_count: Number of fences in @timeline_fences array. */
__u32 fence_count;
/**
* @timeline_fences: Pointer to an array of timeline fences.
*
* Timeline fences are of format struct drm_i915_gem_timeline_fence.
*/
__u64 timeline_fences;
/** @rsvd2: Reserved, MBZ */
__u64 rsvd2;
/**
* @extensions: Zero-terminated chain of extensions.
*
* For future extensions. See struct i915_user_extension.
*/
__u64 extensions;
};
/**
* struct drm_i915_gem_create_ext_vm_private - Extension to make the object
* private to the specified VM.
*
* See struct drm_i915_gem_create_ext.
*/
struct drm_i915_gem_create_ext_vm_private {
#define I915_GEM_CREATE_EXT_VM_PRIVATE 2
/** @base: Extension link. See struct i915_user_extension. */
struct i915_user_extension base;
/** @vm_id: Id of the VM to which the object is private */
__u32 vm_id;
};

View file

@ -0,0 +1,245 @@
==========================================
I915 VM_BIND feature design and use cases
==========================================
VM_BIND feature
================
DRM_I915_GEM_VM_BIND/UNBIND ioctls allows UMD to bind/unbind GEM buffer
objects (BOs) or sections of a BOs at specified GPU virtual addresses on a
specified address space (VM). These mappings (also referred to as persistent
mappings) will be persistent across multiple GPU submissions (execbuf calls)
issued by the UMD, without user having to provide a list of all required
mappings during each submission (as required by older execbuf mode).
The VM_BIND/UNBIND calls allow UMDs to request a timeline out fence for
signaling the completion of bind/unbind operation.
VM_BIND feature is advertised to user via I915_PARAM_VM_BIND_VERSION.
User has to opt-in for VM_BIND mode of binding for an address space (VM)
during VM creation time via I915_VM_CREATE_FLAGS_USE_VM_BIND extension.
VM_BIND/UNBIND ioctl calls executed on different CPU threads concurrently are
not ordered. Furthermore, parts of the VM_BIND/UNBIND operations can be done
asynchronously, when valid out fence is specified.
VM_BIND features include:
* Multiple Virtual Address (VA) mappings can map to the same physical pages
of an object (aliasing).
* VA mapping can map to a partial section of the BO (partial binding).
* Support capture of persistent mappings in the dump upon GPU error.
* Support for userptr gem objects (no special uapi is required for this).
TLB flush consideration
------------------------
The i915 driver flushes the TLB for each submission and when an object's
pages are released. The VM_BIND/UNBIND operation will not do any additional
TLB flush. Any VM_BIND mapping added will be in the working set for subsequent
submissions on that VM and will not be in the working set for currently running
batches (which would require additional TLB flushes, which is not supported).
Execbuf ioctl in VM_BIND mode
-------------------------------
A VM in VM_BIND mode will not support older execbuf mode of binding.
The execbuf ioctl handling in VM_BIND mode differs significantly from the
older execbuf2 ioctl (See struct drm_i915_gem_execbuffer2).
Hence, a new execbuf3 ioctl has been added to support VM_BIND mode. (See
struct drm_i915_gem_execbuffer3). The execbuf3 ioctl will not accept any
execlist. Hence, no support for implicit sync. It is expected that the below
work will be able to support requirements of object dependency setting in all
use cases:
"dma-buf: Add an API for exporting sync files"
(https://lwn.net/Articles/859290/)
The new execbuf3 ioctl only works in VM_BIND mode and the VM_BIND mode only
works with execbuf3 ioctl for submission. All BOs mapped on that VM (through
VM_BIND call) at the time of execbuf3 call are deemed required for that
submission.
The execbuf3 ioctl directly specifies the batch addresses instead of as
object handles as in execbuf2 ioctl. The execbuf3 ioctl will also not
support many of the older features like in/out/submit fences, fence array,
default gem context and many more (See struct drm_i915_gem_execbuffer3).
In VM_BIND mode, VA allocation is completely managed by the user instead of
the i915 driver. Hence all VA assignment, eviction are not applicable in
VM_BIND mode. Also, for determining object activeness, VM_BIND mode will not
be using the i915_vma active reference tracking. It will instead use dma-resv
object for that (See `VM_BIND dma_resv usage`_).
So, a lot of existing code supporting execbuf2 ioctl, like relocations, VA
evictions, vma lookup table, implicit sync, vma active reference tracking etc.,
are not applicable for execbuf3 ioctl. Hence, all execbuf3 specific handling
should be in a separate file and only functionalities common to these ioctls
can be the shared code where possible.
VM_PRIVATE objects
-------------------
By default, BOs can be mapped on multiple VMs and can also be dma-buf
exported. Hence these BOs are referred to as Shared BOs.
During each execbuf submission, the request fence must be added to the
dma-resv fence list of all shared BOs mapped on the VM.
VM_BIND feature introduces an optimization where user can create BO which
is private to a specified VM via I915_GEM_CREATE_EXT_VM_PRIVATE flag during
BO creation. Unlike Shared BOs, these VM private BOs can only be mapped on
the VM they are private to and can't be dma-buf exported.
All private BOs of a VM share the dma-resv object. Hence during each execbuf
submission, they need only one dma-resv fence list updated. Thus, the fast
path (where required mappings are already bound) submission latency is O(1)
w.r.t the number of VM private BOs.
VM_BIND locking hirarchy
-------------------------
The locking design here supports the older (execlist based) execbuf mode, the
newer VM_BIND mode, the VM_BIND mode with GPU page faults and possible future
system allocator support (See `Shared Virtual Memory (SVM) support`_).
The older execbuf mode and the newer VM_BIND mode without page faults manages
residency of backing storage using dma_fence. The VM_BIND mode with page faults
and the system allocator support do not use any dma_fence at all.
VM_BIND locking order is as below.
1) Lock-A: A vm_bind mutex will protect vm_bind lists. This lock is taken in
vm_bind/vm_unbind ioctl calls, in the execbuf path and while releasing the
mapping.
In future, when GPU page faults are supported, we can potentially use a
rwsem instead, so that multiple page fault handlers can take the read side
lock to lookup the mapping and hence can run in parallel.
The older execbuf mode of binding do not need this lock.
2) Lock-B: The object's dma-resv lock will protect i915_vma state and needs to
be held while binding/unbinding a vma in the async worker and while updating
dma-resv fence list of an object. Note that private BOs of a VM will all
share a dma-resv object.
The future system allocator support will use the HMM prescribed locking
instead.
3) Lock-C: Spinlock/s to protect some of the VM's lists like the list of
invalidated vmas (due to eviction and userptr invalidation) etc.
When GPU page faults are supported, the execbuf path do not take any of these
locks. There we will simply smash the new batch buffer address into the ring and
then tell the scheduler run that. The lock taking only happens from the page
fault handler, where we take lock-A in read mode, whichever lock-B we need to
find the backing storage (dma_resv lock for gem objects, and hmm/core mm for
system allocator) and some additional locks (lock-D) for taking care of page
table races. Page fault mode should not need to ever manipulate the vm lists,
so won't ever need lock-C.
VM_BIND LRU handling
---------------------
We need to ensure VM_BIND mapped objects are properly LRU tagged to avoid
performance degradation. We will also need support for bulk LRU movement of
VM_BIND objects to avoid additional latencies in execbuf path.
The page table pages are similar to VM_BIND mapped objects (See
`Evictable page table allocations`_) and are maintained per VM and needs to
be pinned in memory when VM is made active (ie., upon an execbuf call with
that VM). So, bulk LRU movement of page table pages is also needed.
VM_BIND dma_resv usage
-----------------------
Fences needs to be added to all VM_BIND mapped objects. During each execbuf
submission, they are added with DMA_RESV_USAGE_BOOKKEEP usage to prevent
over sync (See enum dma_resv_usage). One can override it with either
DMA_RESV_USAGE_READ or DMA_RESV_USAGE_WRITE usage during explicit object
dependency setting.
Note that DRM_I915_GEM_WAIT and DRM_I915_GEM_BUSY ioctls do not check for
DMA_RESV_USAGE_BOOKKEEP usage and hence should not be used for end of batch
check. Instead, the execbuf3 out fence should be used for end of batch check
(See struct drm_i915_gem_execbuffer3).
Also, in VM_BIND mode, use dma-resv apis for determining object activeness
(See dma_resv_test_signaled() and dma_resv_wait_timeout()) and do not use the
older i915_vma active reference tracking which is deprecated. This should be
easier to get it working with the current TTM backend.
Mesa use case
--------------
VM_BIND can potentially reduce the CPU overhead in Mesa (both Vulkan and Iris),
hence improving performance of CPU-bound applications. It also allows us to
implement Vulkan's Sparse Resources. With increasing GPU hardware performance,
reducing CPU overhead becomes more impactful.
Other VM_BIND use cases
========================
Long running Compute contexts
------------------------------
Usage of dma-fence expects that they complete in reasonable amount of time.
Compute on the other hand can be long running. Hence it is appropriate for
compute to use user/memory fence (See `User/Memory Fence`_) and dma-fence usage
must be limited to in-kernel consumption only.
Where GPU page faults are not available, kernel driver upon buffer invalidation
will initiate a suspend (preemption) of long running context, finish the
invalidation, revalidate the BO and then resume the compute context. This is
done by having a per-context preempt fence which is enabled when someone tries
to wait on it and triggers the context preemption.
User/Memory Fence
~~~~~~~~~~~~~~~~~~
User/Memory fence is a <address, value> pair. To signal the user fence, the
specified value will be written at the specified virtual address and wakeup the
waiting process. User fence can be signaled either by the GPU or kernel async
worker (like upon bind completion). User can wait on a user fence with a new
user fence wait ioctl.
Here is some prior work on this:
https://patchwork.freedesktop.org/patch/349417/
Low Latency Submission
~~~~~~~~~~~~~~~~~~~~~~~
Allows compute UMD to directly submit GPU jobs instead of through execbuf
ioctl. This is made possible by VM_BIND is not being synchronized against
execbuf. VM_BIND allows bind/unbind of mappings required for the directly
submitted jobs.
Debugger
---------
With debug event interface user space process (debugger) is able to keep track
of and act upon resources created by another process (debugged) and attached
to GPU via vm_bind interface.
GPU page faults
----------------
GPU page faults when supported (in future), will only be supported in the
VM_BIND mode. While both the older execbuf mode and the newer VM_BIND mode of
binding will require using dma-fence to ensure residency, the GPU page faults
mode when supported, will not use any dma-fence as residency is purely managed
by installing and removing/invalidating page table entries.
Page level hints settings
--------------------------
VM_BIND allows any hints setting per mapping instead of per BO. Possible hints
include placement and atomicity. Sub-BO level placement hint will be even more
relevant with upcoming GPU on-demand page fault support.
Page level Cache/CLOS settings
-------------------------------
VM_BIND allows cache/CLOS settings per mapping instead of per BO.
Evictable page table allocations
---------------------------------
Make pagetable allocations evictable and manage them similar to VM_BIND
mapped objects. Page table pages are similar to persistent mappings of a
VM (difference here are that the page table pages will not have an i915_vma
structure and after swapping pages back in, parent page link needs to be
updated).
Shared Virtual Memory (SVM) support
------------------------------------
VM_BIND interface can be used to map system memory directly (without gem BO
abstraction) using the HMM interface. SVM is only supported with GPU page
faults enabled.
VM_BIND UAPI
=============
.. kernel-doc:: Documentation/gpu/rfc/i915_vm_bind.h

View file

@ -23,3 +23,11 @@ host such documentation:
.. toctree::
i915_scheduler.rst
.. toctree::
i915_small_bar.rst
.. toctree::
i915_vm_bind.rst

View file

@ -4,7 +4,7 @@ config DRM_AMDGPU_SI
depends on DRM_AMDGPU
help
Choose this option if you want to enable experimental support
for SI asics.
for SI (Southern Islands) asics.
SI is already supported in radeon. Experimental support for SI
in amdgpu will be disabled by default and is still provided by
@ -16,7 +16,8 @@ config DRM_AMDGPU_CIK
bool "Enable amdgpu support for CIK parts"
depends on DRM_AMDGPU
help
Choose this option if you want to enable support for CIK asics.
Choose this option if you want to enable support for CIK (Sea
Islands) asics.
CIK is already supported in radeon. Support for CIK in amdgpu
will be disabled by default and is still provided by radeon.

View file

@ -93,7 +93,7 @@ amdgpu-y += \
# add UMC block
amdgpu-y += \
umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o
umc_v6_0.o umc_v6_1.o umc_v6_7.o umc_v8_7.o umc_v8_10.o
# add IH block
amdgpu-y += \

View file

@ -197,6 +197,7 @@ extern uint amdgpu_smu_memory_pool_size;
extern int amdgpu_smu_pptable_id;
extern uint amdgpu_dc_feature_mask;
extern uint amdgpu_dc_debug_mask;
extern uint amdgpu_dc_visual_confirm;
extern uint amdgpu_dm_abm_level;
extern int amdgpu_backlight;
extern struct amdgpu_mgpu_info mgpu_info;
@ -1011,7 +1012,6 @@ struct amdgpu_device {
uint64_t df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
/* enable runtime pm on the device */
bool runpm;
bool in_runpm;
bool has_pr3;
@ -1253,9 +1253,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
bool amdgpu_device_has_job_running(struct amdgpu_device *adev);
bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job* job);
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job);
struct amdgpu_job *job,
struct amdgpu_reset_context *reset_context);
void amdgpu_device_pci_config_reset(struct amdgpu_device *adev);
int amdgpu_device_pci_reset(struct amdgpu_device *adev);
bool amdgpu_device_need_post(struct amdgpu_device *adev);

View file

@ -29,6 +29,8 @@
#include <linux/platform_device.h>
#include <sound/designware_i2s.h>
#include <sound/pcm.h>
#include <linux/acpi.h>
#include <linux/dmi.h>
#include "amdgpu.h"
#include "atom.h"
@ -36,6 +38,7 @@
#include "acp_gfx_if.h"
#define ST_JADEITE 1
#define ACP_TILE_ON_MASK 0x03
#define ACP_TILE_OFF_MASK 0x02
#define ACP_TILE_ON_RETAIN_REG_MASK 0x1f
@ -85,6 +88,8 @@
#define ACP_DEVS 4
#define ACP_SRC_ID 162
static unsigned long acp_machine_id;
enum {
ACP_TILE_P1 = 0,
ACP_TILE_P2,
@ -128,16 +133,14 @@ static int acp_poweroff(struct generic_pm_domain *genpd)
struct amdgpu_device *adev;
apd = container_of(genpd, struct acp_pm_domain, gpd);
if (apd != NULL) {
adev = apd->adev;
adev = apd->adev;
/* call smu to POWER GATE ACP block
* smu will
* 1. turn off the acp clock
* 2. power off the acp tiles
* 3. check and enter ulv state
*/
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
}
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true);
return 0;
}
@ -147,16 +150,14 @@ static int acp_poweron(struct generic_pm_domain *genpd)
struct amdgpu_device *adev;
apd = container_of(genpd, struct acp_pm_domain, gpd);
if (apd != NULL) {
adev = apd->adev;
adev = apd->adev;
/* call smu to UNGATE ACP block
* smu will
* 1. exit ulv
* 2. turn on acp clock
* 3. power on acp tiles
*/
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
}
amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false);
return 0;
}
@ -184,6 +185,37 @@ static int acp_genpd_remove_device(struct device *dev, void *data)
return 0;
}
static int acp_quirk_cb(const struct dmi_system_id *id)
{
acp_machine_id = ST_JADEITE;
return 1;
}
static const struct dmi_system_id acp_quirk_table[] = {
{
.callback = acp_quirk_cb,
.matches = {
DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AMD"),
DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "Jadeite"),
}
},
{
.callback = acp_quirk_cb,
.matches = {
DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "IP3 Technology CO.,Ltd."),
DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN1D"),
},
},
{
.callback = acp_quirk_cb,
.matches = {
DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Standard"),
DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "ASN10"),
},
},
{}
};
/**
* acp_hw_init - start and test ACP block
*
@ -193,7 +225,7 @@ static int acp_genpd_remove_device(struct device *dev, void *data)
static int acp_hw_init(void *handle)
{
int r;
uint64_t acp_base;
u64 acp_base;
u32 val = 0;
u32 count = 0;
struct i2s_platform_data *i2s_pdata = NULL;
@ -220,141 +252,210 @@ static int acp_hw_init(void *handle)
return -EINVAL;
acp_base = adev->rmmio_base;
adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
if (adev->acp.acp_genpd == NULL)
if (!adev->acp.acp_genpd)
return -ENOMEM;
adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
adev->acp.acp_genpd->gpd.power_on = acp_poweron;
adev->acp.acp_genpd->adev = adev;
pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
dmi_check_system(acp_quirk_table);
switch (acp_machine_id) {
case ST_JADEITE:
{
adev->acp.acp_cell = kcalloc(2, sizeof(struct mfd_cell),
GFP_KERNEL);
if (!adev->acp.acp_cell) {
r = -ENOMEM;
goto failure;
}
adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
GFP_KERNEL);
adev->acp.acp_res = kcalloc(3, sizeof(struct resource), GFP_KERNEL);
if (!adev->acp.acp_res) {
r = -ENOMEM;
goto failure;
}
if (adev->acp.acp_cell == NULL) {
r = -ENOMEM;
goto failure;
}
i2s_pdata = kcalloc(1, sizeof(struct i2s_platform_data), GFP_KERNEL);
if (!i2s_pdata) {
r = -ENOMEM;
goto failure;
}
adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL);
if (adev->acp.acp_res == NULL) {
r = -ENOMEM;
goto failure;
}
i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL);
if (i2s_pdata == NULL) {
r = -ENOMEM;
goto failure;
}
switch (adev->asic_type) {
case CHIP_STONEY:
i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
break;
default:
i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
}
i2s_pdata[0].cap = DWC_I2S_PLAY;
i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET;
i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET;
switch (adev->asic_type) {
case CHIP_STONEY:
i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
DW_I2S_QUIRK_COMP_PARAM1 |
DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
break;
default:
i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
DW_I2S_QUIRK_COMP_PARAM1;
}
DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
i2s_pdata[0].cap = DWC_I2S_PLAY | DWC_I2S_RECORD;
i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
i2s_pdata[1].cap = DWC_I2S_RECORD;
i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000;
i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
adev->acp.acp_res[0].name = "acp2x_dma";
adev->acp.acp_res[0].flags = IORESOURCE_MEM;
adev->acp.acp_res[0].start = acp_base;
adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
switch (adev->asic_type) {
case CHIP_STONEY:
i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
break;
default:
adev->acp.acp_res[1].name = "acp2x_dw_i2s_play_cap";
adev->acp.acp_res[1].flags = IORESOURCE_MEM;
adev->acp.acp_res[1].start = acp_base + ACP_I2S_CAP_REGS_START;
adev->acp.acp_res[1].end = acp_base + ACP_I2S_CAP_REGS_END;
adev->acp.acp_res[2].name = "acp2x_dma_irq";
adev->acp.acp_res[2].flags = IORESOURCE_IRQ;
adev->acp.acp_res[2].start = amdgpu_irq_create_mapping(adev, 162);
adev->acp.acp_res[2].end = adev->acp.acp_res[2].start;
adev->acp.acp_cell[0].name = "acp_audio_dma";
adev->acp.acp_cell[0].num_resources = 3;
adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
adev->acp.acp_cell[0].platform_data = &adev->asic_type;
adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
adev->acp.acp_cell[1].name = "designware-i2s";
adev->acp.acp_cell[1].num_resources = 1;
adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, 2);
if (r)
goto failure;
r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
acp_genpd_add_device);
if (r)
goto failure;
break;
}
default:
adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell),
GFP_KERNEL);
i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD;
i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000;
i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET;
i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET;
if (!adev->acp.acp_cell) {
r = -ENOMEM;
goto failure;
}
adev->acp.acp_res[0].name = "acp2x_dma";
adev->acp.acp_res[0].flags = IORESOURCE_MEM;
adev->acp.acp_res[0].start = acp_base;
adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
adev->acp.acp_res = kcalloc(5, sizeof(struct resource), GFP_KERNEL);
if (!adev->acp.acp_res) {
r = -ENOMEM;
goto failure;
}
adev->acp.acp_res[1].name = "acp2x_dw_i2s_play";
adev->acp.acp_res[1].flags = IORESOURCE_MEM;
adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START;
adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END;
i2s_pdata = kcalloc(3, sizeof(struct i2s_platform_data), GFP_KERNEL);
if (!i2s_pdata) {
r = -ENOMEM;
goto failure;
}
adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap";
adev->acp.acp_res[2].flags = IORESOURCE_MEM;
adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START;
adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END;
switch (adev->asic_type) {
case CHIP_STONEY:
i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
break;
default:
i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
}
i2s_pdata[0].cap = DWC_I2S_PLAY;
i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET;
i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET;
switch (adev->asic_type) {
case CHIP_STONEY:
i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
DW_I2S_QUIRK_COMP_PARAM1 |
DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
break;
default:
i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
DW_I2S_QUIRK_COMP_PARAM1;
}
adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap";
adev->acp.acp_res[3].flags = IORESOURCE_MEM;
adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START;
adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END;
i2s_pdata[1].cap = DWC_I2S_RECORD;
i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000;
i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
adev->acp.acp_res[4].name = "acp2x_dma_irq";
adev->acp.acp_res[4].flags = IORESOURCE_IRQ;
adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162);
adev->acp.acp_res[4].end = adev->acp.acp_res[4].start;
i2s_pdata[2].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
switch (adev->asic_type) {
case CHIP_STONEY:
i2s_pdata[2].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
break;
default:
break;
}
adev->acp.acp_cell[0].name = "acp_audio_dma";
adev->acp.acp_cell[0].num_resources = 5;
adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
adev->acp.acp_cell[0].platform_data = &adev->asic_type;
adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
i2s_pdata[2].cap = DWC_I2S_PLAY | DWC_I2S_RECORD;
i2s_pdata[2].snd_rates = SNDRV_PCM_RATE_8000_96000;
i2s_pdata[2].i2s_reg_comp1 = ACP_BT_COMP1_REG_OFFSET;
i2s_pdata[2].i2s_reg_comp2 = ACP_BT_COMP2_REG_OFFSET;
adev->acp.acp_cell[1].name = "designware-i2s";
adev->acp.acp_cell[1].num_resources = 1;
adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
i2s_pdata[3].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
switch (adev->asic_type) {
case CHIP_STONEY:
i2s_pdata[3].quirks |= DW_I2S_QUIRK_16BIT_IDX_OVERRIDE;
break;
default:
break;
}
adev->acp.acp_res[0].name = "acp2x_dma";
adev->acp.acp_res[0].flags = IORESOURCE_MEM;
adev->acp.acp_res[0].start = acp_base;
adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
adev->acp.acp_cell[2].name = "designware-i2s";
adev->acp.acp_cell[2].num_resources = 1;
adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2];
adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
adev->acp.acp_res[1].name = "acp2x_dw_i2s_play";
adev->acp.acp_res[1].flags = IORESOURCE_MEM;
adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START;
adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END;
adev->acp.acp_cell[3].name = "designware-i2s";
adev->acp.acp_cell[3].num_resources = 1;
adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3];
adev->acp.acp_cell[3].platform_data = &i2s_pdata[2];
adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data);
adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap";
adev->acp.acp_res[2].flags = IORESOURCE_MEM;
adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START;
adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END;
r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell,
ACP_DEVS);
if (r)
goto failure;
adev->acp.acp_res[3].name = "acp2x_dw_bt_i2s_play_cap";
adev->acp.acp_res[3].flags = IORESOURCE_MEM;
adev->acp.acp_res[3].start = acp_base + ACP_BT_PLAY_REGS_START;
adev->acp.acp_res[3].end = acp_base + ACP_BT_PLAY_REGS_END;
r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
acp_genpd_add_device);
if (r)
goto failure;
adev->acp.acp_res[4].name = "acp2x_dma_irq";
adev->acp.acp_res[4].flags = IORESOURCE_IRQ;
adev->acp.acp_res[4].start = amdgpu_irq_create_mapping(adev, 162);
adev->acp.acp_res[4].end = adev->acp.acp_res[4].start;
adev->acp.acp_cell[0].name = "acp_audio_dma";
adev->acp.acp_cell[0].num_resources = 5;
adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
adev->acp.acp_cell[0].platform_data = &adev->asic_type;
adev->acp.acp_cell[0].pdata_size = sizeof(adev->asic_type);
adev->acp.acp_cell[1].name = "designware-i2s";
adev->acp.acp_cell[1].num_resources = 1;
adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
adev->acp.acp_cell[2].name = "designware-i2s";
adev->acp.acp_cell[2].num_resources = 1;
adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2];
adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
adev->acp.acp_cell[3].name = "designware-i2s";
adev->acp.acp_cell[3].num_resources = 1;
adev->acp.acp_cell[3].resources = &adev->acp.acp_res[3];
adev->acp.acp_cell[3].platform_data = &i2s_pdata[2];
adev->acp.acp_cell[3].pdata_size = sizeof(struct i2s_platform_data);
r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell, ACP_DEVS);
if (r)
goto failure;
r = device_for_each_child(adev->acp.parent, &adev->acp.acp_genpd->gpd,
acp_genpd_add_device);
if (r)
goto failure;
}
/* Assert Soft reset of ACP */
val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET);
@ -546,8 +647,7 @@ static const struct amd_ip_funcs acp_ip_funcs = {
.set_powergating_state = acp_set_powergating_state,
};
const struct amdgpu_ip_block_version acp_ip_block =
{
const struct amdgpu_ip_block_version acp_ip_block = {
.type = AMD_IP_BLOCK_TYPE_ACP,
.major = 2,
.minor = 2,

View file

@ -129,7 +129,14 @@ static void amdgpu_amdkfd_reset_work(struct work_struct *work)
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
kfd.reset_work);
amdgpu_device_gpu_recover(adev, NULL);
struct amdgpu_reset_context reset_context;
memset(&reset_context, 0, sizeof(reset_context));
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)

View file

@ -115,21 +115,12 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size)
* compromise that should work in most cases without reserving too
* much memory for page tables unnecessarily (factor 16K, >> 14).
*/
#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM)
static size_t amdgpu_amdkfd_acc_size(uint64_t size)
{
size >>= PAGE_SHIFT;
size *= sizeof(dma_addr_t) + sizeof(void *);
return __roundup_pow_of_two(sizeof(struct amdgpu_bo)) +
__roundup_pow_of_two(sizeof(struct ttm_tt)) +
PAGE_ALIGN(size);
}
/**
* amdgpu_amdkfd_reserve_mem_limit() - Decrease available memory by size
* of buffer including any reserved for control structures
* of buffer.
*
* @adev: Device to which allocated BO belongs to
* @size: Size of buffer, in bytes, encapsulated by B0. This should be
@ -143,19 +134,16 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
{
uint64_t reserved_for_pt =
ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size);
size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
size_t system_mem_needed, ttm_mem_needed, vram_needed;
int ret = 0;
acc_size = amdgpu_amdkfd_acc_size(size);
system_mem_needed = 0;
ttm_mem_needed = 0;
vram_needed = 0;
if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
system_mem_needed = acc_size + size;
ttm_mem_needed = acc_size + size;
system_mem_needed = size;
ttm_mem_needed = size;
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
system_mem_needed = acc_size;
ttm_mem_needed = acc_size;
/*
* Conservatively round up the allocation requirement to 2 MB
* to avoid fragmentation caused by 4K allocations in the tail
@ -163,14 +151,10 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
*/
vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN);
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
system_mem_needed = acc_size + size;
ttm_mem_needed = acc_size;
} else if (alloc_flag &
(KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
system_mem_needed = acc_size;
ttm_mem_needed = acc_size;
} else {
system_mem_needed = size;
} else if (!(alloc_flag &
(KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
return -ENOMEM;
}
@ -208,28 +192,18 @@ release:
static void unreserve_mem_limit(struct amdgpu_device *adev,
uint64_t size, u32 alloc_flag)
{
size_t acc_size;
acc_size = amdgpu_amdkfd_acc_size(size);
spin_lock(&kfd_mem_limit.mem_limit_lock);
if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_GTT) {
kfd_mem_limit.system_mem_used -= (acc_size + size);
kfd_mem_limit.ttm_mem_used -= (acc_size + size);
kfd_mem_limit.system_mem_used -= size;
kfd_mem_limit.ttm_mem_used -= size;
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
kfd_mem_limit.system_mem_used -= acc_size;
kfd_mem_limit.ttm_mem_used -= acc_size;
adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN);
} else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) {
kfd_mem_limit.system_mem_used -= (acc_size + size);
kfd_mem_limit.ttm_mem_used -= acc_size;
} else if (alloc_flag &
(KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP)) {
kfd_mem_limit.system_mem_used -= acc_size;
kfd_mem_limit.ttm_mem_used -= acc_size;
} else {
kfd_mem_limit.system_mem_used -= size;
} else if (!(alloc_flag &
(KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL |
KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) {
pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag);
goto release;
}
@ -401,22 +375,8 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm)
return ret;
}
ret = amdgpu_amdkfd_validate_vm_bo(NULL, pd);
if (ret) {
pr_err("failed to validate PD\n");
return ret;
}
vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.bo);
if (vm->use_cpu_for_update) {
ret = amdgpu_bo_kmap(pd, NULL);
if (ret) {
pr_err("failed to kmap PD, ret=%d\n", ret);
return ret;
}
}
return 0;
}
@ -450,45 +410,42 @@ static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
switch (adev->asic_type) {
case CHIP_ARCTURUS:
if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
if (bo_adev == adev)
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
else
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
} else {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
}
break;
case CHIP_ALDEBARAN:
if (coherent && uncached) {
if (adev->gmc.xgmi.connected_to_cpu ||
!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
snoop = true;
mapping_flags |= AMDGPU_VM_MTYPE_UC;
} else if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
if (bo_adev == adev) {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW;
if (adev->gmc.xgmi.connected_to_cpu)
if (uncached)
mapping_flags |= AMDGPU_VM_MTYPE_UC;
else if (coherent)
mapping_flags |= AMDGPU_VM_MTYPE_CC;
else
mapping_flags |= AMDGPU_VM_MTYPE_RW;
if (adev->asic_type == CHIP_ALDEBARAN &&
adev->gmc.xgmi.connected_to_cpu)
snoop = true;
} else {
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
if (uncached || coherent)
mapping_flags |= AMDGPU_VM_MTYPE_UC;
else
mapping_flags |= AMDGPU_VM_MTYPE_NC;
if (amdgpu_xgmi_same_hive(adev, bo_adev))
snoop = true;
}
} else {
if (uncached || coherent)
mapping_flags |= AMDGPU_VM_MTYPE_UC;
else
mapping_flags |= AMDGPU_VM_MTYPE_NC;
snoop = true;
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
}
break;
default:
mapping_flags |= coherent ?
AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC;
if (uncached || coherent)
mapping_flags |= AMDGPU_VM_MTYPE_UC;
else
mapping_flags |= AMDGPU_VM_MTYPE_NC;
if (!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
snoop = true;
}
pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags);
@ -1555,16 +1512,10 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
struct amdkfd_process_info *process_info = vm->process_info;
struct amdgpu_bo *pd = vm->root.bo;
if (!process_info)
return;
/* Release eviction fence from PD */
amdgpu_bo_reserve(pd, false);
amdgpu_bo_fence(pd, NULL, false);
amdgpu_bo_unreserve(pd);
/* Update process info */
mutex_lock(&process_info->lock);
process_info->n_vms--;

View file

@ -40,7 +40,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu)
{
struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list,
rhead);
mutex_destroy(&list->bo_list_mutex);
kvfree(list);
}
@ -136,6 +136,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
trace_amdgpu_cs_bo_status(list->num_entries, total_size);
mutex_init(&list->bo_list_mutex);
*result = list;
return 0;

View file

@ -47,6 +47,10 @@ struct amdgpu_bo_list {
struct amdgpu_bo *oa_obj;
unsigned first_userptr;
unsigned num_entries;
/* Protect access during command submission.
*/
struct mutex bo_list_mutex;
};
int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,

View file

@ -519,6 +519,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
return r;
}
mutex_lock(&p->bo_list->bo_list_mutex);
/* One for TTM and one for the CS job */
amdgpu_bo_list_for_each_entry(e, p->bo_list)
e->tv.num_shared = 2;
@ -651,6 +653,7 @@ out_free_user_pages:
kvfree(e->user_pages);
e->user_pages = NULL;
}
mutex_unlock(&p->bo_list->bo_list_mutex);
}
return r;
}
@ -690,9 +693,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
{
unsigned i;
if (error && backoff)
if (error && backoff) {
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
mutex_unlock(&parser->bo_list->bo_list_mutex);
}
for (i = 0; i < parser->num_post_deps; i++) {
drm_syncobj_put(parser->post_deps[i].syncobj);
@ -832,12 +837,16 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
continue;
r = amdgpu_vm_bo_update(adev, bo_va, false);
if (r)
if (r) {
mutex_unlock(&p->bo_list->bo_list_mutex);
return r;
}
r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
if (r)
if (r) {
mutex_unlock(&p->bo_list->bo_list_mutex);
return r;
}
}
r = amdgpu_vm_handle_moved(adev, vm);
@ -1278,6 +1287,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
mutex_unlock(&p->adev->notifier_lock);
mutex_unlock(&p->bo_list->bo_list_mutex);
return 0;

View file

@ -272,32 +272,6 @@ static ktime_t amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
return res;
}
static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
struct drm_file *filp, struct amdgpu_ctx *ctx)
{
int r;
r = amdgpu_ctx_priority_permit(filp, priority);
if (r)
return r;
memset(ctx, 0, sizeof(*ctx));
kref_init(&ctx->refcount);
ctx->mgr = mgr;
spin_lock_init(&ctx->ring_lock);
mutex_init(&ctx->lock);
ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
ctx->reset_counter_query = ctx->reset_counter;
ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter);
ctx->init_priority = priority;
ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
ctx->stable_pstate = AMDGPU_CTX_STABLE_PSTATE_NONE;
return 0;
}
static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
u32 *stable_pstate)
{
@ -326,6 +300,38 @@ static int amdgpu_ctx_get_stable_pstate(struct amdgpu_ctx *ctx,
return 0;
}
static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
struct drm_file *filp, struct amdgpu_ctx *ctx)
{
u32 current_stable_pstate;
int r;
r = amdgpu_ctx_priority_permit(filp, priority);
if (r)
return r;
memset(ctx, 0, sizeof(*ctx));
kref_init(&ctx->refcount);
ctx->mgr = mgr;
spin_lock_init(&ctx->ring_lock);
mutex_init(&ctx->lock);
ctx->reset_counter = atomic_read(&mgr->adev->gpu_reset_counter);
ctx->reset_counter_query = ctx->reset_counter;
ctx->vram_lost_counter = atomic_read(&mgr->adev->vram_lost_counter);
ctx->init_priority = priority;
ctx->override_priority = AMDGPU_CTX_PRIORITY_UNSET;
r = amdgpu_ctx_get_stable_pstate(ctx, &current_stable_pstate);
if (r)
return r;
ctx->stable_pstate = current_stable_pstate;
return 0;
}
static int amdgpu_ctx_set_stable_pstate(struct amdgpu_ctx *ctx,
u32 stable_pstate)
{
@ -397,7 +403,7 @@ static void amdgpu_ctx_fini(struct kref *ref)
}
if (drm_dev_enter(&adev->ddev, &idx)) {
amdgpu_ctx_set_stable_pstate(ctx, AMDGPU_CTX_STABLE_PSTATE_NONE);
amdgpu_ctx_set_stable_pstate(ctx, ctx->stable_pstate);
drm_dev_exit(idx);
}

View file

@ -383,12 +383,8 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
value = RREG32_PCIE(*pos);
r = put_user(value, (uint32_t *)buf);
if (r) {
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
}
if (r)
goto out;
result += 4;
buf += 4;
@ -396,11 +392,12 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
size -= 4;
}
r = result;
out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return result;
return r;
}
/**
@ -441,12 +438,8 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
uint32_t value;
r = get_user(value, (uint32_t *)buf);
if (r) {
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
}
if (r)
goto out;
WREG32_PCIE(*pos, value);
@ -456,11 +449,12 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
size -= 4;
}
r = result;
out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return result;
return r;
}
/**
@ -502,12 +496,8 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
value = RREG32_DIDT(*pos >> 2);
r = put_user(value, (uint32_t *)buf);
if (r) {
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
}
if (r)
goto out;
result += 4;
buf += 4;
@ -515,11 +505,12 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
size -= 4;
}
r = result;
out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return result;
return r;
}
/**
@ -560,12 +551,8 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
uint32_t value;
r = get_user(value, (uint32_t *)buf);
if (r) {
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
}
if (r)
goto out;
WREG32_DIDT(*pos >> 2, value);
@ -575,11 +562,12 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
size -= 4;
}
r = result;
out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return result;
return r;
}
/**
@ -621,12 +609,8 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
value = RREG32_SMC(*pos);
r = put_user(value, (uint32_t *)buf);
if (r) {
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
}
if (r)
goto out;
result += 4;
buf += 4;
@ -634,11 +618,12 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
size -= 4;
}
r = result;
out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return result;
return r;
}
/**
@ -679,12 +664,8 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
uint32_t value;
r = get_user(value, (uint32_t *)buf);
if (r) {
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return r;
}
if (r)
goto out;
WREG32_SMC(*pos, value);
@ -694,11 +675,12 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
size -= 4;
}
r = result;
out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
amdgpu_virt_disable_access_debugfs(adev);
return result;
return r;
}
/**
@ -1090,11 +1072,8 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu
uint32_t value;
r = get_user(value, (uint32_t *)buf);
if (r) {
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
}
if (r)
goto out;
amdgpu_gfx_off_ctrl(adev, value ? true : false);
@ -1104,10 +1083,12 @@ static ssize_t amdgpu_debugfs_gfxoff_write(struct file *f, const char __user *bu
size -= 4;
}
r = result;
out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return result;
return r;
}
@ -1136,21 +1117,11 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
}
while (size) {
uint32_t value;
u32 value = adev->gfx.gfx_off_state;
r = amdgpu_get_gfx_off_status(adev, &value);
if (r) {
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
}
r = put_user(value, (uint32_t *)buf);
if (r) {
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
}
r = put_user(value, (u32 *)buf);
if (r)
goto out;
result += 4;
buf += 4;
@ -1158,10 +1129,53 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
size -= 4;
}
r = result;
out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return result;
return r;
}
static ssize_t amdgpu_debugfs_gfxoff_status_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
struct amdgpu_device *adev = file_inode(f)->i_private;
ssize_t result = 0;
int r;
if (size & 0x3 || *pos & 0x3)
return -EINVAL;
r = pm_runtime_get_sync(adev_to_drm(adev)->dev);
if (r < 0) {
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
}
while (size) {
u32 value;
r = amdgpu_get_gfx_off_status(adev, &value);
if (r)
goto out;
r = put_user(value, (u32 *)buf);
if (r)
goto out;
result += 4;
buf += 4;
*pos += 4;
size -= 4;
}
r = result;
out:
pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
pm_runtime_put_autosuspend(adev_to_drm(adev)->dev);
return r;
}
static const struct file_operations amdgpu_debugfs_regs2_fops = {
@ -1229,6 +1243,12 @@ static const struct file_operations amdgpu_debugfs_gfxoff_fops = {
.llseek = default_llseek
};
static const struct file_operations amdgpu_debugfs_gfxoff_status_fops = {
.owner = THIS_MODULE,
.read = amdgpu_debugfs_gfxoff_status_read,
.llseek = default_llseek
};
static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_regs_fops,
&amdgpu_debugfs_regs2_fops,
@ -1240,6 +1260,7 @@ static const struct file_operations *debugfs_regs[] = {
&amdgpu_debugfs_wave_fops,
&amdgpu_debugfs_gpr_fops,
&amdgpu_debugfs_gfxoff_fops,
&amdgpu_debugfs_gfxoff_status_fops,
};
static const char *debugfs_regs_names[] = {
@ -1253,6 +1274,7 @@ static const char *debugfs_regs_names[] = {
"amdgpu_wave",
"amdgpu_gpr",
"amdgpu_gfxoff",
"amdgpu_gfxoff_status",
};
/**

View file

@ -5109,7 +5109,8 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
*/
int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
struct amdgpu_job *job)
struct amdgpu_job *job,
struct amdgpu_reset_context *reset_context)
{
struct list_head device_list, *device_list_handle = NULL;
bool job_signaled = false;
@ -5119,9 +5120,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
bool need_emergency_restart = false;
bool audio_suspended = false;
int tmp_vram_lost_counter;
struct amdgpu_reset_context reset_context;
memset(&reset_context, 0, sizeof(reset_context));
/*
* Special case: RAS triggered and full reset isn't supported
@ -5147,12 +5145,8 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
if (hive)
mutex_lock(&hive->hive_lock);
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
reset_context.job = job;
reset_context.hive = hive;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
reset_context->job = job;
reset_context->hive = hive;
/*
* Build list of devices to reset.
* In case we are in XGMI hive mode, resort the device list
@ -5236,8 +5230,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
*
* job->base holds a reference to parent fence
*/
if (job && (job->hw_fence.ops != NULL) &&
dma_fence_is_signaled(&job->hw_fence)) {
if (job && dma_fence_is_signaled(&job->hw_fence)) {
job_signaled = true;
dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
goto skip_hw_reset;
@ -5245,7 +5238,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
retry: /* Rest of adevs pre asic reset from XGMI hive. */
list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
r = amdgpu_device_pre_asic_reset(tmp_adev, &reset_context);
r = amdgpu_device_pre_asic_reset(tmp_adev, reset_context);
/*TODO Should we stop ?*/
if (r) {
dev_err(tmp_adev->dev, "GPU pre asic reset failed with err, %d for drm dev, %s ",
@ -5272,7 +5265,7 @@ retry: /* Rest of adevs pre asic reset from XGMI hive. */
if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
amdgpu_ras_resume(adev);
} else {
r = amdgpu_do_asic_reset(device_list_handle, &reset_context);
r = amdgpu_do_asic_reset(device_list_handle, reset_context);
if (r && r == -EAGAIN)
goto retry;
}
@ -5292,7 +5285,7 @@ skip_hw_reset:
if (amdgpu_gpu_recovery == 2 &&
!(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter)))
amdgpu_device_recheck_guilty_jobs(
tmp_adev, device_list_handle, &reset_context);
tmp_adev, device_list_handle, reset_context);
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = tmp_adev->rings[i];

View file

@ -1716,6 +1716,7 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(3, 0, 1):
case IP_VERSION(3, 1, 2):
case IP_VERSION(3, 1, 3):
case IP_VERSION(3, 1, 4):
case IP_VERSION(3, 1, 5):
case IP_VERSION(3, 1, 6):
case IP_VERSION(3, 2, 0):
@ -2206,12 +2207,9 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
break;
case IP_VERSION(7, 4, 0):
case IP_VERSION(7, 4, 1):
adev->nbio.funcs = &nbio_v7_4_funcs;
adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg;
break;
case IP_VERSION(7, 4, 4):
adev->nbio.funcs = &nbio_v7_4_funcs;
adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg_ald;
adev->nbio.hdp_flush_reg = &nbio_v7_4_hdp_flush_reg;
break;
case IP_VERSION(7, 2, 0):
case IP_VERSION(7, 2, 1):
@ -2225,15 +2223,12 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
case IP_VERSION(2, 3, 0):
case IP_VERSION(2, 3, 1):
case IP_VERSION(2, 3, 2):
adev->nbio.funcs = &nbio_v2_3_funcs;
adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg;
break;
case IP_VERSION(3, 3, 0):
case IP_VERSION(3, 3, 1):
case IP_VERSION(3, 3, 2):
case IP_VERSION(3, 3, 3):
adev->nbio.funcs = &nbio_v2_3_funcs;
adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg_sc;
adev->nbio.hdp_flush_reg = &nbio_v2_3_hdp_flush_reg;
break;
case IP_VERSION(4, 3, 0):
case IP_VERSION(4, 3, 1):

View file

@ -1559,6 +1559,21 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc,
stime, etime, mode);
}
static bool
amdgpu_display_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
{
struct drm_device *dev = adev_to_drm(adev);
struct drm_fb_helper *fb_helper = dev->fb_helper;
if (!fb_helper || !fb_helper->buffer)
return false;
if (gem_to_amdgpu_bo(fb_helper->buffer->gem) != robj)
return false;
return true;
}
int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
{
struct drm_device *dev = adev_to_drm(adev);
@ -1594,10 +1609,12 @@ int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
continue;
}
robj = gem_to_amdgpu_bo(fb->obj[0]);
r = amdgpu_bo_reserve(robj, true);
if (r == 0) {
amdgpu_bo_unpin(robj);
amdgpu_bo_unreserve(robj);
if (!amdgpu_display_robj_is_fb(adev, robj)) {
r = amdgpu_bo_reserve(robj, true);
if (r == 0) {
amdgpu_bo_unpin(robj);
amdgpu_bo_unreserve(robj);
}
}
}
return 0;

View file

@ -80,7 +80,7 @@
* - 3.24.0 - Add high priority compute support for gfx9
* - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
* - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
* - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
* - 3.27.0 - Add new chunk to AMDGPU_CS to enable BO_LIST creation.
* - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
* - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
* - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE.
@ -100,10 +100,11 @@
* - 3.44.0 - DCN3 supports DCC independent block settings: !64B && 128B, 64B && 128B
* - 3.45.0 - Add context ioctl stable pstate interface
* - 3.46.0 - To enable hot plug amdgpu tests in libdrm
* * 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags
* - 3.47.0 - Add AMDGPU_GEM_CREATE_DISCARDABLE and AMDGPU_VM_NOALLOC flags
* - 3.48.0 - Add IP discovery version info to HW INFO
*/
#define KMS_DRIVER_MAJOR 3
#define KMS_DRIVER_MINOR 47
#define KMS_DRIVER_MINOR 48
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit;
@ -167,6 +168,7 @@ int amdgpu_smu_pptable_id = -1;
*/
uint amdgpu_dc_feature_mask = 2;
uint amdgpu_dc_debug_mask;
uint amdgpu_dc_visual_confirm;
int amdgpu_async_gfx_ring = 1;
int amdgpu_mcbp;
int amdgpu_discovery = -1;
@ -827,6 +829,9 @@ module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444);
MODULE_PARM_DESC(dcdebugmask, "all debug options disabled (default))");
module_param_named(dcdebugmask, amdgpu_dc_debug_mask, uint, 0444);
MODULE_PARM_DESC(visualconfirm, "Visual confirm (0 = off (default), 1 = MPO, 5 = PSR)");
module_param_named(visualconfirm, amdgpu_dc_visual_confirm, uint, 0444);
/**
* DOC: abmlevel (uint)
* Override the default ABM (Adaptive Backlight Management) level used for DC
@ -2121,7 +2126,7 @@ retry_init:
if (ret)
DRM_ERROR("Creating debugfs files failed (%d).\n", ret);
if (adev->runpm) {
if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
/* only need to skip on ATPX */
if (amdgpu_device_supports_px(ddev))
dev_pm_set_driver_flags(ddev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
@ -2178,7 +2183,7 @@ amdgpu_pci_remove(struct pci_dev *pdev)
drm_dev_unplug(dev);
if (adev->runpm) {
if (adev->pm.rpm_mode != AMDGPU_RUNPM_NONE) {
pm_runtime_get_sync(dev->dev);
pm_runtime_forbid(dev->dev);
}
@ -2461,7 +2466,7 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int ret, i;
if (!adev->runpm) {
if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) {
pm_runtime_forbid(dev);
return -EBUSY;
}
@ -2530,7 +2535,7 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)
struct amdgpu_device *adev = drm_to_adev(drm_dev);
int ret;
if (!adev->runpm)
if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE)
return -EINVAL;
/* Avoids registers access if device is physically gone */
@ -2574,7 +2579,7 @@ static int amdgpu_pmops_runtime_idle(struct device *dev)
/* we don't want the main rpm_idle to call suspend - we want to autosuspend */
int ret = 1;
if (!adev->runpm) {
if (adev->pm.rpm_mode == AMDGPU_RUNPM_NONE) {
pm_runtime_forbid(dev);
return -EBUSY;
}

View file

@ -47,7 +47,7 @@
* for GPU/CPU synchronization. When the fence is written,
* it is expected that all buffers associated with that fence
* are no longer in use by the associated ring on the GPU and
* that the the relevant GPU caches have been flushed.
* that the relevant GPU caches have been flushed.
*/
struct amdgpu_fence {
@ -844,7 +844,14 @@ static void amdgpu_debugfs_reset_work(struct work_struct *work)
struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
reset_work);
amdgpu_device_gpu_recover(adev, NULL);
struct amdgpu_reset_context reset_context;
memset(&reset_context, 0, sizeof(reset_context));
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
#endif

View file

@ -29,6 +29,7 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
#include "amdgpu_reset.h"
static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
{
@ -64,7 +65,14 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job)
ti.process_name, ti.tgid, ti.task_name, ti.pid);
if (amdgpu_device_should_recover_gpu(ring->adev)) {
r = amdgpu_device_gpu_recover(ring->adev, job);
struct amdgpu_reset_context reset_context;
memset(&reset_context, 0, sizeof(reset_context));
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context);
if (r)
DRM_ERROR("GPU Recovery Failed: %d\n", r);
} else {
@ -125,16 +133,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
{
struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
struct dma_fence *f;
struct dma_fence *hw_fence;
unsigned i;
if (job->hw_fence.ops == NULL)
hw_fence = job->external_hw_fence;
else
hw_fence = &job->hw_fence;
/* use sched fence if available */
f = job->base.s_fence ? &job->base.s_fence->finished : hw_fence;
f = job->base.s_fence ? &job->base.s_fence->finished : &job->hw_fence;
for (i = 0; i < job->num_ibs; ++i)
amdgpu_ib_free(ring->adev, &job->ibs[i], f);
}
@ -148,11 +150,7 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
amdgpu_sync_free(&job->sync);
amdgpu_sync_free(&job->sched_sync);
/* only put the hw fence if has embedded fence */
if (job->hw_fence.ops != NULL)
dma_fence_put(&job->hw_fence);
else
kfree(job);
dma_fence_put(&job->hw_fence);
}
void amdgpu_job_free(struct amdgpu_job *job)
@ -161,11 +159,7 @@ void amdgpu_job_free(struct amdgpu_job *job)
amdgpu_sync_free(&job->sync);
amdgpu_sync_free(&job->sched_sync);
/* only put the hw fence if has embedded fence */
if (job->hw_fence.ops != NULL)
dma_fence_put(&job->hw_fence);
else
kfree(job);
dma_fence_put(&job->hw_fence);
}
int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
@ -195,15 +189,12 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
int r;
job->base.sched = &ring->sched;
r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence);
/* record external_hw_fence for direct submit */
job->external_hw_fence = dma_fence_get(*fence);
r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job, fence);
if (r)
return r;
amdgpu_job_free(job);
dma_fence_put(*fence);
return 0;
}

View file

@ -50,7 +50,6 @@ struct amdgpu_job {
struct amdgpu_sync sync;
struct amdgpu_sync sched_sync;
struct dma_fence hw_fence;
struct dma_fence *external_hw_fence;
uint32_t preamble_status;
uint32_t preemption_status;
bool vm_needs_flush;

View file

@ -43,17 +43,6 @@
#include "amdgpu_display.h"
#include "amdgpu_ras.h"
static void amdgpu_runtime_pm_quirk(struct amdgpu_device *adev)
{
/*
* Add below quirk on several sienna_cichlid cards to disable
* runtime pm to fix EMI failures.
*/
if (((adev->pdev->device == 0x73A1) && (adev->pdev->revision == 0x00)) ||
((adev->pdev->device == 0x73BF) && (adev->pdev->revision == 0xCF)))
adev->runpm = false;
}
void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
{
struct amdgpu_gpu_instance *gpu_instance;
@ -158,37 +147,36 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
goto out;
}
adev->pm.rpm_mode = AMDGPU_RUNPM_NONE;
if (amdgpu_device_supports_px(dev) &&
(amdgpu_runtime_pm != 0)) { /* enable runpm by default for atpx */
adev->runpm = true;
(amdgpu_runtime_pm != 0)) { /* enable PX as runtime mode */
adev->pm.rpm_mode = AMDGPU_RUNPM_PX;
dev_info(adev->dev, "Using ATPX for runtime pm\n");
} else if (amdgpu_device_supports_boco(dev) &&
(amdgpu_runtime_pm != 0)) { /* enable runpm by default for boco */
adev->runpm = true;
(amdgpu_runtime_pm != 0)) { /* enable boco as runtime mode */
adev->pm.rpm_mode = AMDGPU_RUNPM_BOCO;
dev_info(adev->dev, "Using BOCO for runtime pm\n");
} else if (amdgpu_device_supports_baco(dev) &&
(amdgpu_runtime_pm != 0)) {
switch (adev->asic_type) {
case CHIP_VEGA20:
case CHIP_ARCTURUS:
/* enable runpm if runpm=1 */
/* enable BACO as runpm mode if runpm=1 */
if (amdgpu_runtime_pm > 0)
adev->runpm = true;
adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
break;
case CHIP_VEGA10:
/* turn runpm on if noretry=0 */
/* enable BACO as runpm mode if noretry=0 */
if (!adev->gmc.noretry)
adev->runpm = true;
adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
break;
default:
/* enable runpm on CI+ */
adev->runpm = true;
/* enable BACO as runpm mode on CI+ */
adev->pm.rpm_mode = AMDGPU_RUNPM_BACO;
break;
}
amdgpu_runtime_pm_quirk(adev);
if (adev->runpm)
if (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO)
dev_info(adev->dev, "Using BACO for runtime pm\n");
}
@ -473,6 +461,30 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
result->hw_ip_version_major = adev->ip_blocks[i].version->major;
result->hw_ip_version_minor = adev->ip_blocks[i].version->minor;
if (adev->asic_type >= CHIP_VEGA10) {
switch (type) {
case AMD_IP_BLOCK_TYPE_GFX:
result->ip_discovery_version = adev->ip_versions[GC_HWIP][0];
break;
case AMD_IP_BLOCK_TYPE_SDMA:
result->ip_discovery_version = adev->ip_versions[SDMA0_HWIP][0];
break;
case AMD_IP_BLOCK_TYPE_UVD:
case AMD_IP_BLOCK_TYPE_VCN:
case AMD_IP_BLOCK_TYPE_JPEG:
result->ip_discovery_version = adev->ip_versions[UVD_HWIP][0];
break;
case AMD_IP_BLOCK_TYPE_VCE:
result->ip_discovery_version = adev->ip_versions[VCE_HWIP][0];
break;
default:
result->ip_discovery_version = 0;
break;
}
} else {
result->ip_discovery_version = 0;
}
result->capabilities_flags = 0;
result->available_rings = (1 << num_rings) - 1;
result->ib_start_alignment = ib_start_alignment;

View file

@ -114,8 +114,14 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
size_t doorbell_start_offset;
size_t doorbell_aperture_size;
size_t doorbell_process_limit;
size_t aggregated_doorbell_start;
int i;
doorbell_start_offset = (adev->doorbell_index.max_assignment+1) * sizeof(u32);
aggregated_doorbell_start = (adev->doorbell_index.max_assignment + 1) * sizeof(u32);
aggregated_doorbell_start =
roundup(aggregated_doorbell_start, PAGE_SIZE);
doorbell_start_offset = aggregated_doorbell_start + PAGE_SIZE;
doorbell_start_offset =
roundup(doorbell_start_offset,
amdgpu_mes_doorbell_process_slice(adev));
@ -135,6 +141,11 @@ static int amdgpu_mes_doorbell_init(struct amdgpu_device *adev)
adev->mes.doorbell_id_offset = doorbell_start_offset / sizeof(u32);
adev->mes.max_doorbell_slices = doorbell_process_limit;
/* allocate Qword range for aggregated doorbell */
for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++)
adev->mes.aggregated_doorbells[i] =
aggregated_doorbell_start / sizeof(u32) + i * 2;
DRM_INFO("max_doorbell_slices=%zu\n", doorbell_process_limit);
return 0;
}
@ -150,6 +161,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
idr_init(&adev->mes.queue_id_idr);
ida_init(&adev->mes.doorbell_ida);
spin_lock_init(&adev->mes.queue_id_lock);
spin_lock_init(&adev->mes.ring_lock);
mutex_init(&adev->mes.mutex_hidden);
adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
@ -173,9 +185,6 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
adev->mes.sdma_hqd_mask[i] = 0xfc;
}
for (i = 0; i < AMDGPU_MES_PRIORITY_NUM_LEVELS; i++)
adev->mes.agreegated_doorbells[i] = 0xffffffff;
r = amdgpu_device_wb_get(adev, &adev->mes.sch_ctx_offs);
if (r) {
dev_err(adev->dev,
@ -716,6 +725,7 @@ int amdgpu_mes_add_hw_queue(struct amdgpu_device *adev, int gang_id,
queue->queue_type = qprops->queue_type;
queue->paging = qprops->paging;
queue->gang = gang;
queue->ring->mqd_ptr = queue->mqd_cpu_ptr;
list_add_tail(&queue->list, &gang->queue_list);
amdgpu_mes_unlock(&adev->mes);
@ -794,8 +804,6 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
struct mes_unmap_legacy_queue_input queue_input;
int r;
amdgpu_mes_lock(&adev->mes);
queue_input.action = action;
queue_input.queue_type = ring->funcs->type;
queue_input.doorbell_offset = ring->doorbell_index;
@ -808,7 +816,6 @@ int amdgpu_mes_unmap_legacy_queue(struct amdgpu_device *adev,
if (r)
DRM_ERROR("failed to unmap legacy queue\n");
amdgpu_mes_unlock(&adev->mes);
return r;
}
@ -817,8 +824,6 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
struct mes_misc_op_input op_input;
int r, val = 0;
amdgpu_mes_lock(&adev->mes);
op_input.op = MES_MISC_OP_READ_REG;
op_input.read_reg.reg_offset = reg;
op_input.read_reg.buffer_addr = adev->mes.read_val_gpu_addr;
@ -835,7 +840,6 @@ uint32_t amdgpu_mes_rreg(struct amdgpu_device *adev, uint32_t reg)
val = *(adev->mes.read_val_ptr);
error:
amdgpu_mes_unlock(&adev->mes);
return val;
}
@ -845,8 +849,6 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev,
struct mes_misc_op_input op_input;
int r;
amdgpu_mes_lock(&adev->mes);
op_input.op = MES_MISC_OP_WRITE_REG;
op_input.write_reg.reg_offset = reg;
op_input.write_reg.reg_value = val;
@ -862,7 +864,6 @@ int amdgpu_mes_wreg(struct amdgpu_device *adev,
DRM_ERROR("failed to write reg (0x%x)\n", reg);
error:
amdgpu_mes_unlock(&adev->mes);
return r;
}
@ -873,8 +874,6 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
struct mes_misc_op_input op_input;
int r;
amdgpu_mes_lock(&adev->mes);
op_input.op = MES_MISC_OP_WRM_REG_WR_WAIT;
op_input.wrm_reg.reg0 = reg0;
op_input.wrm_reg.reg1 = reg1;
@ -892,7 +891,6 @@ int amdgpu_mes_reg_write_reg_wait(struct amdgpu_device *adev,
DRM_ERROR("failed to reg_write_reg_wait\n");
error:
amdgpu_mes_unlock(&adev->mes);
return r;
}
@ -902,8 +900,6 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
struct mes_misc_op_input op_input;
int r;
amdgpu_mes_lock(&adev->mes);
op_input.op = MES_MISC_OP_WRM_REG_WAIT;
op_input.wrm_reg.reg0 = reg;
op_input.wrm_reg.ref = val;
@ -920,7 +916,6 @@ int amdgpu_mes_reg_wait(struct amdgpu_device *adev, uint32_t reg,
DRM_ERROR("failed to reg_write_reg_wait\n");
error:
amdgpu_mes_unlock(&adev->mes);
return r;
}
@ -1087,6 +1082,12 @@ void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
kfree(ring);
}
uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
enum amdgpu_mes_priority_level prio)
{
return adev->mes.aggregated_doorbells[prio];
}
int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
struct amdgpu_mes_ctx_data *ctx_data)
{
@ -1188,6 +1189,63 @@ error:
return r;
}
int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
struct amdgpu_mes_ctx_data *ctx_data)
{
struct amdgpu_bo_va *bo_va = ctx_data->meta_data_va;
struct amdgpu_bo *bo = ctx_data->meta_data_obj;
struct amdgpu_vm *vm = bo_va->base.vm;
struct amdgpu_bo_list_entry vm_pd;
struct list_head list, duplicates;
struct dma_fence *fence = NULL;
struct ttm_validate_buffer tv;
struct ww_acquire_ctx ticket;
long r = 0;
INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&duplicates);
tv.bo = &bo->tbo;
tv.num_shared = 2;
list_add(&tv.head, &list);
amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
if (r) {
dev_err(adev->dev, "leaking bo va because "
"we fail to reserve bo (%ld)\n", r);
return r;
}
amdgpu_vm_bo_del(adev, bo_va);
if (!amdgpu_vm_ready(vm))
goto out_unlock;
r = dma_resv_get_singleton(bo->tbo.base.resv, DMA_RESV_USAGE_BOOKKEEP, &fence);
if (r)
goto out_unlock;
if (fence) {
amdgpu_bo_fence(bo, fence, true);
fence = NULL;
}
r = amdgpu_vm_clear_freed(adev, vm, &fence);
if (r || !fence)
goto out_unlock;
dma_fence_wait(fence, false);
amdgpu_bo_fence(bo, fence, true);
dma_fence_put(fence);
out_unlock:
if (unlikely(r < 0))
dev_err(adev->dev, "failed to clear page tables (%ld)\n", r);
ttm_eu_backoff_reservation(&ticket, &list);
return r;
}
static int amdgpu_mes_test_create_gang_and_queues(struct amdgpu_device *adev,
int pasid, int *gang_id,
int queue_type, int num_queue,
@ -1294,7 +1352,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
r = amdgpu_mes_ctx_alloc_meta_data(adev, &ctx_data);
if (r) {
DRM_ERROR("failed to alloc ctx meta data\n");
goto error_pasid;
goto error_fini;
}
ctx_data.meta_data_gpu_addr = AMDGPU_VA_RESERVED_SIZE;
@ -1349,9 +1407,9 @@ error_queues:
amdgpu_mes_destroy_process(adev, pasid);
error_vm:
BUG_ON(amdgpu_bo_reserve(ctx_data.meta_data_obj, true));
amdgpu_vm_bo_del(adev, ctx_data.meta_data_va);
amdgpu_bo_unreserve(ctx_data.meta_data_obj);
amdgpu_mes_ctx_unmap_meta_data(adev, &ctx_data);
error_fini:
amdgpu_vm_fini(adev, vm);
error_pasid:

View file

@ -83,6 +83,7 @@ struct amdgpu_mes {
uint64_t default_gang_quantum;
struct amdgpu_ring ring;
spinlock_t ring_lock;
const struct firmware *fw[AMDGPU_MAX_MES_PIPES];
@ -112,7 +113,7 @@ struct amdgpu_mes {
uint32_t compute_hqd_mask[AMDGPU_MES_MAX_COMPUTE_PIPES];
uint32_t gfx_hqd_mask[AMDGPU_MES_MAX_GFX_PIPES];
uint32_t sdma_hqd_mask[AMDGPU_MES_MAX_SDMA_PIPES];
uint32_t agreegated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
uint32_t aggregated_doorbells[AMDGPU_MES_PRIORITY_NUM_LEVELS];
uint32_t sch_ctx_offs;
uint64_t sch_ctx_gpu_addr;
uint64_t *sch_ctx_ptr;
@ -346,12 +347,17 @@ int amdgpu_mes_add_ring(struct amdgpu_device *adev, int gang_id,
void amdgpu_mes_remove_ring(struct amdgpu_device *adev,
struct amdgpu_ring *ring);
uint32_t amdgpu_mes_get_aggregated_doorbell_index(struct amdgpu_device *adev,
enum amdgpu_mes_priority_level prio);
int amdgpu_mes_ctx_alloc_meta_data(struct amdgpu_device *adev,
struct amdgpu_mes_ctx_data *ctx_data);
void amdgpu_mes_ctx_free_meta_data(struct amdgpu_mes_ctx_data *ctx_data);
int amdgpu_mes_ctx_map_meta_data(struct amdgpu_device *adev,
struct amdgpu_vm *vm,
struct amdgpu_mes_ctx_data *ctx_data);
int amdgpu_mes_ctx_unmap_meta_data(struct amdgpu_device *adev,
struct amdgpu_mes_ctx_data *ctx_data);
int amdgpu_mes_self_test(struct amdgpu_device *adev);

View file

@ -882,6 +882,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
if (WARN_ON_ONCE(min_offset > max_offset))
return -EINVAL;
/* Check domain to be pinned to against preferred domains */
if (bo->preferred_domains & domain)
domain = bo->preferred_domains & domain;
/* A shared bo cannot be migrated to VRAM */
if (bo->tbo.base.import_attach) {
if (domain & AMDGPU_GEM_DOMAIN_GTT)

View file

@ -2168,6 +2168,21 @@ static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
case AMDGPU_UCODE_ID_RLC_DRAM:
*type = GFX_FW_TYPE_RLC_DRAM_BOOT;
break;
case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
*type = GFX_FW_TYPE_GLOBAL_TAP_DELAYS;
break;
case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
*type = GFX_FW_TYPE_SE0_TAP_DELAYS;
break;
case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
*type = GFX_FW_TYPE_SE1_TAP_DELAYS;
break;
case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
*type = GFX_FW_TYPE_SE2_TAP_DELAYS;
break;
case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
*type = GFX_FW_TYPE_SE3_TAP_DELAYS;
break;
case AMDGPU_UCODE_ID_SMC:
*type = GFX_FW_TYPE_SMU;
break;
@ -2348,6 +2363,13 @@ static int psp_load_smu_fw(struct psp_context *psp)
&adev->firmware.ucode[AMDGPU_UCODE_ID_SMC];
struct amdgpu_ras *ras = psp->ras_context.ras;
/*
* Skip SMU FW reloading in case of using BACO for runpm only,
* as SMU is always alive.
*/
if (adev->in_runpm && (adev->pm.rpm_mode == AMDGPU_RUNPM_BACO))
return 0;
if (!ucode->fw || amdgpu_sriov_vf(psp->adev))
return 0;

View file

@ -69,8 +69,8 @@ enum psp_bootloader_cmd {
PSP_BL__LOAD_SOSDRV = 0x20000,
PSP_BL__LOAD_KEY_DATABASE = 0x80000,
PSP_BL__LOAD_SOCDRV = 0xB0000,
PSP_BL__LOAD_INTFDRV = 0xC0000,
PSP_BL__LOAD_DBGDRV = 0xD0000,
PSP_BL__LOAD_DBGDRV = 0xC0000,
PSP_BL__LOAD_INTFDRV = 0xD0000,
PSP_BL__DRAM_LONG_TRAIN = 0x100000,
PSP_BL__DRAM_SHORT_TRAIN = 0x200000,
PSP_BL__LOAD_TOS_SPL_TABLE = 0x10000000,

View file

@ -717,27 +717,30 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
if (!con)
return -EINVAL;
info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);
if (!info)
return -ENOMEM;
if (head->block == AMDGPU_RAS_BLOCK__GFX) {
info = kzalloc(sizeof(union ta_ras_cmd_input), GFP_KERNEL);
if (!info)
return -ENOMEM;
if (!enable) {
info->disable_features = (struct ta_ras_disable_features_input) {
.block_id = amdgpu_ras_block_to_ta(head->block),
.error_type = amdgpu_ras_error_to_ta(head->type),
};
} else {
info->enable_features = (struct ta_ras_enable_features_input) {
.block_id = amdgpu_ras_block_to_ta(head->block),
.error_type = amdgpu_ras_error_to_ta(head->type),
};
if (!enable) {
info->disable_features = (struct ta_ras_disable_features_input) {
.block_id = amdgpu_ras_block_to_ta(head->block),
.error_type = amdgpu_ras_error_to_ta(head->type),
};
} else {
info->enable_features = (struct ta_ras_enable_features_input) {
.block_id = amdgpu_ras_block_to_ta(head->block),
.error_type = amdgpu_ras_error_to_ta(head->type),
};
}
}
/* Do not enable if it is not allowed. */
WARN_ON(enable && !amdgpu_ras_is_feature_allowed(adev, head));
/* Only enable ras feature operation handle on host side */
if (!amdgpu_sriov_vf(adev) &&
if (head->block == AMDGPU_RAS_BLOCK__GFX &&
!amdgpu_sriov_vf(adev) &&
!amdgpu_ras_intr_triggered()) {
ret = psp_ras_enable_features(&adev->psp, info, enable);
if (ret) {
@ -753,7 +756,8 @@ int amdgpu_ras_feature_enable(struct amdgpu_device *adev,
__amdgpu_ras_feature_enable(adev, head, enable);
ret = 0;
out:
kfree(info);
if (head->block == AMDGPU_RAS_BLOCK__GFX)
kfree(info);
return ret;
}
@ -1938,8 +1942,16 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
amdgpu_put_xgmi_hive(hive);
}
if (amdgpu_device_should_recover_gpu(ras->adev))
amdgpu_device_gpu_recover(ras->adev, NULL);
if (amdgpu_device_should_recover_gpu(ras->adev)) {
struct amdgpu_reset_context reset_context;
memset(&reset_context, 0, sizeof(reset_context));
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
}
atomic_set(&ras->in_recovery, 0);
}
@ -2150,7 +2162,7 @@ int amdgpu_ras_recovery_init(struct amdgpu_device *adev)
bool exc_err_limit = false;
int ret;
if (!con)
if (!con || amdgpu_sriov_vf(adev))
return 0;
/* Allow access to RAS EEPROM via debugfs, when the ASIC

View file

@ -222,6 +222,11 @@ struct amdgpu_rlc {
u32 rlc_dram_ucode_size_bytes;
u32 rlcp_ucode_size_bytes;
u32 rlcv_ucode_size_bytes;
u32 global_tap_delays_ucode_size_bytes;
u32 se0_tap_delays_ucode_size_bytes;
u32 se1_tap_delays_ucode_size_bytes;
u32 se2_tap_delays_ucode_size_bytes;
u32 se3_tap_delays_ucode_size_bytes;
u32 *register_list_format;
u32 *register_restore;
@ -232,6 +237,11 @@ struct amdgpu_rlc {
u8 *rlc_dram_ucode;
u8 *rlcp_ucode;
u8 *rlcv_ucode;
u8 *global_tap_delays_ucode;
u8 *se0_tap_delays_ucode;
u8 *se1_tap_delays_ucode;
u8 *se2_tap_delays_ucode;
u8 *se3_tap_delays_ucode;
bool is_rlc_v2_1;

View file

@ -561,6 +561,16 @@ const char *amdgpu_ucode_name(enum AMDGPU_UCODE_ID ucode_id)
return "RLC_P";
case AMDGPU_UCODE_ID_RLC_V:
return "RLC_V";
case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
return "GLOBAL_TAP_DELAYS";
case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
return "SE0_TAP_DELAYS";
case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
return "SE1_TAP_DELAYS";
case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
return "SE2_TAP_DELAYS";
case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
return "SE3_TAP_DELAYS";
case AMDGPU_UCODE_ID_IMU_I:
return "IMU_I";
case AMDGPU_UCODE_ID_IMU_D:
@ -745,6 +755,26 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev,
ucode->ucode_size = adev->gfx.rlc.rlcv_ucode_size_bytes;
ucode_addr = adev->gfx.rlc.rlcv_ucode;
break;
case AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS:
ucode->ucode_size = adev->gfx.rlc.global_tap_delays_ucode_size_bytes;
ucode_addr = adev->gfx.rlc.global_tap_delays_ucode;
break;
case AMDGPU_UCODE_ID_SE0_TAP_DELAYS:
ucode->ucode_size = adev->gfx.rlc.se0_tap_delays_ucode_size_bytes;
ucode_addr = adev->gfx.rlc.se0_tap_delays_ucode;
break;
case AMDGPU_UCODE_ID_SE1_TAP_DELAYS:
ucode->ucode_size = adev->gfx.rlc.se1_tap_delays_ucode_size_bytes;
ucode_addr = adev->gfx.rlc.se1_tap_delays_ucode;
break;
case AMDGPU_UCODE_ID_SE2_TAP_DELAYS:
ucode->ucode_size = adev->gfx.rlc.se2_tap_delays_ucode_size_bytes;
ucode_addr = adev->gfx.rlc.se2_tap_delays_ucode;
break;
case AMDGPU_UCODE_ID_SE3_TAP_DELAYS:
ucode->ucode_size = adev->gfx.rlc.se3_tap_delays_ucode_size_bytes;
ucode_addr = adev->gfx.rlc.se3_tap_delays_ucode;
break;
case AMDGPU_UCODE_ID_CP_MES:
ucode->ucode_size = le32_to_cpu(mes_hdr->mes_ucode_size_bytes);
ucode_addr = (u8 *)ucode->fw->data +

View file

@ -266,6 +266,21 @@ struct rlc_firmware_header_v2_3 {
uint32_t rlcv_ucode_offset_bytes;
};
/* version_major=2, version_minor=4 */
struct rlc_firmware_header_v2_4 {
struct rlc_firmware_header_v2_3 v2_3;
uint32_t global_tap_delays_ucode_size_bytes;
uint32_t global_tap_delays_ucode_offset_bytes;
uint32_t se0_tap_delays_ucode_size_bytes;
uint32_t se0_tap_delays_ucode_offset_bytes;
uint32_t se1_tap_delays_ucode_size_bytes;
uint32_t se1_tap_delays_ucode_offset_bytes;
uint32_t se2_tap_delays_ucode_size_bytes;
uint32_t se2_tap_delays_ucode_offset_bytes;
uint32_t se3_tap_delays_ucode_size_bytes;
uint32_t se3_tap_delays_ucode_offset_bytes;
};
/* version_major=1, version_minor=0 */
struct sdma_firmware_header_v1_0 {
struct common_firmware_header header;
@ -426,6 +441,11 @@ enum AMDGPU_UCODE_ID {
AMDGPU_UCODE_ID_CP_MES1_DATA,
AMDGPU_UCODE_ID_IMU_I,
AMDGPU_UCODE_ID_IMU_D,
AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS,
AMDGPU_UCODE_ID_SE0_TAP_DELAYS,
AMDGPU_UCODE_ID_SE1_TAP_DELAYS,
AMDGPU_UCODE_ID_SE2_TAP_DELAYS,
AMDGPU_UCODE_ID_SE3_TAP_DELAYS,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM,
AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM,

View file

@ -41,6 +41,12 @@
#define LOOP_UMC_CH_INST(ch_inst) for ((ch_inst) = 0; (ch_inst) < adev->umc.channel_inst_num; (ch_inst)++)
#define LOOP_UMC_INST_AND_CH(umc_inst, ch_inst) LOOP_UMC_INST((umc_inst)) LOOP_UMC_CH_INST((ch_inst))
#define LOOP_UMC_NODE_INST(node_inst) \
for ((node_inst) = 0; (node_inst) < adev->umc.node_inst_num; (node_inst)++)
#define LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) \
LOOP_UMC_NODE_INST((node_inst)) LOOP_UMC_INST_AND_CH((umc_inst), (ch_inst))
struct amdgpu_umc_ras {
struct amdgpu_ras_block_object ras_block;
void (*err_cnt_init)(struct amdgpu_device *adev);
@ -62,6 +68,10 @@ struct amdgpu_umc {
uint32_t channel_inst_num;
/* number of umc instance with memory map register access */
uint32_t umc_inst_num;
/*number of umc node instance with memory map register access*/
uint32_t node_inst_num;
/* UMC regiser per channel offset */
uint32_t channel_offs;
/* channel index table of interleaved memory */

View file

@ -496,7 +496,8 @@ static int amdgpu_vkms_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = YRES_MAX;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
adev_to_drm(adev)->mode_config.prefer_shadow = 1;
/* disable prefer shadow for now due to hibernation issues */
adev_to_drm(adev)->mode_config.prefer_shadow = 0;
adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;

View file

@ -2168,6 +2168,14 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
} else {
vm->update_funcs = &amdgpu_vm_sdma_funcs;
}
/*
* Make sure root PD gets mapped. As vm_update_mode could be changed
* when turning a GFX VM into a compute VM.
*/
r = vm->update_funcs->map_table(to_amdgpu_bo_vm(vm->root.bo));
if (r)
goto unreserve_bo;
dma_fence_put(vm->last_update);
vm->last_update = NULL;
vm->is_compute_context = true;

View file

@ -395,11 +395,11 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
unsigned long pages_per_block;
int r;
lpfn = place->lpfn << PAGE_SHIFT;
lpfn = (u64)place->lpfn << PAGE_SHIFT;
if (!lpfn)
lpfn = man->size;
fpfn = place->fpfn << PAGE_SHIFT;
fpfn = (u64)place->fpfn << PAGE_SHIFT;
max_bytes = adev->gmc.mc_vram_size;
if (tbo->type != ttm_bo_type_kernel)
@ -439,12 +439,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
/* Allocate blocks in desired range */
vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
remaining_size = vres->base.num_pages << PAGE_SHIFT;
remaining_size = (u64)vres->base.num_pages << PAGE_SHIFT;
mutex_lock(&mgr->lock);
while (remaining_size) {
if (tbo->page_alignment)
min_block_size = tbo->page_alignment << PAGE_SHIFT;
min_block_size = (u64)tbo->page_alignment << PAGE_SHIFT;
else
min_block_size = mgr->default_page_size;
@ -453,12 +453,12 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
/* Limit maximum size to 2GiB due to SG table limitations */
size = min(remaining_size, 2ULL << 30);
if (size >= pages_per_block << PAGE_SHIFT)
min_block_size = pages_per_block << PAGE_SHIFT;
if (size >= (u64)pages_per_block << PAGE_SHIFT)
min_block_size = (u64)pages_per_block << PAGE_SHIFT;
cur_size = size;
if (fpfn + size != place->lpfn << PAGE_SHIFT) {
if (fpfn + size != (u64)place->lpfn << PAGE_SHIFT) {
/*
* Except for actual range allocation, modify the size and
* min_block_size conforming to continuous flag enablement
@ -498,7 +498,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
LIST_HEAD(temp);
trim_list = &vres->blocks;
original_size = vres->base.num_pages << PAGE_SHIFT;
original_size = (u64)vres->base.num_pages << PAGE_SHIFT;
/*
* If size value is rounded up to min_block_size, trim the last

View file

@ -50,7 +50,7 @@ static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block)
static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
{
return PAGE_SIZE << drm_buddy_block_order(block);
return (u64)PAGE_SIZE << drm_buddy_block_order(block);
}
static inline struct amdgpu_vram_mgr_resource *

View file

@ -2796,7 +2796,8 @@ static int dce_v10_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
adev_to_drm(adev)->mode_config.prefer_shadow = 1;
/* disable prefer shadow for now due to hibernation issues */
adev_to_drm(adev)->mode_config.prefer_shadow = 0;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;

View file

@ -2914,7 +2914,8 @@ static int dce_v11_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
adev_to_drm(adev)->mode_config.prefer_shadow = 1;
/* disable prefer shadow for now due to hibernation issues */
adev_to_drm(adev)->mode_config.prefer_shadow = 0;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;

View file

@ -339,7 +339,7 @@ static void dce_v6_0_hpd_fini(struct amdgpu_device *adev)
tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0);
WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
@ -2673,7 +2673,8 @@ static int dce_v6_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_width = 16384;
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
adev_to_drm(adev)->mode_config.prefer_shadow = 1;
/* disable prefer shadow for now due to hibernation issues */
adev_to_drm(adev)->mode_config.prefer_shadow = 0;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;

View file

@ -333,7 +333,7 @@ static void dce_v8_0_hpd_fini(struct amdgpu_device *adev)
tmp = RREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd]);
tmp &= ~DC_HPD1_CONTROL__DC_HPD1_EN_MASK;
WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], 0);
WREG32(mmDC_HPD1_CONTROL + hpd_offsets[amdgpu_connector->hpd.hpd], tmp);
amdgpu_irq_put(adev, &adev->hpd_irq, amdgpu_connector->hpd.hpd);
}
@ -2693,7 +2693,8 @@ static int dce_v8_0_sw_init(void *handle)
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
adev_to_drm(adev)->mode_config.prefer_shadow = 1;
/* disable prefer shadow for now due to hibernation issues */
adev_to_drm(adev)->mode_config.prefer_shadow = 0;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;

View file

@ -3976,6 +3976,23 @@ static void gfx_v10_0_init_rlc_iram_dram_microcode(struct amdgpu_device *adev)
adev->gfx.rlc.rlc_dram_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->rlc_dram_ucode_offset_bytes);
}
static void gfx_v10_0_init_tap_delays_microcode(struct amdgpu_device *adev)
{
const struct rlc_firmware_header_v2_4 *rlc_hdr;
rlc_hdr = (const struct rlc_firmware_header_v2_4 *)adev->gfx.rlc_fw->data;
adev->gfx.rlc.global_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->global_tap_delays_ucode_size_bytes);
adev->gfx.rlc.global_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->global_tap_delays_ucode_offset_bytes);
adev->gfx.rlc.se0_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_size_bytes);
adev->gfx.rlc.se0_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se0_tap_delays_ucode_offset_bytes);
adev->gfx.rlc.se1_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_size_bytes);
adev->gfx.rlc.se1_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se1_tap_delays_ucode_offset_bytes);
adev->gfx.rlc.se2_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_size_bytes);
adev->gfx.rlc.se2_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se2_tap_delays_ucode_offset_bytes);
adev->gfx.rlc.se3_tap_delays_ucode_size_bytes = le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_size_bytes);
adev->gfx.rlc.se3_tap_delays_ucode = (u8 *)rlc_hdr + le32_to_cpu(rlc_hdr->se3_tap_delays_ucode_offset_bytes);
}
static bool gfx_v10_0_navi10_gfxoff_should_enable(struct amdgpu_device *adev)
{
bool ret = false;
@ -4153,8 +4170,11 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
if (version_major == 2) {
if (version_minor >= 1)
gfx_v10_0_init_rlc_ext_microcode(adev);
if (version_minor == 2)
if (version_minor >= 2)
gfx_v10_0_init_rlc_iram_dram_microcode(adev);
if (version_minor == 4) {
gfx_v10_0_init_tap_delays_microcode(adev);
}
}
}
@ -4251,8 +4271,39 @@ static int gfx_v10_0_init_microcode(struct amdgpu_device *adev)
adev->firmware.fw_size +=
ALIGN(adev->gfx.rlc.rlc_dram_ucode_size_bytes, PAGE_SIZE);
}
}
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS];
info->ucode_id = AMDGPU_UCODE_ID_GLOBAL_TAP_DELAYS;
info->fw = adev->gfx.rlc_fw;
adev->firmware.fw_size +=
ALIGN(adev->gfx.rlc.global_tap_delays_ucode_size_bytes, PAGE_SIZE);
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE0_TAP_DELAYS];
info->ucode_id = AMDGPU_UCODE_ID_SE0_TAP_DELAYS;
info->fw = adev->gfx.rlc_fw;
adev->firmware.fw_size +=
ALIGN(adev->gfx.rlc.se0_tap_delays_ucode_size_bytes, PAGE_SIZE);
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE1_TAP_DELAYS];
info->ucode_id = AMDGPU_UCODE_ID_SE1_TAP_DELAYS;
info->fw = adev->gfx.rlc_fw;
adev->firmware.fw_size +=
ALIGN(adev->gfx.rlc.se1_tap_delays_ucode_size_bytes, PAGE_SIZE);
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE2_TAP_DELAYS];
info->ucode_id = AMDGPU_UCODE_ID_SE2_TAP_DELAYS;
info->fw = adev->gfx.rlc_fw;
adev->firmware.fw_size +=
ALIGN(adev->gfx.rlc.se2_tap_delays_ucode_size_bytes, PAGE_SIZE);
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SE3_TAP_DELAYS];
info->ucode_id = AMDGPU_UCODE_ID_SE3_TAP_DELAYS;
info->fw = adev->gfx.rlc_fw;
adev->firmware.fw_size +=
ALIGN(adev->gfx.rlc.se3_tap_delays_ucode_size_bytes, PAGE_SIZE);
info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1];
info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1;
info->fw = adev->gfx.mec_fw;
@ -8525,14 +8576,45 @@ static u64 gfx_v10_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
static void gfx_v10_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t *wptr_saved;
uint32_t *is_queue_unmap;
uint64_t aggregated_db_index;
uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
uint64_t wptr_tmp;
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
if (ring->is_mes_queue) {
wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
sizeof(uint32_t));
aggregated_db_index =
amdgpu_mes_get_aggregated_doorbell_index(adev,
AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
wptr_tmp = ring->wptr & ring->buf_mask;
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
*wptr_saved = wptr_tmp;
/* assume doorbell always being used by mes mapped queue */
if (*is_queue_unmap) {
WDOORBELL64(aggregated_db_index, wptr_tmp);
WDOORBELL64(ring->doorbell_index, wptr_tmp);
} else {
WDOORBELL64(ring->doorbell_index, wptr_tmp);
if (*is_queue_unmap)
WDOORBELL64(aggregated_db_index, wptr_tmp);
}
} else {
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR,
lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, mmCP_RB0_WPTR_HI,
upper_32_bits(ring->wptr));
}
}
}
@ -8557,13 +8639,42 @@ static u64 gfx_v10_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
static void gfx_v10_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t *wptr_saved;
uint32_t *is_queue_unmap;
uint64_t aggregated_db_index;
uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
uint64_t wptr_tmp;
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
if (ring->is_mes_queue) {
wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
sizeof(uint32_t));
aggregated_db_index =
amdgpu_mes_get_aggregated_doorbell_index(adev,
AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
wptr_tmp = ring->wptr & ring->buf_mask;
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
*wptr_saved = wptr_tmp;
/* assume doorbell always used by mes mapped queue */
if (*is_queue_unmap) {
WDOORBELL64(aggregated_db_index, wptr_tmp);
WDOORBELL64(ring->doorbell_index, wptr_tmp);
} else {
WDOORBELL64(ring->doorbell_index, wptr_tmp);
if (*is_queue_unmap)
WDOORBELL64(aggregated_db_index, wptr_tmp);
}
} else {
BUG(); /* only DOORBELL method supported on gfx10 now */
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
BUG(); /* only DOORBELL method supported on gfx10 now */
}
}
}

View file

@ -126,6 +126,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev);
static void gfx_v11_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
uint16_t pasid, uint32_t flush_type,
bool all_hub, uint8_t dst_sel);
static void gfx_v11_0_set_safe_mode(struct amdgpu_device *adev);
static void gfx_v11_0_unset_safe_mode(struct amdgpu_device *adev);
static void gfx11_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue_mask)
{
@ -4743,63 +4745,143 @@ static int gfx_v11_0_soft_reset(void *handle)
{
u32 grbm_soft_reset = 0;
u32 tmp;
int i, j, k;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
/* GRBM_STATUS */
tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS);
if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__DB_BUSY_MASK |
GRBM_STATUS__CB_BUSY_MASK | GRBM_STATUS__GDS_BUSY_MASK |
GRBM_STATUS__SPI_BUSY_MASK | GRBM_STATUS__GE_BUSY_NO_DMA_MASK)) {
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
GRBM_SOFT_RESET, SOFT_RESET_CP,
1);
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
GRBM_SOFT_RESET, SOFT_RESET_GFX,
1);
tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 0);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 0);
WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
gfx_v11_0_set_safe_mode(adev);
for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) {
tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
WREG32_SOC15(GC, 0, regCP_HQD_DEQUEUE_REQUEST, 0x2);
WREG32_SOC15(GC, 0, regSPI_COMPUTE_QUEUE_RESET, 0x1);
}
}
}
for (i = 0; i < adev->gfx.me.num_me; ++i) {
for (j = 0; j < adev->gfx.me.num_queue_per_pipe; j++) {
for (k = 0; k < adev->gfx.me.num_pipe_per_me; k++) {
tmp = RREG32_SOC15(GC, 0, regGRBM_GFX_CNTL);
tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, MEID, i);
tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, QUEUEID, j);
tmp = REG_SET_FIELD(tmp, GRBM_GFX_CNTL, PIPEID, k);
WREG32_SOC15(GC, 0, regGRBM_GFX_CNTL, tmp);
WREG32_SOC15(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST, 0x1);
}
}
}
if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK)) {
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
GRBM_SOFT_RESET, SOFT_RESET_CP,
1);
WREG32_SOC15(GC, 0, regCP_VMID_RESET, 0xfffffffe);
// Read CP_VMID_RESET register three times.
// to get sufficient time for GFX_HQD_ACTIVE reach 0
RREG32_SOC15(GC, 0, regCP_VMID_RESET);
RREG32_SOC15(GC, 0, regCP_VMID_RESET);
RREG32_SOC15(GC, 0, regCP_VMID_RESET);
for (i = 0; i < adev->usec_timeout; i++) {
if (!RREG32_SOC15(GC, 0, regCP_HQD_ACTIVE) &&
!RREG32_SOC15(GC, 0, regCP_GFX_HQD_ACTIVE))
break;
udelay(1);
}
if (i >= adev->usec_timeout) {
printk("Failed to wait all pipes clean\n");
return -EINVAL;
}
/* GRBM_STATUS2 */
tmp = RREG32_SOC15(GC, 0, regGRBM_STATUS2);
if (REG_GET_FIELD(tmp, GRBM_STATUS2, RLC_BUSY))
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset,
GRBM_SOFT_RESET,
SOFT_RESET_RLC,
1);
/********** trigger soft reset ***********/
grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
SOFT_RESET_CP, 1);
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
SOFT_RESET_GFX, 1);
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
SOFT_RESET_CPF, 1);
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
SOFT_RESET_CPC, 1);
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
SOFT_RESET_CPG, 1);
WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
/********** exit soft reset ***********/
grbm_soft_reset = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
SOFT_RESET_CP, 0);
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
SOFT_RESET_GFX, 0);
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
SOFT_RESET_CPF, 0);
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
SOFT_RESET_CPC, 0);
grbm_soft_reset = REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET,
SOFT_RESET_CPG, 0);
WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, grbm_soft_reset);
if (grbm_soft_reset) {
/* stop the rlc */
gfx_v11_0_rlc_stop(adev);
tmp = RREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL);
tmp = REG_SET_FIELD(tmp, CP_SOFT_RESET_CNTL, CMP_HQD_REG_RESET, 0x1);
WREG32_SOC15(GC, 0, regCP_SOFT_RESET_CNTL, tmp);
/* Disable GFX parsing/prefetching */
gfx_v11_0_cp_gfx_enable(adev, false);
WREG32_SOC15(GC, 0, regCP_ME_CNTL, 0x0);
WREG32_SOC15(GC, 0, regCP_MEC_RS64_CNTL, 0x0);
/* Disable MEC parsing/prefetching */
gfx_v11_0_cp_compute_enable(adev, false);
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
tmp |= grbm_soft_reset;
dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
udelay(50);
tmp &= ~grbm_soft_reset;
WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
/* Wait a little for things to settle down */
udelay(50);
for (i = 0; i < adev->usec_timeout; i++) {
if (!RREG32_SOC15(GC, 0, regCP_VMID_RESET))
break;
udelay(1);
}
return 0;
if (i >= adev->usec_timeout) {
printk("Failed to wait CP_VMID_RESET to 0\n");
return -EINVAL;
}
tmp = RREG32_SOC15(GC, 0, regCP_INT_CNTL);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CMP_BUSY_INT_ENABLE, 1);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_BUSY_INT_ENABLE, 1);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, CNTX_EMPTY_INT_ENABLE, 1);
tmp = REG_SET_FIELD(tmp, CP_INT_CNTL, GFX_IDLE_INT_ENABLE, 1);
WREG32_SOC15(GC, 0, regCP_INT_CNTL, tmp);
gfx_v11_0_unset_safe_mode(adev);
return gfx_v11_0_cp_resume(adev);
}
static bool gfx_v11_0_check_soft_reset(void *handle)
{
int i, r;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
long tmo = msecs_to_jiffies(1000);
for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
ring = &adev->gfx.gfx_ring[i];
r = amdgpu_ring_test_ib(ring, tmo);
if (r)
return true;
}
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
ring = &adev->gfx.compute_ring[i];
r = amdgpu_ring_test_ib(ring, tmo);
if (r)
return true;
}
return false;
}
static uint64_t gfx_v11_0_get_gpu_clock_counter(struct amdgpu_device *adev)
@ -5297,14 +5379,45 @@ static u64 gfx_v11_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
static void gfx_v11_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t *wptr_saved;
uint32_t *is_queue_unmap;
uint64_t aggregated_db_index;
uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_GFX].mqd_size;
uint64_t wptr_tmp;
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
if (ring->is_mes_queue) {
wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
sizeof(uint32_t));
aggregated_db_index =
amdgpu_mes_get_aggregated_doorbell_index(adev,
ring->hw_prio);
wptr_tmp = ring->wptr & ring->buf_mask;
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
*wptr_saved = wptr_tmp;
/* assume doorbell always being used by mes mapped queue */
if (*is_queue_unmap) {
WDOORBELL64(aggregated_db_index, wptr_tmp);
WDOORBELL64(ring->doorbell_index, wptr_tmp);
} else {
WDOORBELL64(ring->doorbell_index, wptr_tmp);
if (*is_queue_unmap)
WDOORBELL64(aggregated_db_index, wptr_tmp);
}
} else {
WREG32_SOC15(GC, 0, regCP_RB0_WPTR, lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI, upper_32_bits(ring->wptr));
if (ring->use_doorbell) {
/* XXX check if swapping is necessary on BE */
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
WREG32_SOC15(GC, 0, regCP_RB0_WPTR,
lower_32_bits(ring->wptr));
WREG32_SOC15(GC, 0, regCP_RB0_WPTR_HI,
upper_32_bits(ring->wptr));
}
}
}
@ -5329,13 +5442,42 @@ static u64 gfx_v11_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
static void gfx_v11_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t *wptr_saved;
uint32_t *is_queue_unmap;
uint64_t aggregated_db_index;
uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_COMPUTE].mqd_size;
uint64_t wptr_tmp;
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
if (ring->is_mes_queue) {
wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
sizeof(uint32_t));
aggregated_db_index =
amdgpu_mes_get_aggregated_doorbell_index(adev,
ring->hw_prio);
wptr_tmp = ring->wptr & ring->buf_mask;
atomic64_set((atomic64_t *)ring->wptr_cpu_addr, wptr_tmp);
*wptr_saved = wptr_tmp;
/* assume doorbell always used by mes mapped queue */
if (*is_queue_unmap) {
WDOORBELL64(aggregated_db_index, wptr_tmp);
WDOORBELL64(ring->doorbell_index, wptr_tmp);
} else {
WDOORBELL64(ring->doorbell_index, wptr_tmp);
if (*is_queue_unmap)
WDOORBELL64(aggregated_db_index, wptr_tmp);
}
} else {
BUG(); /* only DOORBELL method supported on gfx11 now */
/* XXX check if swapping is necessary on BE */
if (ring->use_doorbell) {
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr);
WDOORBELL64(ring->doorbell_index, ring->wptr);
} else {
BUG(); /* only DOORBELL method supported on gfx11 now */
}
}
}
@ -6132,6 +6274,7 @@ static const struct amd_ip_funcs gfx_v11_0_ip_funcs = {
.is_idle = gfx_v11_0_is_idle,
.wait_for_idle = gfx_v11_0_wait_for_idle,
.soft_reset = gfx_v11_0_soft_reset,
.check_soft_reset = gfx_v11_0_check_soft_reset,
.set_clockgating_state = gfx_v11_0_set_clockgating_state,
.set_powergating_state = gfx_v11_0_set_powergating_state,
.get_clockgating_state = gfx_v11_0_get_clockgating_state,

View file

@ -987,23 +987,23 @@ static void gfx_v9_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t scratch = SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0);
uint32_t tmp = 0;
unsigned i;
int r;
WREG32_SOC15(GC, 0, mmSCRATCH_REG0, 0xCAFEDEAD);
WREG32(scratch, 0xCAFEDEAD);
r = amdgpu_ring_alloc(ring, 3);
if (r)
return r;
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmSCRATCH_REG0) -
PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, scratch - PACKET3_SET_UCONFIG_REG_START);
amdgpu_ring_write(ring, 0xDEADBEEF);
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32_SOC15(GC, 0, mmSCRATCH_REG0);
tmp = RREG32(scratch);
if (tmp == 0xDEADBEEF)
break;
udelay(1);

View file

@ -22,6 +22,9 @@
*/
#include <linux/firmware.h>
#include <linux/pci.h>
#include <drm/drm_cache.h>
#include "amdgpu.h"
#include "amdgpu_atomfirmware.h"
#include "gmc_v10_0.h"
@ -834,10 +837,21 @@ static int gmc_v10_0_mc_init(struct amdgpu_device *adev)
adev->gmc.visible_vram_size = adev->gmc.real_vram_size;
/* set the gart size */
if (amdgpu_gart_size == -1)
adev->gmc.gart_size = 512ULL << 20;
else
if (amdgpu_gart_size == -1) {
switch (adev->ip_versions[GC_HWIP][0]) {
default:
adev->gmc.gart_size = 512ULL << 20;
break;
case IP_VERSION(10, 3, 1): /* DCE SG support */
case IP_VERSION(10, 3, 3): /* DCE SG support */
case IP_VERSION(10, 3, 6): /* DCE SG support */
case IP_VERSION(10, 3, 7): /* DCE SG support */
adev->gmc.gart_size = 1024ULL << 20;
break;
}
} else {
adev->gmc.gart_size = (u64)amdgpu_gart_size << 20;
}
gmc_v10_0_vram_gtt_location(adev, &adev->gmc);
@ -969,6 +983,8 @@ static int gmc_v10_0_sw_init(void *handle)
return r;
}
adev->need_swiotlb = drm_need_swiotlb(44);
r = gmc_v10_0_mc_init(adev);
if (r)
return r;

View file

@ -22,10 +22,13 @@
*/
#include <linux/firmware.h>
#include <linux/pci.h>
#include <drm/drm_cache.h>
#include "amdgpu.h"
#include "amdgpu_atomfirmware.h"
#include "gmc_v11_0.h"
#include "umc_v8_7.h"
#include "umc_v8_10.h"
#include "athub/athub_3_0_0_sh_mask.h"
#include "athub/athub_3_0_0_offset.h"
#include "oss/osssys_6_0_0_offset.h"
@ -537,11 +540,36 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev)
{
switch (adev->ip_versions[UMC_HWIP][0]) {
case IP_VERSION(8, 10, 0):
adev->umc.channel_inst_num = UMC_V8_10_CHANNEL_INSTANCE_NUM;
adev->umc.umc_inst_num = UMC_V8_10_UMC_INSTANCE_NUM;
adev->umc.node_inst_num = adev->gmc.num_umc;
adev->umc.max_ras_err_cnt_per_query = UMC_V8_10_TOTAL_CHANNEL_NUM(adev);
adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET;
adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl[0][0][0];
adev->umc.ras = &umc_v8_10_ras;
break;
case IP_VERSION(8, 11, 0):
break;
default:
break;
}
if (adev->umc.ras) {
amdgpu_ras_register_ras_block(adev, &adev->umc.ras->ras_block);
strcpy(adev->umc.ras->ras_block.ras_comm.name, "umc");
adev->umc.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__UMC;
adev->umc.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__MULTI_UNCORRECTABLE;
adev->umc.ras_if = &adev->umc.ras->ras_block.ras_comm;
/* If don't define special ras_late_init function, use default ras_late_init */
if (!adev->umc.ras->ras_block.ras_late_init)
adev->umc.ras->ras_block.ras_late_init = amdgpu_umc_ras_late_init;
/* If not define special ras_cb function, use default ras_cb */
if (!adev->umc.ras->ras_block.ras_cb)
adev->umc.ras->ras_block.ras_cb = amdgpu_umc_process_ras_data_cb;
}
}
@ -750,6 +778,8 @@ static int gmc_v11_0_sw_init(void *handle)
return r;
}
adev->need_swiotlb = drm_need_swiotlb(44);
r = gmc_v11_0_mc_init(adev);
if (r)
return r;

View file

@ -87,21 +87,32 @@ static const struct amdgpu_ring_funcs mes_v10_1_ring_funcs = {
};
static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
void *pkt, int size)
void *pkt, int size,
int api_status_off)
{
int ndw = size / 4;
signed long r;
union MESAPI__ADD_QUEUE *x_pkt = pkt;
struct MES_API_STATUS *api_status;
struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring;
unsigned long flags;
BUG_ON(size % 4 != 0);
if (amdgpu_ring_alloc(ring, ndw))
spin_lock_irqsave(&mes->ring_lock, flags);
if (amdgpu_ring_alloc(ring, ndw)) {
spin_unlock_irqrestore(&mes->ring_lock, flags);
return -ENOMEM;
}
api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr;
api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq;
amdgpu_ring_write_multiple(ring, pkt, ndw);
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&mes->ring_lock, flags);
DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
@ -166,13 +177,9 @@ static int mes_v10_1_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.gws_size = input->gws_size;
mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
mes_add_queue_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
mes_add_queue_pkt.api_status.api_completion_fence_value =
++mes->ring.fence_drv.sync_seq;
return mes_v10_1_submit_pkt_and_poll_completion(mes,
&mes_add_queue_pkt, sizeof(mes_add_queue_pkt));
&mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
offsetof(union MESAPI__ADD_QUEUE, api_status));
}
static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
@ -189,13 +196,9 @@ static int mes_v10_1_remove_hw_queue(struct amdgpu_mes *mes,
mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
mes_remove_queue_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
mes_remove_queue_pkt.api_status.api_completion_fence_value =
++mes->ring.fence_drv.sync_seq;
return mes_v10_1_submit_pkt_and_poll_completion(mes,
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
offsetof(union MESAPI__REMOVE_QUEUE, api_status));
}
static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes,
@ -227,13 +230,9 @@ static int mes_v10_1_unmap_legacy_queue(struct amdgpu_mes *mes,
mes_remove_queue_pkt.unmap_kiq_utility_queue = 1;
}
mes_remove_queue_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
mes_remove_queue_pkt.api_status.api_completion_fence_value =
++mes->ring.fence_drv.sync_seq;
return mes_v10_1_submit_pkt_and_poll_completion(mes,
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
offsetof(union MESAPI__REMOVE_QUEUE, api_status));
}
static int mes_v10_1_suspend_gang(struct amdgpu_mes *mes,
@ -258,13 +257,9 @@ static int mes_v10_1_query_sched_status(struct amdgpu_mes *mes)
mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_status_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
mes_status_pkt.api_status.api_completion_fence_value =
++mes->ring.fence_drv.sync_seq;
return mes_v10_1_submit_pkt_and_poll_completion(mes,
&mes_status_pkt, sizeof(mes_status_pkt));
&mes_status_pkt, sizeof(mes_status_pkt),
offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
}
static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
@ -299,7 +294,7 @@ static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
mes_set_hw_res_pkt.aggregated_doorbells[i] =
mes->agreegated_doorbells[i];
mes->aggregated_doorbells[i];
for (i = 0; i < 5; i++) {
mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
@ -313,13 +308,63 @@ static int mes_v10_1_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.disable_mes_log = 1;
mes_set_hw_res_pkt.use_different_vmid_compute = 1;
mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
mes_set_hw_res_pkt.api_status.api_completion_fence_value =
++mes->ring.fence_drv.sync_seq;
return mes_v10_1_submit_pkt_and_poll_completion(mes,
&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt));
&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
}
static void mes_v10_1_init_aggregated_doorbell(struct amdgpu_mes *mes)
{
struct amdgpu_device *adev = mes->adev;
uint32_t data;
data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1);
data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL1, data);
data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2);
data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL2, data);
data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3);
data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL3, data);
data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4);
data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL4, data);
data = RREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5);
data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
WREG32_SOC15(GC, 0, mmCP_MES_DOORBELL_CONTROL5, data);
data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
WREG32_SOC15(GC, 0, mmCP_HQD_GFX_CONTROL, data);
}
static const struct amdgpu_mes_funcs mes_v10_1_funcs = {
@ -1121,6 +1166,8 @@ static int mes_v10_1_hw_init(void *handle)
if (r)
goto failure;
mes_v10_1_init_aggregated_doorbell(&adev->mes);
r = mes_v10_1_query_sched_status(&adev->mes);
if (r) {
DRM_ERROR("MES is busy\n");
@ -1133,6 +1180,7 @@ static int mes_v10_1_hw_init(void *handle)
* with MES enabled.
*/
adev->gfx.kiq.ring.sched.ready = false;
adev->mes.ring.sched.ready = true;
return 0;
@ -1145,6 +1193,8 @@ static int mes_v10_1_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->mes.ring.sched.ready = false;
mes_v10_1_enable(adev, false);
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {

View file

@ -86,21 +86,32 @@ static const struct amdgpu_ring_funcs mes_v11_0_ring_funcs = {
};
static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
void *pkt, int size)
void *pkt, int size,
int api_status_off)
{
int ndw = size / 4;
signed long r;
union MESAPI__ADD_QUEUE *x_pkt = pkt;
struct MES_API_STATUS *api_status;
struct amdgpu_device *adev = mes->adev;
struct amdgpu_ring *ring = &mes->ring;
unsigned long flags;
BUG_ON(size % 4 != 0);
if (amdgpu_ring_alloc(ring, ndw))
spin_lock_irqsave(&mes->ring_lock, flags);
if (amdgpu_ring_alloc(ring, ndw)) {
spin_unlock_irqrestore(&mes->ring_lock, flags);
return -ENOMEM;
}
api_status = (struct MES_API_STATUS *)((char *)pkt + api_status_off);
api_status->api_completion_fence_addr = mes->ring.fence_drv.gpu_addr;
api_status->api_completion_fence_value = ++mes->ring.fence_drv.sync_seq;
amdgpu_ring_write_multiple(ring, pkt, ndw);
amdgpu_ring_commit(ring);
spin_unlock_irqrestore(&mes->ring_lock, flags);
DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
@ -173,13 +184,9 @@ static int mes_v11_0_add_hw_queue(struct amdgpu_mes *mes,
mes_add_queue_pkt.tma_addr = input->tma_addr;
mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
mes_add_queue_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
mes_add_queue_pkt.api_status.api_completion_fence_value =
++mes->ring.fence_drv.sync_seq;
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_add_queue_pkt, sizeof(mes_add_queue_pkt));
&mes_add_queue_pkt, sizeof(mes_add_queue_pkt),
offsetof(union MESAPI__ADD_QUEUE, api_status));
}
static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes,
@ -196,13 +203,9 @@ static int mes_v11_0_remove_hw_queue(struct amdgpu_mes *mes,
mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_remove_queue_pkt.gang_context_addr = input->gang_context_addr;
mes_remove_queue_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
mes_remove_queue_pkt.api_status.api_completion_fence_value =
++mes->ring.fence_drv.sync_seq;
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
offsetof(union MESAPI__REMOVE_QUEUE, api_status));
}
static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes,
@ -216,7 +219,7 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes,
mes_remove_queue_pkt.header.opcode = MES_SCH_API_REMOVE_QUEUE;
mes_remove_queue_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset << 2;
mes_remove_queue_pkt.doorbell_offset = input->doorbell_offset;
mes_remove_queue_pkt.gang_context_addr = 0;
mes_remove_queue_pkt.pipe_id = input->pipe_id;
@ -228,19 +231,14 @@ static int mes_v11_0_unmap_legacy_queue(struct amdgpu_mes *mes,
mes_remove_queue_pkt.tf_data =
lower_32_bits(input->trail_fence_data);
} else {
if (input->queue_type == AMDGPU_RING_TYPE_GFX)
mes_remove_queue_pkt.unmap_legacy_gfx_queue = 1;
else
mes_remove_queue_pkt.unmap_kiq_utility_queue = 1;
mes_remove_queue_pkt.unmap_legacy_queue = 1;
mes_remove_queue_pkt.queue_type =
convert_to_mes_queue_type(input->queue_type);
}
mes_remove_queue_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
mes_remove_queue_pkt.api_status.api_completion_fence_value =
++mes->ring.fence_drv.sync_seq;
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt));
&mes_remove_queue_pkt, sizeof(mes_remove_queue_pkt),
offsetof(union MESAPI__REMOVE_QUEUE, api_status));
}
static int mes_v11_0_suspend_gang(struct amdgpu_mes *mes,
@ -265,13 +263,9 @@ static int mes_v11_0_query_sched_status(struct amdgpu_mes *mes)
mes_status_pkt.header.opcode = MES_SCH_API_QUERY_SCHEDULER_STATUS;
mes_status_pkt.header.dwsize = API_FRAME_SIZE_IN_DWORDS;
mes_status_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
mes_status_pkt.api_status.api_completion_fence_value =
++mes->ring.fence_drv.sync_seq;
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_status_pkt, sizeof(mes_status_pkt));
&mes_status_pkt, sizeof(mes_status_pkt),
offsetof(union MESAPI__QUERY_MES_STATUS, api_status));
}
static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
@ -317,13 +311,9 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes,
return -EINVAL;
}
misc_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
misc_pkt.api_status.api_completion_fence_value =
++mes->ring.fence_drv.sync_seq;
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&misc_pkt, sizeof(misc_pkt));
&misc_pkt, sizeof(misc_pkt),
offsetof(union MESAPI__MISC, api_status));
}
static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
@ -358,7 +348,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
for (i = 0; i < AMD_PRIORITY_NUM_LEVELS; i++)
mes_set_hw_res_pkt.aggregated_doorbells[i] =
mes->agreegated_doorbells[i];
mes->aggregated_doorbells[i];
for (i = 0; i < 5; i++) {
mes_set_hw_res_pkt.gc_base[i] = adev->reg_offset[GC_HWIP][0][i];
@ -373,13 +363,63 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
mes_set_hw_res_pkt.use_different_vmid_compute = 1;
mes_set_hw_res_pkt.oversubscription_timer = 50;
mes_set_hw_res_pkt.api_status.api_completion_fence_addr =
mes->ring.fence_drv.gpu_addr;
mes_set_hw_res_pkt.api_status.api_completion_fence_value =
++mes->ring.fence_drv.sync_seq;
return mes_v11_0_submit_pkt_and_poll_completion(mes,
&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt));
&mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt),
offsetof(union MESAPI_SET_HW_RESOURCES, api_status));
}
static void mes_v11_0_init_aggregated_doorbell(struct amdgpu_mes *mes)
{
struct amdgpu_device *adev = mes->adev;
uint32_t data;
data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1);
data &= ~(CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET_MASK |
CP_MES_DOORBELL_CONTROL1__DOORBELL_EN_MASK |
CP_MES_DOORBELL_CONTROL1__DOORBELL_HIT_MASK);
data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_LOW] <<
CP_MES_DOORBELL_CONTROL1__DOORBELL_OFFSET__SHIFT;
data |= 1 << CP_MES_DOORBELL_CONTROL1__DOORBELL_EN__SHIFT;
WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL1, data);
data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2);
data &= ~(CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET_MASK |
CP_MES_DOORBELL_CONTROL2__DOORBELL_EN_MASK |
CP_MES_DOORBELL_CONTROL2__DOORBELL_HIT_MASK);
data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_NORMAL] <<
CP_MES_DOORBELL_CONTROL2__DOORBELL_OFFSET__SHIFT;
data |= 1 << CP_MES_DOORBELL_CONTROL2__DOORBELL_EN__SHIFT;
WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL2, data);
data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3);
data &= ~(CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET_MASK |
CP_MES_DOORBELL_CONTROL3__DOORBELL_EN_MASK |
CP_MES_DOORBELL_CONTROL3__DOORBELL_HIT_MASK);
data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_MEDIUM] <<
CP_MES_DOORBELL_CONTROL3__DOORBELL_OFFSET__SHIFT;
data |= 1 << CP_MES_DOORBELL_CONTROL3__DOORBELL_EN__SHIFT;
WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL3, data);
data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4);
data &= ~(CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET_MASK |
CP_MES_DOORBELL_CONTROL4__DOORBELL_EN_MASK |
CP_MES_DOORBELL_CONTROL4__DOORBELL_HIT_MASK);
data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_HIGH] <<
CP_MES_DOORBELL_CONTROL4__DOORBELL_OFFSET__SHIFT;
data |= 1 << CP_MES_DOORBELL_CONTROL4__DOORBELL_EN__SHIFT;
WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL4, data);
data = RREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5);
data &= ~(CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET_MASK |
CP_MES_DOORBELL_CONTROL5__DOORBELL_EN_MASK |
CP_MES_DOORBELL_CONTROL5__DOORBELL_HIT_MASK);
data |= mes->aggregated_doorbells[AMDGPU_MES_PRIORITY_LEVEL_REALTIME] <<
CP_MES_DOORBELL_CONTROL5__DOORBELL_OFFSET__SHIFT;
data |= 1 << CP_MES_DOORBELL_CONTROL5__DOORBELL_EN__SHIFT;
WREG32_SOC15(GC, 0, regCP_MES_DOORBELL_CONTROL5, data);
data = 1 << CP_HQD_GFX_CONTROL__DB_UPDATED_MSG_EN__SHIFT;
WREG32_SOC15(GC, 0, regCP_HQD_GFX_CONTROL, data);
}
static const struct amdgpu_mes_funcs mes_v11_0_funcs = {
@ -1181,6 +1221,8 @@ static int mes_v11_0_hw_init(void *handle)
if (r)
goto failure;
mes_v11_0_init_aggregated_doorbell(&adev->mes);
r = mes_v11_0_query_sched_status(&adev->mes);
if (r) {
DRM_ERROR("MES is busy\n");
@ -1204,6 +1246,9 @@ failure:
static int mes_v11_0_hw_fini(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
adev->mes.ring.sched.ready = false;
return 0;
}

View file

@ -283,8 +283,16 @@ flr_done:
/* Trigger recovery for world switch failure if no TDR */
if (amdgpu_device_should_recover_gpu(adev)
&& (!amdgpu_device_has_job_running(adev) ||
adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT))
amdgpu_device_gpu_recover(adev, NULL);
adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT)) {
struct amdgpu_reset_context reset_context;
memset(&reset_context, 0, sizeof(reset_context));
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
}
static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,

View file

@ -310,8 +310,16 @@ flr_done:
adev->sdma_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->gfx_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->compute_timeout == MAX_SCHEDULE_TIMEOUT ||
adev->video_timeout == MAX_SCHEDULE_TIMEOUT))
amdgpu_device_gpu_recover(adev, NULL);
adev->video_timeout == MAX_SCHEDULE_TIMEOUT)) {
struct amdgpu_reset_context reset_context;
memset(&reset_context, 0, sizeof(reset_context));
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
}
static int xgpu_nv_set_mailbox_rcv_irq(struct amdgpu_device *adev,

View file

@ -522,8 +522,16 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work)
}
/* Trigger recovery due to world switch failure */
if (amdgpu_device_should_recover_gpu(adev))
amdgpu_device_gpu_recover(adev, NULL);
if (amdgpu_device_should_recover_gpu(adev)) {
struct amdgpu_reset_context reset_context;
memset(&reset_context, 0, sizeof(reset_context));
reset_context.method = AMD_RESET_METHOD_NONE;
reset_context.reset_req_dev = adev;
clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
amdgpu_device_gpu_recover(adev, NULL, &reset_context);
}
}
static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev,

View file

@ -328,27 +328,6 @@ const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg = {
.ref_and_mask_sdma1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__SDMA1_MASK,
};
const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc = {
.ref_and_mask_cp0 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP0_MASK,
.ref_and_mask_cp1 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP1_MASK,
.ref_and_mask_cp2 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP2_MASK,
.ref_and_mask_cp3 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP3_MASK,
.ref_and_mask_cp4 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP4_MASK,
.ref_and_mask_cp5 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP5_MASK,
.ref_and_mask_cp6 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP6_MASK,
.ref_and_mask_cp7 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP7_MASK,
.ref_and_mask_cp8 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP8_MASK,
.ref_and_mask_cp9 = BIF_BX_PF_GPU_HDP_FLUSH_DONE__CP9_MASK,
.ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK,
.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK,
.ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK,
.ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK,
.ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK,
.ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK,
.ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK,
.ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK,
};
static void nbio_v2_3_init_registers(struct amdgpu_device *adev)
{
uint32_t def, data;

View file

@ -27,7 +27,6 @@
#include "soc15_common.h"
extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg;
extern const struct nbio_hdp_flush_reg nbio_v2_3_hdp_flush_reg_sc;
extern const struct amdgpu_nbio_funcs nbio_v2_3_funcs;
#endif

View file

@ -339,27 +339,6 @@ const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = {
.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK,
};
const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald = {
.ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK,
.ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK,
.ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK,
.ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK,
.ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK,
.ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK,
.ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK,
.ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK,
.ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK,
.ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK,
.ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__RSVD_ENG1_MASK,
.ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__RSVD_ENG2_MASK,
.ref_and_mask_sdma2 = GPU_HDP_FLUSH_DONE__RSVD_ENG3_MASK,
.ref_and_mask_sdma3 = GPU_HDP_FLUSH_DONE__RSVD_ENG4_MASK,
.ref_and_mask_sdma4 = GPU_HDP_FLUSH_DONE__RSVD_ENG5_MASK,
.ref_and_mask_sdma5 = GPU_HDP_FLUSH_DONE__RSVD_ENG6_MASK,
.ref_and_mask_sdma6 = GPU_HDP_FLUSH_DONE__RSVD_ENG7_MASK,
.ref_and_mask_sdma7 = GPU_HDP_FLUSH_DONE__RSVD_ENG8_MASK,
};
static void nbio_v7_4_init_registers(struct amdgpu_device *adev)
{
uint32_t baco_cntl;

View file

@ -27,7 +27,6 @@
#include "soc15_common.h"
extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg;
extern const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg_ald;
extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs;
extern struct amdgpu_nbio_ras nbio_v7_4_ras;

View file

@ -259,6 +259,8 @@ enum psp_gfx_fw_type {
GFX_FW_TYPE_SDMA7 = 57, /* SDMA7 MI */
GFX_FW_TYPE_VCN1 = 58, /* VCN1 MI */
GFX_FW_TYPE_CAP = 62, /* CAP_FW */
GFX_FW_TYPE_SE2_TAP_DELAYS = 65, /* SE2 TAP DELAYS NV */
GFX_FW_TYPE_SE3_TAP_DELAYS = 66, /* SE3 TAP DELAYS NV */
GFX_FW_TYPE_REG_LIST = 67, /* REG_LIST MI */
GFX_FW_TYPE_IMU_I = 68, /* IMU Instruction FW SOC21 */
GFX_FW_TYPE_IMU_D = 69, /* IMU Data FW SOC21 */

View file

@ -389,34 +389,67 @@ static uint64_t sdma_v5_0_ring_get_wptr(struct amdgpu_ring *ring)
static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t *wptr_saved;
uint32_t *is_queue_unmap;
uint64_t aggregated_db_index;
uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size;
DRM_DEBUG("Setting write pointer\n");
if (ring->use_doorbell) {
DRM_DEBUG("Using doorbell -- "
"wptr_offs == 0x%08x "
"lower_32_bits(ring->wptr << 2) == 0x%08x "
"upper_32_bits(ring->wptr << 2) == 0x%08x\n",
ring->wptr_offs,
lower_32_bits(ring->wptr << 2),
upper_32_bits(ring->wptr << 2));
/* XXX check if swapping is necessary on BE */
if (ring->is_mes_queue) {
wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
sizeof(uint32_t));
aggregated_db_index =
amdgpu_mes_get_aggregated_doorbell_index(adev,
AMDGPU_MES_PRIORITY_LEVEL_NORMAL);
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr << 2);
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
*wptr_saved = ring->wptr << 2;
if (*is_queue_unmap) {
WDOORBELL64(aggregated_db_index, ring->wptr << 2);
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
} else {
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
if (*is_queue_unmap)
WDOORBELL64(aggregated_db_index,
ring->wptr << 2);
}
} else {
DRM_DEBUG("Not using doorbell -- "
"mmSDMA%i_GFX_RB_WPTR == 0x%08x "
"mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
ring->me,
lower_32_bits(ring->wptr << 2),
ring->me,
upper_32_bits(ring->wptr << 2));
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR),
lower_32_bits(ring->wptr << 2));
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI),
upper_32_bits(ring->wptr << 2));
if (ring->use_doorbell) {
DRM_DEBUG("Using doorbell -- "
"wptr_offs == 0x%08x "
"lower_32_bits(ring->wptr) << 2 == 0x%08x "
"upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
ring->wptr_offs,
lower_32_bits(ring->wptr << 2),
upper_32_bits(ring->wptr << 2));
/* XXX check if swapping is necessary on BE */
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr << 2);
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
} else {
DRM_DEBUG("Not using doorbell -- "
"mmSDMA%i_GFX_RB_WPTR == 0x%08x "
"mmSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
ring->me,
lower_32_bits(ring->wptr << 2),
ring->me,
upper_32_bits(ring->wptr << 2));
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
ring->me, mmSDMA0_GFX_RB_WPTR),
lower_32_bits(ring->wptr << 2));
WREG32_SOC15_IP(GC, sdma_v5_0_get_reg_offset(adev,
ring->me, mmSDMA0_GFX_RB_WPTR_HI),
upper_32_bits(ring->wptr << 2));
}
}
}

View file

@ -57,6 +57,7 @@ static void sdma_v6_0_set_ring_funcs(struct amdgpu_device *adev);
static void sdma_v6_0_set_buffer_funcs(struct amdgpu_device *adev);
static void sdma_v6_0_set_vm_pte_funcs(struct amdgpu_device *adev);
static void sdma_v6_0_set_irq_funcs(struct amdgpu_device *adev);
static int sdma_v6_0_start(struct amdgpu_device *adev);
static u32 sdma_v6_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 internal_offset)
{
@ -245,34 +246,68 @@ static uint64_t sdma_v6_0_ring_get_wptr(struct amdgpu_ring *ring)
static void sdma_v6_0_ring_set_wptr(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
uint32_t *wptr_saved;
uint32_t *is_queue_unmap;
uint64_t aggregated_db_index;
uint32_t mqd_size = adev->mqds[AMDGPU_HW_IP_DMA].mqd_size;
DRM_DEBUG("Setting write pointer\n");
if (ring->use_doorbell) {
DRM_DEBUG("Using doorbell -- "
"wptr_offs == 0x%08x "
"lower_32_bits(ring->wptr) << 2 == 0x%08x "
"upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
ring->wptr_offs,
lower_32_bits(ring->wptr << 2),
upper_32_bits(ring->wptr << 2));
/* XXX check if swapping is necessary on BE */
if (ring->is_mes_queue) {
wptr_saved = (uint32_t *)(ring->mqd_ptr + mqd_size);
is_queue_unmap = (uint32_t *)(ring->mqd_ptr + mqd_size +
sizeof(uint32_t));
aggregated_db_index =
amdgpu_mes_get_aggregated_doorbell_index(adev,
ring->hw_prio);
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr << 2);
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
*wptr_saved = ring->wptr << 2;
if (*is_queue_unmap) {
WDOORBELL64(aggregated_db_index, ring->wptr << 2);
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
} else {
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
if (*is_queue_unmap)
WDOORBELL64(aggregated_db_index,
ring->wptr << 2);
}
} else {
DRM_DEBUG("Not using doorbell -- "
"regSDMA%i_GFX_RB_WPTR == 0x%08x "
"regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
ring->me,
lower_32_bits(ring->wptr << 2),
ring->me,
upper_32_bits(ring->wptr << 2));
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, ring->me, regSDMA0_QUEUE0_RB_WPTR),
lower_32_bits(ring->wptr << 2));
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, ring->me, regSDMA0_QUEUE0_RB_WPTR_HI),
upper_32_bits(ring->wptr << 2));
if (ring->use_doorbell) {
DRM_DEBUG("Using doorbell -- "
"wptr_offs == 0x%08x "
"lower_32_bits(ring->wptr) << 2 == 0x%08x "
"upper_32_bits(ring->wptr) << 2 == 0x%08x\n",
ring->wptr_offs,
lower_32_bits(ring->wptr << 2),
upper_32_bits(ring->wptr << 2));
/* XXX check if swapping is necessary on BE */
atomic64_set((atomic64_t *)ring->wptr_cpu_addr,
ring->wptr << 2);
DRM_DEBUG("calling WDOORBELL64(0x%08x, 0x%016llx)\n",
ring->doorbell_index, ring->wptr << 2);
WDOORBELL64(ring->doorbell_index, ring->wptr << 2);
} else {
DRM_DEBUG("Not using doorbell -- "
"regSDMA%i_GFX_RB_WPTR == 0x%08x "
"regSDMA%i_GFX_RB_WPTR_HI == 0x%08x\n",
ring->me,
lower_32_bits(ring->wptr << 2),
ring->me,
upper_32_bits(ring->wptr << 2));
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
ring->me, regSDMA0_QUEUE0_RB_WPTR),
lower_32_bits(ring->wptr << 2));
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev,
ring->me, regSDMA0_QUEUE0_RB_WPTR_HI),
upper_32_bits(ring->wptr << 2));
}
}
}
@ -771,32 +806,54 @@ static int sdma_v6_0_load_microcode(struct amdgpu_device *adev)
static int sdma_v6_0_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
u32 grbm_soft_reset;
u32 tmp;
int i;
sdma_v6_0_gfx_stop(adev);
for (i = 0; i < adev->sdma.num_instances; i++) {
grbm_soft_reset = REG_SET_FIELD(0,
GRBM_SOFT_RESET, SOFT_RESET_SDMA0,
1);
grbm_soft_reset <<= i;
tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE));
tmp |= SDMA0_FREEZE__FREEZE_MASK;
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_FREEZE), tmp);
tmp = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
tmp |= SDMA0_F32_CNTL__HALT_MASK;
tmp |= SDMA0_F32_CNTL__TH1_RESET_MASK;
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL), tmp);
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
tmp |= grbm_soft_reset;
DRM_DEBUG("GRBM_SOFT_RESET=0x%08X\n", tmp);
WREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_QUEUE0_PREEMPT), 0);
udelay(100);
tmp = GRBM_SOFT_RESET__SOFT_RESET_SDMA0_MASK << i;
WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
udelay(50);
udelay(100);
tmp &= ~grbm_soft_reset;
WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, tmp);
WREG32_SOC15(GC, 0, regGRBM_SOFT_RESET, 0);
tmp = RREG32_SOC15(GC, 0, regGRBM_SOFT_RESET);
udelay(50);
udelay(100);
}
return 0;
return sdma_v6_0_start(adev);
}
static bool sdma_v6_0_check_soft_reset(void *handle)
{
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
struct amdgpu_ring *ring;
int i, r;
long tmo = msecs_to_jiffies(1000);
for (i = 0; i < adev->sdma.num_instances; i++) {
ring = &adev->sdma.instance[i].ring;
r = amdgpu_ring_test_ib(ring, tmo);
if (r)
return true;
}
return false;
}
/**
@ -830,7 +887,6 @@ static int sdma_v6_0_start(struct amdgpu_device *adev)
msleep(1000);
}
sdma_v6_0_soft_reset(adev);
/* unhalt the MEs */
sdma_v6_0_enable(adev, true);
/* enable sdma ring preemption */
@ -1526,6 +1582,7 @@ const struct amd_ip_funcs sdma_v6_0_ip_funcs = {
.is_idle = sdma_v6_0_is_idle,
.wait_for_idle = sdma_v6_0_wait_for_idle,
.soft_reset = sdma_v6_0_soft_reset,
.check_soft_reset = sdma_v6_0_check_soft_reset,
.set_clockgating_state = sdma_v6_0_set_clockgating_state,
.set_powergating_state = sdma_v6_0_set_powergating_state,
.get_clockgating_state = sdma_v6_0_get_clockgating_state,

View file

@ -320,6 +320,7 @@ soc21_asic_reset_method(struct amdgpu_device *adev)
switch (adev->ip_versions[MP1_HWIP][0]) {
case IP_VERSION(13, 0, 0):
case IP_VERSION(13, 0, 7):
return AMD_RESET_METHOD_MODE1;
case IP_VERSION(13, 0, 4):
return AMD_RESET_METHOD_MODE2;
@ -417,7 +418,13 @@ static uint32_t soc21_get_rev_id(struct amdgpu_device *adev)
static bool soc21_need_full_reset(struct amdgpu_device *adev)
{
return true;
switch (adev->ip_versions[GC_HWIP][0]) {
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 2):
return false;
default:
return true;
}
}
static bool soc21_need_reset_on_init(struct amdgpu_device *adev)

View file

@ -0,0 +1,357 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#include "umc_v8_10.h"
#include "amdgpu_ras.h"
#include "amdgpu_umc.h"
#include "amdgpu.h"
#include "umc/umc_8_10_0_offset.h"
#include "umc/umc_8_10_0_sh_mask.h"
#define UMC_8_NODE_DIST 0x800000
#define UMC_8_INST_DIST 0x4000
struct channelnum_map_colbit {
uint32_t channel_num;
uint32_t col_bit;
};
const struct channelnum_map_colbit umc_v8_10_channelnum_map_colbit_table[] = {
{24, 13},
{20, 13},
{16, 12},
{14, 12},
{12, 12},
{10, 12},
{6, 11},
};
const uint32_t
umc_v8_10_channel_idx_tbl[]
[UMC_V8_10_UMC_INSTANCE_NUM]
[UMC_V8_10_CHANNEL_INSTANCE_NUM] = {
{{16, 18}, {17, 19}},
{{15, 11}, {3, 7}},
{{1, 5}, {13, 9}},
{{23, 21}, {22, 20}},
{{0, 4}, {12, 8}},
{{14, 10}, {2, 6}}
};
static inline uint32_t get_umc_v8_10_reg_offset(struct amdgpu_device *adev,
uint32_t node_inst,
uint32_t umc_inst,
uint32_t ch_inst)
{
return adev->umc.channel_offs * ch_inst + UMC_8_INST_DIST * umc_inst +
UMC_8_NODE_DIST * node_inst;
}
static void umc_v8_10_clear_error_count_per_channel(struct amdgpu_device *adev,
uint32_t umc_reg_offset)
{
uint32_t ecc_err_cnt_addr;
ecc_err_cnt_addr =
SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt);
/* clear error count */
WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4,
UMC_V8_10_CE_CNT_INIT);
}
static void umc_v8_10_clear_error_count(struct amdgpu_device *adev)
{
uint32_t node_inst = 0;
uint32_t umc_inst = 0;
uint32_t ch_inst = 0;
uint32_t umc_reg_offset = 0;
LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
umc_reg_offset = get_umc_v8_10_reg_offset(adev,
node_inst,
umc_inst,
ch_inst);
umc_v8_10_clear_error_count_per_channel(adev,
umc_reg_offset);
}
}
static void umc_v8_10_query_correctable_error_count(struct amdgpu_device *adev,
uint32_t umc_reg_offset,
unsigned long *error_count)
{
uint32_t ecc_err_cnt, ecc_err_cnt_addr;
uint64_t mc_umc_status;
uint32_t mc_umc_status_addr;
/* UMC 8_10 registers */
ecc_err_cnt_addr =
SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt);
mc_umc_status_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
ecc_err_cnt = RREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4);
*error_count +=
(REG_GET_FIELD(ecc_err_cnt, UMCCH0_0_GeccErrCnt, GeccErrCnt) -
UMC_V8_10_CE_CNT_INIT);
/* Check for SRAM correctable error, MCUMC_STATUS is a 64 bit register */
mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)
*error_count += 1;
}
static void umc_v8_10_query_uncorrectable_error_count(struct amdgpu_device *adev,
uint32_t umc_reg_offset,
unsigned long *error_count)
{
uint64_t mc_umc_status;
uint32_t mc_umc_status_addr;
mc_umc_status_addr = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
/* Check the MCUMC_STATUS. */
mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
if ((REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC) == 1))
*error_count += 1;
}
static void umc_v8_10_query_ras_error_count(struct amdgpu_device *adev,
void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
uint32_t node_inst = 0;
uint32_t umc_inst = 0;
uint32_t ch_inst = 0;
uint32_t umc_reg_offset = 0;
LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
umc_reg_offset = get_umc_v8_10_reg_offset(adev,
node_inst,
umc_inst,
ch_inst);
umc_v8_10_query_correctable_error_count(adev,
umc_reg_offset,
&(err_data->ce_count));
umc_v8_10_query_uncorrectable_error_count(adev,
umc_reg_offset,
&(err_data->ue_count));
}
umc_v8_10_clear_error_count(adev);
}
static uint32_t umc_v8_10_get_col_bit(uint32_t channel_num)
{
uint32_t t = 0;
for (t = 0; t < ARRAY_SIZE(umc_v8_10_channelnum_map_colbit_table); t++)
if (channel_num == umc_v8_10_channelnum_map_colbit_table[t].channel_num)
return umc_v8_10_channelnum_map_colbit_table[t].col_bit;
/* Failed to get col_bit. */
return U32_MAX;
}
/*
* Mapping normal address to soc physical address in swizzle mode.
*/
static int umc_v8_10_swizzle_mode_na_to_pa(struct amdgpu_device *adev,
uint32_t channel_idx,
uint64_t na, uint64_t *soc_pa)
{
uint32_t channel_num = UMC_V8_10_TOTAL_CHANNEL_NUM(adev);
uint32_t col_bit = umc_v8_10_get_col_bit(channel_num);
uint64_t tmp_addr;
if (col_bit == U32_MAX)
return -1;
tmp_addr = SWIZZLE_MODE_TMP_ADDR(na, channel_num, channel_idx);
*soc_pa = SWIZZLE_MODE_ADDR_HI(tmp_addr, col_bit) |
SWIZZLE_MODE_ADDR_MID(na, col_bit) |
SWIZZLE_MODE_ADDR_LOW(tmp_addr, col_bit) |
SWIZZLE_MODE_ADDR_LSB(na);
return 0;
}
static void umc_v8_10_query_error_address(struct amdgpu_device *adev,
struct ras_err_data *err_data,
uint32_t umc_reg_offset,
uint32_t node_inst,
uint32_t ch_inst,
uint32_t umc_inst)
{
uint64_t mc_umc_status_addr;
uint64_t mc_umc_status, err_addr;
uint32_t channel_index;
mc_umc_status_addr =
SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_STATUST0);
mc_umc_status = RREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4);
if (mc_umc_status == 0)
return;
if (!err_data->err_addr) {
/* clear umc status */
WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
return;
}
channel_index =
adev->umc.channel_idx_tbl[node_inst * adev->umc.umc_inst_num *
adev->umc.channel_inst_num +
umc_inst * adev->umc.channel_inst_num +
ch_inst];
/* calculate error address if ue/ce error is detected */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1 &&
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrV) == 1 &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1 ||
REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, CECC) == 1)) {
uint32_t addr_lsb;
uint64_t mc_umc_addrt0;
mc_umc_addrt0 = SOC15_REG_OFFSET(UMC, 0, regMCA_UMC_UMC0_MCUMC_ADDRT0);
err_addr = RREG64_PCIE((mc_umc_addrt0 + umc_reg_offset) * 4);
err_addr = REG_GET_FIELD(err_addr, MCA_UMC_UMC0_MCUMC_ADDRT0, ErrorAddr);
/* the lowest lsb bits should be ignored */
addr_lsb = REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, AddrLsb);
err_addr &= ~((0x1ULL << addr_lsb) - 1);
/* we only save ue error information currently, ce is skipped */
if (REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UECC) == 1) {
uint64_t na_err_addr_base = err_addr & ~(0x3ULL << UMC_V8_10_NA_C5_BIT);
uint64_t na_err_addr, retired_page_addr;
uint32_t col = 0;
int ret = 0;
/* loop for all possibilities of [C6 C5] in normal address. */
for (col = 0; col < UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM; col++) {
na_err_addr = na_err_addr_base | (col << UMC_V8_10_NA_C5_BIT);
/* Mapping normal error address to retired soc physical address. */
ret = umc_v8_10_swizzle_mode_na_to_pa(adev, channel_index,
na_err_addr, &retired_page_addr);
if (ret) {
dev_err(adev->dev, "Failed to map pa from umc na.\n");
break;
}
dev_info(adev->dev, "Error Address(PA): 0x%llx\n",
retired_page_addr);
amdgpu_umc_fill_error_record(err_data, na_err_addr,
retired_page_addr, channel_index, umc_inst);
}
}
}
/* clear umc status */
WREG64_PCIE((mc_umc_status_addr + umc_reg_offset) * 4, 0x0ULL);
}
static void umc_v8_10_query_ras_error_address(struct amdgpu_device *adev,
void *ras_error_status)
{
struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
uint32_t node_inst = 0;
uint32_t umc_inst = 0;
uint32_t ch_inst = 0;
uint32_t umc_reg_offset = 0;
LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
umc_reg_offset = get_umc_v8_10_reg_offset(adev,
node_inst,
umc_inst,
ch_inst);
umc_v8_10_query_error_address(adev,
err_data,
umc_reg_offset,
node_inst,
ch_inst,
umc_inst);
}
}
static void umc_v8_10_err_cnt_init_per_channel(struct amdgpu_device *adev,
uint32_t umc_reg_offset)
{
uint32_t ecc_err_cnt_sel, ecc_err_cnt_sel_addr;
uint32_t ecc_err_cnt_addr;
ecc_err_cnt_sel_addr =
SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCntSel);
ecc_err_cnt_addr =
SOC15_REG_OFFSET(UMC, 0, regUMCCH0_0_GeccErrCnt);
ecc_err_cnt_sel = RREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4);
/* set ce error interrupt type to APIC based interrupt */
ecc_err_cnt_sel = REG_SET_FIELD(ecc_err_cnt_sel, UMCCH0_0_GeccErrCntSel,
GeccErrInt, 0x1);
WREG32_PCIE((ecc_err_cnt_sel_addr + umc_reg_offset) * 4, ecc_err_cnt_sel);
/* set error count to initial value */
WREG32_PCIE((ecc_err_cnt_addr + umc_reg_offset) * 4, UMC_V8_10_CE_CNT_INIT);
}
static void umc_v8_10_err_cnt_init(struct amdgpu_device *adev)
{
uint32_t node_inst = 0;
uint32_t umc_inst = 0;
uint32_t ch_inst = 0;
uint32_t umc_reg_offset = 0;
LOOP_UMC_EACH_NODE_INST_AND_CH(node_inst, umc_inst, ch_inst) {
umc_reg_offset = get_umc_v8_10_reg_offset(adev,
node_inst,
umc_inst,
ch_inst);
umc_v8_10_err_cnt_init_per_channel(adev, umc_reg_offset);
}
}
const struct amdgpu_ras_block_hw_ops umc_v8_10_ras_hw_ops = {
.query_ras_error_count = umc_v8_10_query_ras_error_count,
.query_ras_error_address = umc_v8_10_query_ras_error_address,
};
struct amdgpu_umc_ras umc_v8_10_ras = {
.ras_block = {
.hw_ops = &umc_v8_10_ras_hw_ops,
},
.err_cnt_init = umc_v8_10_err_cnt_init,
};

View file

@ -0,0 +1,70 @@
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef __UMC_V8_10_H__
#define __UMC_V8_10_H__
#include "soc15_common.h"
#include "amdgpu.h"
/* number of umc channel instance with memory map register access */
#define UMC_V8_10_CHANNEL_INSTANCE_NUM 2
/* number of umc instance with memory map register access */
#define UMC_V8_10_UMC_INSTANCE_NUM 2
/* Total channel instances for all umc nodes */
#define UMC_V8_10_TOTAL_CHANNEL_NUM(adev) \
(UMC_V8_10_CHANNEL_INSTANCE_NUM * UMC_V8_10_UMC_INSTANCE_NUM * (adev)->umc.node_inst_num)
/* UMC regiser per channel offset */
#define UMC_V8_10_PER_CHANNEL_OFFSET 0x400
/* EccErrCnt max value */
#define UMC_V8_10_CE_CNT_MAX 0xffff
/* umc ce interrupt threshold */
#define UUMC_V8_10_CE_INT_THRESHOLD 0xffff
/* umc ce count initial value */
#define UMC_V8_10_CE_CNT_INIT (UMC_V8_10_CE_CNT_MAX - UUMC_V8_10_CE_INT_THRESHOLD)
#define UMC_V8_10_NA_COL_2BITS_POWER_OF_2_NUM 4
/* The C5 bit in NA address */
#define UMC_V8_10_NA_C5_BIT 14
/* Map to swizzle mode address */
#define SWIZZLE_MODE_TMP_ADDR(na, ch_num, ch_idx) \
((((na) >> 10) * (ch_num) + (ch_idx)) << 10)
#define SWIZZLE_MODE_ADDR_HI(addr, col_bit) \
(((addr) >> ((col_bit) + 2)) << ((col_bit) + 2))
#define SWIZZLE_MODE_ADDR_MID(na, col_bit) ((((na) >> 8) & 0x3) << (col_bit))
#define SWIZZLE_MODE_ADDR_LOW(addr, col_bit) \
((((addr) >> 10) & ((0x1ULL << (col_bit - 8)) - 1)) << 8)
#define SWIZZLE_MODE_ADDR_LSB(na) ((na) & 0xFF)
extern struct amdgpu_umc_ras umc_v8_10_ras;
extern const uint32_t
umc_v8_10_channel_idx_tbl[]
[UMC_V8_10_UMC_INSTANCE_NUM]
[UMC_V8_10_CHANNEL_INSTANCE_NUM];
#endif

View file

@ -25,6 +25,7 @@
#include "amdgpu.h"
#include "amdgpu_vcn.h"
#include "amdgpu_pm.h"
#include "amdgpu_cs.h"
#include "soc15.h"
#include "soc15d.h"
#include "soc15_hw_ip.h"
@ -44,6 +45,9 @@
#define VCN_VID_SOC_ADDRESS_2_0 0x1fb00
#define VCN1_VID_SOC_ADDRESS_3_0 0x48300
#define RDECODE_MSG_CREATE 0x00000000
#define RDECODE_MESSAGE_CREATE 0x00000001
static int amdgpu_ih_clientid_vcns[] = {
SOC15_IH_CLIENTID_VCN,
SOC15_IH_CLIENTID_VCN1
@ -1323,6 +1327,132 @@ static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring)
}
}
static int vcn_v4_0_limit_sched(struct amdgpu_cs_parser *p)
{
struct drm_gpu_scheduler **scheds;
/* The create msg must be in the first IB submitted */
if (atomic_read(&p->entity->fence_seq))
return -EINVAL;
scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
[AMDGPU_RING_PRIO_0].sched;
drm_sched_entity_modify_sched(p->entity, scheds, 1);
return 0;
}
static int vcn_v4_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
{
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_bo_va_mapping *map;
uint32_t *msg, num_buffers;
struct amdgpu_bo *bo;
uint64_t start, end;
unsigned int i;
void *ptr;
int r;
addr &= AMDGPU_GMC_HOLE_MASK;
r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
if (r) {
DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
return r;
}
start = map->start * AMDGPU_GPU_PAGE_SIZE;
end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
if (addr & 0x7) {
DRM_ERROR("VCN messages must be 8 byte aligned!\n");
return -EINVAL;
}
bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
if (r) {
DRM_ERROR("Failed validating the VCN message BO (%d)!\n", r);
return r;
}
r = amdgpu_bo_kmap(bo, &ptr);
if (r) {
DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
return r;
}
msg = ptr + addr - start;
/* Check length */
if (msg[1] > end - addr) {
r = -EINVAL;
goto out;
}
if (msg[3] != RDECODE_MSG_CREATE)
goto out;
num_buffers = msg[2];
for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
uint32_t offset, size, *create;
if (msg[0] != RDECODE_MESSAGE_CREATE)
continue;
offset = msg[1];
size = msg[2];
if (offset + size > end) {
r = -EINVAL;
goto out;
}
create = ptr + addr + offset - start;
/* H246, HEVC and VP9 can run on any instance */
if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
continue;
r = vcn_v4_0_limit_sched(p);
if (r)
goto out;
}
out:
amdgpu_bo_kunmap(bo);
return r;
}
#define RADEON_VCN_ENGINE_TYPE_DECODE (0x00000003)
static int vcn_v4_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
uint32_t val;
int r = 0;
/* The first instance can decode anything */
if (!ring->me)
return r;
/* unified queue ib header has 8 double words. */
if (ib->length_dw < 8)
return r;
val = amdgpu_ib_get_value(ib, 6); //RADEON_VCN_ENGINE_TYPE
if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib->ptr[10];
if (decode_buffer->valid_buf_flag & 0x1)
r = vcn_v4_0_dec_msg(p, ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
decode_buffer->msg_buffer_address_lo);
}
return r;
}
static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
.type = AMDGPU_RING_TYPE_VCN_ENC,
.align_mask = 0x3f,
@ -1331,6 +1461,7 @@ static const struct amdgpu_ring_funcs vcn_v4_0_unified_ring_vm_funcs = {
.get_rptr = vcn_v4_0_unified_ring_get_rptr,
.get_wptr = vcn_v4_0_unified_ring_get_wptr,
.set_wptr = vcn_v4_0_unified_ring_set_wptr,
.patch_cs_in_place = vcn_v4_0_ring_patch_cs_in_place,
.emit_frame_size =
SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +

View file

@ -184,6 +184,8 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
/* Navi2x+, Navi1x+ */
if (gc_version == IP_VERSION(10, 3, 6))
kfd->device_info.no_atomic_fw_version = 14;
else if (gc_version == IP_VERSION(10, 3, 7))
kfd->device_info.no_atomic_fw_version = 3;
else if (gc_version >= IP_VERSION(10, 3, 0))
kfd->device_info.no_atomic_fw_version = 92;
else if (gc_version >= IP_VERSION(10, 1, 1))

View file

@ -1674,14 +1674,13 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
if (q->properties.is_active) {
increment_queue_count(dqm, qpd, q);
if (!dqm->dev->shared_resources.enable_mes) {
if (!dqm->dev->shared_resources.enable_mes)
retval = execute_queues_cpsch(dqm,
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
} else {
KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
else
retval = add_queue_mes(dqm, q, qpd);
if (retval)
goto cleanup_queue;
}
if (retval)
goto cleanup_queue;
}
/*

View file

@ -1115,6 +1115,15 @@ static void kfd_process_wq_release(struct work_struct *work)
struct kfd_process *p = container_of(work, struct kfd_process,
release_work);
kfd_process_dequeue_from_all_devices(p);
pqm_uninit(&p->pqm);
/* Signal the eviction fence after user mode queues are
* destroyed. This allows any BOs to be freed without
* triggering pointless evictions or waiting for fences.
*/
dma_fence_signal(p->ef);
kfd_process_remove_sysfs(p);
kfd_iommu_unbind_process(p);
@ -1179,20 +1188,8 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
cancel_delayed_work_sync(&p->eviction_work);
cancel_delayed_work_sync(&p->restore_work);
mutex_lock(&p->mutex);
kfd_process_dequeue_from_all_devices(p);
pqm_uninit(&p->pqm);
/* Indicate to other users that MM is no longer valid */
p->mm = NULL;
/* Signal the eviction fence after user mode queues are
* destroyed. This allows any BOs to be freed without
* triggering pointless evictions or waiting for fences.
*/
dma_fence_signal(p->ef);
mutex_unlock(&p->mutex);
mmu_notifier_put(&p->mmu_notifier);
}
@ -1405,6 +1402,11 @@ static struct kfd_process *create_process(const struct task_struct *thread)
hash_add_rcu(kfd_processes_table, &process->kfd_processes,
(uintptr_t)process->mm);
/* Avoid free_notifier to start kfd_process_wq_release if
* mmu_notifier_get failed because of pending signal.
*/
kref_get(&process->ref);
/* MMU notifier registration must be the last call that can fail
* because after this point we cannot unwind the process creation.
* After this point, mmu_notifier_put will trigger the cleanup by
@ -1417,6 +1419,7 @@ static struct kfd_process *create_process(const struct task_struct *thread)
}
BUG_ON(mn != &process->mmu_notifier);
kfd_unref_process(process);
get_task_struct(process->lead_thread);
return process;

View file

@ -1775,10 +1775,15 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
pr_debug("invalidate svms 0x%p prange [0x%lx 0x%lx] [0x%lx 0x%lx]\n",
svms, prange->start, prange->last, start, last);
if (!p->xnack_enabled) {
if (!p->xnack_enabled ||
(prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) {
int evicted_ranges;
bool mapped = prange->mapped_to_gpu;
list_for_each_entry(pchild, &prange->child_list, child_list) {
if (!pchild->mapped_to_gpu)
continue;
mapped = true;
mutex_lock_nested(&pchild->lock, 1);
if (pchild->start <= last && pchild->last >= start) {
pr_debug("increment pchild invalid [0x%lx 0x%lx]\n",
@ -1788,6 +1793,9 @@ svm_range_evict(struct svm_range *prange, struct mm_struct *mm,
mutex_unlock(&pchild->lock);
}
if (!mapped)
return r;
if (prange->start <= last && prange->last >= start)
atomic_inc(&prange->invalid);
@ -3343,7 +3351,9 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm,
if (r)
goto out_unlock_range;
if (migrated && !p->xnack_enabled) {
if (migrated && (!p->xnack_enabled ||
(prange->flags & KFD_IOCTL_SVM_FLAG_GPU_ALWAYS_MAPPED)) &&
prange->mapped_to_gpu) {
pr_debug("restore_work will update mappings of GPUs\n");
mutex_unlock(&prange->migrate_mutex);
continue;

View file

@ -6,7 +6,7 @@ config DRM_AMD_DC
bool "AMD DC - Enable new display engine"
default y
select SND_HDA_COMPONENT if SND_HDA_CORE
select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
select DRM_AMD_DC_DCN if (X86 || PPC64)
help
Choose this option if you want to use the new display engine
support for AMDGPU. This adds required support for Vega and

View file

@ -25,7 +25,13 @@
AMDGPUDM = amdgpu_dm.o amdgpu_dm_irq.o amdgpu_dm_mst_types.o amdgpu_dm_color.o
AMDGPUDM = \
amdgpu_dm.o \
amdgpu_dm_plane.o \
amdgpu_dm_crtc.o \
amdgpu_dm_irq.o \
amdgpu_dm_mst_types.o \
amdgpu_dm_color.o
ifdef CONFIG_DRM_AMD_DC_DCN
AMDGPUDM += dc_fpu.o

File diff suppressed because it is too large Load diff

View file

@ -547,6 +547,14 @@ struct amdgpu_display_manager {
* last successfully applied backlight values.
*/
u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP];
/**
* @aux_hpd_discon_quirk:
*
* quirk for hpd discon while aux is on-going.
* occurred on certain intel platform
*/
bool aux_hpd_discon_quirk;
};
enum dsc_clock_force_state {
@ -563,6 +571,14 @@ struct dsc_preferred_settings {
bool dsc_force_disable_passthrough;
};
enum mst_progress_status {
MST_STATUS_DEFAULT = 0,
MST_PROBE = BIT(0),
MST_REMOTE_EDID = BIT(1),
MST_ALLOCATE_NEW_PAYLOAD = BIT(2),
MST_CLEAR_ALLOCATED_PAYLOAD = BIT(3),
};
struct amdgpu_dm_connector {
struct drm_connector base;
@ -615,8 +631,20 @@ struct amdgpu_dm_connector {
struct drm_display_mode freesync_vid_base;
int psr_skip_count;
/* Record progress status of mst*/
uint8_t mst_status;
};
static inline void amdgpu_dm_set_mst_status(uint8_t *status,
uint8_t flags, bool set)
{
if (set)
*status |= flags;
else
*status &= ~flags;
}
#define to_amdgpu_dm_connector(x) container_of(x, struct amdgpu_dm_connector, base)
extern const struct amdgpu_ip_block_version dm_ip_block;

View file

@ -0,0 +1,464 @@
// SPDX-License-Identifier: MIT
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: AMD
*
*/
#include <drm/drm_vblank.h>
#include <drm/drm_atomic_helper.h>
#include "dc.h"
#include "amdgpu.h"
#include "amdgpu_dm_psr.h"
#include "amdgpu_dm_crtc.h"
#include "amdgpu_dm_plane.h"
#include "amdgpu_dm_trace.h"
#include "amdgpu_dm_debugfs.h"
void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc)
{
struct drm_crtc *crtc = &acrtc->base;
struct drm_device *dev = crtc->dev;
unsigned long flags;
drm_crtc_handle_vblank(crtc);
spin_lock_irqsave(&dev->event_lock, flags);
/* Send completion event for cursor-only commits */
if (acrtc->event && acrtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
drm_crtc_send_vblank_event(crtc, acrtc->event);
drm_crtc_vblank_put(crtc);
acrtc->event = NULL;
}
spin_unlock_irqrestore(&dev->event_lock, flags);
}
bool modeset_required(struct drm_crtc_state *crtc_state,
struct dc_stream_state *new_stream,
struct dc_stream_state *old_stream)
{
return crtc_state->active && drm_atomic_crtc_needs_modeset(crtc_state);
}
bool amdgpu_dm_vrr_active_irq(struct amdgpu_crtc *acrtc)
{
return acrtc->dm_irq_params.freesync_config.state ==
VRR_STATE_ACTIVE_VARIABLE ||
acrtc->dm_irq_params.freesync_config.state ==
VRR_STATE_ACTIVE_FIXED;
}
int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable)
{
enum dc_irq_source irq_source;
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
struct amdgpu_device *adev = drm_to_adev(crtc->dev);
int rc;
irq_source = IRQ_TYPE_VUPDATE + acrtc->otg_inst;
rc = dc_interrupt_set(adev->dm.dc, irq_source, enable) ? 0 : -EBUSY;
DRM_DEBUG_VBL("crtc %d - vupdate irq %sabling: r=%d\n",
acrtc->crtc_id, enable ? "en" : "dis", rc);
return rc;
}
bool amdgpu_dm_vrr_active(struct dm_crtc_state *dm_state)
{
return dm_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE ||
dm_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED;
}
static void vblank_control_worker(struct work_struct *work)
{
struct vblank_control_work *vblank_work =
container_of(work, struct vblank_control_work, work);
struct amdgpu_display_manager *dm = vblank_work->dm;
mutex_lock(&dm->dc_lock);
if (vblank_work->enable)
dm->active_vblank_irq_count++;
else if (dm->active_vblank_irq_count)
dm->active_vblank_irq_count--;
dc_allow_idle_optimizations(
dm->dc, dm->active_vblank_irq_count == 0 ? true : false);
DRM_DEBUG_KMS("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0);
/*
* Control PSR based on vblank requirements from OS
*
* If panel supports PSR SU, there's no need to disable PSR when OS is
* submitting fast atomic commits (we infer this by whether the OS
* requests vblank events). Fast atomic commits will simply trigger a
* full-frame-update (FFU); a specific case of selective-update (SU)
* where the SU region is the full hactive*vactive region. See
* fill_dc_dirty_rects().
*/
if (vblank_work->stream && vblank_work->stream->link) {
if (vblank_work->enable) {
if (vblank_work->stream->link->psr_settings.psr_version < DC_PSR_VERSION_SU_1 &&
vblank_work->stream->link->psr_settings.psr_allow_active)
amdgpu_dm_psr_disable(vblank_work->stream);
} else if (vblank_work->stream->link->psr_settings.psr_feature_enabled &&
!vblank_work->stream->link->psr_settings.psr_allow_active &&
vblank_work->acrtc->dm_irq_params.allow_psr_entry) {
amdgpu_dm_psr_enable(vblank_work->stream);
}
}
mutex_unlock(&dm->dc_lock);
dc_stream_release(vblank_work->stream);
kfree(vblank_work);
}
static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable)
{
enum dc_irq_source irq_source;
struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
struct amdgpu_device *adev = drm_to_adev(crtc->dev);
struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state);
struct amdgpu_display_manager *dm = &adev->dm;
struct vblank_control_work *work;
int rc = 0;
if (enable) {
/* vblank irq on -> Only need vupdate irq in vrr mode */
if (amdgpu_dm_vrr_active(acrtc_state))
rc = dm_set_vupdate_irq(crtc, true);
} else {
/* vblank irq off -> vupdate irq off */
rc = dm_set_vupdate_irq(crtc, false);
}
if (rc)
return rc;
irq_source = IRQ_TYPE_VBLANK + acrtc->otg_inst;
if (!dc_interrupt_set(adev->dm.dc, irq_source, enable))
return -EBUSY;
if (amdgpu_in_reset(adev))
return 0;
if (dm->vblank_control_workqueue) {
work = kzalloc(sizeof(*work), GFP_ATOMIC);
if (!work)
return -ENOMEM;
INIT_WORK(&work->work, vblank_control_worker);
work->dm = dm;
work->acrtc = acrtc;
work->enable = enable;
if (acrtc_state->stream) {
dc_stream_retain(acrtc_state->stream);
work->stream = acrtc_state->stream;
}
queue_work(dm->vblank_control_workqueue, &work->work);
}
return 0;
}
int dm_enable_vblank(struct drm_crtc *crtc)
{
return dm_set_vblank(crtc, true);
}
void dm_disable_vblank(struct drm_crtc *crtc)
{
dm_set_vblank(crtc, false);
}
static void dm_crtc_destroy_state(struct drm_crtc *crtc,
struct drm_crtc_state *state)
{
struct dm_crtc_state *cur = to_dm_crtc_state(state);
/* TODO Destroy dc_stream objects are stream object is flattened */
if (cur->stream)
dc_stream_release(cur->stream);
__drm_atomic_helper_crtc_destroy_state(state);
kfree(state);
}
static struct drm_crtc_state *dm_crtc_duplicate_state(struct drm_crtc *crtc)
{
struct dm_crtc_state *state, *cur;
cur = to_dm_crtc_state(crtc->state);
if (WARN_ON(!crtc->state))
return NULL;
state = kzalloc(sizeof(*state), GFP_KERNEL);
if (!state)
return NULL;
__drm_atomic_helper_crtc_duplicate_state(crtc, &state->base);
if (cur->stream) {
state->stream = cur->stream;
dc_stream_retain(state->stream);
}
state->active_planes = cur->active_planes;
state->vrr_infopacket = cur->vrr_infopacket;
state->abm_level = cur->abm_level;
state->vrr_supported = cur->vrr_supported;
state->freesync_config = cur->freesync_config;
state->cm_has_degamma = cur->cm_has_degamma;
state->cm_is_degamma_srgb = cur->cm_is_degamma_srgb;
state->crc_skip_count = cur->crc_skip_count;
state->mpo_requested = cur->mpo_requested;
/* TODO Duplicate dc_stream after objects are stream object is flattened */
return &state->base;
}
static void amdgpu_dm_crtc_destroy(struct drm_crtc *crtc)
{
drm_crtc_cleanup(crtc);
kfree(crtc);
}
static void dm_crtc_reset_state(struct drm_crtc *crtc)
{
struct dm_crtc_state *state;
if (crtc->state)
dm_crtc_destroy_state(crtc, crtc->state);
state = kzalloc(sizeof(*state), GFP_KERNEL);
if (WARN_ON(!state))
return;
__drm_atomic_helper_crtc_reset(crtc, &state->base);
}
#ifdef CONFIG_DEBUG_FS
static int amdgpu_dm_crtc_late_register(struct drm_crtc *crtc)
{
crtc_debugfs_init(crtc);
return 0;
}
#endif
/* Implemented only the options currently available for the driver */
static const struct drm_crtc_funcs amdgpu_dm_crtc_funcs = {
.reset = dm_crtc_reset_state,
.destroy = amdgpu_dm_crtc_destroy,
.set_config = drm_atomic_helper_set_config,
.page_flip = drm_atomic_helper_page_flip,
.atomic_duplicate_state = dm_crtc_duplicate_state,
.atomic_destroy_state = dm_crtc_destroy_state,
.set_crc_source = amdgpu_dm_crtc_set_crc_source,
.verify_crc_source = amdgpu_dm_crtc_verify_crc_source,
.get_crc_sources = amdgpu_dm_crtc_get_crc_sources,
.get_vblank_counter = amdgpu_get_vblank_counter_kms,
.enable_vblank = dm_enable_vblank,
.disable_vblank = dm_disable_vblank,
.get_vblank_timestamp = drm_crtc_vblank_helper_get_vblank_timestamp,
#if defined(CONFIG_DEBUG_FS)
.late_register = amdgpu_dm_crtc_late_register,
#endif
};
static void dm_crtc_helper_disable(struct drm_crtc *crtc)
{
}
static int count_crtc_active_planes(struct drm_crtc_state *new_crtc_state)
{
struct drm_atomic_state *state = new_crtc_state->state;
struct drm_plane *plane;
int num_active = 0;
drm_for_each_plane_mask(plane, state->dev, new_crtc_state->plane_mask) {
struct drm_plane_state *new_plane_state;
/* Cursor planes are "fake". */
if (plane->type == DRM_PLANE_TYPE_CURSOR)
continue;
new_plane_state = drm_atomic_get_new_plane_state(state, plane);
if (!new_plane_state) {
/*
* The plane is enable on the CRTC and hasn't changed
* state. This means that it previously passed
* validation and is therefore enabled.
*/
num_active += 1;
continue;
}
/* We need a framebuffer to be considered enabled. */
num_active += (new_plane_state->fb != NULL);
}
return num_active;
}
static void dm_update_crtc_active_planes(struct drm_crtc *crtc,
struct drm_crtc_state *new_crtc_state)
{
struct dm_crtc_state *dm_new_crtc_state =
to_dm_crtc_state(new_crtc_state);
dm_new_crtc_state->active_planes = 0;
if (!dm_new_crtc_state->stream)
return;
dm_new_crtc_state->active_planes =
count_crtc_active_planes(new_crtc_state);
}
static bool dm_crtc_helper_mode_fixup(struct drm_crtc *crtc,
const struct drm_display_mode *mode,
struct drm_display_mode *adjusted_mode)
{
return true;
}
static int dm_crtc_helper_atomic_check(struct drm_crtc *crtc,
struct drm_atomic_state *state)
{
struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state,
crtc);
struct amdgpu_device *adev = drm_to_adev(crtc->dev);
struct dc *dc = adev->dm.dc;
struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state);
int ret = -EINVAL;
trace_amdgpu_dm_crtc_atomic_check(crtc_state);
dm_update_crtc_active_planes(crtc, crtc_state);
if (WARN_ON(unlikely(!dm_crtc_state->stream &&
modeset_required(crtc_state, NULL, dm_crtc_state->stream)))) {
return ret;
}
/*
* We require the primary plane to be enabled whenever the CRTC is, otherwise
* drm_mode_cursor_universal may end up trying to enable the cursor plane while all other
* planes are disabled, which is not supported by the hardware. And there is legacy
* userspace which stops using the HW cursor altogether in response to the resulting EINVAL.
*/
if (crtc_state->enable &&
!(crtc_state->plane_mask & drm_plane_mask(crtc->primary))) {
DRM_DEBUG_ATOMIC("Can't enable a CRTC without enabling the primary plane\n");
return -EINVAL;
}
/* In some use cases, like reset, no stream is attached */
if (!dm_crtc_state->stream)
return 0;
if (dc_validate_stream(dc, dm_crtc_state->stream) == DC_OK)
return 0;
DRM_DEBUG_ATOMIC("Failed DC stream validation\n");
return ret;
}
static const struct drm_crtc_helper_funcs amdgpu_dm_crtc_helper_funcs = {
.disable = dm_crtc_helper_disable,
.atomic_check = dm_crtc_helper_atomic_check,
.mode_fixup = dm_crtc_helper_mode_fixup,
.get_scanout_position = amdgpu_crtc_get_scanout_position,
};
int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
struct drm_plane *plane,
uint32_t crtc_index)
{
struct amdgpu_crtc *acrtc = NULL;
struct drm_plane *cursor_plane;
int res = -ENOMEM;
cursor_plane = kzalloc(sizeof(*cursor_plane), GFP_KERNEL);
if (!cursor_plane)
goto fail;
cursor_plane->type = DRM_PLANE_TYPE_CURSOR;
res = amdgpu_dm_plane_init(dm, cursor_plane, 0, NULL);
acrtc = kzalloc(sizeof(struct amdgpu_crtc), GFP_KERNEL);
if (!acrtc)
goto fail;
res = drm_crtc_init_with_planes(
dm->ddev,
&acrtc->base,
plane,
cursor_plane,
&amdgpu_dm_crtc_funcs, NULL);
if (res)
goto fail;
drm_crtc_helper_add(&acrtc->base, &amdgpu_dm_crtc_helper_funcs);
/* Create (reset) the plane state */
if (acrtc->base.funcs->reset)
acrtc->base.funcs->reset(&acrtc->base);
acrtc->max_cursor_width = dm->adev->dm.dc->caps.max_cursor_size;
acrtc->max_cursor_height = dm->adev->dm.dc->caps.max_cursor_size;
acrtc->crtc_id = crtc_index;
acrtc->base.enabled = false;
acrtc->otg_inst = -1;
dm->adev->mode_info.crtcs[crtc_index] = acrtc;
drm_crtc_enable_color_mgmt(&acrtc->base, MAX_COLOR_LUT_ENTRIES,
true, MAX_COLOR_LUT_ENTRIES);
drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES);
return 0;
fail:
kfree(acrtc);
kfree(cursor_plane);
return res;
}

View file

@ -0,0 +1,51 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: AMD
*
*/
#ifndef __AMDGPU_DM_CRTC_H__
#define __AMDGPU_DM_CRTC_H__
void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc);
bool modeset_required(struct drm_crtc_state *crtc_state,
struct dc_stream_state *new_stream,
struct dc_stream_state *old_stream);
int dm_set_vupdate_irq(struct drm_crtc *crtc, bool enable);
bool amdgpu_dm_vrr_active_irq(struct amdgpu_crtc *acrtc);
bool amdgpu_dm_vrr_active(struct dm_crtc_state *dm_state);
int dm_enable_vblank(struct drm_crtc *crtc);
void dm_disable_vblank(struct drm_crtc *crtc);
int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
struct drm_plane *plane,
uint32_t link_index);
#endif

View file

@ -50,6 +50,13 @@ struct dmub_debugfs_trace_entry {
uint32_t param1;
};
static const char *const mst_progress_status[] = {
"probe",
"remote_edid",
"allocate_new_payload",
"clear_allocated_payload",
};
/* parse_write_buffer_into_params - Helper function to parse debugfs write buffer into an array
*
* Function takes in attributes passed to debugfs write entry
@ -1226,12 +1233,14 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf,
struct drm_connector *connector = &aconnector->base;
struct dc_link *link = NULL;
struct drm_device *dev = connector->dev;
struct amdgpu_device *adev = drm_to_adev(dev);
enum dc_connection_type new_connection_type = dc_connection_none;
char *wr_buf = NULL;
uint32_t wr_buf_size = 42;
int max_param_num = 1;
long param[1] = {0};
uint8_t param_nums = 0;
bool ret = false;
if (!aconnector || !aconnector->dc_link)
return -EINVAL;
@ -1254,20 +1263,32 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf,
return -EINVAL;
}
kfree(wr_buf);
if (param_nums <= 0) {
DRM_DEBUG_DRIVER("user data not be read\n");
kfree(wr_buf);
return -EINVAL;
}
mutex_lock(&aconnector->hpd_lock);
/* Don't support for mst end device*/
if (aconnector->mst_port) {
mutex_unlock(&aconnector->hpd_lock);
return -EINVAL;
}
if (param[0] == 1) {
mutex_lock(&aconnector->hpd_lock);
if (!dc_link_detect_sink(aconnector->dc_link, &new_connection_type) &&
new_connection_type != dc_connection_none)
goto unlock;
if (!dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD))
mutex_lock(&adev->dm.dc_lock);
ret = dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD);
mutex_unlock(&adev->dm.dc_lock);
if (!ret)
goto unlock;
amdgpu_dm_update_connector_after_detect(aconnector);
@ -1294,6 +1315,10 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf,
amdgpu_dm_update_connector_after_detect(aconnector);
/* If the aconnector is the root node in mst topology */
if (aconnector->mst_mgr.mst_state == true)
reset_cur_dp_mst_topology(link);
drm_modeset_lock_all(dev);
dm_restore_drm_connector_state(dev, connector);
drm_modeset_unlock_all(dev);
@ -1304,7 +1329,6 @@ static ssize_t trigger_hotplug(struct file *f, const char __user *buf,
unlock:
mutex_unlock(&aconnector->hpd_lock);
kfree(wr_buf);
return size;
}
@ -2523,6 +2547,92 @@ static int target_backlight_show(struct seq_file *m, void *unused)
return 0;
}
/*
* function description: Determine if the connector is mst connector
*
* This function helps to determine whether a connector is a mst connector.
* - "root" stands for the root connector of the topology
* - "branch" stands for branch device of the topology
* - "end" stands for leaf node connector of the topology
* - "no" stands for the connector is not a device of a mst topology
* Access it with the following command:
*
* cat /sys/kernel/debug/dri/0/DP-X/is_mst_connector
*
*/
static int dp_is_mst_connector_show(struct seq_file *m, void *unused)
{
struct drm_connector *connector = m->private;
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
struct drm_dp_mst_topology_mgr *mgr = NULL;
struct drm_dp_mst_port *port = NULL;
char *role = NULL;
mutex_lock(&aconnector->hpd_lock);
if (aconnector->mst_mgr.mst_state) {
role = "root";
} else if (aconnector->mst_port &&
aconnector->mst_port->mst_mgr.mst_state) {
role = "end";
mgr = &aconnector->mst_port->mst_mgr;
port = aconnector->port;
drm_modeset_lock(&mgr->base.lock, NULL);
if (port->pdt == DP_PEER_DEVICE_MST_BRANCHING &&
port->mcs)
role = "branch";
drm_modeset_unlock(&mgr->base.lock);
} else {
role = "no";
}
seq_printf(m, "%s\n", role);
mutex_unlock(&aconnector->hpd_lock);
return 0;
}
/*
* function description: Read out the mst progress status
*
* This function helps to determine the mst progress status of
* a mst connector.
*
* Access it with the following command:
*
* cat /sys/kernel/debug/dri/0/DP-X/mst_progress_status
*
*/
static int dp_mst_progress_status_show(struct seq_file *m, void *unused)
{
struct drm_connector *connector = m->private;
struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
struct amdgpu_device *adev = drm_to_adev(connector->dev);
int i;
mutex_lock(&aconnector->hpd_lock);
mutex_lock(&adev->dm.dc_lock);
if (aconnector->mst_status == MST_STATUS_DEFAULT) {
seq_puts(m, "disabled\n");
} else {
for (i = 0; i < sizeof(mst_progress_status)/sizeof(char *); i++)
seq_printf(m, "%s:%s\n",
mst_progress_status[i],
aconnector->mst_status & BIT(i) ? "done" : "not_done");
}
mutex_unlock(&adev->dm.dc_lock);
mutex_unlock(&aconnector->hpd_lock);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support);
DEFINE_SHOW_ATTRIBUTE(dmub_fw_state);
DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer);
@ -2532,6 +2642,8 @@ DEFINE_SHOW_ATTRIBUTE(hdcp_sink_capability);
#endif
DEFINE_SHOW_ATTRIBUTE(internal_display);
DEFINE_SHOW_ATTRIBUTE(psr_capability);
DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector);
DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status);
static const struct file_operations dp_dsc_clock_en_debugfs_fops = {
.owner = THIS_MODULE,
@ -2675,6 +2787,8 @@ static const struct {
{"dp_dsc_fec_support", &dp_dsc_fec_support_fops},
{"max_bpc", &dp_max_bpc_debugfs_fops},
{"dsc_disable_passthrough", &dp_dsc_disable_passthrough_debugfs_fops},
{"is_mst_connector", &dp_is_mst_connector_fops},
{"mst_progress_status", &dp_mst_progress_status_fops}
};
#ifdef CONFIG_DRM_AMD_DC_HDCP
@ -3290,7 +3404,10 @@ static int trigger_hpd_mst_set(void *data, u64 val)
aconnector = to_amdgpu_dm_connector(connector);
if (aconnector->dc_link->type == dc_connection_mst_branch &&
aconnector->mst_mgr.aux) {
mutex_lock(&adev->dm.dc_lock);
dc_link_detect(aconnector->dc_link, DETECT_REASON_HPD);
mutex_unlock(&adev->dm.dc_lock);
drm_dp_mst_topology_mgr_set_mst(&aconnector->mst_mgr, true);
}
}

View file

@ -302,7 +302,7 @@ static void event_property_update(struct work_struct *work)
mutex_lock(&hdcp_work->mutex);
if (aconnector->base.state->commit) {
if (aconnector->base.state && aconnector->base.state->commit) {
ret = wait_for_completion_interruptible_timeout(&aconnector->base.state->commit->hw_done, 10 * HZ);
if (ret == 0) {
@ -311,18 +311,26 @@ static void event_property_update(struct work_struct *work)
}
}
if (hdcp_work->encryption_status != MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF) {
if (aconnector->base.state->hdcp_content_type == DRM_MODE_HDCP_CONTENT_TYPE0 &&
hdcp_work->encryption_status <= MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON)
drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_ENABLED);
else if (aconnector->base.state->hdcp_content_type == DRM_MODE_HDCP_CONTENT_TYPE1 &&
hdcp_work->encryption_status == MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON)
drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_ENABLED);
} else {
drm_hdcp_update_content_protection(&aconnector->base, DRM_MODE_CONTENT_PROTECTION_DESIRED);
if (aconnector->base.state) {
if (hdcp_work->encryption_status != MOD_HDCP_ENCRYPTION_STATUS_HDCP_OFF) {
if (aconnector->base.state->hdcp_content_type ==
DRM_MODE_HDCP_CONTENT_TYPE0 &&
hdcp_work->encryption_status <=
MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE0_ON)
drm_hdcp_update_content_protection(&aconnector->base,
DRM_MODE_CONTENT_PROTECTION_ENABLED);
else if (aconnector->base.state->hdcp_content_type ==
DRM_MODE_HDCP_CONTENT_TYPE1 &&
hdcp_work->encryption_status ==
MOD_HDCP_ENCRYPTION_STATUS_HDCP2_TYPE1_ON)
drm_hdcp_update_content_protection(&aconnector->base,
DRM_MODE_CONTENT_PROTECTION_ENABLED);
} else {
drm_hdcp_update_content_protection(&aconnector->base,
DRM_MODE_CONTENT_PROTECTION_DESIRED);
}
}
mutex_unlock(&hdcp_work->mutex);
drm_modeset_unlock(&dev->mode_config.connection_mutex);
}
@ -495,7 +503,9 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
(!!aconnector->base.state) ? aconnector->base.state->content_protection : -1,
(!!aconnector->base.state) ? aconnector->base.state->hdcp_content_type : -1);
hdcp_update_display(hdcp_work, link_index, aconnector, conn_state->hdcp_content_type, false);
if (conn_state)
hdcp_update_display(hdcp_work, link_index, aconnector,
conn_state->hdcp_content_type, false);
}
@ -667,6 +677,7 @@ struct hdcp_workqueue *hdcp_create_workqueue(struct amdgpu_device *adev, struct
hdcp_work[i].hdcp.config.psp.handle = &adev->psp;
if (dc->ctx->dce_version == DCN_VERSION_3_1 ||
dc->ctx->dce_version == DCN_VERSION_3_14 ||
dc->ctx->dce_version == DCN_VERSION_3_15 ||
dc->ctx->dce_version == DCN_VERSION_3_16)
hdcp_work[i].hdcp.config.psp.caps.dtm_v3_supported = 1;

View file

@ -312,6 +312,8 @@ bool dm_helpers_dp_mst_send_payload_allocation(
struct amdgpu_dm_connector *aconnector;
struct drm_dp_mst_topology_mgr *mst_mgr;
struct drm_dp_mst_port *mst_port;
enum mst_progress_status set_flag = MST_ALLOCATE_NEW_PAYLOAD;
enum mst_progress_status clr_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
@ -325,8 +327,20 @@ bool dm_helpers_dp_mst_send_payload_allocation(
if (!mst_mgr->mst_state)
return false;
/* It's OK for this to fail */
drm_dp_update_payload_part2(mst_mgr);
if (!enable) {
set_flag = MST_CLEAR_ALLOCATED_PAYLOAD;
clr_flag = MST_ALLOCATE_NEW_PAYLOAD;
}
if (drm_dp_update_payload_part2(mst_mgr)) {
amdgpu_dm_set_mst_status(&aconnector->mst_status,
set_flag, false);
} else {
amdgpu_dm_set_mst_status(&aconnector->mst_status,
set_flag, true);
amdgpu_dm_set_mst_status(&aconnector->mst_status,
clr_flag, false);
}
if (!enable)
drm_dp_mst_deallocate_vcpi(mst_mgr, mst_port);

View file

@ -56,6 +56,8 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
ssize_t result = 0;
struct aux_payload payload;
enum aux_return_code_type operation_result;
struct amdgpu_device *adev;
struct ddc_service *ddc;
if (WARN_ON(msg->size > 16))
return -E2BIG;
@ -74,6 +76,21 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload,
&operation_result);
/*
* w/a on certain intel platform where hpd is unexpected to pull low during
* 1st sideband message transaction by return AUX_RET_ERROR_HPD_DISCON
* aux transaction is succuess in such case, therefore bypass the error
*/
ddc = TO_DM_AUX(aux)->ddc_service;
adev = ddc->ctx->driver_context;
if (adev->dm.aux_hpd_discon_quirk) {
if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE &&
operation_result == AUX_RET_ERROR_HPD_DISCON) {
result = 0;
operation_result = AUX_RET_SUCCESS;
}
}
if (payload.write && result >= 0)
result = msg->size;
@ -160,7 +177,10 @@ amdgpu_dm_mst_connector_early_unregister(struct drm_connector *connector)
dc_sink_release(dc_sink);
aconnector->dc_sink = NULL;
aconnector->edid = NULL;
}
aconnector->mst_status = MST_STATUS_DEFAULT;
drm_modeset_unlock(&root->mst_mgr.base.lock);
}
@ -261,6 +281,9 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
edid = drm_dp_mst_get_edid(connector, &aconnector->mst_port->mst_mgr, aconnector->port);
if (!edid) {
amdgpu_dm_set_mst_status(&aconnector->mst_status,
MST_REMOTE_EDID, false);
drm_connector_update_edid_property(
&aconnector->base,
NULL);
@ -291,6 +314,8 @@ static int dm_dp_mst_get_modes(struct drm_connector *connector)
}
aconnector->edid = edid;
amdgpu_dm_set_mst_status(&aconnector->mst_status,
MST_REMOTE_EDID, true);
}
if (aconnector->dc_sink && aconnector->dc_sink->sink_signal == SIGNAL_TYPE_VIRTUAL) {
@ -411,6 +436,11 @@ dm_dp_mst_detect(struct drm_connector *connector,
dc_sink_release(aconnector->dc_sink);
aconnector->dc_sink = NULL;
aconnector->edid = NULL;
amdgpu_dm_set_mst_status(&aconnector->mst_status,
MST_REMOTE_EDID | MST_ALLOCATE_NEW_PAYLOAD | MST_CLEAR_ALLOCATED_PAYLOAD,
false);
}
return connection_status;
@ -507,6 +537,8 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
connector = &aconnector->base;
aconnector->port = port;
aconnector->mst_port = master;
amdgpu_dm_set_mst_status(&aconnector->mst_status,
MST_PROBE, true);
if (drm_connector_init(
dev,

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,73 @@
// SPDX-License-Identifier: MIT
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: AMD
*
*/
#ifndef __AMDGPU_DM_PLANE_H__
#define __AMDGPU_DM_PLANE_H__
#include "dc.h"
void handle_cursor_update(struct drm_plane *plane,
struct drm_plane_state *old_plane_state);
int fill_dc_scaling_info(struct amdgpu_device *adev,
const struct drm_plane_state *state,
struct dc_scaling_info *scaling_info);
void get_min_max_dc_plane_scaling(struct drm_device *dev,
struct drm_framebuffer *fb,
int *min_downscale, int *max_upscale);
int dm_plane_helper_check_state(struct drm_plane_state *state,
struct drm_crtc_state *new_crtc_state);
bool modifier_has_dcc(uint64_t modifier);
unsigned int modifier_gfx9_swizzle_mode(uint64_t modifier);
int fill_plane_buffer_attributes(struct amdgpu_device *adev,
const struct amdgpu_framebuffer *afb,
const enum surface_pixel_format format,
const enum dc_rotation_angle rotation,
const uint64_t tiling_flags,
union dc_tiling_info *tiling_info,
struct plane_size *plane_size,
struct dc_plane_dcc_param *dcc,
struct dc_plane_address *address,
bool tmz_surface,
bool force_disable_dcc);
int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
struct drm_plane *plane,
unsigned long possible_crtcs,
const struct dc_plane_cap *plane_cap);
const struct drm_format_info *amd_get_format_info(const struct drm_mode_fb_cmd2 *cmd);
void fill_blending_from_plane_state(const struct drm_plane_state *plane_state,
bool *per_pixel_alpha, bool *pre_multiplied_alpha,
bool *global_alpha, int *global_alpha_value);
#endif

View file

@ -21,11 +21,13 @@
#
#
# Makefile for Display Core (dc) component.
#
DC_LIBS = basics bios dml clk_mgr dce gpio irq link virtual
ifdef CONFIG_DRM_AMD_DC_DCN
KCOV_INSTRUMENT := n
DC_LIBS += dcn20
DC_LIBS += dsc
DC_LIBS += dcn10
@ -36,6 +38,7 @@ DC_LIBS += dcn301
DC_LIBS += dcn302
DC_LIBS += dcn303
DC_LIBS += dcn31
DC_LIBS += dcn314
DC_LIBS += dcn315
DC_LIBS += dcn316
DC_LIBS += dcn32

View file

@ -23,8 +23,6 @@
*
*/
#include <linux/slab.h>
#include "dm_services.h"
#include "include/vector.h"

View file

@ -23,8 +23,6 @@
*
*/
#include <linux/slab.h>
#include "dm_services.h"
#include "ObjectID.h"
@ -404,7 +402,7 @@ static struct atom_display_object_path_v3 *get_bios_object_from_path_v3(
return NULL;
}
return NULL;
return NULL;
}
static enum bp_result bios_parser_get_i2c_info(struct dc_bios *dcb,
@ -607,8 +605,8 @@ static enum bp_result bios_parser_get_hpd_info(
default:
object = get_bios_object(bp, id);
if (!object)
return BP_RESULT_BADINPUT;
if (!object)
return BP_RESULT_BADINPUT;
record = get_hpd_record(bp, object);
@ -812,10 +810,10 @@ static enum bp_result bios_parser_get_device_tag(
/* getBiosObject will return MXM object */
object = get_bios_object(bp, connector_object_id);
if (!object) {
BREAK_TO_DEBUGGER(); /* Invalid object id */
return BP_RESULT_BADINPUT;
}
if (!object) {
BREAK_TO_DEBUGGER(); /* Invalid object id */
return BP_RESULT_BADINPUT;
}
info->acpi_device = 0; /* BIOS no longer provides this */
info->dev_id = device_type_from_device_id(object->device_tag);
@ -1598,7 +1596,7 @@ static bool bios_parser_is_device_id_supported(
break;
}
return false;
return false;
}
static uint32_t bios_parser_get_ss_entry_number(
@ -2081,6 +2079,7 @@ static enum bp_result bios_parser_get_encoder_cap_info(
record = get_encoder_cap_record(bp, object);
if (!record)
return BP_RESULT_NORECORD;
DC_LOG_BIOS("record->encodercaps 0x%x for object_id 0x%x", record->encodercaps, object_id.id);
info->DP_HBR2_CAP = (record->encodercaps &
ATOM_ENCODER_CAP_RECORD_HBR2) ? 1 : 0;
@ -2100,6 +2099,7 @@ static enum bp_result bios_parser_get_encoder_cap_info(
ATOM_ENCODER_CAP_RECORD_UHBR20_EN) ? 1 : 0;
info->DP_IS_USB_C = (record->encodercaps &
ATOM_ENCODER_CAP_RECORD_USB_C_TYPE) ? 1 : 0;
DC_LOG_BIOS("\t info->DP_IS_USB_C %d", info->DP_IS_USB_C);
return BP_RESULT_OK;
}
@ -2946,7 +2946,35 @@ static enum bp_result construct_integrated_info(
if (result != BP_RESULT_OK)
return result;
else {
// Log each external path
for (i = 0; i < MAX_NUMBER_OF_EXT_DISPLAY_PATH; i++) {
if (info->ext_disp_conn_info.path[i].device_tag != 0)
DC_LOG_BIOS("integrated_info:For EXTERNAL DISPLAY PATH %d --------------\n"
"DEVICE_TAG: 0x%x\n"
"DEVICE_ACPI_ENUM: 0x%x\n"
"DEVICE_CONNECTOR_ID: 0x%x\n"
"EXT_AUX_DDC_LUT_INDEX: %d\n"
"EXT_HPD_PIN_LUT_INDEX: %d\n"
"EXT_ENCODER_OBJ_ID: 0x%x\n"
"Encoder CAPS: 0x%x\n",
i,
info->ext_disp_conn_info.path[i].device_tag,
info->ext_disp_conn_info.path[i].device_acpi_enum,
info->ext_disp_conn_info.path[i].device_connector_id.id,
info->ext_disp_conn_info.path[i].ext_aux_ddc_lut_index,
info->ext_disp_conn_info.path[i].ext_hpd_pin_lut_index,
info->ext_disp_conn_info.path[i].ext_encoder_obj_id.id,
info->ext_disp_conn_info.path[i].caps
);
}
// Log the Checksum and Voltage Swing
DC_LOG_BIOS("Integrated info table CHECKSUM: %d\n"
"Integrated info table FIX_DP_VOLTAGE_SWING: %d\n",
info->ext_disp_conn_info.checksum,
info->ext_disp_conn_info.fixdpvoltageswing);
}
/* Sort voltage table from low to high*/
for (i = 1; i < NUMBER_OF_DISP_CLK_VOLTAGE; ++i) {
for (j = i; j > 0; --j) {

View file

@ -75,6 +75,7 @@ bool dal_bios_parser_init_cmd_tbl_helper2(
case DCN_VERSION_3_02:
case DCN_VERSION_3_03:
case DCN_VERSION_3_1:
case DCN_VERSION_3_14:
case DCN_VERSION_3_15:
case DCN_VERSION_3_16:
case DCN_VERSION_3_2:

View file

@ -107,12 +107,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN201)
###############################################################################
CLK_MGR_DCN21 = rn_clk_mgr.o rn_clk_mgr_vbios_smu.o
# prevent build errors regarding soft-float vs hard-float FP ABI tags
# this code is currently unused on ppc64, as it applies to Renoir APUs only
ifdef CONFIG_PPC64
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn21/rn_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
endif
AMD_DAL_CLK_MGR_DCN21 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn21/,$(CLK_MGR_DCN21))
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21)
@ -121,12 +115,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN21)
###############################################################################
CLK_MGR_DCN30 = dcn30_clk_mgr.o dcn30_clk_mgr_smu_msg.o
# prevent build errors regarding soft-float vs hard-float FP ABI tags
# this code is currently unused on ppc64, as it applies to VanGogh APUs only
ifdef CONFIG_PPC64
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn30/dcn30_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
endif
AMD_DAL_CLK_MGR_DCN30 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn30/,$(CLK_MGR_DCN30))
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30)
@ -135,12 +123,6 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN30)
###############################################################################
CLK_MGR_DCN301 = vg_clk_mgr.o dcn301_smu.o
# prevent build errors regarding soft-float vs hard-float FP ABI tags
# this code is currently unused on ppc64, as it applies to VanGogh APUs only
ifdef CONFIG_PPC64
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn301/vg_clk_mgr.o := $(call cc-option,-mno-gnu-attribute)
endif
AMD_DAL_CLK_MGR_DCN301 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn301/,$(CLK_MGR_DCN301))
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN301)
@ -154,6 +136,15 @@ AMD_DAL_CLK_MGR_DCN31 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn31/,$(CLK_MGR_DC
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN31)
###############################################################################
# DCN314
###############################################################################
CLK_MGR_DCN314 = dcn314_smu.o dcn314_clk_mgr.o
AMD_DAL_CLK_MGR_DCN314 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn314/,$(CLK_MGR_DCN314))
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN314)
###############################################################################
# DCN315
###############################################################################
@ -179,31 +170,6 @@ CLK_MGR_DCN32 = dcn32_clk_mgr.o dcn32_clk_mgr_smu_msg.o
AMD_DAL_CLK_MGR_DCN32 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn32/,$(CLK_MGR_DCN32))
ifdef CONFIG_X86
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float -msse
endif
ifdef CONFIG_PPC64
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mhard-float -maltivec
endif
ifdef CONFIG_CC_IS_GCC
ifeq ($(call cc-ifversion, -lt, 0701, y), y)
IS_OLD_GCC = 1
endif
endif
ifdef CONFIG_X86
ifdef IS_OLD_GCC
# Stack alignment mismatch, proceed with caution.
# GCC < 7.1 cannot compile code using `double` and -mpreferred-stack-boundary=3
# (8B stack alignment).
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -mpreferred-stack-boundary=4
else
CFLAGS_$(AMDDALPATH)/dc/clk_mgr/dcn32/dcn32_clk_mgr.o := -msse2
endif
endif
AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN32)
endif

View file

@ -43,11 +43,11 @@
#include "dcn30/dcn30_clk_mgr.h"
#include "dcn301/vg_clk_mgr.h"
#include "dcn31/dcn31_clk_mgr.h"
#include "dcn314/dcn314_clk_mgr.h"
#include "dcn315/dcn315_clk_mgr.h"
#include "dcn316/dcn316_clk_mgr.h"
#include "dcn32/dcn32_clk_mgr.h"
int clk_mgr_helper_get_active_display_cnt(
struct dc *dc,
struct dc_state *context)
@ -58,6 +58,12 @@ int clk_mgr_helper_get_active_display_cnt(
for (i = 0; i < context->stream_count; i++) {
const struct dc_stream_state *stream = context->streams[i];
/* Don't count SubVP phantom pipes as part of active
* display count
*/
if (stream->mall_stream_config.type == SUBVP_PHANTOM)
continue;
/*
* Only notify active stream or virtual stream.
* Need to notify virtual stream to work around
@ -281,6 +287,7 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
return &clk_mgr->base.base;
}
break;
case FAMILY_YELLOW_CARP: {
struct clk_mgr_dcn31 *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
@ -329,6 +336,20 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
return &clk_mgr->base;
break;
}
case AMDGPU_FAMILY_GC_11_0_2: {
struct clk_mgr_dcn314 *clk_mgr = kzalloc(sizeof(*clk_mgr), GFP_KERNEL);
if (clk_mgr == NULL) {
BREAK_TO_DEBUGGER();
return NULL;
}
dcn314_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
return &clk_mgr->base.base;
}
break;
#endif
default:
ASSERT(0); /* Unknown Asic */
@ -375,6 +396,11 @@ void dc_destroy_clk_mgr(struct clk_mgr *clk_mgr_base)
case AMDGPU_FAMILY_GC_11_0_0:
dcn32_clk_mgr_destroy(clk_mgr);
break;
case AMDGPU_FAMILY_GC_11_0_2:
dcn314_clk_mgr_destroy(clk_mgr);
break;
default:
break;
}

View file

@ -26,10 +26,9 @@
#include "dccg.h"
#include "clk_mgr_internal.h"
#include "dcn20/dcn20_clk_mgr.h"
#include "rn_clk_mgr.h"
#include "dml/dcn20/dcn20_fpu.h"
#include "dce100/dce_clk_mgr.h"
#include "rn_clk_mgr_vbios_smu.h"
@ -45,7 +44,6 @@
/* Constants */
#define LPDDR_MEM_RETRAIN_LATENCY 4.977 /* Number obtained from LPDDR4 Training Counter Requirement doc */
#define SMU_VER_55_51_0 0x373300 /* SMU Version that is able to set DISPCLK below 100MHz */
/* Macros */
@ -613,228 +611,6 @@ static struct clk_bw_params rn_bw_params = {
};
static struct wm_table ddr4_wm_table_gs = {
.entries = {
{
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 7.09,
.sr_enter_plus_exit_time_us = 8.14,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 10.12,
.sr_enter_plus_exit_time_us = 11.48,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 10.12,
.sr_enter_plus_exit_time_us = 11.48,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 10.12,
.sr_enter_plus_exit_time_us = 11.48,
.valid = true,
},
}
};
static struct wm_table lpddr4_wm_table_gs = {
.entries = {
{
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 5.32,
.sr_enter_plus_exit_time_us = 6.38,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 9.82,
.sr_enter_plus_exit_time_us = 11.196,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 9.89,
.sr_enter_plus_exit_time_us = 11.24,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 9.748,
.sr_enter_plus_exit_time_us = 11.102,
.valid = true,
},
}
};
static struct wm_table lpddr4_wm_table_with_disabled_ppt = {
.entries = {
{
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 8.32,
.sr_enter_plus_exit_time_us = 9.38,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 9.82,
.sr_enter_plus_exit_time_us = 11.196,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 9.89,
.sr_enter_plus_exit_time_us = 11.24,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 9.748,
.sr_enter_plus_exit_time_us = 11.102,
.valid = true,
},
}
};
static struct wm_table ddr4_wm_table_rn = {
.entries = {
{
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 11.90,
.sr_enter_plus_exit_time_us = 12.80,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 13.18,
.sr_enter_plus_exit_time_us = 14.30,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 13.18,
.sr_enter_plus_exit_time_us = 14.30,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 13.18,
.sr_enter_plus_exit_time_us = 14.30,
.valid = true,
},
}
};
static struct wm_table ddr4_1R_wm_table_rn = {
.entries = {
{
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 13.90,
.sr_enter_plus_exit_time_us = 14.80,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 13.90,
.sr_enter_plus_exit_time_us = 14.80,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 13.90,
.sr_enter_plus_exit_time_us = 14.80,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 13.90,
.sr_enter_plus_exit_time_us = 14.80,
.valid = true,
},
}
};
static struct wm_table lpddr4_wm_table_rn = {
.entries = {
{
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 7.32,
.sr_enter_plus_exit_time_us = 8.38,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 9.82,
.sr_enter_plus_exit_time_us = 11.196,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 9.89,
.sr_enter_plus_exit_time_us = 11.24,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 9.748,
.sr_enter_plus_exit_time_us = 11.102,
.valid = true,
},
}
};
static unsigned int find_socclk_for_voltage(struct dpm_clocks *clock_table, unsigned int voltage)
{
int i;
@ -914,12 +690,10 @@ static void rn_clk_mgr_helper_populate_bw_params(struct clk_bw_params *bw_params
/*
* WM set D will be re-purposed for memory retraining
*/
bw_params->wm_table.entries[WM_D].pstate_latency_us = LPDDR_MEM_RETRAIN_LATENCY;
bw_params->wm_table.entries[WM_D].wm_inst = WM_D;
bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING;
bw_params->wm_table.entries[WM_D].valid = true;
DC_FP_START();
dcn21_clk_mgr_set_bw_params_wm_table(bw_params);
DC_FP_END();
}
}
void rn_clk_mgr_construct(

View file

@ -29,6 +29,13 @@
#include "clk_mgr.h"
#include "dm_pp_smu.h"
extern struct wm_table ddr4_wm_table_gs;
extern struct wm_table lpddr4_wm_table_gs;
extern struct wm_table lpddr4_wm_table_with_disabled_ppt;
extern struct wm_table ddr4_wm_table_rn;
extern struct wm_table ddr4_1R_wm_table_rn;
extern struct wm_table lpddr4_wm_table_rn;
struct rn_clk_registers {
uint32_t CLK1_CLK0_CURRENT_CNT; /* DPREFCLK */
};

View file

@ -101,9 +101,9 @@ static int rn_vbios_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
uint32_t result;
result = rn_smu_wait_for_response(clk_mgr, 10, 200000);
ASSERT(result == VBIOSSMC_Result_OK);
smu_print("SMU response after wait: %d\n", result);
if (result != VBIOSSMC_Result_OK)
smu_print("SMU Response was not OK. SMU response after wait received is: %d\n", result);
if (result == VBIOSSMC_Status_BUSY) {
return -1;
@ -188,6 +188,10 @@ int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int reque
VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
khz_to_mhz_ceil(requested_dcfclk_khz));
#ifdef DBG
smu_print("actual_dcfclk_set_mhz %d is set to : %d\n", actual_dcfclk_set_mhz, actual_dcfclk_set_mhz * 1000);
#endif
return actual_dcfclk_set_mhz * 1000;
}

View file

@ -29,6 +29,7 @@
#include "dcn20/dcn20_clk_mgr.h"
#include "dce100/dce_clk_mgr.h"
#include "dcn30/dcn30_clk_mgr.h"
#include "dml/dcn30/dcn30_fpu.h"
#include "reg_helper.h"
#include "core_types.h"
#include "dm_helpers.h"
@ -97,65 +98,11 @@ static void dcn3_init_single_clock(struct clk_mgr_internal *clk_mgr, uint32_t cl
}
}
static noinline void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
static void dcn3_build_wm_range_table(struct clk_mgr_internal *clk_mgr)
{
/* defaults */
double pstate_latency_us = clk_mgr->base.ctx->dc->dml.soc.dram_clock_change_latency_us;
double sr_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_exit_time_us;
double sr_enter_plus_exit_time_us = clk_mgr->base.ctx->dc->dml.soc.sr_enter_plus_exit_time_us;
uint16_t min_uclk_mhz = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz;
/* Set A - Normal - default values*/
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].valid = true;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us = pstate_latency_us;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_exit_time_us = sr_exit_time_us;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_dcfclk = 0;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_dcfclk = 0xFFFF;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.min_uclk = min_uclk_mhz;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_A].pmfw_breakdown.max_uclk = 0xFFFF;
/* Set B - Performance - higher minimum clocks */
// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].valid = true;
// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.pstate_latency_us = pstate_latency_us;
// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_exit_time_us = sr_exit_time_us;
// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.wm_type = WATERMARKS_CLOCK_RANGE;
// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_dcfclk = TUNED VALUE;
// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_dcfclk = 0xFFFF;
// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.min_uclk = TUNED VALUE;
// clk_mgr->base.bw_params->wm_table.nv_entries[WM_B].pmfw_breakdown.max_uclk = 0xFFFF;
/* Set C - Dummy P-State - P-State latency set to "dummy p-state" value */
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 0;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.wm_type = WATERMARKS_DUMMY_PSTATE;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_dcfclk = 0;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_dcfclk = 0xFFFF;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = 1600;
clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38;
clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = 8000;
clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9;
clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = 10000;
clk_mgr->base.bw_params->dummy_pstate_table[2].dummy_pstate_latency_us = 8;
clk_mgr->base.bw_params->dummy_pstate_table[3].dram_speed_mts = 16000;
clk_mgr->base.bw_params->dummy_pstate_table[3].dummy_pstate_latency_us = 5;
/* Set D - MALL - SR enter and exit times adjusted for MALL */
clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].valid = true;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.pstate_latency_us = pstate_latency_us;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_exit_time_us = 2;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].dml_input.sr_enter_plus_exit_time_us = 4;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.wm_type = WATERMARKS_MALL;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_dcfclk = 0;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_dcfclk = 0xFFFF;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.min_uclk = min_uclk_mhz;
clk_mgr->base.bw_params->wm_table.nv_entries[WM_D].pmfw_breakdown.max_uclk = 0xFFFF;
DC_FP_START();
dcn3_fpu_build_wm_range_table(&clk_mgr->base);
DC_FP_END();
}
void dcn3_init_clocks(struct clk_mgr *clk_mgr_base)

View file

@ -102,7 +102,8 @@ static int dcn301_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
result = dcn301_smu_wait_for_response(clk_mgr, 10, 200000);
smu_print("SMU response after wait: %d\n", result);
if (result != VBIOSSMC_Result_OK)
smu_print("SMU Response was not OK. SMU response after wait received is: %d\n", result);
if (result == VBIOSSMC_Status_BUSY) {
return -1;
@ -179,6 +180,10 @@ int dcn301_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int request
VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
khz_to_mhz_ceil(requested_dcfclk_khz));
#ifdef DBG
smu_print("actual_dcfclk_set_mhz %d is set to : %d\n", actual_dcfclk_set_mhz, actual_dcfclk_set_mhz * 1000);
#endif
return actual_dcfclk_set_mhz * 1000;
}

View file

@ -32,6 +32,9 @@
// For dcn20_update_clocks_update_dpp_dto
#include "dcn20/dcn20_clk_mgr.h"
// For DML FPU code
#include "dml/dcn20/dcn20_fpu.h"
#include "vg_clk_mgr.h"
#include "dcn301_smu.h"
#include "reg_helper.h"
@ -526,81 +529,6 @@ static struct clk_bw_params vg_bw_params = {
};
static struct wm_table ddr4_wm_table = {
.entries = {
{
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 6.09,
.sr_enter_plus_exit_time_us = 7.14,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 10.12,
.sr_enter_plus_exit_time_us = 11.48,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 10.12,
.sr_enter_plus_exit_time_us = 11.48,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 10.12,
.sr_enter_plus_exit_time_us = 11.48,
.valid = true,
},
}
};
static struct wm_table lpddr5_wm_table = {
.entries = {
{
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 13.5,
.sr_enter_plus_exit_time_us = 16.5,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 13.5,
.sr_enter_plus_exit_time_us = 16.5,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 13.5,
.sr_enter_plus_exit_time_us = 16.5,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 13.5,
.sr_enter_plus_exit_time_us = 16.5,
.valid = true,
},
}
};
static unsigned int find_dcfclk_for_voltage(const struct vg_dpm_clocks *clock_table,
unsigned int voltage)
{
@ -670,10 +598,9 @@ static void vg_clk_mgr_helper_populate_bw_params(
/*
* WM set D will be re-purposed for memory retraining
*/
bw_params->wm_table.entries[WM_D].pstate_latency_us = LPDDR_MEM_RETRAIN_LATENCY;
bw_params->wm_table.entries[WM_D].wm_inst = WM_D;
bw_params->wm_table.entries[WM_D].wm_type = WM_TYPE_RETRAINING;
bw_params->wm_table.entries[WM_D].valid = true;
DC_FP_START();
dcn21_clk_mgr_set_bw_params_wm_table(bw_params);
DC_FP_END();
}
}

View file

@ -29,6 +29,9 @@
struct watermarks;
extern struct wm_table ddr4_wm_table;
extern struct wm_table lpddr5_wm_table;
struct smu_watermark_set {
struct watermarks *wm_set;
union large_integer mc_address;

View file

@ -108,9 +108,9 @@ static int dcn31_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
uint32_t result;
result = dcn31_smu_wait_for_response(clk_mgr, 10, 200000);
ASSERT(result == VBIOSSMC_Result_OK);
smu_print("SMU response after wait: %d\n", result);
if (result != VBIOSSMC_Result_OK)
smu_print("SMU Response was not OK. SMU response after wait received is: %d\n", result);
if (result == VBIOSSMC_Status_BUSY) {
return -1;
@ -202,6 +202,10 @@ int dcn31_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requeste
VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
khz_to_mhz_ceil(requested_dcfclk_khz));
#ifdef DBG
smu_print("actual_dcfclk_set_mhz %d is set to : %d\n", actual_dcfclk_set_mhz, actual_dcfclk_set_mhz * 1000);
#endif
return actual_dcfclk_set_mhz * 1000;
}

View file

@ -0,0 +1,751 @@
// SPDX-License-Identifier: MIT
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: AMD
*
*/
#include "dcn314_clk_mgr.h"
#include "dccg.h"
#include "clk_mgr_internal.h"
// For dce12_get_dp_ref_freq_khz
#include "dce100/dce_clk_mgr.h"
// For dcn20_update_clocks_update_dpp_dto
#include "dcn20/dcn20_clk_mgr.h"
#include "reg_helper.h"
#include "core_types.h"
#include "dm_helpers.h"
/* TODO: remove this include once we ported over remaining clk mgr functions*/
#include "dcn30/dcn30_clk_mgr.h"
#include "dcn31/dcn31_clk_mgr.h"
#include "dc_dmub_srv.h"
#include "dc_link_dp.h"
#include "dcn314_smu.h"
#define MAX_INSTANCE 7
#define MAX_SEGMENT 8
struct IP_BASE_INSTANCE {
unsigned int segment[MAX_SEGMENT];
};
struct IP_BASE {
struct IP_BASE_INSTANCE instance[MAX_INSTANCE];
};
static const struct IP_BASE CLK_BASE = { { { { 0x00016C00, 0x02401800, 0, 0, 0, 0, 0, 0 } },
{ { 0x00016E00, 0x02401C00, 0, 0, 0, 0, 0, 0 } },
{ { 0x00017000, 0x02402000, 0, 0, 0, 0, 0, 0 } },
{ { 0x00017200, 0x02402400, 0, 0, 0, 0, 0, 0 } },
{ { 0x0001B000, 0x0242D800, 0, 0, 0, 0, 0, 0 } },
{ { 0x0001B200, 0x0242DC00, 0, 0, 0, 0, 0, 0 } },
{ { 0x0001B400, 0x0242E000, 0, 0, 0, 0, 0, 0 } } } };
#define regCLK1_CLK_PLL_REQ 0x0237
#define regCLK1_CLK_PLL_REQ_BASE_IDX 0
#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
#define REG(reg_name) \
(CLK_BASE.instance[0].segment[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
#define TO_CLK_MGR_DCN314(clk_mgr)\
container_of(clk_mgr, struct clk_mgr_dcn314, base)
static int dcn314_get_active_display_cnt_wa(
struct dc *dc,
struct dc_state *context)
{
int i, display_count;
bool tmds_present = false;
display_count = 0;
for (i = 0; i < context->stream_count; i++) {
const struct dc_stream_state *stream = context->streams[i];
if (stream->signal == SIGNAL_TYPE_HDMI_TYPE_A ||
stream->signal == SIGNAL_TYPE_DVI_SINGLE_LINK ||
stream->signal == SIGNAL_TYPE_DVI_DUAL_LINK)
tmds_present = true;
}
for (i = 0; i < dc->link_count; i++) {
const struct dc_link *link = dc->links[i];
/* abusing the fact that the dig and phy are coupled to see if the phy is enabled */
if (link->link_enc && link->link_enc->funcs->is_dig_enabled &&
link->link_enc->funcs->is_dig_enabled(link->link_enc))
display_count++;
}
/* WA for hang on HDMI after display off back on*/
if (display_count == 0 && tmds_present)
display_count = 1;
return display_count;
}
static void dcn314_disable_otg_wa(struct clk_mgr *clk_mgr_base, bool disable)
{
struct dc *dc = clk_mgr_base->ctx->dc;
int i;
for (i = 0; i < dc->res_pool->pipe_count; ++i) {
struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
if (pipe->top_pipe || pipe->prev_odm_pipe)
continue;
if (pipe->stream && (pipe->stream->dpms_off || pipe->plane_state == NULL ||
dc_is_virtual_signal(pipe->stream->signal))) {
if (disable)
pipe->stream_res.tg->funcs->immediate_disable_crtc(pipe->stream_res.tg);
else
pipe->stream_res.tg->funcs->enable_crtc(pipe->stream_res.tg);
}
}
}
void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
struct dc_state *context,
bool safe_to_lower)
{
union dmub_rb_cmd cmd;
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
struct dc_clocks *new_clocks = &context->bw_ctx.bw.dcn.clk;
struct dc *dc = clk_mgr_base->ctx->dc;
int display_count;
bool update_dppclk = false;
bool update_dispclk = false;
bool dpp_clock_lowered = false;
if (dc->work_arounds.skip_clock_update)
return;
/*
* if it is safe to lower, but we are already in the lower state, we don't have to do anything
* also if safe to lower is false, we just go in the higher state
*/
if (safe_to_lower) {
if (new_clocks->zstate_support != DCN_ZSTATE_SUPPORT_DISALLOW &&
new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
dcn314_smu_set_zstate_support(clk_mgr, new_clocks->zstate_support);
dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, true);
clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
}
if (clk_mgr_base->clks.dtbclk_en && !new_clocks->dtbclk_en) {
dcn314_smu_set_dtbclk(clk_mgr, false);
clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
}
/* check that we're not already in lower */
if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_LOW_POWER) {
display_count = dcn314_get_active_display_cnt_wa(dc, context);
/* if we can go lower, go lower */
if (display_count == 0) {
union display_idle_optimization_u idle_info = { 0 };
idle_info.idle_info.df_request_disabled = 1;
idle_info.idle_info.phy_ref_clk_off = 1;
idle_info.idle_info.s0i2_rdy = 1;
dcn314_smu_set_display_idle_optimization(clk_mgr, idle_info.data);
/* update power state */
clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_LOW_POWER;
}
}
} else {
if (new_clocks->zstate_support == DCN_ZSTATE_SUPPORT_DISALLOW &&
new_clocks->zstate_support != clk_mgr_base->clks.zstate_support) {
dcn314_smu_set_zstate_support(clk_mgr, DCN_ZSTATE_SUPPORT_DISALLOW);
dm_helpers_enable_periodic_detection(clk_mgr_base->ctx, false);
clk_mgr_base->clks.zstate_support = new_clocks->zstate_support;
}
if (!clk_mgr_base->clks.dtbclk_en && new_clocks->dtbclk_en) {
dcn314_smu_set_dtbclk(clk_mgr, true);
clk_mgr_base->clks.dtbclk_en = new_clocks->dtbclk_en;
}
/* check that we're not already in D0 */
if (clk_mgr_base->clks.pwr_state != DCN_PWR_STATE_MISSION_MODE) {
union display_idle_optimization_u idle_info = { 0 };
dcn314_smu_set_display_idle_optimization(clk_mgr, idle_info.data);
/* update power state */
clk_mgr_base->clks.pwr_state = DCN_PWR_STATE_MISSION_MODE;
}
}
if (should_set_clock(safe_to_lower, new_clocks->dcfclk_khz, clk_mgr_base->clks.dcfclk_khz)) {
clk_mgr_base->clks.dcfclk_khz = new_clocks->dcfclk_khz;
dcn314_smu_set_hard_min_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_khz);
}
if (should_set_clock(safe_to_lower,
new_clocks->dcfclk_deep_sleep_khz, clk_mgr_base->clks.dcfclk_deep_sleep_khz)) {
clk_mgr_base->clks.dcfclk_deep_sleep_khz = new_clocks->dcfclk_deep_sleep_khz;
dcn314_smu_set_min_deep_sleep_dcfclk(clk_mgr, clk_mgr_base->clks.dcfclk_deep_sleep_khz);
}
// workaround: Limit dppclk to 100Mhz to avoid lower eDP panel switch to plus 4K monitor underflow.
if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
if (new_clocks->dppclk_khz < 100000)
new_clocks->dppclk_khz = 100000;
}
if (should_set_clock(safe_to_lower, new_clocks->dppclk_khz, clk_mgr->base.clks.dppclk_khz)) {
if (clk_mgr->base.clks.dppclk_khz > new_clocks->dppclk_khz)
dpp_clock_lowered = true;
clk_mgr_base->clks.dppclk_khz = new_clocks->dppclk_khz;
update_dppclk = true;
}
if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
dcn314_disable_otg_wa(clk_mgr_base, true);
clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
dcn314_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
dcn314_disable_otg_wa(clk_mgr_base, false);
update_dispclk = true;
}
if (dpp_clock_lowered) {
// increase per DPP DTO before lowering global dppclk
dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
dcn314_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
} else {
// increase global DPPCLK before lowering per DPP DTO
if (update_dppclk || update_dispclk)
dcn314_smu_set_dppclk(clk_mgr, clk_mgr_base->clks.dppclk_khz);
// always update dtos unless clock is lowered and not safe to lower
if (new_clocks->dppclk_khz >= dc->current_state->bw_ctx.bw.dcn.clk.dppclk_khz)
dcn20_update_clocks_update_dpp_dto(clk_mgr, context, safe_to_lower);
}
// notify DMCUB of latest clocks
memset(&cmd, 0, sizeof(cmd));
cmd.notify_clocks.header.type = DMUB_CMD__CLK_MGR;
cmd.notify_clocks.header.sub_type = DMUB_CMD__CLK_MGR_NOTIFY_CLOCKS;
cmd.notify_clocks.clocks.dcfclk_khz = clk_mgr_base->clks.dcfclk_khz;
cmd.notify_clocks.clocks.dcfclk_deep_sleep_khz =
clk_mgr_base->clks.dcfclk_deep_sleep_khz;
cmd.notify_clocks.clocks.dispclk_khz = clk_mgr_base->clks.dispclk_khz;
cmd.notify_clocks.clocks.dppclk_khz = clk_mgr_base->clks.dppclk_khz;
dc_dmub_srv_cmd_queue(dc->ctx->dmub_srv, &cmd);
dc_dmub_srv_cmd_execute(dc->ctx->dmub_srv);
dc_dmub_srv_wait_idle(dc->ctx->dmub_srv);
}
static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
{
/* get FbMult value */
struct fixed31_32 pll_req;
unsigned int fbmult_frac_val = 0;
unsigned int fbmult_int_val = 0;
/*
* Register value of fbmult is in 8.16 format, we are converting to 314.32
* to leverage the fix point operations available in driver
*/
REG_GET(CLK1_CLK_PLL_REQ, FbMult_frac, &fbmult_frac_val); /* 16 bit fractional part*/
REG_GET(CLK1_CLK_PLL_REQ, FbMult_int, &fbmult_int_val); /* 8 bit integer part */
pll_req = dc_fixpt_from_int(fbmult_int_val);
/*
* since fractional part is only 16 bit in register definition but is 32 bit
* in our fix point definiton, need to shift left by 16 to obtain correct value
*/
pll_req.value |= fbmult_frac_val << 16;
/* multiply by REFCLK period */
pll_req = dc_fixpt_mul_int(pll_req, clk_mgr->dfs_ref_freq_khz);
/* integer part is now VCO frequency in kHz */
return dc_fixpt_floor(pll_req);
}
static void dcn314_enable_pme_wa(struct clk_mgr *clk_mgr_base)
{
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
dcn314_smu_enable_pme_wa(clk_mgr);
}
void dcn314_init_clocks(struct clk_mgr *clk_mgr)
{
memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
// Assumption is that boot state always supports pstate
clk_mgr->clks.p_state_change_support = true;
clk_mgr->clks.prev_p_state_change_support = true;
clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
clk_mgr->clks.zstate_support = DCN_ZSTATE_SUPPORT_UNKNOWN;
}
bool dcn314_are_clock_states_equal(struct dc_clocks *a,
struct dc_clocks *b)
{
if (a->dispclk_khz != b->dispclk_khz)
return false;
else if (a->dppclk_khz != b->dppclk_khz)
return false;
else if (a->dcfclk_khz != b->dcfclk_khz)
return false;
else if (a->dcfclk_deep_sleep_khz != b->dcfclk_deep_sleep_khz)
return false;
else if (a->zstate_support != b->zstate_support)
return false;
else if (a->dtbclk_en != b->dtbclk_en)
return false;
return true;
}
static void dcn314_dump_clk_registers(struct clk_state_registers_and_bypass *regs_and_bypass,
struct clk_mgr *clk_mgr_base, struct clk_log_info *log_info)
{
return;
}
static struct clk_bw_params dcn314_bw_params = {
.vram_type = Ddr4MemType,
.num_channels = 1,
.clk_table = {
.num_entries = 4,
},
};
static struct wm_table ddr5_wm_table = {
.entries = {
{
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 9,
.sr_enter_plus_exit_time_us = 11,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 9,
.sr_enter_plus_exit_time_us = 11,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 9,
.sr_enter_plus_exit_time_us = 11,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.72,
.sr_exit_time_us = 9,
.sr_enter_plus_exit_time_us = 11,
.valid = true,
},
}
};
static struct wm_table lpddr5_wm_table = {
.entries = {
{
.wm_inst = WM_A,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 11.5,
.sr_enter_plus_exit_time_us = 14.5,
.valid = true,
},
{
.wm_inst = WM_B,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 11.5,
.sr_enter_plus_exit_time_us = 14.5,
.valid = true,
},
{
.wm_inst = WM_C,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 11.5,
.sr_enter_plus_exit_time_us = 14.5,
.valid = true,
},
{
.wm_inst = WM_D,
.wm_type = WM_TYPE_PSTATE_CHG,
.pstate_latency_us = 11.65333,
.sr_exit_time_us = 11.5,
.sr_enter_plus_exit_time_us = 14.5,
.valid = true,
},
}
};
static DpmClocks_t dummy_clocks;
static struct dcn314_watermarks dummy_wms = { 0 };
static void dcn314_build_watermark_ranges(struct clk_bw_params *bw_params, struct dcn314_watermarks *table)
{
int i, num_valid_sets;
num_valid_sets = 0;
for (i = 0; i < WM_SET_COUNT; i++) {
/* skip empty entries, the smu array has no holes*/
if (!bw_params->wm_table.entries[i].valid)
continue;
table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmSetting = bw_params->wm_table.entries[i].wm_inst;
table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmType = bw_params->wm_table.entries[i].wm_type;
/* We will not select WM based on fclk, so leave it as unconstrained */
table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinClock = 0;
table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxClock = 0xFFFF;
if (table->WatermarkRow[WM_DCFCLK][num_valid_sets].WmType == WM_TYPE_PSTATE_CHG) {
if (i == 0)
table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinMclk = 0;
else {
/* add 1 to make it non-overlapping with next lvl */
table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinMclk =
bw_params->clk_table.entries[i - 1].dcfclk_mhz + 1;
}
table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxMclk =
bw_params->clk_table.entries[i].dcfclk_mhz;
} else {
/* unconstrained for memory retraining */
table->WatermarkRow[WM_DCFCLK][num_valid_sets].MinClock = 0;
table->WatermarkRow[WM_DCFCLK][num_valid_sets].MaxClock = 0xFFFF;
/* Modify previous watermark range to cover up to max */
table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxClock = 0xFFFF;
}
num_valid_sets++;
}
ASSERT(num_valid_sets != 0); /* Must have at least one set of valid watermarks */
/* modify the min and max to make sure we cover the whole range*/
table->WatermarkRow[WM_DCFCLK][0].MinMclk = 0;
table->WatermarkRow[WM_DCFCLK][0].MinClock = 0;
table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxMclk = 0xFFFF;
table->WatermarkRow[WM_DCFCLK][num_valid_sets - 1].MaxClock = 0xFFFF;
/* This is for writeback only, does not matter currently as no writeback support*/
table->WatermarkRow[WM_SOCCLK][0].WmSetting = WM_A;
table->WatermarkRow[WM_SOCCLK][0].MinClock = 0;
table->WatermarkRow[WM_SOCCLK][0].MaxClock = 0xFFFF;
table->WatermarkRow[WM_SOCCLK][0].MinMclk = 0;
table->WatermarkRow[WM_SOCCLK][0].MaxMclk = 0xFFFF;
}
static void dcn314_notify_wm_ranges(struct clk_mgr *clk_mgr_base)
{
struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
struct clk_mgr_dcn314 *clk_mgr_dcn314 = TO_CLK_MGR_DCN314(clk_mgr);
struct dcn314_watermarks *table = clk_mgr_dcn314->smu_wm_set.wm_set;
if (!clk_mgr->smu_ver)
return;
if (!table || clk_mgr_dcn314->smu_wm_set.mc_address.quad_part == 0)
return;
memset(table, 0, sizeof(*table));
dcn314_build_watermark_ranges(clk_mgr_base->bw_params, table);
dcn314_smu_set_dram_addr_high(clk_mgr,
clk_mgr_dcn314->smu_wm_set.mc_address.high_part);
dcn314_smu_set_dram_addr_low(clk_mgr,
clk_mgr_dcn314->smu_wm_set.mc_address.low_part);
dcn314_smu_transfer_wm_table_dram_2_smu(clk_mgr);
}
static void dcn314_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
struct dcn314_smu_dpm_clks *smu_dpm_clks)
{
DpmClocks_t *table = smu_dpm_clks->dpm_clks;
if (!clk_mgr->smu_ver)
return;
if (!table || smu_dpm_clks->mc_address.quad_part == 0)
return;
memset(table, 0, sizeof(*table));
dcn314_smu_set_dram_addr_high(clk_mgr,
smu_dpm_clks->mc_address.high_part);
dcn314_smu_set_dram_addr_low(clk_mgr,
smu_dpm_clks->mc_address.low_part);
dcn314_smu_transfer_dpm_table_smu_2_dram(clk_mgr);
}
static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
{
uint32_t max = 0;
int i;
for (i = 0; i < num_clocks; ++i) {
if (clocks[i] > max)
max = clocks[i];
}
return max;
}
static unsigned int find_clk_for_voltage(
const DpmClocks_t *clock_table,
const uint32_t clocks[],
unsigned int voltage)
{
int i;
int max_voltage = 0;
int clock = 0;
for (i = 0; i < NUM_SOC_VOLTAGE_LEVELS; i++) {
if (clock_table->SocVoltage[i] == voltage) {
return clocks[i];
} else if (clock_table->SocVoltage[i] >= max_voltage &&
clock_table->SocVoltage[i] < voltage) {
max_voltage = clock_table->SocVoltage[i];
clock = clocks[i];
}
}
ASSERT(clock);
return clock;
}
static void dcn314_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk_mgr,
struct integrated_info *bios_info,
const DpmClocks_t *clock_table)
{
int i, j;
struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
uint32_t max_dispclk = 0, max_dppclk = 0;
j = -1;
ASSERT(NUM_DF_PSTATE_LEVELS <= MAX_NUM_DPM_LVL);
/* Find lowest DPM, FCLK is filled in reverse order*/
for (i = NUM_DF_PSTATE_LEVELS - 1; i >= 0; i--) {
if (clock_table->DfPstateTable[i].FClk != 0) {
j = i;
break;
}
}
if (j == -1) {
/* clock table is all 0s, just use our own hardcode */
ASSERT(0);
return;
}
bw_params->clk_table.num_entries = j + 1;
/* dispclk and dppclk can be max at any voltage, same number of levels for both */
if (clock_table->NumDispClkLevelsEnabled <= NUM_DISPCLK_DPM_LEVELS &&
clock_table->NumDispClkLevelsEnabled <= NUM_DPPCLK_DPM_LEVELS) {
max_dispclk = find_max_clk_value(clock_table->DispClocks, clock_table->NumDispClkLevelsEnabled);
max_dppclk = find_max_clk_value(clock_table->DppClocks, clock_table->NumDispClkLevelsEnabled);
} else {
ASSERT(0);
}
for (i = 0; i < bw_params->clk_table.num_entries; i++, j--) {
bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[j].FClk;
bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[j].MemClk;
bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[j].Voltage;
switch (clock_table->DfPstateTable[j].WckRatio) {
case WCK_RATIO_1_2:
bw_params->clk_table.entries[i].wck_ratio = 2;
break;
case WCK_RATIO_1_4:
bw_params->clk_table.entries[i].wck_ratio = 4;
break;
default:
bw_params->clk_table.entries[i].wck_ratio = 1;
}
bw_params->clk_table.entries[i].dcfclk_mhz = find_clk_for_voltage(clock_table, clock_table->DcfClocks, clock_table->DfPstateTable[j].Voltage);
bw_params->clk_table.entries[i].socclk_mhz = find_clk_for_voltage(clock_table, clock_table->SocClocks, clock_table->DfPstateTable[j].Voltage);
bw_params->clk_table.entries[i].dispclk_mhz = max_dispclk;
bw_params->clk_table.entries[i].dppclk_mhz = max_dppclk;
}
bw_params->vram_type = bios_info->memory_type;
bw_params->num_channels = bios_info->ma_channel_number;
for (i = 0; i < WM_SET_COUNT; i++) {
bw_params->wm_table.entries[i].wm_inst = i;
if (i >= bw_params->clk_table.num_entries) {
bw_params->wm_table.entries[i].valid = false;
continue;
}
bw_params->wm_table.entries[i].wm_type = WM_TYPE_PSTATE_CHG;
bw_params->wm_table.entries[i].valid = true;
}
}
static struct clk_mgr_funcs dcn314_funcs = {
.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
.get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
.update_clocks = dcn314_update_clocks,
.init_clocks = dcn314_init_clocks,
.enable_pme_wa = dcn314_enable_pme_wa,
.are_clock_states_equal = dcn314_are_clock_states_equal,
.notify_wm_ranges = dcn314_notify_wm_ranges
};
extern struct clk_mgr_funcs dcn3_fpga_funcs;
void dcn314_clk_mgr_construct(
struct dc_context *ctx,
struct clk_mgr_dcn314 *clk_mgr,
struct pp_smu_funcs *pp_smu,
struct dccg *dccg)
{
struct dcn314_smu_dpm_clks smu_dpm_clks = { 0 };
clk_mgr->base.base.ctx = ctx;
clk_mgr->base.base.funcs = &dcn314_funcs;
clk_mgr->base.pp_smu = pp_smu;
clk_mgr->base.dccg = dccg;
clk_mgr->base.dfs_bypass_disp_clk = 0;
clk_mgr->base.dprefclk_ss_percentage = 0;
clk_mgr->base.dprefclk_ss_divider = 1000;
clk_mgr->base.ss_on_dprefclk = false;
clk_mgr->base.dfs_ref_freq_khz = 48000;
clk_mgr->smu_wm_set.wm_set = (struct dcn314_watermarks *)dm_helpers_allocate_gpu_mem(
clk_mgr->base.base.ctx,
DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
sizeof(struct dcn314_watermarks),
&clk_mgr->smu_wm_set.mc_address.quad_part);
if (!clk_mgr->smu_wm_set.wm_set) {
clk_mgr->smu_wm_set.wm_set = &dummy_wms;
clk_mgr->smu_wm_set.mc_address.quad_part = 0;
}
ASSERT(clk_mgr->smu_wm_set.wm_set);
smu_dpm_clks.dpm_clks = (DpmClocks_t *)dm_helpers_allocate_gpu_mem(
clk_mgr->base.base.ctx,
DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
sizeof(DpmClocks_t),
&smu_dpm_clks.mc_address.quad_part);
if (smu_dpm_clks.dpm_clks == NULL) {
smu_dpm_clks.dpm_clks = &dummy_clocks;
smu_dpm_clks.mc_address.quad_part = 0;
}
ASSERT(smu_dpm_clks.dpm_clks);
if (IS_FPGA_MAXIMUS_DC(ctx->dce_environment)) {
clk_mgr->base.base.funcs = &dcn3_fpga_funcs;
} else {
struct clk_log_info log_info = {0};
clk_mgr->base.smu_ver = dcn314_smu_get_smu_version(&clk_mgr->base);
if (clk_mgr->base.smu_ver)
clk_mgr->base.smu_present = true;
/* TODO: Check we get what we expect during bringup */
clk_mgr->base.base.dentist_vco_freq_khz = get_vco_frequency_from_reg(&clk_mgr->base);
if (ctx->dc_bios->integrated_info->memory_type == LpDdr5MemType)
dcn314_bw_params.wm_table = lpddr5_wm_table;
else
dcn314_bw_params.wm_table = ddr5_wm_table;
/* Saved clocks configured at boot for debug purposes */
dcn314_dump_clk_registers(&clk_mgr->base.base.boot_snapshot, &clk_mgr->base.base, &log_info);
}
clk_mgr->base.base.dprefclk_khz = 600000;
clk_mgr->base.base.clks.ref_dtbclk_khz = 600000;
dce_clock_read_ss_info(&clk_mgr->base);
/*if bios enabled SS, driver needs to adjust dtb clock, only enable with correct bios*/
//clk_mgr->base.dccg->ref_dtbclk_khz = dce_adjust_dp_ref_freq_for_ss(clk_mgr_internal, clk_mgr->base.base.dprefclk_khz);
clk_mgr->base.base.bw_params = &dcn314_bw_params;
if (clk_mgr->base.base.ctx->dc->debug.pstate_enabled) {
dcn314_get_dpm_table_from_smu(&clk_mgr->base, &smu_dpm_clks);
if (ctx->dc_bios && ctx->dc_bios->integrated_info) {
dcn314_clk_mgr_helper_populate_bw_params(
&clk_mgr->base,
ctx->dc_bios->integrated_info,
smu_dpm_clks.dpm_clks);
}
}
if (smu_dpm_clks.dpm_clks && smu_dpm_clks.mc_address.quad_part != 0)
dm_helpers_free_gpu_mem(clk_mgr->base.base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
smu_dpm_clks.dpm_clks);
}
void dcn314_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int)
{
struct clk_mgr_dcn314 *clk_mgr = TO_CLK_MGR_DCN314(clk_mgr_int);
if (clk_mgr->smu_wm_set.wm_set && clk_mgr->smu_wm_set.mc_address.quad_part != 0)
dm_helpers_free_gpu_mem(clk_mgr_int->base.ctx, DC_MEM_ALLOC_TYPE_FRAME_BUFFER,
clk_mgr->smu_wm_set.wm_set);
}

View file

@ -1,5 +1,6 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright 2012 Red Hat Inc.
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@ -19,37 +20,38 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: Ben Skeggs
* Authors: AMD
*
*/
#include "rootnv50.h"
#include "channv50.h"
#include <nvif/class.h>
#ifndef __DCN314_CLK_MGR_H__
#define __DCN314_CLK_MGR_H__
#include "clk_mgr_internal.h"
static const struct nv50_disp_root_func
g84_disp_root = {
.user = {
{{0,0,G82_DISP_CURSOR }, nv50_disp_curs_new },
{{0,0,G82_DISP_OVERLAY }, nv50_disp_oimm_new },
{{0,0,G82_DISP_BASE_CHANNEL_DMA }, g84_disp_base_new },
{{0,0,G82_DISP_CORE_CHANNEL_DMA }, g84_disp_core_new },
{{0,0,G82_DISP_OVERLAY_CHANNEL_DMA}, g84_disp_ovly_new },
{}
},
struct dcn314_watermarks;
struct dcn314_smu_watermark_set {
struct dcn314_watermarks *wm_set;
union large_integer mc_address;
};
static int
g84_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass,
void *data, u32 size, struct nvkm_object **pobject)
{
return nv50_disp_root_new_(&g84_disp_root, disp, oclass,
data, size, pobject);
}
const struct nvkm_disp_oclass
g84_disp_root_oclass = {
.base.oclass = G82_DISP,
.base.minver = -1,
.base.maxver = -1,
.ctor = g84_disp_root_new,
struct clk_mgr_dcn314 {
struct clk_mgr_internal base;
struct dcn314_smu_watermark_set smu_wm_set;
};
bool dcn314_are_clock_states_equal(struct dc_clocks *a,
struct dc_clocks *b);
void dcn314_init_clocks(struct clk_mgr *clk_mgr);
void dcn314_update_clocks(struct clk_mgr *clk_mgr_base,
struct dc_state *context,
bool safe_to_lower);
void dcn314_clk_mgr_construct(struct dc_context *ctx,
struct clk_mgr_dcn314 *clk_mgr,
struct pp_smu_funcs *pp_smu,
struct dccg *dccg);
void dcn314_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int);
#endif //__DCN314_CLK_MGR_H__

View file

@ -0,0 +1,391 @@
// SPDX-License-Identifier: MIT
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: AMD
*
*/
#include "core_types.h"
#include "clk_mgr_internal.h"
#include "reg_helper.h"
#include "dm_helpers.h"
#include "dcn314_smu.h"
#include "mp/mp_13_0_5_offset.h"
/* TODO: Use the real headers when they're correct */
#define MP1_BASE__INST0_SEG0 0x00016000
#define MP1_BASE__INST0_SEG1 0x0243FC00
#define MP1_BASE__INST0_SEG2 0x00DC0000
#define MP1_BASE__INST0_SEG3 0x00E00000
#define MP1_BASE__INST0_SEG4 0x00E40000
#define MP1_BASE__INST0_SEG5 0
#ifdef BASE_INNER
#undef BASE_INNER
#endif
#define BASE_INNER(seg) MP1_BASE__INST0_SEG ## seg
#define BASE(seg) BASE_INNER(seg)
#define REG(reg_name) (BASE(reg##reg_name##_BASE_IDX) + reg##reg_name)
#define FN(reg_name, field) \
FD(reg_name##__##field)
#include "logger_types.h"
#undef DC_LOGGER
#define DC_LOGGER \
CTX->logger
#define smu_print(str, ...) {DC_LOG_SMU(str, ##__VA_ARGS__); }
#define VBIOSSMC_MSG_TestMessage 0x1
#define VBIOSSMC_MSG_GetSmuVersion 0x2
#define VBIOSSMC_MSG_PowerUpGfx 0x3
#define VBIOSSMC_MSG_SetDispclkFreq 0x4
#define VBIOSSMC_MSG_SetDprefclkFreq 0x5 //Not used. DPRef is constant
#define VBIOSSMC_MSG_SetDppclkFreq 0x6
#define VBIOSSMC_MSG_SetHardMinDcfclkByFreq 0x7
#define VBIOSSMC_MSG_SetMinDeepSleepDcfclk 0x8
#define VBIOSSMC_MSG_SetPhyclkVoltageByFreq 0x9 //Keep it in case VMIN dees not support phy clk
#define VBIOSSMC_MSG_GetFclkFrequency 0xA
#define VBIOSSMC_MSG_SetDisplayCount 0xB //Not used anymore
#define VBIOSSMC_MSG_EnableTmdp48MHzRefclkPwrDown 0xC //Not used anymore
#define VBIOSSMC_MSG_UpdatePmeRestore 0xD
#define VBIOSSMC_MSG_SetVbiosDramAddrHigh 0xE //Used for WM table txfr
#define VBIOSSMC_MSG_SetVbiosDramAddrLow 0xF
#define VBIOSSMC_MSG_TransferTableSmu2Dram 0x10
#define VBIOSSMC_MSG_TransferTableDram2Smu 0x11
#define VBIOSSMC_MSG_SetDisplayIdleOptimizations 0x12
#define VBIOSSMC_MSG_GetDprefclkFreq 0x13
#define VBIOSSMC_MSG_GetDtbclkFreq 0x14
#define VBIOSSMC_MSG_AllowZstatesEntry 0x15
#define VBIOSSMC_MSG_DisallowZstatesEntry 0x16
#define VBIOSSMC_MSG_SetDtbClk 0x17
#define VBIOSSMC_Message_Count 0x18
#define VBIOSSMC_Status_BUSY 0x0
#define VBIOSSMC_Result_OK 0x1
#define VBIOSSMC_Result_Failed 0xFF
#define VBIOSSMC_Result_UnknownCmd 0xFE
#define VBIOSSMC_Result_CmdRejectedPrereq 0xFD
#define VBIOSSMC_Result_CmdRejectedBusy 0xFC
/*
* Function to be used instead of REG_WAIT macro because the wait ends when
* the register is NOT EQUAL to zero, and because the translation in msg_if.h
* won't work with REG_WAIT.
*/
static uint32_t dcn314_smu_wait_for_response(struct clk_mgr_internal *clk_mgr, unsigned int delay_us, unsigned int max_retries)
{
uint32_t res_val = VBIOSSMC_Status_BUSY;
do {
res_val = REG_READ(MP1_SMN_C2PMSG_91);
if (res_val != VBIOSSMC_Status_BUSY)
break;
if (delay_us >= 1000)
msleep(delay_us/1000);
else if (delay_us > 0)
udelay(delay_us);
} while (max_retries--);
return res_val;
}
static int dcn314_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
unsigned int msg_id,
unsigned int param)
{
uint32_t result;
result = dcn314_smu_wait_for_response(clk_mgr, 10, 200000);
ASSERT(result == VBIOSSMC_Result_OK);
smu_print("SMU response after wait: %d\n", result);
if (result == VBIOSSMC_Status_BUSY)
return -1;
/* First clear response register */
REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Status_BUSY);
/* Set the parameter register for the SMU message, unit is Mhz */
REG_WRITE(MP1_SMN_C2PMSG_83, param);
/* Trigger the message transaction by writing the message ID */
REG_WRITE(MP1_SMN_C2PMSG_67, msg_id);
result = dcn314_smu_wait_for_response(clk_mgr, 10, 200000);
if (result == VBIOSSMC_Result_Failed) {
if (msg_id == VBIOSSMC_MSG_TransferTableDram2Smu &&
param == TABLE_WATERMARKS)
DC_LOG_WARNING("Watermarks table not configured properly by SMU");
else
ASSERT(0);
REG_WRITE(MP1_SMN_C2PMSG_91, VBIOSSMC_Result_OK);
return -1;
}
if (IS_SMU_TIMEOUT(result)) {
ASSERT(0);
dm_helpers_smu_timeout(CTX, msg_id, param, 10 * 200000);
}
return REG_READ(MP1_SMN_C2PMSG_83);
}
int dcn314_smu_get_smu_version(struct clk_mgr_internal *clk_mgr)
{
return dcn314_smu_send_msg_with_param(
clk_mgr,
VBIOSSMC_MSG_GetSmuVersion,
0);
}
int dcn314_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz)
{
int actual_dispclk_set_mhz = -1;
if (!clk_mgr->smu_present)
return requested_dispclk_khz;
/* Unit of SMU msg parameter is Mhz */
actual_dispclk_set_mhz = dcn314_smu_send_msg_with_param(
clk_mgr,
VBIOSSMC_MSG_SetDispclkFreq,
khz_to_mhz_ceil(requested_dispclk_khz));
return actual_dispclk_set_mhz * 1000;
}
int dcn314_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr)
{
int actual_dprefclk_set_mhz = -1;
if (!clk_mgr->smu_present)
return clk_mgr->base.dprefclk_khz;
actual_dprefclk_set_mhz = dcn314_smu_send_msg_with_param(
clk_mgr,
VBIOSSMC_MSG_SetDprefclkFreq,
khz_to_mhz_ceil(clk_mgr->base.dprefclk_khz));
/* TODO: add code for programing DP DTO, currently this is down by command table */
return actual_dprefclk_set_mhz * 1000;
}
int dcn314_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz)
{
int actual_dcfclk_set_mhz = -1;
if (!clk_mgr->base.ctx->dc->debug.pstate_enabled)
return -1;
if (!clk_mgr->smu_present)
return requested_dcfclk_khz;
actual_dcfclk_set_mhz = dcn314_smu_send_msg_with_param(
clk_mgr,
VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
khz_to_mhz_ceil(requested_dcfclk_khz));
return actual_dcfclk_set_mhz * 1000;
}
int dcn314_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz)
{
int actual_min_ds_dcfclk_mhz = -1;
if (!clk_mgr->base.ctx->dc->debug.pstate_enabled)
return -1;
if (!clk_mgr->smu_present)
return requested_min_ds_dcfclk_khz;
actual_min_ds_dcfclk_mhz = dcn314_smu_send_msg_with_param(
clk_mgr,
VBIOSSMC_MSG_SetMinDeepSleepDcfclk,
khz_to_mhz_ceil(requested_min_ds_dcfclk_khz));
return actual_min_ds_dcfclk_mhz * 1000;
}
int dcn314_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz)
{
int actual_dppclk_set_mhz = -1;
if (!clk_mgr->smu_present)
return requested_dpp_khz;
actual_dppclk_set_mhz = dcn314_smu_send_msg_with_param(
clk_mgr,
VBIOSSMC_MSG_SetDppclkFreq,
khz_to_mhz_ceil(requested_dpp_khz));
return actual_dppclk_set_mhz * 1000;
}
void dcn314_smu_set_display_idle_optimization(struct clk_mgr_internal *clk_mgr, uint32_t idle_info)
{
if (!clk_mgr->base.ctx->dc->debug.pstate_enabled)
return;
if (!clk_mgr->smu_present)
return;
//TODO: Work with smu team to define optimization options.
dcn314_smu_send_msg_with_param(
clk_mgr,
VBIOSSMC_MSG_SetDisplayIdleOptimizations,
idle_info);
}
void dcn314_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable)
{
union display_idle_optimization_u idle_info = { 0 };
if (!clk_mgr->smu_present)
return;
if (enable) {
idle_info.idle_info.df_request_disabled = 1;
idle_info.idle_info.phy_ref_clk_off = 1;
}
dcn314_smu_send_msg_with_param(
clk_mgr,
VBIOSSMC_MSG_SetDisplayIdleOptimizations,
idle_info.data);
}
void dcn314_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr)
{
if (!clk_mgr->smu_present)
return;
dcn314_smu_send_msg_with_param(
clk_mgr,
VBIOSSMC_MSG_UpdatePmeRestore,
0);
}
void dcn314_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high)
{
if (!clk_mgr->smu_present)
return;
dcn314_smu_send_msg_with_param(clk_mgr,
VBIOSSMC_MSG_SetVbiosDramAddrHigh, addr_high);
}
void dcn314_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low)
{
if (!clk_mgr->smu_present)
return;
dcn314_smu_send_msg_with_param(clk_mgr,
VBIOSSMC_MSG_SetVbiosDramAddrLow, addr_low);
}
void dcn314_smu_transfer_dpm_table_smu_2_dram(struct clk_mgr_internal *clk_mgr)
{
if (!clk_mgr->smu_present)
return;
dcn314_smu_send_msg_with_param(clk_mgr,
VBIOSSMC_MSG_TransferTableSmu2Dram, TABLE_DPMCLOCKS);
}
void dcn314_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr)
{
if (!clk_mgr->smu_present)
return;
dcn314_smu_send_msg_with_param(clk_mgr,
VBIOSSMC_MSG_TransferTableDram2Smu, TABLE_WATERMARKS);
}
void dcn314_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zstate_support_state support)
{
unsigned int msg_id, param;
if (!clk_mgr->smu_present)
return;
if (!clk_mgr->base.ctx->dc->debug.enable_z9_disable_interface &&
(support == DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY))
support = DCN_ZSTATE_SUPPORT_DISALLOW;
// Arg[15:0] = 8/9/0 for Z8/Z9/disallow -> existing bits
// Arg[16] = Disallow Z9 -> new bit
switch (support) {
case DCN_ZSTATE_SUPPORT_ALLOW:
msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
param = 9;
break;
case DCN_ZSTATE_SUPPORT_DISALLOW:
msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
param = 8;
break;
case DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY:
msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
param = 0x00010008;
break;
default: //DCN_ZSTATE_SUPPORT_UNKNOWN
msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
param = 0;
break;
}
dcn314_smu_send_msg_with_param(
clk_mgr,
msg_id,
param);
}
/* Arg = 1: Turn DTB on; 0: Turn DTB CLK OFF. when it is on, it is 600MHZ */
void dcn314_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable)
{
if (!clk_mgr->smu_present)
return;
dcn314_smu_send_msg_with_param(
clk_mgr,
VBIOSSMC_MSG_SetDtbClk,
enable);
}

View file

@ -0,0 +1,79 @@
/* SPDX-License-Identifier: MIT */
/*
* Copyright 2022 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
* Authors: AMD
*
*/
#ifndef DAL_DC_314_SMU_H_
#define DAL_DC_314_SMU_H_
#include "smu13_driver_if_v13_0_4.h"
typedef enum {
WCK_RATIO_1_1 = 0, // DDR5, Wck:ck is always 1:1;
WCK_RATIO_1_2,
WCK_RATIO_1_4,
WCK_RATIO_MAX
} WCK_RATIO_e;
struct dcn314_watermarks {
// Watermarks
WatermarkRowGeneric_t WatermarkRow[WM_COUNT][NUM_WM_RANGES];
uint32_t MmHubPadding[7]; // SMU internal use
};
struct dcn314_smu_dpm_clks {
DpmClocks_t *dpm_clks;
union large_integer mc_address;
};
struct display_idle_optimization {
unsigned int df_request_disabled : 1;
unsigned int phy_ref_clk_off : 1;
unsigned int s0i2_rdy : 1;
unsigned int reserved : 29;
};
union display_idle_optimization_u {
struct display_idle_optimization idle_info;
uint32_t data;
};
int dcn314_smu_get_smu_version(struct clk_mgr_internal *clk_mgr);
int dcn314_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz);
int dcn314_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr);
int dcn314_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_dcfclk_khz);
int dcn314_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz);
int dcn314_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz);
void dcn314_smu_set_display_idle_optimization(struct clk_mgr_internal *clk_mgr, uint32_t idle_info);
void dcn314_smu_enable_phy_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable);
void dcn314_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr);
void dcn314_smu_set_dram_addr_high(struct clk_mgr_internal *clk_mgr, uint32_t addr_high);
void dcn314_smu_set_dram_addr_low(struct clk_mgr_internal *clk_mgr, uint32_t addr_low);
void dcn314_smu_transfer_dpm_table_smu_2_dram(struct clk_mgr_internal *clk_mgr);
void dcn314_smu_transfer_wm_table_dram_2_smu(struct clk_mgr_internal *clk_mgr);
void dcn314_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zstate_support_state support);
void dcn314_smu_set_dtbclk(struct clk_mgr_internal *clk_mgr, bool enable);
#endif /* DAL_DC_314_SMU_H_ */

View file

@ -173,11 +173,14 @@ static void dcn315_update_clocks(struct clk_mgr *clk_mgr_base,
}
if (should_set_clock(safe_to_lower, new_clocks->dispclk_khz, clk_mgr_base->clks.dispclk_khz)) {
dcn315_disable_otg_wa(clk_mgr_base, true);
/* No need to apply the w/a if we haven't taken over from bios yet */
if (clk_mgr_base->clks.dispclk_khz)
dcn315_disable_otg_wa(clk_mgr_base, true);
clk_mgr_base->clks.dispclk_khz = new_clocks->dispclk_khz;
dcn315_smu_set_dispclk(clk_mgr, clk_mgr_base->clks.dispclk_khz);
dcn315_disable_otg_wa(clk_mgr_base, false);
if (clk_mgr_base->clks.dispclk_khz)
dcn315_disable_otg_wa(clk_mgr_base, false);
update_dispclk = true;
}

Some files were not shown because too many files have changed in this diff Show more