mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-04 00:06:36 +00:00
drm/amdgpu: Fix the warning info in mode1 reset
Fix the warning info below during mode1 reset.
[ +0.000004] Call Trace:
[ +0.000004] <TASK>
[ +0.000006] ? show_regs+0x6e/0x80
[ +0.000011] ? __flush_work.isra.0+0x2e8/0x390
[ +0.000005] ? __warn+0x91/0x150
[ +0.000009] ? __flush_work.isra.0+0x2e8/0x390
[ +0.000006] ? report_bug+0x19d/0x1b0
[ +0.000013] ? handle_bug+0x46/0x80
[ +0.000012] ? exc_invalid_op+0x1d/0x80
[ +0.000011] ? asm_exc_invalid_op+0x1f/0x30
[ +0.000014] ? __flush_work.isra.0+0x2e8/0x390
[ +0.000007] ? __flush_work.isra.0+0x208/0x390
[ +0.000007] ? _prb_read_valid+0x216/0x290
[ +0.000008] __cancel_work_timer+0x11d/0x1a0
[ +0.000007] ? try_to_grab_pending+0xe8/0x190
[ +0.000012] cancel_work_sync+0x14/0x20
[ +0.000008] amddrm_sched_stop+0x3c/0x1d0 [amd_sched]
[ +0.000032] amdgpu_device_gpu_recover+0x29a/0xe90 [amdgpu]
This warning info was printed after applying the patch
"drm/sched: Convert drm scheduler to use a work queue rather than kthread".
The root cause is that amdgpu driver tries to use the uninitialized
work_struct in the struct drm_gpu_scheduler
v2:
- Rename the function to amdgpu_ring_sched_ready and move it to
amdgpu_ring.c (Alex)
v3:
- Fix a few more checks based on Vitaly's patch (Alex)
v4:
- squash in fix noticed by Bert in
https://gitlab.freedesktop.org/drm/amd/-/issues/3139
Fixes: 11b3b9f461
("drm/sched: Check scheduler ready before calling timeout handling")
Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Vitaly Prosyak <vitaly.prosyak@amd.com>
Signed-off-by: Ma Jun <Jun.Ma2@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
faf51b201b
commit
bb34bc2cd3
5 changed files with 24 additions and 12 deletions
|
@ -290,7 +290,7 @@ static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool sus
|
|||
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
|
||||
struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
|
||||
|
||||
if (!(ring && drm_sched_wqueue_ready(&ring->sched)))
|
||||
if (!amdgpu_ring_sched_ready(ring))
|
||||
continue;
|
||||
|
||||
/* stop secheduler and drain ring. */
|
||||
|
|
|
@ -1678,7 +1678,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
|
|||
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
|
||||
if (!ring || !drm_sched_wqueue_ready(&ring->sched))
|
||||
if (!amdgpu_ring_sched_ready(ring))
|
||||
continue;
|
||||
drm_sched_wqueue_stop(&ring->sched);
|
||||
}
|
||||
|
@ -1694,7 +1694,7 @@ static int amdgpu_debugfs_test_ib_show(struct seq_file *m, void *unused)
|
|||
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
|
||||
if (!ring || !drm_sched_wqueue_ready(&ring->sched))
|
||||
if (!amdgpu_ring_sched_ready(ring))
|
||||
continue;
|
||||
drm_sched_wqueue_start(&ring->sched);
|
||||
}
|
||||
|
@ -1916,8 +1916,8 @@ static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
|
|||
|
||||
ring = adev->rings[val];
|
||||
|
||||
if (!ring || !ring->funcs->preempt_ib ||
|
||||
!drm_sched_wqueue_ready(&ring->sched))
|
||||
if (!amdgpu_ring_sched_ready(ring) ||
|
||||
!ring->funcs->preempt_ib)
|
||||
return -EINVAL;
|
||||
|
||||
/* the last preemption failed */
|
||||
|
|
|
@ -5021,7 +5021,7 @@ bool amdgpu_device_has_job_running(struct amdgpu_device *adev)
|
|||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
|
||||
if (!ring || !drm_sched_wqueue_ready(&ring->sched))
|
||||
if (!amdgpu_ring_sched_ready(ring))
|
||||
continue;
|
||||
|
||||
spin_lock(&ring->sched.job_list_lock);
|
||||
|
@ -5160,7 +5160,7 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
|
|||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
|
||||
if (!ring || !drm_sched_wqueue_ready(&ring->sched))
|
||||
if (!amdgpu_ring_sched_ready(ring))
|
||||
continue;
|
||||
|
||||
/* Clear job fence from fence drv to avoid force_completion
|
||||
|
@ -5627,7 +5627,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
|
|||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = tmp_adev->rings[i];
|
||||
|
||||
if (!ring || !drm_sched_wqueue_ready(&ring->sched))
|
||||
if (!amdgpu_ring_sched_ready(ring))
|
||||
continue;
|
||||
|
||||
drm_sched_stop(&ring->sched, job ? &job->base : NULL);
|
||||
|
@ -5696,7 +5696,7 @@ skip_hw_reset:
|
|||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = tmp_adev->rings[i];
|
||||
|
||||
if (!ring || !drm_sched_wqueue_ready(&ring->sched))
|
||||
if (!amdgpu_ring_sched_ready(ring))
|
||||
continue;
|
||||
|
||||
drm_sched_start(&ring->sched, true);
|
||||
|
@ -6051,7 +6051,7 @@ pci_ers_result_t amdgpu_pci_error_detected(struct pci_dev *pdev, pci_channel_sta
|
|||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
|
||||
if (!ring || !drm_sched_wqueue_ready(&ring->sched))
|
||||
if (!amdgpu_ring_sched_ready(ring))
|
||||
continue;
|
||||
|
||||
drm_sched_stop(&ring->sched, NULL);
|
||||
|
@ -6179,7 +6179,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
|
|||
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
|
||||
struct amdgpu_ring *ring = adev->rings[i];
|
||||
|
||||
if (!ring || !drm_sched_wqueue_ready(&ring->sched))
|
||||
if (!amdgpu_ring_sched_ready(ring))
|
||||
continue;
|
||||
|
||||
drm_sched_start(&ring->sched, true);
|
||||
|
|
|
@ -635,6 +635,7 @@ int amdgpu_ring_test_helper(struct amdgpu_ring *ring)
|
|||
ring->name);
|
||||
|
||||
ring->sched.ready = !r;
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
|
@ -717,3 +718,14 @@ void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring)
|
|||
if (ring->is_sw_ring)
|
||||
amdgpu_sw_ring_ib_mark_offset(ring, AMDGPU_MUX_OFFSET_TYPE_DE);
|
||||
}
|
||||
|
||||
bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring)
|
||||
{
|
||||
if (!ring)
|
||||
return false;
|
||||
|
||||
if (ring->no_scheduler || !drm_sched_wqueue_ready(&ring->sched))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -450,5 +450,5 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
|
|||
int amdgpu_ib_pool_init(struct amdgpu_device *adev);
|
||||
void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
|
||||
int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
|
||||
|
||||
bool amdgpu_ring_sched_ready(struct amdgpu_ring *ring);
|
||||
#endif
|
||||
|
|
Loading…
Add table
Reference in a new issue