mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00
drm/amdgpu: add RAS poison creation handler (v2)
Prepare for the implementation of poison consumption handler. v2: separate umc handler from poison creation. Signed-off-by: Tao Zhou <tao.zhou1@amd.com> Reviewed-by: Hawking Zhang <Hawking.Zhang@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
cc9d82fc96
commit
50a7d025ca
1 changed files with 44 additions and 26 deletions
|
@ -1515,12 +1515,45 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
|
||||||
/* ras fs end */
|
/* ras fs end */
|
||||||
|
|
||||||
/* ih begin */
|
/* ih begin */
|
||||||
|
static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj,
|
||||||
|
struct amdgpu_iv_entry *entry)
|
||||||
|
{
|
||||||
|
dev_info(obj->adev->dev,
|
||||||
|
"Poison is created, no user action is needed.\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void amdgpu_ras_interrupt_umc_handler(struct ras_manager *obj,
|
||||||
|
struct amdgpu_iv_entry *entry)
|
||||||
|
{
|
||||||
|
struct ras_ih_data *data = &obj->ih_data;
|
||||||
|
struct ras_err_data err_data = {0, 0, 0, NULL};
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (!data->cb)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Let IP handle its data, maybe we need get the output
|
||||||
|
* from the callback to update the error type/count, etc
|
||||||
|
*/
|
||||||
|
ret = data->cb(obj->adev, &err_data, entry);
|
||||||
|
/* ue will trigger an interrupt, and in that case
|
||||||
|
* we need do a reset to recovery the whole system.
|
||||||
|
* But leave IP do that recovery, here we just dispatch
|
||||||
|
* the error.
|
||||||
|
*/
|
||||||
|
if (ret == AMDGPU_RAS_SUCCESS) {
|
||||||
|
/* these counts could be left as 0 if
|
||||||
|
* some blocks do not count error number
|
||||||
|
*/
|
||||||
|
obj->err_data.ue_count += err_data.ue_count;
|
||||||
|
obj->err_data.ce_count += err_data.ce_count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
|
static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
|
||||||
{
|
{
|
||||||
struct ras_ih_data *data = &obj->ih_data;
|
struct ras_ih_data *data = &obj->ih_data;
|
||||||
struct amdgpu_iv_entry entry;
|
struct amdgpu_iv_entry entry;
|
||||||
int ret;
|
|
||||||
struct ras_err_data err_data = {0, 0, 0, NULL};
|
|
||||||
|
|
||||||
while (data->rptr != data->wptr) {
|
while (data->rptr != data->wptr) {
|
||||||
rmb();
|
rmb();
|
||||||
|
@ -1531,30 +1564,15 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
|
||||||
data->rptr = (data->aligned_element_size +
|
data->rptr = (data->aligned_element_size +
|
||||||
data->rptr) % data->ring_size;
|
data->rptr) % data->ring_size;
|
||||||
|
|
||||||
if (data->cb) {
|
if (amdgpu_ras_is_poison_mode_supported(obj->adev)) {
|
||||||
if (amdgpu_ras_is_poison_mode_supported(obj->adev) &&
|
if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
|
||||||
obj->head.block == AMDGPU_RAS_BLOCK__UMC)
|
amdgpu_ras_interrupt_poison_creation_handler(obj, &entry);
|
||||||
dev_info(obj->adev->dev,
|
} else {
|
||||||
"Poison is created, no user action is needed.\n");
|
if (obj->head.block == AMDGPU_RAS_BLOCK__UMC)
|
||||||
else {
|
amdgpu_ras_interrupt_umc_handler(obj, &entry);
|
||||||
/* Let IP handle its data, maybe we need get the output
|
else
|
||||||
* from the callback to udpate the error type/count, etc
|
dev_warn(obj->adev->dev,
|
||||||
*/
|
"No RAS interrupt handler for non-UMC block with poison disabled.\n");
|
||||||
memset(&err_data, 0, sizeof(err_data));
|
|
||||||
ret = data->cb(obj->adev, &err_data, &entry);
|
|
||||||
/* ue will trigger an interrupt, and in that case
|
|
||||||
* we need do a reset to recovery the whole system.
|
|
||||||
* But leave IP do that recovery, here we just dispatch
|
|
||||||
* the error.
|
|
||||||
*/
|
|
||||||
if (ret == AMDGPU_RAS_SUCCESS) {
|
|
||||||
/* these counts could be left as 0 if
|
|
||||||
* some blocks do not count error number
|
|
||||||
*/
|
|
||||||
obj->err_data.ue_count += err_data.ue_count;
|
|
||||||
obj->err_data.ce_count += err_data.ce_count;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Reference in a new issue