2021-01-22 19:29:04 +00:00
|
|
|
// SPDX-License-Identifier: MIT
|
2018-04-10 09:12:46 -07:00
|
|
|
/*
|
|
|
|
* Copyright © 2014-2018 Intel Corporation
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "i915_drv.h"
|
2022-11-09 17:35:22 +02:00
|
|
|
#include "i915_reg.h"
|
2019-05-28 10:29:49 +01:00
|
|
|
#include "intel_context.h"
|
2019-11-25 10:58:56 +00:00
|
|
|
#include "intel_engine_pm.h"
|
2022-01-10 21:15:56 -08:00
|
|
|
#include "intel_engine_regs.h"
|
2020-12-16 13:54:52 +00:00
|
|
|
#include "intel_gpu_commands.h"
|
2019-06-21 08:07:48 +01:00
|
|
|
#include "intel_gt.h"
|
2024-03-28 08:34:05 +01:00
|
|
|
#include "intel_gt_ccs_mode.h"
|
2022-06-14 17:10:18 -07:00
|
|
|
#include "intel_gt_mcr.h"
|
2023-10-09 11:38:01 -07:00
|
|
|
#include "intel_gt_print.h"
|
2022-01-27 15:43:33 -08:00
|
|
|
#include "intel_gt_regs.h"
|
2019-10-24 11:03:44 +01:00
|
|
|
#include "intel_ring.h"
|
2018-04-10 09:12:46 -07:00
|
|
|
#include "intel_workarounds.h"
|
|
|
|
|
2024-04-26 13:51:36 +03:00
|
|
|
#include "display/intel_fbc_regs.h"
|
|
|
|
|
2018-04-10 09:12:46 -07:00
|
|
|
/**
|
|
|
|
* DOC: Hardware workarounds
|
|
|
|
*
|
2022-11-15 11:26:11 -08:00
|
|
|
* Hardware workarounds are register programming documented to be executed in
|
|
|
|
* the driver that fall outside of the normal programming sequences for a
|
|
|
|
* platform. There are some basic categories of workarounds, depending on
|
|
|
|
* how/when they are applied:
|
2018-04-10 09:12:46 -07:00
|
|
|
*
|
2022-11-15 11:26:11 -08:00
|
|
|
* - Context workarounds: workarounds that touch registers that are
|
|
|
|
* saved/restored to/from the HW context image. The list is emitted (via Load
|
|
|
|
* Register Immediate commands) once when initializing the device and saved in
|
|
|
|
* the default context. That default context is then used on every context
|
|
|
|
* creation to have a "primed golden context", i.e. a context image that
|
|
|
|
* already contains the changes needed to all the registers.
|
2018-04-10 09:12:46 -07:00
|
|
|
*
|
2023-02-03 17:02:13 +07:00
|
|
|
* Context workarounds should be implemented in the \*_ctx_workarounds_init()
|
2023-01-18 12:52:48 -03:00
|
|
|
* variants respective to the targeted platforms.
|
|
|
|
*
|
2022-11-15 11:26:11 -08:00
|
|
|
* - Engine workarounds: the list of these WAs is applied whenever the specific
|
|
|
|
* engine is reset. It's also possible that a set of engine classes share a
|
|
|
|
* common power domain and they are reset together. This happens on some
|
|
|
|
* platforms with render and compute engines. In this case (at least) one of
|
|
|
|
* them need to keeep the workaround programming: the approach taken in the
|
|
|
|
* driver is to tie those workarounds to the first compute/render engine that
|
|
|
|
* is registered. When executing with GuC submission, engine resets are
|
|
|
|
* outside of kernel driver control, hence the list of registers involved in
|
|
|
|
* written once, on engine initialization, and then passed to GuC, that
|
|
|
|
* saves/restores their values before/after the reset takes place. See
|
|
|
|
* ``drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c`` for reference.
|
2018-04-10 09:12:46 -07:00
|
|
|
*
|
2023-01-18 12:52:48 -03:00
|
|
|
* Workarounds for registers specific to RCS and CCS should be implemented in
|
|
|
|
* rcs_engine_wa_init() and ccs_engine_wa_init(), respectively; those for
|
|
|
|
* registers belonging to BCS, VCS or VECS should be implemented in
|
|
|
|
* xcs_engine_wa_init(). Workarounds for registers not belonging to a specific
|
|
|
|
* engine's MMIO range but that are part of of the common RCS/CCS reset domain
|
2024-03-28 08:34:03 +01:00
|
|
|
* should be implemented in general_render_compute_wa_init(). The settings
|
|
|
|
* about the CCS load balancing should be added in ccs_engine_wa_mode().
|
2023-01-18 12:52:48 -03:00
|
|
|
*
|
2022-11-15 11:26:11 -08:00
|
|
|
* - GT workarounds: the list of these WAs is applied whenever these registers
|
|
|
|
* revert to their default values: on GPU reset, suspend/resume [1]_, etc.
|
|
|
|
*
|
2023-02-03 17:02:13 +07:00
|
|
|
* GT workarounds should be implemented in the \*_gt_workarounds_init()
|
2023-01-18 12:52:48 -03:00
|
|
|
* variants respective to the targeted platforms.
|
|
|
|
*
|
2022-11-15 11:26:11 -08:00
|
|
|
* - Register whitelist: some workarounds need to be implemented in userspace,
|
|
|
|
* but need to touch privileged registers. The whitelist in the kernel
|
|
|
|
* instructs the hardware to allow the access to happen. From the kernel side,
|
|
|
|
* this is just a special case of a MMIO workaround (as we write the list of
|
|
|
|
* these to/be-whitelisted registers to some special HW registers).
|
|
|
|
*
|
2023-02-03 17:02:13 +07:00
|
|
|
* Register whitelisting should be done in the \*_whitelist_build() variants
|
2023-01-18 12:52:48 -03:00
|
|
|
* respective to the targeted platforms.
|
|
|
|
*
|
2022-11-15 11:26:11 -08:00
|
|
|
* - Workaround batchbuffers: buffers that get executed automatically by the
|
|
|
|
* hardware on every HW context restore. These buffers are created and
|
|
|
|
* programmed in the default context so the hardware always go through those
|
|
|
|
* programming sequences when switching contexts. The support for workaround
|
|
|
|
* batchbuffers is enabled these hardware mechanisms:
|
2018-04-10 09:12:46 -07:00
|
|
|
*
|
2022-11-15 11:26:11 -08:00
|
|
|
* #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
|
|
|
|
* context, pointing the hardware to jump to that location when that offset
|
|
|
|
* is reached in the context restore. Workaround batchbuffer in the driver
|
|
|
|
* currently uses this mechanism for all platforms.
|
2018-04-10 09:12:46 -07:00
|
|
|
*
|
2022-11-15 11:26:11 -08:00
|
|
|
* #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
|
|
|
|
* pointing the hardware to a buffer to continue executing after the
|
|
|
|
* engine registers are restored in a context restore sequence. This is
|
|
|
|
* currently not used in the driver.
|
2018-04-10 09:12:46 -07:00
|
|
|
*
|
2022-11-15 11:26:11 -08:00
|
|
|
* - Other: There are WAs that, due to their nature, cannot be applied from a
|
|
|
|
* central place. Those are peppered around the rest of the code, as needed.
|
|
|
|
* Workarounds related to the display IP are the main example.
|
|
|
|
*
|
|
|
|
* .. [1] Technically, some registers are powercontext saved & restored, so they
|
|
|
|
* survive a suspend/resume. In practice, writing them again is not too
|
|
|
|
* costly and simplifies things, so it's the approach taken in the driver.
|
2018-04-10 09:12:46 -07:00
|
|
|
*/
|
|
|
|
|
2022-11-09 10:46:33 +00:00
|
|
|
static void wa_init_start(struct i915_wa_list *wal, struct intel_gt *gt,
|
|
|
|
const char *name, const char *engine_name)
|
2018-12-03 13:33:19 +00:00
|
|
|
{
|
2022-11-09 10:46:33 +00:00
|
|
|
wal->gt = gt;
|
2018-12-03 13:33:19 +00:00
|
|
|
wal->name = name;
|
2019-07-12 00:07:45 -07:00
|
|
|
wal->engine_name = engine_name;
|
2018-12-03 13:33:19 +00:00
|
|
|
}
|
|
|
|
|
2018-12-03 12:50:14 +00:00
|
|
|
#define WA_LIST_CHUNK (1 << 4)
|
|
|
|
|
2018-12-03 13:33:19 +00:00
|
|
|
static void wa_init_finish(struct i915_wa_list *wal)
|
|
|
|
{
|
2018-12-03 12:50:14 +00:00
|
|
|
/* Trim unused entries. */
|
|
|
|
if (!IS_ALIGNED(wal->count, WA_LIST_CHUNK)) {
|
2024-08-21 16:40:36 +02:00
|
|
|
struct i915_wa *list = kmemdup_array(wal->list, wal->count,
|
|
|
|
sizeof(*list), GFP_KERNEL);
|
2018-12-03 12:50:14 +00:00
|
|
|
|
|
|
|
if (list) {
|
|
|
|
kfree(wal->list);
|
|
|
|
wal->list = list;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-03 13:33:19 +00:00
|
|
|
if (!wal->count)
|
|
|
|
return;
|
|
|
|
|
2023-10-09 11:38:01 -07:00
|
|
|
gt_dbg(wal->gt, "Initialized %u %s workarounds on %s\n",
|
|
|
|
wal->wa_count, wal->name, wal->engine_name);
|
2018-12-03 13:33:19 +00:00
|
|
|
}
|
|
|
|
|
2023-06-30 13:35:03 -07:00
|
|
|
static enum forcewake_domains
|
|
|
|
wal_get_fw_for_rmw(struct intel_uncore *uncore, const struct i915_wa_list *wal)
|
|
|
|
{
|
|
|
|
enum forcewake_domains fw = 0;
|
|
|
|
struct i915_wa *wa;
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
|
|
|
|
fw |= intel_uncore_forcewake_for_reg(uncore,
|
|
|
|
wa->reg,
|
|
|
|
FW_REG_READ |
|
|
|
|
FW_REG_WRITE);
|
|
|
|
|
|
|
|
return fw;
|
|
|
|
}
|
|
|
|
|
2018-12-03 13:33:57 +00:00
|
|
|
static void _wa_add(struct i915_wa_list *wal, const struct i915_wa *wa)
|
2018-04-10 09:12:46 -07:00
|
|
|
{
|
2018-12-03 13:33:57 +00:00
|
|
|
unsigned int addr = i915_mmio_reg_offset(wa->reg);
|
2022-11-09 10:46:33 +00:00
|
|
|
struct drm_i915_private *i915 = wal->gt->i915;
|
2018-12-03 13:33:57 +00:00
|
|
|
unsigned int start = 0, end = wal->count;
|
2018-12-03 12:50:14 +00:00
|
|
|
const unsigned int grow = WA_LIST_CHUNK;
|
2018-12-03 13:33:57 +00:00
|
|
|
struct i915_wa *wa_;
|
|
|
|
|
|
|
|
GEM_BUG_ON(!is_power_of_2(grow));
|
|
|
|
|
|
|
|
if (IS_ALIGNED(wal->count, grow)) { /* Either uninitialized or full. */
|
|
|
|
struct i915_wa *list;
|
|
|
|
|
2025-04-25 23:13:58 -07:00
|
|
|
list = kmalloc_array(ALIGN(wal->count + 1, grow), sizeof(*list),
|
2018-12-03 13:33:57 +00:00
|
|
|
GFP_KERNEL);
|
|
|
|
if (!list) {
|
2022-11-09 10:46:33 +00:00
|
|
|
drm_err(&i915->drm, "No space for workaround init!\n");
|
2018-12-03 13:33:57 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2020-11-13 13:25:10 +00:00
|
|
|
if (wal->list) {
|
2018-12-03 13:33:57 +00:00
|
|
|
memcpy(list, wal->list, sizeof(*wa) * wal->count);
|
2020-11-13 13:25:10 +00:00
|
|
|
kfree(wal->list);
|
|
|
|
}
|
2018-12-03 13:33:57 +00:00
|
|
|
|
|
|
|
wal->list = list;
|
|
|
|
}
|
2018-06-15 13:02:07 +01:00
|
|
|
|
|
|
|
while (start < end) {
|
|
|
|
unsigned int mid = start + (end - start) / 2;
|
|
|
|
|
2018-12-03 13:33:57 +00:00
|
|
|
if (i915_mmio_reg_offset(wal->list[mid].reg) < addr) {
|
2018-06-15 13:02:07 +01:00
|
|
|
start = mid + 1;
|
2018-12-03 13:33:57 +00:00
|
|
|
} else if (i915_mmio_reg_offset(wal->list[mid].reg) > addr) {
|
2018-06-15 13:02:07 +01:00
|
|
|
end = mid;
|
|
|
|
} else {
|
2018-12-03 13:33:57 +00:00
|
|
|
wa_ = &wal->list[mid];
|
2018-06-15 13:02:07 +01:00
|
|
|
|
2020-01-31 23:50:35 +00:00
|
|
|
if ((wa->clr | wa_->clr) && !(wa->clr & ~wa_->clr)) {
|
2022-11-09 10:46:33 +00:00
|
|
|
drm_err(&i915->drm,
|
|
|
|
"Discarding overwritten w/a for reg %04x (clear: %08x, set: %08x)\n",
|
|
|
|
i915_mmio_reg_offset(wa_->reg),
|
|
|
|
wa_->clr, wa_->set);
|
2018-06-15 13:02:07 +01:00
|
|
|
|
2020-01-31 23:50:35 +00:00
|
|
|
wa_->set &= ~wa->clr;
|
2018-06-15 13:02:07 +01:00
|
|
|
}
|
|
|
|
|
2018-12-03 13:33:57 +00:00
|
|
|
wal->wa_count++;
|
2020-01-31 23:50:35 +00:00
|
|
|
wa_->set |= wa->set;
|
|
|
|
wa_->clr |= wa->clr;
|
2019-04-17 08:56:29 +01:00
|
|
|
wa_->read |= wa->read;
|
2018-06-15 13:02:07 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2018-12-03 13:33:57 +00:00
|
|
|
wal->wa_count++;
|
|
|
|
wa_ = &wal->list[wal->count++];
|
|
|
|
*wa_ = *wa;
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2018-12-03 13:33:57 +00:00
|
|
|
while (wa_-- > wal->list) {
|
|
|
|
GEM_BUG_ON(i915_mmio_reg_offset(wa_[0].reg) ==
|
|
|
|
i915_mmio_reg_offset(wa_[1].reg));
|
|
|
|
if (i915_mmio_reg_offset(wa_[1].reg) >
|
|
|
|
i915_mmio_reg_offset(wa_[0].reg))
|
2018-06-15 13:02:07 +01:00
|
|
|
break;
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2018-12-03 13:33:57 +00:00
|
|
|
swap(wa_[1], wa_[0]);
|
2018-06-15 13:02:07 +01:00
|
|
|
}
|
2018-04-10 09:12:46 -07:00
|
|
|
}
|
|
|
|
|
2020-01-31 23:50:35 +00:00
|
|
|
static void wa_add(struct i915_wa_list *wal, i915_reg_t reg,
|
2021-07-26 17:23:30 -07:00
|
|
|
u32 clear, u32 set, u32 read_mask, bool masked_reg)
|
2018-12-03 13:33:57 +00:00
|
|
|
{
|
|
|
|
struct i915_wa wa = {
|
2019-04-17 08:56:29 +01:00
|
|
|
.reg = reg,
|
2020-01-31 23:50:35 +00:00
|
|
|
.clr = clear,
|
|
|
|
.set = set,
|
2019-11-28 07:40:05 +05:30
|
|
|
.read = read_mask,
|
2021-07-26 17:23:30 -07:00
|
|
|
.masked_reg = masked_reg,
|
2018-12-03 13:33:57 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
_wa_add(wal, &wa);
|
|
|
|
}
|
|
|
|
|
2022-10-14 16:02:37 -07:00
|
|
|
static void wa_mcr_add(struct i915_wa_list *wal, i915_mcr_reg_t reg,
|
2022-10-14 16:02:36 -07:00
|
|
|
u32 clear, u32 set, u32 read_mask, bool masked_reg)
|
|
|
|
{
|
|
|
|
struct i915_wa wa = {
|
2022-10-14 16:02:37 -07:00
|
|
|
.mcr_reg = reg,
|
2022-10-14 16:02:36 -07:00
|
|
|
.clr = clear,
|
|
|
|
.set = set,
|
|
|
|
.read = read_mask,
|
|
|
|
.masked_reg = masked_reg,
|
|
|
|
.is_mcr = 1,
|
|
|
|
};
|
|
|
|
|
|
|
|
_wa_add(wal, &wa);
|
|
|
|
}
|
|
|
|
|
2019-11-28 07:40:05 +05:30
|
|
|
static void
|
drm/i915/gt: rename wa_write_masked_or()
The use of "masked" in this function is due to its history. Once upon a
time it received a mask and a value as parameter. Since
commit eeec73f8a4a4 ("drm/i915/gt: Skip rmw for masked registers")
that is not true anymore and now there is a clear and a set parameter.
Depending on the case, that can still be thought as a mask and value,
but there are some subtle differences: what we clear doesn't need to be
the same bits we are setting, particularly when we are using masked
registers.
The fact that we also have "masked registers", i.e. registers whose mask
is stored in the upper 16 bits of the register, makes it even more
confusing, because "masked" in wa_write_masked_or() has little to do
with masked registers, but rather refers to the old mask parameter the
function received (that can also, but not exclusively, be used to write
to masked register).
Avoid the ambiguity and misnomer by renaming it to something else,
hopefully less confusing: wa_write_clr_set(), to designate that we are
doing both clr and set operations in the register.
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20201209045246.2905675-2-lucas.demarchi@intel.com
2020-12-08 20:52:45 -08:00
|
|
|
wa_write_clr_set(struct i915_wa_list *wal, i915_reg_t reg, u32 clear, u32 set)
|
2019-11-28 07:40:05 +05:30
|
|
|
{
|
2023-06-30 13:35:09 -07:00
|
|
|
wa_add(wal, reg, clear, set, clear | set, false);
|
2019-11-28 07:40:05 +05:30
|
|
|
}
|
|
|
|
|
2022-10-14 16:02:36 -07:00
|
|
|
static void
|
2022-10-14 16:02:37 -07:00
|
|
|
wa_mcr_write_clr_set(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clear, u32 set)
|
2022-10-14 16:02:36 -07:00
|
|
|
{
|
2023-06-30 13:35:09 -07:00
|
|
|
wa_mcr_add(wal, reg, clear, set, clear | set, false);
|
2022-10-14 16:02:36 -07:00
|
|
|
}
|
|
|
|
|
2019-01-31 17:08:42 -08:00
|
|
|
static void
|
2020-01-31 23:50:35 +00:00
|
|
|
wa_write(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
|
|
|
|
{
|
drm/i915/gt: rename wa_write_masked_or()
The use of "masked" in this function is due to its history. Once upon a
time it received a mask and a value as parameter. Since
commit eeec73f8a4a4 ("drm/i915/gt: Skip rmw for masked registers")
that is not true anymore and now there is a clear and a set parameter.
Depending on the case, that can still be thought as a mask and value,
but there are some subtle differences: what we clear doesn't need to be
the same bits we are setting, particularly when we are using masked
registers.
The fact that we also have "masked registers", i.e. registers whose mask
is stored in the upper 16 bits of the register, makes it even more
confusing, because "masked" in wa_write_masked_or() has little to do
with masked registers, but rather refers to the old mask parameter the
function received (that can also, but not exclusively, be used to write
to masked register).
Avoid the ambiguity and misnomer by renaming it to something else,
hopefully less confusing: wa_write_clr_set(), to designate that we are
doing both clr and set operations in the register.
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20201209045246.2905675-2-lucas.demarchi@intel.com
2020-12-08 20:52:45 -08:00
|
|
|
wa_write_clr_set(wal, reg, ~0, set);
|
2020-01-31 23:50:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
wa_write_or(struct i915_wa_list *wal, i915_reg_t reg, u32 set)
|
2019-01-31 17:08:42 -08:00
|
|
|
{
|
drm/i915/gt: rename wa_write_masked_or()
The use of "masked" in this function is due to its history. Once upon a
time it received a mask and a value as parameter. Since
commit eeec73f8a4a4 ("drm/i915/gt: Skip rmw for masked registers")
that is not true anymore and now there is a clear and a set parameter.
Depending on the case, that can still be thought as a mask and value,
but there are some subtle differences: what we clear doesn't need to be
the same bits we are setting, particularly when we are using masked
registers.
The fact that we also have "masked registers", i.e. registers whose mask
is stored in the upper 16 bits of the register, makes it even more
confusing, because "masked" in wa_write_masked_or() has little to do
with masked registers, but rather refers to the old mask parameter the
function received (that can also, but not exclusively, be used to write
to masked register).
Avoid the ambiguity and misnomer by renaming it to something else,
hopefully less confusing: wa_write_clr_set(), to designate that we are
doing both clr and set operations in the register.
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20201209045246.2905675-2-lucas.demarchi@intel.com
2020-12-08 20:52:45 -08:00
|
|
|
wa_write_clr_set(wal, reg, set, set);
|
2019-01-31 17:08:42 -08:00
|
|
|
}
|
|
|
|
|
2022-10-14 16:02:36 -07:00
|
|
|
static void
|
2022-10-14 16:02:37 -07:00
|
|
|
wa_mcr_write_or(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 set)
|
2022-10-14 16:02:36 -07:00
|
|
|
{
|
|
|
|
wa_mcr_write_clr_set(wal, reg, set, set);
|
|
|
|
}
|
|
|
|
|
2020-06-11 10:30:15 +01:00
|
|
|
static void
|
|
|
|
wa_write_clr(struct i915_wa_list *wal, i915_reg_t reg, u32 clr)
|
|
|
|
{
|
drm/i915/gt: rename wa_write_masked_or()
The use of "masked" in this function is due to its history. Once upon a
time it received a mask and a value as parameter. Since
commit eeec73f8a4a4 ("drm/i915/gt: Skip rmw for masked registers")
that is not true anymore and now there is a clear and a set parameter.
Depending on the case, that can still be thought as a mask and value,
but there are some subtle differences: what we clear doesn't need to be
the same bits we are setting, particularly when we are using masked
registers.
The fact that we also have "masked registers", i.e. registers whose mask
is stored in the upper 16 bits of the register, makes it even more
confusing, because "masked" in wa_write_masked_or() has little to do
with masked registers, but rather refers to the old mask parameter the
function received (that can also, but not exclusively, be used to write
to masked register).
Avoid the ambiguity and misnomer by renaming it to something else,
hopefully less confusing: wa_write_clr_set(), to designate that we are
doing both clr and set operations in the register.
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20201209045246.2905675-2-lucas.demarchi@intel.com
2020-12-08 20:52:45 -08:00
|
|
|
wa_write_clr_set(wal, reg, clr, 0);
|
2020-06-11 10:30:15 +01:00
|
|
|
}
|
|
|
|
|
2022-10-14 16:02:36 -07:00
|
|
|
static void
|
2022-10-14 16:02:37 -07:00
|
|
|
wa_mcr_write_clr(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 clr)
|
2022-10-14 16:02:36 -07:00
|
|
|
{
|
|
|
|
wa_mcr_write_clr_set(wal, reg, clr, 0);
|
|
|
|
}
|
|
|
|
|
2020-12-08 20:52:46 -08:00
|
|
|
/*
|
|
|
|
* WA operations on "masked register". A masked register has the upper 16 bits
|
|
|
|
* documented as "masked" in b-spec. Its purpose is to allow writing to just a
|
|
|
|
* portion of the register without a rmw: you simply write in the upper 16 bits
|
|
|
|
* the mask of bits you are going to modify.
|
|
|
|
*
|
|
|
|
* The wa_masked_* family of functions already does the necessary operations to
|
|
|
|
* calculate the mask based on the parameters passed, so user only has to
|
|
|
|
* provide the lower 16 bits of that register.
|
|
|
|
*/
|
|
|
|
|
2019-01-31 17:08:42 -08:00
|
|
|
static void
|
2020-01-31 23:50:35 +00:00
|
|
|
wa_masked_en(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
|
2019-01-31 17:08:42 -08:00
|
|
|
{
|
2021-07-26 17:23:30 -07:00
|
|
|
wa_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
|
2019-01-31 17:08:42 -08:00
|
|
|
}
|
|
|
|
|
2022-10-14 16:02:36 -07:00
|
|
|
static void
|
2022-10-14 16:02:37 -07:00
|
|
|
wa_mcr_masked_en(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
|
2022-10-14 16:02:36 -07:00
|
|
|
{
|
|
|
|
wa_mcr_add(wal, reg, 0, _MASKED_BIT_ENABLE(val), val, true);
|
|
|
|
}
|
|
|
|
|
2019-01-31 17:08:42 -08:00
|
|
|
static void
|
2020-01-31 23:50:35 +00:00
|
|
|
wa_masked_dis(struct i915_wa_list *wal, i915_reg_t reg, u32 val)
|
2019-01-31 17:08:42 -08:00
|
|
|
{
|
2021-07-26 17:23:30 -07:00
|
|
|
wa_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
|
2019-01-31 17:08:42 -08:00
|
|
|
}
|
|
|
|
|
2022-10-14 16:02:36 -07:00
|
|
|
static void
|
2022-10-14 16:02:37 -07:00
|
|
|
wa_mcr_masked_dis(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 val)
|
2022-10-14 16:02:36 -07:00
|
|
|
{
|
|
|
|
wa_mcr_add(wal, reg, 0, _MASKED_BIT_DISABLE(val), val, true);
|
|
|
|
}
|
|
|
|
|
2020-12-05 01:25:42 -08:00
|
|
|
static void
|
|
|
|
wa_masked_field_set(struct i915_wa_list *wal, i915_reg_t reg,
|
|
|
|
u32 mask, u32 val)
|
|
|
|
{
|
2021-07-26 17:23:30 -07:00
|
|
|
wa_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
|
2020-12-05 01:25:42 -08:00
|
|
|
}
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2022-10-14 16:02:36 -07:00
|
|
|
static void
|
2022-10-14 16:02:37 -07:00
|
|
|
wa_mcr_masked_field_set(struct i915_wa_list *wal, i915_mcr_reg_t reg,
|
2022-10-14 16:02:36 -07:00
|
|
|
u32 mask, u32 val)
|
|
|
|
{
|
|
|
|
wa_mcr_add(wal, reg, 0, _MASKED_FIELD(mask, val), mask, true);
|
|
|
|
}
|
|
|
|
|
2020-06-01 08:24:13 +01:00
|
|
|
static void gen6_ctx_workarounds_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
|
|
|
{
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
|
2020-06-01 08:24:13 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void gen7_ctx_workarounds_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
|
|
|
{
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
|
2020-06-01 08:24:13 +01:00
|
|
|
}
|
|
|
|
|
2019-05-20 15:25:46 +01:00
|
|
|
static void gen8_ctx_workarounds_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
2018-04-10 09:12:46 -07:00
|
|
|
{
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, INSTPM, INSTPM_FORCE_ORDERING);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* WaDisableAsyncFlipPerfMode:bdw,chv */
|
2022-02-08 21:11:37 -08:00
|
|
|
wa_masked_en(wal, RING_MI_MODE(RENDER_RING_BASE), ASYNC_FLIP_PERF_DISABLE);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* WaDisablePartialInstShootdown:bdw,chv */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
|
|
|
|
PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* Use Force Non-Coherent whenever executing a 3D context. This is a
|
2021-01-22 19:29:06 +00:00
|
|
|
* workaround for a possible hang in the unlikely event a TLB
|
2018-04-10 09:12:46 -07:00
|
|
|
* invalidation occurs during a PSD flush.
|
|
|
|
*/
|
|
|
|
/* WaForceEnableNonCoherent:bdw,chv */
|
|
|
|
/* WaHdcDisableFetchWhenMasked:bdw,chv */
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, HDC_CHICKEN0,
|
|
|
|
HDC_DONOT_FETCH_MEM_WHEN_MASKED |
|
|
|
|
HDC_FORCE_NON_COHERENT);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* From the Haswell PRM, Command Reference: Registers, CACHE_MODE_0:
|
|
|
|
* "The Hierarchical Z RAW Stall Optimization allows non-overlapping
|
|
|
|
* polygons in the same 8x4 pixel/sample area to be processed without
|
|
|
|
* stalling waiting for the earlier ones to write to Hierarchical Z
|
|
|
|
* buffer."
|
|
|
|
*
|
|
|
|
* This optimization is off by default for BDW and CHV; turn it on.
|
|
|
|
*/
|
2020-12-05 01:25:41 -08:00
|
|
|
wa_masked_dis(wal, CACHE_MODE_0_GEN7, HIZ_RAW_STALL_OPT_DISABLE);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* Wa4x4STCOptimizationDisable:bdw,chv */
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, CACHE_MODE_1, GEN8_4x4_STC_OPTIMIZATION_DISABLE);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* BSpec recommends 8x4 when MSAA is used,
|
|
|
|
* however in practice 16x4 seems fastest.
|
|
|
|
*
|
|
|
|
* Note that PS/WM thread counts depend on the WIZ hashing
|
|
|
|
* disable bit, which we don't touch here, but it's good
|
|
|
|
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
|
|
|
|
*/
|
2020-12-05 01:25:42 -08:00
|
|
|
wa_masked_field_set(wal, GEN7_GT_MODE,
|
2018-04-10 09:12:46 -07:00
|
|
|
GEN6_WIZ_HASHING_MASK,
|
|
|
|
GEN6_WIZ_HASHING_16x4);
|
|
|
|
}
|
|
|
|
|
2019-05-20 15:25:46 +01:00
|
|
|
static void bdw_ctx_workarounds_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
2018-04-10 09:12:46 -07:00
|
|
|
{
|
2018-12-03 13:33:57 +00:00
|
|
|
struct drm_i915_private *i915 = engine->i915;
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2019-05-20 15:25:46 +01:00
|
|
|
gen8_ctx_workarounds_init(engine, wal);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* WaDisableDopClockGating:bdw
|
|
|
|
*
|
2019-12-24 00:40:10 -08:00
|
|
|
* Also see the related UCGTCL1 write in bdw_init_clock_gating()
|
2018-04-10 09:12:46 -07:00
|
|
|
* to disable EUTC clock gating.
|
|
|
|
*/
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
|
|
|
|
DOP_CLOCK_GATING_DISABLE);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
|
|
|
|
GEN8_SAMPLER_POWER_BYPASS_DIS);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, HDC_CHICKEN0,
|
|
|
|
/* WaForceContextSaveRestoreNonCoherent:bdw */
|
|
|
|
HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
|
|
|
|
/* WaDisableFenceDestinationToSLM:bdw (pre-prod) */
|
2024-09-30 15:49:48 +03:00
|
|
|
(INTEL_INFO(i915)->gt == 3 ? HDC_FENCE_DEST_SLM_DISABLE : 0));
|
2018-04-10 09:12:46 -07:00
|
|
|
}
|
|
|
|
|
2019-05-20 15:25:46 +01:00
|
|
|
static void chv_ctx_workarounds_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
2018-04-10 09:12:46 -07:00
|
|
|
{
|
2019-05-20 15:25:46 +01:00
|
|
|
gen8_ctx_workarounds_init(engine, wal);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* WaDisableThreadStallDopClockGating:chv */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN, STALL_DOP_GATING_DISABLE);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* Improve HiZ throughput on CHV. */
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, HIZ_CHICKEN, CHV_HZ_8X8_MODE_IN_1X);
|
2018-04-10 09:12:46 -07:00
|
|
|
}
|
|
|
|
|
2019-05-20 15:25:46 +01:00
|
|
|
static void gen9_ctx_workarounds_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
2018-04-10 09:12:46 -07:00
|
|
|
{
|
2018-12-03 13:33:57 +00:00
|
|
|
struct drm_i915_private *i915 = engine->i915;
|
|
|
|
|
|
|
|
if (HAS_LLC(i915)) {
|
2018-04-10 09:12:46 -07:00
|
|
|
/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
|
|
|
|
*
|
|
|
|
* Must match Display Engine. See
|
|
|
|
* WaCompressedResourceDisplayNewHashMode.
|
|
|
|
*/
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
|
|
|
|
GEN9_PBE_COMPRESSED_HASH_SELECTION);
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
|
|
|
|
GEN9_SAMPLER_HASH_COMPRESSED_READ_ADDR);
|
2018-04-10 09:12:46 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/* WaClearFlowControlGpgpuContextSave:skl,bxt,kbl,glk,cfl */
|
|
|
|
/* WaDisablePartialInstShootdown:skl,bxt,kbl,glk,cfl */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
|
|
|
|
FLOW_CONTROL_ENABLE |
|
|
|
|
PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* WaEnableYV12BugFixInHalfSliceChicken7:skl,bxt,kbl,glk,cfl */
|
|
|
|
/* WaEnableSamplerGPGPUPreemptionSupport:skl,bxt,kbl,cfl */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, GEN9_HALF_SLICE_CHICKEN7,
|
|
|
|
GEN9_ENABLE_YV12_BUGFIX |
|
|
|
|
GEN9_ENABLE_GPGPU_PREEMPTION);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* Wa4x4STCOptimizationDisable:skl,bxt,kbl,glk,cfl */
|
|
|
|
/* WaDisablePartialResolveInVc:skl,bxt,kbl,cfl */
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, CACHE_MODE_1,
|
|
|
|
GEN8_4x4_STC_OPTIMIZATION_DISABLE |
|
|
|
|
GEN9_PARTIAL_RESOLVE_IN_VC_DISABLE);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* WaCcsTlbPrefetchDisable:skl,bxt,kbl,glk,cfl */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_dis(wal, GEN9_HALF_SLICE_CHICKEN5,
|
|
|
|
GEN9_CCS_TLB_PREFETCH_ENABLE);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* WaForceContextSaveRestoreNonCoherent:skl,bxt,kbl,cfl */
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, HDC_CHICKEN0,
|
|
|
|
HDC_FORCE_CONTEXT_SAVE_RESTORE_NON_COHERENT |
|
|
|
|
HDC_FORCE_CSR_NON_COHERENT_OVR_DISABLE);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* WaForceEnableNonCoherent and WaDisableHDCInvalidation are
|
|
|
|
* both tied to WaForceContextSaveRestoreNonCoherent
|
|
|
|
* in some hsds for skl. We keep the tie for all gen9. The
|
|
|
|
* documentation is a bit hazy and so we want to get common behaviour,
|
|
|
|
* even though there is no clear evidence we would need both on kbl/bxt.
|
|
|
|
* This area has been source of system hangs so we play it safe
|
|
|
|
* and mimic the skl regardless of what bspec says.
|
|
|
|
*
|
|
|
|
* Use Force Non-Coherent whenever executing a 3D context. This
|
|
|
|
* is a workaround for a possible hang in the unlikely event
|
|
|
|
* a TLB invalidation occurs during a PSD flush.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* WaForceEnableNonCoherent:skl,bxt,kbl,cfl */
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, HDC_CHICKEN0,
|
|
|
|
HDC_FORCE_NON_COHERENT);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* WaDisableSamplerPowerBypassForSOPingPong:skl,bxt,kbl,cfl */
|
2020-06-02 15:05:40 +01:00
|
|
|
if (IS_SKYLAKE(i915) ||
|
|
|
|
IS_KABYLAKE(i915) ||
|
|
|
|
IS_COFFEELAKE(i915) ||
|
|
|
|
IS_COMETLAKE(i915))
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN3,
|
|
|
|
GEN8_SAMPLER_POWER_BYPASS_DIS);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* WaDisableSTUnitPowerOptimization:skl,bxt,kbl,glk,cfl */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, HALF_SLICE_CHICKEN2, GEN8_ST_PO_DISABLE);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Supporting preemption with fine-granularity requires changes in the
|
|
|
|
* batch buffer programming. Since we can't break old userspace, we
|
|
|
|
* need to set our default preemption level to safe value. Userspace is
|
|
|
|
* still able to use more fine-grained preemption levels, since in
|
|
|
|
* WaEnablePreemptionGranularityControlByUMD we're whitelisting the
|
|
|
|
* per-ctx register. As such, WaDisable{3D,GPGPU}MidCmdPreemption are
|
|
|
|
* not real HW workarounds, but merely a way to start using preemption
|
|
|
|
* while maintaining old contract with userspace.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* WaDisable3DMidCmdPreemption:skl,bxt,glk,cfl,[cnl] */
|
2020-12-05 01:25:41 -08:00
|
|
|
wa_masked_dis(wal, GEN8_CS_CHICKEN1, GEN9_PREEMPT_3D_OBJECT_LEVEL);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* WaDisableGPGPUMidCmdPreemption:skl,bxt,blk,cfl,[cnl] */
|
2020-12-05 01:25:42 -08:00
|
|
|
wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
|
2018-04-10 09:12:46 -07:00
|
|
|
GEN9_PREEMPT_GPGPU_LEVEL_MASK,
|
|
|
|
GEN9_PREEMPT_GPGPU_COMMAND_LEVEL);
|
|
|
|
|
2018-05-10 13:07:08 -07:00
|
|
|
/* WaClearHIZ_WM_CHICKEN3:bxt,glk */
|
2018-12-03 13:33:57 +00:00
|
|
|
if (IS_GEN9_LP(i915))
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ);
|
2018-04-10 09:12:46 -07:00
|
|
|
}
|
|
|
|
|
2019-05-20 15:25:46 +01:00
|
|
|
static void skl_tune_iz_hashing(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
2018-04-10 09:12:46 -07:00
|
|
|
{
|
2020-07-07 17:39:50 -07:00
|
|
|
struct intel_gt *gt = engine->gt;
|
2018-04-10 09:12:46 -07:00
|
|
|
u8 vals[3] = { 0, 0, 0 };
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
for (i = 0; i < 3; i++) {
|
|
|
|
u8 ss;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Only consider slices where one, and only one, subslice has 7
|
|
|
|
* EUs
|
|
|
|
*/
|
2020-07-07 17:39:50 -07:00
|
|
|
if (!is_power_of_2(gt->info.sseu.subslice_7eu[i]))
|
2018-04-10 09:12:46 -07:00
|
|
|
continue;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* subslice_7eu[i] != 0 (because of the check above) and
|
|
|
|
* ss_max == 4 (maximum number of subslices possible per slice)
|
|
|
|
*
|
|
|
|
* -> 0 <= ss <= 3;
|
|
|
|
*/
|
2020-07-07 17:39:50 -07:00
|
|
|
ss = ffs(gt->info.sseu.subslice_7eu[i]) - 1;
|
2018-04-10 09:12:46 -07:00
|
|
|
vals[i] = 3 - ss;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vals[0] == 0 && vals[1] == 0 && vals[2] == 0)
|
2018-12-03 13:33:57 +00:00
|
|
|
return;
|
2018-04-10 09:12:46 -07:00
|
|
|
|
|
|
|
/* Tune IZ hashing. See intel_device_info_runtime_init() */
|
2020-12-05 01:25:42 -08:00
|
|
|
wa_masked_field_set(wal, GEN7_GT_MODE,
|
2018-04-10 09:12:46 -07:00
|
|
|
GEN9_IZ_HASHING_MASK(2) |
|
|
|
|
GEN9_IZ_HASHING_MASK(1) |
|
|
|
|
GEN9_IZ_HASHING_MASK(0),
|
|
|
|
GEN9_IZ_HASHING(2, vals[2]) |
|
|
|
|
GEN9_IZ_HASHING(1, vals[1]) |
|
|
|
|
GEN9_IZ_HASHING(0, vals[0]));
|
|
|
|
}
|
|
|
|
|
2019-05-20 15:25:46 +01:00
|
|
|
static void skl_ctx_workarounds_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
2018-04-10 09:12:46 -07:00
|
|
|
{
|
2019-05-20 15:25:46 +01:00
|
|
|
gen9_ctx_workarounds_init(engine, wal);
|
|
|
|
skl_tune_iz_hashing(engine, wal);
|
2018-04-10 09:12:47 -07:00
|
|
|
}
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2019-05-20 15:25:46 +01:00
|
|
|
static void bxt_ctx_workarounds_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
2018-04-10 09:12:47 -07:00
|
|
|
{
|
2019-05-20 15:25:46 +01:00
|
|
|
gen9_ctx_workarounds_init(engine, wal);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2018-04-10 09:12:47 -07:00
|
|
|
/* WaDisableThreadStallDopClockGating:bxt */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN,
|
|
|
|
STALL_DOP_GATING_DISABLE);
|
2018-04-10 09:12:47 -07:00
|
|
|
|
|
|
|
/* WaToEnableHwFixForPushConstHWBug:bxt */
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
|
|
|
|
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
|
2018-04-10 09:12:46 -07:00
|
|
|
}
|
|
|
|
|
2019-05-20 15:25:46 +01:00
|
|
|
static void kbl_ctx_workarounds_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
2018-04-10 09:12:46 -07:00
|
|
|
{
|
2018-12-03 13:33:57 +00:00
|
|
|
struct drm_i915_private *i915 = engine->i915;
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2019-05-20 15:25:46 +01:00
|
|
|
gen9_ctx_workarounds_init(engine, wal);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2018-04-10 09:12:47 -07:00
|
|
|
/* WaToEnableHwFixForPushConstHWBug:kbl */
|
2023-08-01 19:23:34 +05:30
|
|
|
if (IS_KABYLAKE(i915) && IS_GRAPHICS_STEP(i915, STEP_C0, STEP_FOREVER))
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
|
|
|
|
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2018-04-10 09:12:47 -07:00
|
|
|
/* WaDisableSbeCacheDispatchPortSharing:kbl */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
|
|
|
|
GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
|
2018-04-10 09:12:47 -07:00
|
|
|
}
|
|
|
|
|
2019-05-20 15:25:46 +01:00
|
|
|
static void glk_ctx_workarounds_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
2018-04-10 09:12:47 -07:00
|
|
|
{
|
2019-05-20 15:25:46 +01:00
|
|
|
gen9_ctx_workarounds_init(engine, wal);
|
2018-04-10 09:12:47 -07:00
|
|
|
|
|
|
|
/* WaToEnableHwFixForPushConstHWBug:glk */
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
|
|
|
|
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
|
2018-04-10 09:12:46 -07:00
|
|
|
}
|
|
|
|
|
2019-05-20 15:25:46 +01:00
|
|
|
static void cfl_ctx_workarounds_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
2018-04-10 09:12:46 -07:00
|
|
|
{
|
2019-05-20 15:25:46 +01:00
|
|
|
gen9_ctx_workarounds_init(engine, wal);
|
2018-04-10 09:12:47 -07:00
|
|
|
|
|
|
|
/* WaToEnableHwFixForPushConstHWBug:cfl */
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, COMMON_SLICE_CHICKEN2,
|
|
|
|
GEN8_SBE_DISABLE_REPLAY_BUF_OPTIMIZATION);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2018-04-10 09:12:47 -07:00
|
|
|
/* WaDisableSbeCacheDispatchPortSharing:cfl */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, GEN8_HALF_SLICE_CHICKEN1,
|
|
|
|
GEN7_SBE_SS_CACHE_DISPATCH_PORT_SHARING_DISABLE);
|
2018-04-10 09:12:47 -07:00
|
|
|
}
|
|
|
|
|
2019-05-20 15:25:46 +01:00
|
|
|
static void icl_ctx_workarounds_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
2018-05-08 14:29:23 -07:00
|
|
|
{
|
2021-07-16 22:14:21 -07:00
|
|
|
/* Wa_1406697149 (WaDisableBankHangMode:icl) */
|
2023-06-30 13:35:06 -07:00
|
|
|
wa_write(wal, GEN8_L3CNTLREG, GEN8_ERRDETBCTRL);
|
2019-05-20 12:04:42 +01:00
|
|
|
|
2018-05-08 14:29:23 -07:00
|
|
|
/* WaForceEnableNonCoherent:icl
|
|
|
|
* This is not the same workaround as in early Gen9 platforms, where
|
|
|
|
* lacking this could cause system hangs, but coherency performance
|
|
|
|
* overhead is high and only a few compute workloads really need it
|
|
|
|
* (the register is whitelisted in hardware now, so UMDs can opt in
|
|
|
|
* for coherency if they have a good reason).
|
|
|
|
*/
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, ICL_HDC_MODE, HDC_FORCE_NON_COHERENT);
|
2018-05-08 14:29:23 -07:00
|
|
|
|
2019-01-31 17:08:44 -08:00
|
|
|
/* WaEnableFloatBlendOptimization:icl */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
|
|
|
|
_MASKED_BIT_ENABLE(FLOAT_BLEND_OPTIMIZATION_ENABLE),
|
|
|
|
0 /* write-only, so skip validation */,
|
|
|
|
true);
|
2019-03-05 13:48:26 +01:00
|
|
|
|
|
|
|
/* WaDisableGPGPUMidThreadPreemption:icl */
|
2020-12-05 01:25:42 -08:00
|
|
|
wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
|
2019-03-05 13:48:26 +01:00
|
|
|
GEN9_PREEMPT_GPGPU_LEVEL_MASK,
|
|
|
|
GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
|
2019-04-25 06:50:05 +01:00
|
|
|
|
|
|
|
/* allow headerless messages for preemptible GPGPU context */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
|
|
|
|
GEN11_SAMPLER_ENABLE_HEADLESS_MSG);
|
2020-03-11 09:22:57 -07:00
|
|
|
|
|
|
|
/* Wa_1604278689:icl,ehl */
|
|
|
|
wa_write(wal, IVB_FBC_RT_BASE, 0xFFFFFFFF & ~ILK_FBC_RT_VALID);
|
drm/i915/gt: rename wa_write_masked_or()
The use of "masked" in this function is due to its history. Once upon a
time it received a mask and a value as parameter. Since
commit eeec73f8a4a4 ("drm/i915/gt: Skip rmw for masked registers")
that is not true anymore and now there is a clear and a set parameter.
Depending on the case, that can still be thought as a mask and value,
but there are some subtle differences: what we clear doesn't need to be
the same bits we are setting, particularly when we are using masked
registers.
The fact that we also have "masked registers", i.e. registers whose mask
is stored in the upper 16 bits of the register, makes it even more
confusing, because "masked" in wa_write_masked_or() has little to do
with masked registers, but rather refers to the old mask parameter the
function received (that can also, but not exclusively, be used to write
to masked register).
Avoid the ambiguity and misnomer by renaming it to something else,
hopefully less confusing: wa_write_clr_set(), to designate that we are
doing both clr and set operations in the register.
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20201209045246.2905675-2-lucas.demarchi@intel.com
2020-12-08 20:52:45 -08:00
|
|
|
wa_write_clr_set(wal, IVB_FBC_RT_BASE_UPPER,
|
2023-06-30 13:35:08 -07:00
|
|
|
0,
|
drm/i915/gt: rename wa_write_masked_or()
The use of "masked" in this function is due to its history. Once upon a
time it received a mask and a value as parameter. Since
commit eeec73f8a4a4 ("drm/i915/gt: Skip rmw for masked registers")
that is not true anymore and now there is a clear and a set parameter.
Depending on the case, that can still be thought as a mask and value,
but there are some subtle differences: what we clear doesn't need to be
the same bits we are setting, particularly when we are using masked
registers.
The fact that we also have "masked registers", i.e. registers whose mask
is stored in the upper 16 bits of the register, makes it even more
confusing, because "masked" in wa_write_masked_or() has little to do
with masked registers, but rather refers to the old mask parameter the
function received (that can also, but not exclusively, be used to write
to masked register).
Avoid the ambiguity and misnomer by renaming it to something else,
hopefully less confusing: wa_write_clr_set(), to designate that we are
doing both clr and set operations in the register.
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20201209045246.2905675-2-lucas.demarchi@intel.com
2020-12-08 20:52:45 -08:00
|
|
|
0xFFFFFFFF);
|
2020-03-11 09:22:58 -07:00
|
|
|
|
|
|
|
/* Wa_1406306137:icl,ehl */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN11_DIS_PICK_2ND_EU);
|
2018-05-08 14:29:23 -07:00
|
|
|
}
|
|
|
|
|
2021-11-02 15:25:11 -07:00
|
|
|
/*
|
|
|
|
* These settings aren't actually workarounds, but general tuning settings that
|
|
|
|
* need to be programmed on dg2 platform.
|
|
|
|
*/
|
|
|
|
static void dg2_ctx_gt_tuning_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
|
|
|
{
|
2023-01-05 10:37:01 -03:00
|
|
|
wa_mcr_masked_en(wal, CHICKEN_RASTER_2, TBIMR_FAST_CLIP);
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_write_clr_set(wal, XEHP_L3SQCREG5, L3_PWM_TIMER_INIT_VAL_MASK,
|
|
|
|
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f));
|
2023-06-30 13:35:07 -07:00
|
|
|
wa_mcr_write_clr_set(wal, XEHP_FF_MODE2, FF_MODE2_TDS_TIMER_MASK,
|
|
|
|
FF_MODE2_TDS_TIMER_128);
|
2021-11-02 15:25:11 -07:00
|
|
|
}
|
|
|
|
|
2020-07-16 15:05:48 -07:00
|
|
|
static void gen12_ctx_workarounds_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
2019-08-17 02:38:42 -07:00
|
|
|
{
|
2022-10-19 13:13:34 -03:00
|
|
|
struct drm_i915_private *i915 = engine->i915;
|
|
|
|
|
2020-02-27 14:00:58 -08:00
|
|
|
/*
|
2025-01-16 15:01:15 +05:30
|
|
|
* Wa_1409142259:tgl,dg1,adl-p,adl-n
|
2021-06-08 10:47:21 -07:00
|
|
|
* Wa_1409347922:tgl,dg1,adl-p
|
|
|
|
* Wa_1409252684:tgl,dg1,adl-p
|
|
|
|
* Wa_1409217633:tgl,dg1,adl-p
|
|
|
|
* Wa_1409207793:tgl,dg1,adl-p
|
2025-01-16 15:01:15 +05:30
|
|
|
* Wa_1409178076:tgl,dg1,adl-p,adl-n
|
|
|
|
* Wa_1408979724:tgl,dg1,adl-p,adl-n
|
|
|
|
* Wa_14010443199:tgl,rkl,dg1,adl-p,adl-n
|
|
|
|
* Wa_14010698770:tgl,rkl,dg1,adl-s,adl-p,adl-n
|
|
|
|
* Wa_1409342910:tgl,rkl,dg1,adl-s,adl-p,adl-n
|
|
|
|
* Wa_22010465259:tgl,rkl,dg1,adl-s,adl-p,adl-n
|
2020-02-27 14:00:58 -08:00
|
|
|
*/
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, GEN11_COMMON_SLICE_CHICKEN3,
|
|
|
|
GEN12_DISABLE_CPS_AWARE_COLOR_PIPE);
|
2019-11-28 07:40:05 +05:30
|
|
|
|
2020-07-16 15:05:48 -07:00
|
|
|
/* WaDisableGPGPUMidThreadPreemption:gen12 */
|
2020-12-05 01:25:42 -08:00
|
|
|
wa_masked_field_set(wal, GEN8_CS_CHICKEN1,
|
2020-07-16 15:05:48 -07:00
|
|
|
GEN9_PREEMPT_GPGPU_LEVEL_MASK,
|
|
|
|
GEN9_PREEMPT_GPGPU_THREAD_GROUP_LEVEL);
|
|
|
|
|
2019-11-28 07:40:05 +05:30
|
|
|
/*
|
2023-06-30 13:35:04 -07:00
|
|
|
* Wa_16011163337 - GS_TIMER
|
|
|
|
*
|
|
|
|
* TDS_TIMER: Although some platforms refer to it as Wa_1604555607, we
|
|
|
|
* need to program it even on those that don't explicitly list that
|
|
|
|
* workaround.
|
|
|
|
*
|
|
|
|
* Note that the programming of GEN12_FF_MODE2 is further modified
|
|
|
|
* according to the FF_MODE2 guidance given by Wa_1608008084.
|
|
|
|
* Wa_1608008084 tells us the FF_MODE2 register will return the wrong
|
|
|
|
* value when read from the CPU.
|
2020-07-16 15:05:48 -07:00
|
|
|
*
|
2023-06-30 13:35:04 -07:00
|
|
|
* The default value for this register is zero for all fields.
|
|
|
|
* So instead of doing a RMW we should just write the desired values
|
|
|
|
* for TDS and GS timers. Note that since the readback can't be trusted,
|
|
|
|
* the clear mask is just set to ~0 to make sure other bits are not
|
|
|
|
* inadvertently set. For the same reason read verification is ignored.
|
2019-11-28 07:40:05 +05:30
|
|
|
*/
|
2020-06-03 15:11:50 -07:00
|
|
|
wa_add(wal,
|
2022-10-14 16:02:27 -07:00
|
|
|
GEN12_FF_MODE2,
|
2023-06-30 13:35:04 -07:00
|
|
|
~0,
|
|
|
|
FF_MODE2_TDS_TIMER_128 | FF_MODE2_GS_TIMER_224,
|
2021-07-26 17:23:30 -07:00
|
|
|
0, false);
|
2022-10-19 13:13:34 -03:00
|
|
|
|
2023-03-07 00:22:38 -03:00
|
|
|
if (!IS_DG1(i915)) {
|
2022-10-19 13:13:34 -03:00
|
|
|
/* Wa_1806527549 */
|
|
|
|
wa_masked_en(wal, HIZ_CHICKEN, HZ_DEPTH_TEST_LE_GE_OPT_DISABLE);
|
2023-03-07 00:22:38 -03:00
|
|
|
|
|
|
|
/* Wa_1606376872 */
|
|
|
|
wa_masked_en(wal, COMMON_SLICE_CHICKEN4, DISABLE_TDC_LOAD_BALANCING_CALC);
|
|
|
|
}
|
2025-02-14 17:57:11 +02:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This bit must be set to enable performance optimization for fast
|
|
|
|
* clears.
|
|
|
|
*/
|
|
|
|
wa_mcr_write_or(wal, GEN8_WM_CHICKEN2, WAIT_ON_DEPTH_STALL_DONE_DISABLE);
|
2019-08-17 02:38:42 -07:00
|
|
|
}
|
|
|
|
|
2020-10-14 12:19:34 -07:00
|
|
|
static void dg1_ctx_workarounds_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
|
|
|
{
|
|
|
|
gen12_ctx_workarounds_init(engine, wal);
|
|
|
|
|
|
|
|
/* Wa_1409044764 */
|
2020-12-05 01:25:41 -08:00
|
|
|
wa_masked_dis(wal, GEN11_COMMON_SLICE_CHICKEN3,
|
|
|
|
DG1_FLOAT_POINT_BLEND_OPT_STRICT_MODE_EN);
|
2020-10-14 12:19:34 -07:00
|
|
|
|
|
|
|
/* Wa_22010493298 */
|
2020-12-05 01:25:40 -08:00
|
|
|
wa_masked_en(wal, HIZ_CHICKEN,
|
|
|
|
DG1_HZ_READ_SUPPRESSION_OPTIMIZATION_DISABLE);
|
2020-10-14 12:19:34 -07:00
|
|
|
}
|
|
|
|
|
2021-11-02 15:25:10 -07:00
|
|
|
static void dg2_ctx_workarounds_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
|
|
|
{
|
2021-11-02 15:25:11 -07:00
|
|
|
dg2_ctx_gt_tuning_init(engine, wal);
|
2021-11-02 15:25:10 -07:00
|
|
|
|
|
|
|
/* Wa_16013271637:dg2 */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
|
|
|
|
MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
|
2022-02-10 21:23:33 -08:00
|
|
|
|
|
|
|
/* Wa_14014947963:dg2 */
|
2023-08-16 14:42:05 -07:00
|
|
|
wa_masked_field_set(wal, VF_PREEMPTION, PREEMPTION_VERTEX_COUNT, 0x4000);
|
2022-07-08 14:58:03 -07:00
|
|
|
|
2022-11-23 10:36:47 -08:00
|
|
|
/* Wa_18018764978:dg2 */
|
2023-08-16 14:42:05 -07:00
|
|
|
wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
|
2022-11-23 10:36:48 -08:00
|
|
|
|
|
|
|
/* Wa_18019271663:dg2 */
|
|
|
|
wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
|
2023-12-13 12:16:12 +05:30
|
|
|
|
|
|
|
/* Wa_14019877138:dg2 */
|
|
|
|
wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT);
|
2021-11-02 15:25:10 -07:00
|
|
|
}
|
|
|
|
|
2023-08-21 11:06:23 -07:00
|
|
|
static void xelpg_ctx_gt_tuning_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
2023-05-17 16:31:10 -07:00
|
|
|
{
|
2023-08-21 11:06:24 -07:00
|
|
|
struct intel_gt *gt = engine->gt;
|
2023-05-17 16:31:10 -07:00
|
|
|
|
|
|
|
dg2_ctx_gt_tuning_init(engine, wal);
|
|
|
|
|
2024-01-08 17:57:38 +05:30
|
|
|
/*
|
|
|
|
* Due to Wa_16014892111, the DRAW_WATERMARK tuning must be done in
|
|
|
|
* gen12_emit_indirect_ctx_rcs() rather than here on some early
|
|
|
|
* steppings.
|
|
|
|
*/
|
|
|
|
if (!(IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
|
|
|
|
IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)))
|
2023-05-17 16:31:10 -07:00
|
|
|
wa_add(wal, DRAW_WATERMARK, VERT_WM_VAL, 0x3FF, 0, false);
|
|
|
|
}
|
|
|
|
|
2023-08-21 11:06:23 -07:00
|
|
|
static void xelpg_ctx_workarounds_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
2023-01-05 15:44:08 -08:00
|
|
|
{
|
2023-08-21 11:06:24 -07:00
|
|
|
struct intel_gt *gt = engine->gt;
|
2023-01-05 15:44:08 -08:00
|
|
|
|
2023-08-21 11:06:23 -07:00
|
|
|
xelpg_ctx_gt_tuning_init(engine, wal);
|
2023-05-17 16:31:10 -07:00
|
|
|
|
2023-08-21 11:06:24 -07:00
|
|
|
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
|
|
|
|
IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
|
2023-01-05 15:44:08 -08:00
|
|
|
/* Wa_14014947963 */
|
|
|
|
wa_masked_field_set(wal, VF_PREEMPTION,
|
|
|
|
PREEMPTION_VERTEX_COUNT, 0x4000);
|
|
|
|
|
|
|
|
/* Wa_16013271637 */
|
|
|
|
wa_mcr_masked_en(wal, XEHP_SLICE_COMMON_ECO_CHICKEN1,
|
|
|
|
MSC_MSAA_REODER_BUF_BYPASS_DISABLE);
|
|
|
|
|
|
|
|
/* Wa_18019627453 */
|
|
|
|
wa_mcr_masked_en(wal, VFLSKPD, VF_PREFETCH_TLB_DIS);
|
|
|
|
|
|
|
|
/* Wa_18018764978 */
|
2023-01-20 15:14:23 -03:00
|
|
|
wa_mcr_masked_en(wal, XEHP_PSS_MODE2, SCOREBOARD_STALL_FLUSH_CONTROL);
|
2023-01-05 15:44:08 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Wa_18019271663 */
|
|
|
|
wa_masked_en(wal, CACHE_MODE_1, MSAA_OPTIMIZATION_REDUC_DISABLE);
|
2024-01-03 11:01:11 +05:30
|
|
|
|
|
|
|
/* Wa_14019877138 */
|
|
|
|
wa_mcr_masked_en(wal, XEHP_PSS_CHICKEN, FD_END_COLLECT);
|
2023-01-05 15:44:08 -08:00
|
|
|
}
|
|
|
|
|
2021-08-05 09:36:46 -07:00
|
|
|
static void fakewa_disable_nestedbb_mode(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
|
|
|
{
|
|
|
|
/*
|
|
|
|
* This is a "fake" workaround defined by software to ensure we
|
|
|
|
* maintain reliable, backward-compatible behavior for userspace with
|
|
|
|
* regards to how nested MI_BATCH_BUFFER_START commands are handled.
|
|
|
|
*
|
|
|
|
* The per-context setting of MI_MODE[12] determines whether the bits
|
|
|
|
* of a nested MI_BATCH_BUFFER_START instruction should be interpreted
|
|
|
|
* in the traditional manner or whether they should instead use a new
|
|
|
|
* tgl+ meaning that breaks backward compatibility, but allows nesting
|
|
|
|
* into 3rd-level batchbuffers. When this new capability was first
|
|
|
|
* added in TGL, it remained off by default unless a context
|
|
|
|
* intentionally opted in to the new behavior. However Xe_HPG now
|
|
|
|
* flips this on by default and requires that we explicitly opt out if
|
|
|
|
* we don't want the new behavior.
|
|
|
|
*
|
|
|
|
* From a SW perspective, we want to maintain the backward-compatible
|
|
|
|
* behavior for userspace, so we'll apply a fake workaround to set it
|
|
|
|
* back to the legacy behavior on platforms where the hardware default
|
|
|
|
* is to break compatibility. At the moment there is no Linux
|
|
|
|
* userspace that utilizes third-level batchbuffers, so this will avoid
|
|
|
|
* userspace from needing to make any changes. using the legacy
|
|
|
|
* meaning is the correct thing to do. If/when we have userspace
|
|
|
|
* consumers that want to utilize third-level batch nesting, we can
|
|
|
|
* provide a context parameter to allow them to opt-in.
|
|
|
|
*/
|
|
|
|
wa_masked_dis(wal, RING_MI_MODE(engine->mmio_base), TGL_NESTED_BB_EN);
|
|
|
|
}
|
|
|
|
|
2021-09-03 14:51:51 +05:30
|
|
|
static void gen12_ctx_gt_mocs_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
|
|
|
{
|
|
|
|
u8 mocs;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Some blitter commands do not have a field for MOCS, those
|
|
|
|
* commands will use MOCS index pointed by BLIT_CCTL.
|
|
|
|
* BLIT_CCTL registers are needed to be programmed to un-cached.
|
|
|
|
*/
|
|
|
|
if (engine->class == COPY_ENGINE_CLASS) {
|
|
|
|
mocs = engine->gt->mocs.uc_index;
|
|
|
|
wa_write_clr_set(wal,
|
|
|
|
BLIT_CCTL(engine->mmio_base),
|
|
|
|
BLIT_CCTL_MASK,
|
|
|
|
BLIT_CCTL_MOCS(mocs, mocs));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* gen12_ctx_gt_fake_wa_init() aren't programmingan official workaround
|
|
|
|
* defined by the hardware team, but it programming general context registers.
|
|
|
|
* Adding those context register programming in context workaround
|
|
|
|
* allow us to use the wa framework for proper application and validation.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
gen12_ctx_gt_fake_wa_init(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal)
|
|
|
|
{
|
|
|
|
if (GRAPHICS_VER_FULL(engine->i915) >= IP_VER(12, 55))
|
|
|
|
fakewa_disable_nestedbb_mode(engine, wal);
|
|
|
|
|
|
|
|
gen12_ctx_gt_mocs_init(engine, wal);
|
|
|
|
}
|
|
|
|
|
2019-05-20 15:25:46 +01:00
|
|
|
static void
|
|
|
|
__intel_engine_init_ctx_wa(struct intel_engine_cs *engine,
|
|
|
|
struct i915_wa_list *wal,
|
|
|
|
const char *name)
|
2018-04-10 09:12:46 -07:00
|
|
|
{
|
2018-12-03 13:33:57 +00:00
|
|
|
struct drm_i915_private *i915 = engine->i915;
|
|
|
|
|
2022-11-09 10:46:33 +00:00
|
|
|
wa_init_start(wal, engine->gt, name, engine->name);
|
2021-08-05 09:36:46 -07:00
|
|
|
|
|
|
|
/* Applies to all engines */
|
2021-09-03 14:51:51 +05:30
|
|
|
/*
|
|
|
|
* Fake workarounds are not the actual workaround but
|
|
|
|
* programming of context registers using workaround framework.
|
|
|
|
*/
|
|
|
|
if (GRAPHICS_VER(i915) >= 12)
|
|
|
|
gen12_ctx_gt_fake_wa_init(engine, wal);
|
2021-08-05 09:36:46 -07:00
|
|
|
|
2019-05-20 15:25:46 +01:00
|
|
|
if (engine->class != RENDER_CLASS)
|
2021-08-25 20:35:59 -07:00
|
|
|
goto done;
|
2019-05-20 15:25:46 +01:00
|
|
|
|
2024-01-08 17:57:38 +05:30
|
|
|
if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
|
2023-08-21 11:06:23 -07:00
|
|
|
xelpg_ctx_workarounds_init(engine, wal);
|
2022-05-27 09:33:48 -07:00
|
|
|
else if (IS_DG2(i915))
|
2021-11-02 15:25:10 -07:00
|
|
|
dg2_ctx_workarounds_init(engine, wal);
|
2021-11-02 15:25:09 -07:00
|
|
|
else if (IS_DG1(i915))
|
2020-10-14 12:19:34 -07:00
|
|
|
dg1_ctx_workarounds_init(engine, wal);
|
2021-06-05 08:53:52 -07:00
|
|
|
else if (GRAPHICS_VER(i915) == 12)
|
2020-07-16 15:05:48 -07:00
|
|
|
gen12_ctx_workarounds_init(engine, wal);
|
2021-06-05 08:53:52 -07:00
|
|
|
else if (GRAPHICS_VER(i915) == 11)
|
2019-05-20 15:25:46 +01:00
|
|
|
icl_ctx_workarounds_init(engine, wal);
|
2020-06-02 15:05:40 +01:00
|
|
|
else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
|
2019-05-20 15:25:46 +01:00
|
|
|
cfl_ctx_workarounds_init(engine, wal);
|
2019-02-21 15:14:52 -08:00
|
|
|
else if (IS_GEMINILAKE(i915))
|
2019-05-20 15:25:46 +01:00
|
|
|
glk_ctx_workarounds_init(engine, wal);
|
2019-02-21 15:14:52 -08:00
|
|
|
else if (IS_KABYLAKE(i915))
|
2019-05-20 15:25:46 +01:00
|
|
|
kbl_ctx_workarounds_init(engine, wal);
|
2019-02-21 15:14:52 -08:00
|
|
|
else if (IS_BROXTON(i915))
|
2019-05-20 15:25:46 +01:00
|
|
|
bxt_ctx_workarounds_init(engine, wal);
|
2019-02-21 15:14:52 -08:00
|
|
|
else if (IS_SKYLAKE(i915))
|
2019-05-20 15:25:46 +01:00
|
|
|
skl_ctx_workarounds_init(engine, wal);
|
2019-02-21 15:14:52 -08:00
|
|
|
else if (IS_CHERRYVIEW(i915))
|
2019-05-20 15:25:46 +01:00
|
|
|
chv_ctx_workarounds_init(engine, wal);
|
2019-02-21 15:14:52 -08:00
|
|
|
else if (IS_BROADWELL(i915))
|
2019-05-20 15:25:46 +01:00
|
|
|
bdw_ctx_workarounds_init(engine, wal);
|
2021-06-05 08:53:52 -07:00
|
|
|
else if (GRAPHICS_VER(i915) == 7)
|
2020-06-01 08:24:13 +01:00
|
|
|
gen7_ctx_workarounds_init(engine, wal);
|
2021-06-05 08:53:52 -07:00
|
|
|
else if (GRAPHICS_VER(i915) == 6)
|
2020-06-01 08:24:13 +01:00
|
|
|
gen6_ctx_workarounds_init(engine, wal);
|
2021-06-05 08:53:52 -07:00
|
|
|
else if (GRAPHICS_VER(i915) < 8)
|
2021-01-22 19:29:13 +00:00
|
|
|
;
|
2018-04-10 09:12:47 -07:00
|
|
|
else
|
2021-06-05 08:53:52 -07:00
|
|
|
MISSING_CASE(GRAPHICS_VER(i915));
|
2018-04-10 09:12:47 -07:00
|
|
|
|
2021-08-25 20:35:59 -07:00
|
|
|
done:
|
2018-12-03 13:33:57 +00:00
|
|
|
wa_init_finish(wal);
|
2018-04-10 09:12:47 -07:00
|
|
|
}
|
|
|
|
|
2019-05-20 15:25:46 +01:00
|
|
|
void intel_engine_init_ctx_wa(struct intel_engine_cs *engine)
|
|
|
|
{
|
|
|
|
__intel_engine_init_ctx_wa(engine, &engine->ctx_wa_list, "context");
|
|
|
|
}
|
|
|
|
|
2018-12-03 13:33:57 +00:00
|
|
|
int intel_engine_emit_ctx_wa(struct i915_request *rq)
|
2018-04-10 09:12:47 -07:00
|
|
|
{
|
2018-12-03 13:33:57 +00:00
|
|
|
struct i915_wa_list *wal = &rq->engine->ctx_wa_list;
|
drm/i915/gt: Fix context workarounds with non-masked regs
Most of the context workarounds tweak masked registers, but not all. For
masked registers, when writing the value it's sufficient to just write
the wa->set_bits since that will take care of both the clr and set bits
as well as not overwriting other bits.
However there are some workarounds, the registers are non-masked. Up
until now the driver was simply emitting a MI_LOAD_REGISTER_IMM with the
set_bits to program the register via the GPU in the WA bb. This has the
side effect of overwriting the content of the register outside of bits
that should be set and also doesn't handle the bits that should be
cleared.
Kenneth reported that on DG2, mesa was seeing a weird behavior due to
the kernel programming of L3SQCREG5 in dg2_ctx_gt_tuning_init(). With
the GPU idle, that register could be read via intel_reg as 0x00e001ff,
but during a 3D workload it would change to 0x0000007f. So the
programming of that tuning was affecting more than the bits in
L3_PWM_TIMER_INIT_VAL_MASK. Matt Roper noticed the lack of rmw for the
context workarounds due to the use of MI_LOAD_REGISTER_IMM.
So, for registers that are not masked, read its value via mmio, modify
and then set it in the buffer to be written by the GPU. This should take
care in a simple way of programming just the bits required by the
tuning/workaround. If in future there are registers that involved that
can't be read by the CPU, a more complex approach may be required like
a) issuing additional instructions to read and modify; or b) scan the
golden context and patch it in place before saving it; or something
else. But for now this should suffice.
Scanning the context workarounds for all platforms, these are the
impacted ones with the respective registers
mtl: DRAW_WATERMARK
mtl/dg2: XEHP_L3SQCREG5, XEHP_FF_MODE2
ICL has some non-masked registers in the context workarounds:
GEN8_L3CNTLREG, IVB_FBC_RT_BASE and VB_FBC_RT_BASE_UPPER, but there
shouldn't be an impact. The first is already being manually read and the
other 2 are intentionally overwriting the entire register. Same
reasoning applies to GEN12_FF_MODE2: the WA is intentionally
overwriting all the bits to avoid a read-modify-write.
v2: Reword commit message wrt GEN12_FF_MODE2 and the changed behavior
on preparatory patches.
v3: Also skip reading if clear|set bits covers everything
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Matt Roper <matthew.d.roper@intel.com>
Link: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23783#note_1968971
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230630203509.1635216-4-lucas.demarchi@intel.com
2023-06-30 13:35:05 -07:00
|
|
|
struct intel_uncore *uncore = rq->engine->uncore;
|
|
|
|
enum forcewake_domains fw;
|
|
|
|
unsigned long flags;
|
2018-12-03 13:33:57 +00:00
|
|
|
struct i915_wa *wa;
|
|
|
|
unsigned int i;
|
2018-04-10 09:12:47 -07:00
|
|
|
u32 *cs;
|
2018-12-03 13:33:57 +00:00
|
|
|
int ret;
|
2018-04-10 09:12:47 -07:00
|
|
|
|
2018-12-03 13:33:57 +00:00
|
|
|
if (wal->count == 0)
|
2018-04-10 09:12:47 -07:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
|
2018-04-10 09:12:46 -07:00
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
2024-07-31 21:26:14 +05:30
|
|
|
if ((IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) ||
|
|
|
|
IS_DG2(rq->i915)) && rq->engine->class == RENDER_CLASS)
|
|
|
|
cs = intel_ring_begin(rq, (wal->count * 2 + 6));
|
|
|
|
else
|
|
|
|
cs = intel_ring_begin(rq, (wal->count * 2 + 2));
|
|
|
|
|
2018-04-10 09:12:47 -07:00
|
|
|
if (IS_ERR(cs))
|
|
|
|
return PTR_ERR(cs);
|
|
|
|
|
drm/i915/gt: Fix context workarounds with non-masked regs
Most of the context workarounds tweak masked registers, but not all. For
masked registers, when writing the value it's sufficient to just write
the wa->set_bits since that will take care of both the clr and set bits
as well as not overwriting other bits.
However there are some workarounds, the registers are non-masked. Up
until now the driver was simply emitting a MI_LOAD_REGISTER_IMM with the
set_bits to program the register via the GPU in the WA bb. This has the
side effect of overwriting the content of the register outside of bits
that should be set and also doesn't handle the bits that should be
cleared.
Kenneth reported that on DG2, mesa was seeing a weird behavior due to
the kernel programming of L3SQCREG5 in dg2_ctx_gt_tuning_init(). With
the GPU idle, that register could be read via intel_reg as 0x00e001ff,
but during a 3D workload it would change to 0x0000007f. So the
programming of that tuning was affecting more than the bits in
L3_PWM_TIMER_INIT_VAL_MASK. Matt Roper noticed the lack of rmw for the
context workarounds due to the use of MI_LOAD_REGISTER_IMM.
So, for registers that are not masked, read its value via mmio, modify
and then set it in the buffer to be written by the GPU. This should take
care in a simple way of programming just the bits required by the
tuning/workaround. If in future there are registers that involved that
can't be read by the CPU, a more complex approach may be required like
a) issuing additional instructions to read and modify; or b) scan the
golden context and patch it in place before saving it; or something
else. But for now this should suffice.
Scanning the context workarounds for all platforms, these are the
impacted ones with the respective registers
mtl: DRAW_WATERMARK
mtl/dg2: XEHP_L3SQCREG5, XEHP_FF_MODE2
ICL has some non-masked registers in the context workarounds:
GEN8_L3CNTLREG, IVB_FBC_RT_BASE and VB_FBC_RT_BASE_UPPER, but there
shouldn't be an impact. The first is already being manually read and the
other 2 are intentionally overwriting the entire register. Same
reasoning applies to GEN12_FF_MODE2: the WA is intentionally
overwriting all the bits to avoid a read-modify-write.
v2: Reword commit message wrt GEN12_FF_MODE2 and the changed behavior
on preparatory patches.
v3: Also skip reading if clear|set bits covers everything
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Matt Roper <matthew.d.roper@intel.com>
Link: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23783#note_1968971
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230630203509.1635216-4-lucas.demarchi@intel.com
2023-06-30 13:35:05 -07:00
|
|
|
fw = wal_get_fw_for_rmw(uncore, wal);
|
|
|
|
|
|
|
|
intel_gt_mcr_lock(wal->gt, &flags);
|
|
|
|
spin_lock(&uncore->lock);
|
|
|
|
intel_uncore_forcewake_get__locked(uncore, fw);
|
|
|
|
|
2018-12-03 13:33:57 +00:00
|
|
|
*cs++ = MI_LOAD_REGISTER_IMM(wal->count);
|
|
|
|
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
|
drm/i915/gt: Fix context workarounds with non-masked regs
Most of the context workarounds tweak masked registers, but not all. For
masked registers, when writing the value it's sufficient to just write
the wa->set_bits since that will take care of both the clr and set bits
as well as not overwriting other bits.
However there are some workarounds, the registers are non-masked. Up
until now the driver was simply emitting a MI_LOAD_REGISTER_IMM with the
set_bits to program the register via the GPU in the WA bb. This has the
side effect of overwriting the content of the register outside of bits
that should be set and also doesn't handle the bits that should be
cleared.
Kenneth reported that on DG2, mesa was seeing a weird behavior due to
the kernel programming of L3SQCREG5 in dg2_ctx_gt_tuning_init(). With
the GPU idle, that register could be read via intel_reg as 0x00e001ff,
but during a 3D workload it would change to 0x0000007f. So the
programming of that tuning was affecting more than the bits in
L3_PWM_TIMER_INIT_VAL_MASK. Matt Roper noticed the lack of rmw for the
context workarounds due to the use of MI_LOAD_REGISTER_IMM.
So, for registers that are not masked, read its value via mmio, modify
and then set it in the buffer to be written by the GPU. This should take
care in a simple way of programming just the bits required by the
tuning/workaround. If in future there are registers that involved that
can't be read by the CPU, a more complex approach may be required like
a) issuing additional instructions to read and modify; or b) scan the
golden context and patch it in place before saving it; or something
else. But for now this should suffice.
Scanning the context workarounds for all platforms, these are the
impacted ones with the respective registers
mtl: DRAW_WATERMARK
mtl/dg2: XEHP_L3SQCREG5, XEHP_FF_MODE2
ICL has some non-masked registers in the context workarounds:
GEN8_L3CNTLREG, IVB_FBC_RT_BASE and VB_FBC_RT_BASE_UPPER, but there
shouldn't be an impact. The first is already being manually read and the
other 2 are intentionally overwriting the entire register. Same
reasoning applies to GEN12_FF_MODE2: the WA is intentionally
overwriting all the bits to avoid a read-modify-write.
v2: Reword commit message wrt GEN12_FF_MODE2 and the changed behavior
on preparatory patches.
v3: Also skip reading if clear|set bits covers everything
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Matt Roper <matthew.d.roper@intel.com>
Link: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23783#note_1968971
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230630203509.1635216-4-lucas.demarchi@intel.com
2023-06-30 13:35:05 -07:00
|
|
|
u32 val;
|
|
|
|
|
|
|
|
/* Skip reading the register if it's not really needed */
|
|
|
|
if (wa->masked_reg || (wa->clr | wa->set) == U32_MAX) {
|
|
|
|
val = wa->set;
|
|
|
|
} else {
|
|
|
|
val = wa->is_mcr ?
|
|
|
|
intel_gt_mcr_read_any_fw(wal->gt, wa->mcr_reg) :
|
|
|
|
intel_uncore_read_fw(uncore, wa->reg);
|
|
|
|
val &= ~wa->clr;
|
|
|
|
val |= wa->set;
|
|
|
|
}
|
|
|
|
|
2018-12-03 13:33:57 +00:00
|
|
|
*cs++ = i915_mmio_reg_offset(wa->reg);
|
drm/i915/gt: Fix context workarounds with non-masked regs
Most of the context workarounds tweak masked registers, but not all. For
masked registers, when writing the value it's sufficient to just write
the wa->set_bits since that will take care of both the clr and set bits
as well as not overwriting other bits.
However there are some workarounds, the registers are non-masked. Up
until now the driver was simply emitting a MI_LOAD_REGISTER_IMM with the
set_bits to program the register via the GPU in the WA bb. This has the
side effect of overwriting the content of the register outside of bits
that should be set and also doesn't handle the bits that should be
cleared.
Kenneth reported that on DG2, mesa was seeing a weird behavior due to
the kernel programming of L3SQCREG5 in dg2_ctx_gt_tuning_init(). With
the GPU idle, that register could be read via intel_reg as 0x00e001ff,
but during a 3D workload it would change to 0x0000007f. So the
programming of that tuning was affecting more than the bits in
L3_PWM_TIMER_INIT_VAL_MASK. Matt Roper noticed the lack of rmw for the
context workarounds due to the use of MI_LOAD_REGISTER_IMM.
So, for registers that are not masked, read its value via mmio, modify
and then set it in the buffer to be written by the GPU. This should take
care in a simple way of programming just the bits required by the
tuning/workaround. If in future there are registers that involved that
can't be read by the CPU, a more complex approach may be required like
a) issuing additional instructions to read and modify; or b) scan the
golden context and patch it in place before saving it; or something
else. But for now this should suffice.
Scanning the context workarounds for all platforms, these are the
impacted ones with the respective registers
mtl: DRAW_WATERMARK
mtl/dg2: XEHP_L3SQCREG5, XEHP_FF_MODE2
ICL has some non-masked registers in the context workarounds:
GEN8_L3CNTLREG, IVB_FBC_RT_BASE and VB_FBC_RT_BASE_UPPER, but there
shouldn't be an impact. The first is already being manually read and the
other 2 are intentionally overwriting the entire register. Same
reasoning applies to GEN12_FF_MODE2: the WA is intentionally
overwriting all the bits to avoid a read-modify-write.
v2: Reword commit message wrt GEN12_FF_MODE2 and the changed behavior
on preparatory patches.
v3: Also skip reading if clear|set bits covers everything
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Matt Roper <matthew.d.roper@intel.com>
Link: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23783#note_1968971
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230630203509.1635216-4-lucas.demarchi@intel.com
2023-06-30 13:35:05 -07:00
|
|
|
*cs++ = val;
|
2018-04-10 09:12:47 -07:00
|
|
|
}
|
|
|
|
*cs++ = MI_NOOP;
|
|
|
|
|
2024-07-31 21:26:14 +05:30
|
|
|
/* Wa_14019789679 */
|
|
|
|
if ((IS_GFX_GT_IP_RANGE(rq->engine->gt, IP_VER(12, 70), IP_VER(12, 74)) ||
|
|
|
|
IS_DG2(rq->i915)) && rq->engine->class == RENDER_CLASS) {
|
|
|
|
*cs++ = CMD_3DSTATE_MESH_CONTROL;
|
|
|
|
*cs++ = 0;
|
|
|
|
*cs++ = 0;
|
|
|
|
*cs++ = MI_NOOP;
|
|
|
|
}
|
|
|
|
|
drm/i915/gt: Fix context workarounds with non-masked regs
Most of the context workarounds tweak masked registers, but not all. For
masked registers, when writing the value it's sufficient to just write
the wa->set_bits since that will take care of both the clr and set bits
as well as not overwriting other bits.
However there are some workarounds, the registers are non-masked. Up
until now the driver was simply emitting a MI_LOAD_REGISTER_IMM with the
set_bits to program the register via the GPU in the WA bb. This has the
side effect of overwriting the content of the register outside of bits
that should be set and also doesn't handle the bits that should be
cleared.
Kenneth reported that on DG2, mesa was seeing a weird behavior due to
the kernel programming of L3SQCREG5 in dg2_ctx_gt_tuning_init(). With
the GPU idle, that register could be read via intel_reg as 0x00e001ff,
but during a 3D workload it would change to 0x0000007f. So the
programming of that tuning was affecting more than the bits in
L3_PWM_TIMER_INIT_VAL_MASK. Matt Roper noticed the lack of rmw for the
context workarounds due to the use of MI_LOAD_REGISTER_IMM.
So, for registers that are not masked, read its value via mmio, modify
and then set it in the buffer to be written by the GPU. This should take
care in a simple way of programming just the bits required by the
tuning/workaround. If in future there are registers that involved that
can't be read by the CPU, a more complex approach may be required like
a) issuing additional instructions to read and modify; or b) scan the
golden context and patch it in place before saving it; or something
else. But for now this should suffice.
Scanning the context workarounds for all platforms, these are the
impacted ones with the respective registers
mtl: DRAW_WATERMARK
mtl/dg2: XEHP_L3SQCREG5, XEHP_FF_MODE2
ICL has some non-masked registers in the context workarounds:
GEN8_L3CNTLREG, IVB_FBC_RT_BASE and VB_FBC_RT_BASE_UPPER, but there
shouldn't be an impact. The first is already being manually read and the
other 2 are intentionally overwriting the entire register. Same
reasoning applies to GEN12_FF_MODE2: the WA is intentionally
overwriting all the bits to avoid a read-modify-write.
v2: Reword commit message wrt GEN12_FF_MODE2 and the changed behavior
on preparatory patches.
v3: Also skip reading if clear|set bits covers everything
Cc: Kenneth Graunke <kenneth@whitecape.org>
Cc: Matt Roper <matthew.d.roper@intel.com>
Link: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23783#note_1968971
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Link: https://patchwork.freedesktop.org/patch/msgid/20230630203509.1635216-4-lucas.demarchi@intel.com
2023-06-30 13:35:05 -07:00
|
|
|
intel_uncore_forcewake_put__locked(uncore, fw);
|
|
|
|
spin_unlock(&uncore->lock);
|
|
|
|
intel_gt_mcr_unlock(wal->gt, flags);
|
|
|
|
|
2018-04-10 09:12:47 -07:00
|
|
|
intel_ring_advance(rq, cs);
|
|
|
|
|
|
|
|
ret = rq->engine->emit_flush(rq, EMIT_BARRIER);
|
|
|
|
if (ret)
|
|
|
|
return ret;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-06-11 09:01:39 +01:00
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
gen4_gt_workarounds_init(struct intel_gt *gt,
|
2020-06-11 09:01:40 +01:00
|
|
|
struct i915_wa_list *wal)
|
2020-06-11 09:01:39 +01:00
|
|
|
{
|
2020-06-11 09:01:40 +01:00
|
|
|
/* WaDisable_RenderCache_OperationalFlush:gen4,ilk */
|
|
|
|
wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
g4x_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
2020-06-11 09:01:40 +01:00
|
|
|
{
|
2021-09-17 10:08:45 -07:00
|
|
|
gen4_gt_workarounds_init(gt, wal);
|
2020-06-11 09:01:39 +01:00
|
|
|
|
2020-06-11 09:01:40 +01:00
|
|
|
/* WaDisableRenderCachePipelinedFlush:g4x,ilk */
|
2020-06-11 09:01:39 +01:00
|
|
|
wa_masked_en(wal, CACHE_MODE_0, CM0_PIPELINED_RENDER_FLUSH_DISABLE);
|
2020-06-11 09:01:40 +01:00
|
|
|
}
|
2020-06-11 09:01:39 +01:00
|
|
|
|
2020-06-11 09:01:40 +01:00
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
ilk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
2020-06-11 09:01:40 +01:00
|
|
|
{
|
2021-09-17 10:08:45 -07:00
|
|
|
g4x_gt_workarounds_init(gt, wal);
|
2020-06-11 09:01:40 +01:00
|
|
|
|
|
|
|
wa_masked_en(wal, _3D_CHICKEN2, _3D_CHICKEN2_WM_READ_PIPELINED);
|
2020-06-11 09:01:39 +01:00
|
|
|
}
|
|
|
|
|
2020-06-11 09:01:38 +01:00
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
snb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
2020-06-11 09:01:38 +01:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2020-06-11 09:01:36 +01:00
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
ivb_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
2020-06-11 09:01:36 +01:00
|
|
|
{
|
|
|
|
/* Apply the WaDisableRHWOOptimizationForRenderHang:ivb workaround. */
|
|
|
|
wa_masked_dis(wal,
|
|
|
|
GEN7_COMMON_SLICE_CHICKEN1,
|
|
|
|
GEN7_CSC1_RHWO_OPT_DISABLE_IN_RCC);
|
|
|
|
|
|
|
|
/* WaApplyL3ControlAndL3ChickenMode:ivb */
|
|
|
|
wa_write(wal, GEN7_L3CNTLREG1, GEN7_WA_FOR_GEN7_L3_CONTROL);
|
|
|
|
wa_write(wal, GEN7_L3_CHICKEN_MODE_REGISTER, GEN7_WA_L3_CHICKEN_MODE);
|
|
|
|
|
|
|
|
/* WaForceL3Serialization:ivb */
|
|
|
|
wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
|
|
|
|
}
|
|
|
|
|
2020-06-11 09:01:37 +01:00
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
vlv_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
2020-06-11 09:01:37 +01:00
|
|
|
{
|
|
|
|
/* WaForceL3Serialization:vlv */
|
|
|
|
wa_write_clr(wal, GEN7_L3SQCREG4, L3SQ_URB_READ_CAM_MATCH_DISABLE);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* WaIncreaseL3CreditsForVLVB0:vlv
|
|
|
|
* This is the hardware default actually.
|
|
|
|
*/
|
|
|
|
wa_write(wal, GEN7_L3SQCREG1, VLV_B0_WA_L3SQCREG1_VALUE);
|
|
|
|
}
|
|
|
|
|
2020-06-11 10:30:15 +01:00
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
hsw_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
2020-06-11 10:30:15 +01:00
|
|
|
{
|
|
|
|
/* L3 caching of data atomics doesn't work -- disable it. */
|
|
|
|
wa_write(wal, HSW_SCRATCH1, HSW_SCRATCH1_L3_DATA_ATOMICS_DISABLE);
|
|
|
|
|
|
|
|
wa_add(wal,
|
|
|
|
HSW_ROW_CHICKEN3, 0,
|
|
|
|
_MASKED_BIT_ENABLE(HSW_ROW_CHICKEN3_L3_GLOBAL_ATOMICS_DISABLE),
|
2021-07-26 17:23:30 -07:00
|
|
|
0 /* XXX does this reg exist? */, true);
|
2020-06-11 10:30:15 +01:00
|
|
|
|
|
|
|
/* WaVSRefCountFullforceMissDisable:hsw */
|
|
|
|
wa_write_clr(wal, GEN7_FF_THREAD_MODE, GEN7_FF_VS_REF_CNT_FFME);
|
|
|
|
}
|
|
|
|
|
2021-10-25 12:26:23 +08:00
|
|
|
static void
|
|
|
|
gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
|
|
|
|
{
|
2021-12-14 21:33:34 +02:00
|
|
|
const struct sseu_dev_info *sseu = &to_gt(i915)->info.sseu;
|
2021-10-25 12:26:23 +08:00
|
|
|
unsigned int slice, subslice;
|
|
|
|
u32 mcr, mcr_mask;
|
|
|
|
|
|
|
|
GEM_BUG_ON(GRAPHICS_VER(i915) != 9);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* WaProgramMgsrForCorrectSliceSpecificMmioReads:gen9,glk,kbl,cml
|
|
|
|
* Before any MMIO read into slice/subslice specific registers, MCR
|
|
|
|
* packet control register needs to be programmed to point to any
|
|
|
|
* enabled s/ss pair. Otherwise, incorrect values will be returned.
|
|
|
|
* This means each subsequent MMIO read will be forwarded to an
|
|
|
|
* specific s/ss combination, but this is OK since these registers
|
|
|
|
* are consistent across s/ss in almost all cases. In the rare
|
|
|
|
* occasions, such as INSTDONE, where this value is dependent
|
|
|
|
* on s/ss combo, the read should be done with read_subslice_reg.
|
|
|
|
*/
|
|
|
|
slice = ffs(sseu->slice_mask) - 1;
|
2022-06-01 08:07:24 -07:00
|
|
|
GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask.hsw));
|
|
|
|
subslice = ffs(intel_sseu_get_hsw_subslices(sseu, slice));
|
2021-10-25 12:26:23 +08:00
|
|
|
GEM_BUG_ON(!subslice);
|
|
|
|
subslice--;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We use GEN8_MCR..() macros to calculate the |mcr| value for
|
|
|
|
* Gen9 to address WaProgramMgsrForCorrectSliceSpecificMmioReads
|
|
|
|
*/
|
|
|
|
mcr = GEN8_MCR_SLICE(slice) | GEN8_MCR_SUBSLICE(subslice);
|
|
|
|
mcr_mask = GEN8_MCR_SLICE_MASK | GEN8_MCR_SUBSLICE_MASK;
|
|
|
|
|
|
|
|
drm_dbg(&i915->drm, "MCR slice:%d/subslice:%d = %x\n", slice, subslice, mcr);
|
|
|
|
|
|
|
|
wa_write_clr_set(wal, GEN8_MCR_SELECTOR, mcr_mask, mcr);
|
|
|
|
}
|
|
|
|
|
2019-01-09 17:32:31 -08:00
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
gen9_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
2018-12-03 13:33:19 +00:00
|
|
|
{
|
2021-09-17 10:08:45 -07:00
|
|
|
struct drm_i915_private *i915 = gt->i915;
|
|
|
|
|
2021-10-25 12:26:23 +08:00
|
|
|
/* WaProgramMgsrForCorrectSliceSpecificMmioReads:glk,kbl,cml,gen9 */
|
|
|
|
gen9_wa_init_mcr(i915, wal);
|
|
|
|
|
2018-04-10 09:12:47 -07:00
|
|
|
/* WaDisableKillLogic:bxt,skl,kbl */
|
2020-06-02 15:05:40 +01:00
|
|
|
if (!IS_COFFEELAKE(i915) && !IS_COMETLAKE(i915))
|
2018-12-03 13:33:19 +00:00
|
|
|
wa_write_or(wal,
|
|
|
|
GAM_ECOCHK,
|
|
|
|
ECOCHK_DIS_TLB);
|
2018-04-10 09:12:47 -07:00
|
|
|
|
2018-12-03 13:33:19 +00:00
|
|
|
if (HAS_LLC(i915)) {
|
2018-04-10 09:12:47 -07:00
|
|
|
/* WaCompressedResourceSamplerPbeMediaNewHashMode:skl,kbl
|
|
|
|
*
|
|
|
|
* Must match Display Engine. See
|
|
|
|
* WaCompressedResourceDisplayNewHashMode.
|
|
|
|
*/
|
2018-12-03 13:33:19 +00:00
|
|
|
wa_write_or(wal,
|
|
|
|
MMCD_MISC_CTRL,
|
|
|
|
MMCD_PCLA | MMCD_HOTSPOT_EN);
|
2018-04-10 09:12:47 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/* WaDisableHDCInvalidation:skl,bxt,kbl,cfl */
|
2018-12-03 13:33:19 +00:00
|
|
|
wa_write_or(wal,
|
|
|
|
GAM_ECOCHK,
|
|
|
|
BDW_DISABLE_HDC_INVALIDATION);
|
2018-04-10 09:12:47 -07:00
|
|
|
}
|
|
|
|
|
2019-01-09 17:32:31 -08:00
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
skl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
2018-04-10 09:12:47 -07:00
|
|
|
{
|
2021-09-17 10:08:45 -07:00
|
|
|
gen9_gt_workarounds_init(gt, wal);
|
2018-04-10 09:12:47 -07:00
|
|
|
|
|
|
|
/* WaDisableGafsUnitClkGating:skl */
|
2018-12-03 13:33:19 +00:00
|
|
|
wa_write_or(wal,
|
|
|
|
GEN7_UCGCTL4,
|
|
|
|
GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
|
2018-04-10 09:12:47 -07:00
|
|
|
|
|
|
|
/* WaInPlaceDecompressionHang:skl */
|
2023-08-01 19:23:33 +05:30
|
|
|
if (IS_SKYLAKE(gt->i915) && IS_GRAPHICS_STEP(gt->i915, STEP_A0, STEP_H0))
|
2018-12-03 13:33:19 +00:00
|
|
|
wa_write_or(wal,
|
|
|
|
GEN9_GAMT_ECO_REG_RW_IA,
|
|
|
|
GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
|
2018-04-10 09:12:47 -07:00
|
|
|
}
|
|
|
|
|
2019-01-09 17:32:31 -08:00
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
kbl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
2018-04-10 09:12:47 -07:00
|
|
|
{
|
2021-09-17 10:08:45 -07:00
|
|
|
gen9_gt_workarounds_init(gt, wal);
|
2018-04-10 09:12:47 -07:00
|
|
|
|
2018-04-10 09:12:46 -07:00
|
|
|
/* WaDisableDynamicCreditSharing:kbl */
|
2023-08-01 19:23:34 +05:30
|
|
|
if (IS_KABYLAKE(gt->i915) && IS_GRAPHICS_STEP(gt->i915, 0, STEP_C0))
|
2018-12-03 13:33:19 +00:00
|
|
|
wa_write_or(wal,
|
|
|
|
GAMT_CHKN_BIT_REG,
|
|
|
|
GAMT_CHKN_DISABLE_DYNAMIC_CREDIT_SHARING);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2018-04-10 09:12:47 -07:00
|
|
|
/* WaDisableGafsUnitClkGating:kbl */
|
2018-12-03 13:33:19 +00:00
|
|
|
wa_write_or(wal,
|
|
|
|
GEN7_UCGCTL4,
|
|
|
|
GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2018-04-10 09:12:47 -07:00
|
|
|
/* WaInPlaceDecompressionHang:kbl */
|
2018-12-03 13:33:19 +00:00
|
|
|
wa_write_or(wal,
|
|
|
|
GEN9_GAMT_ECO_REG_RW_IA,
|
|
|
|
GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
|
2018-04-10 09:12:47 -07:00
|
|
|
}
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2019-01-09 17:32:31 -08:00
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
glk_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
2018-04-10 09:12:47 -07:00
|
|
|
{
|
2021-09-17 10:08:45 -07:00
|
|
|
gen9_gt_workarounds_init(gt, wal);
|
2018-04-10 09:12:47 -07:00
|
|
|
}
|
|
|
|
|
2019-01-09 17:32:31 -08:00
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
cfl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
2018-04-10 09:12:47 -07:00
|
|
|
{
|
2021-09-17 10:08:45 -07:00
|
|
|
gen9_gt_workarounds_init(gt, wal);
|
2018-04-10 09:12:47 -07:00
|
|
|
|
|
|
|
/* WaDisableGafsUnitClkGating:cfl */
|
2018-12-03 13:33:19 +00:00
|
|
|
wa_write_or(wal,
|
|
|
|
GEN7_UCGCTL4,
|
|
|
|
GEN8_EU_GAUNIT_CLOCK_GATE_DISABLE);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2018-04-10 09:12:47 -07:00
|
|
|
/* WaInPlaceDecompressionHang:cfl */
|
2018-12-03 13:33:19 +00:00
|
|
|
wa_write_or(wal,
|
|
|
|
GEN9_GAMT_ECO_REG_RW_IA,
|
|
|
|
GAMT_ECO_ENABLE_IN_PLACE_DECOMPRESS);
|
2018-04-10 09:12:47 -07:00
|
|
|
}
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2021-07-29 09:59:55 -07:00
|
|
|
static void __set_mcr_steering(struct i915_wa_list *wal,
|
|
|
|
i915_reg_t steering_reg,
|
|
|
|
unsigned int slice, unsigned int subslice)
|
2021-07-29 09:59:51 -07:00
|
|
|
{
|
|
|
|
u32 mcr, mcr_mask;
|
|
|
|
|
|
|
|
mcr = GEN11_MCR_SLICE(slice) | GEN11_MCR_SUBSLICE(subslice);
|
|
|
|
mcr_mask = GEN11_MCR_SLICE_MASK | GEN11_MCR_SUBSLICE_MASK;
|
|
|
|
|
2021-07-29 09:59:55 -07:00
|
|
|
wa_write_clr_set(wal, steering_reg, mcr_mask, mcr);
|
|
|
|
}
|
|
|
|
|
drm/i915/xelpg: Add multicast steering
MTL's graphics IP (Xe_LPG) once again changes the multicast register
types and steering details. Key changes from past platforms:
* The number of instances of some MCR types (NODE, OAAL2, and GAM) vary
according to the MTL subplatform and cannot be read from fuse
registers. However steering to instance #0 will always provided a
non-terminated value, so we can lump these all into a single
"instance0" table.
* The MCR steering register (and its bitfields) has changed.
Unlike past platforms, we will be explicitly steering all types of MCR
accesses, including those for "SLICE" and "DSS" ranges; we no longer
rely on implicit steering. On previous platforms, various
hardware/firmware agents that needed to access registers typically had
their own steering control registers, allowing them to perform multicast
steering without clobbering the CPU/kernel steering. Starting with MTL,
more of these agents now share a single steering register (0xFD4) and it
is no longer safe for us to assume that the value will remain unchanged
from how we initialized it during startup. There is also a slight
chance of race conditions between the driver and a hardware/firmware
agent, so the hardware provides a semaphore register that can be used to
coordinate access to the steering register. Support for the semaphore
register will be introduced in a future patch.
v2:
- Use Xe_LPG terminology instead of "MTL 3D" since it's the IP version
we're matching on now rather than the platform.
- Don't combine l3bank and mslice masks into a union. It's not related
to the other changes here and we might still need both of them on
some future platform.
- Separate debug dumping of steering settings to a separate helper
function. (Tvrtko)
- Update debug dumping to include DSS ranges (and future-proof it so
that any new ranges added on future platforms will also be dumped).
- Restore MULTICAST bit at the end of rw_with_mcr_steering_fw() if we
cleared it. Also force the MULTICAST bit to true at the beginning of
multicast writes just to be safe. (Bala)
Bspec: 67788, 67112
Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221014230239.1023689-14-matthew.d.roper@intel.com
2022-10-14 16:02:38 -07:00
|
|
|
static void debug_dump_steering(struct intel_gt *gt)
|
2021-07-29 09:59:55 -07:00
|
|
|
{
|
2024-01-16 15:07:32 +02:00
|
|
|
struct drm_printer p = drm_dbg_printer(>->i915->drm, DRM_UT_DRIVER,
|
|
|
|
"MCR Steering:");
|
2021-07-29 09:59:51 -07:00
|
|
|
|
drm/i915/xelpg: Add multicast steering
MTL's graphics IP (Xe_LPG) once again changes the multicast register
types and steering details. Key changes from past platforms:
* The number of instances of some MCR types (NODE, OAAL2, and GAM) vary
according to the MTL subplatform and cannot be read from fuse
registers. However steering to instance #0 will always provided a
non-terminated value, so we can lump these all into a single
"instance0" table.
* The MCR steering register (and its bitfields) has changed.
Unlike past platforms, we will be explicitly steering all types of MCR
accesses, including those for "SLICE" and "DSS" ranges; we no longer
rely on implicit steering. On previous platforms, various
hardware/firmware agents that needed to access registers typically had
their own steering control registers, allowing them to perform multicast
steering without clobbering the CPU/kernel steering. Starting with MTL,
more of these agents now share a single steering register (0xFD4) and it
is no longer safe for us to assume that the value will remain unchanged
from how we initialized it during startup. There is also a slight
chance of race conditions between the driver and a hardware/firmware
agent, so the hardware provides a semaphore register that can be used to
coordinate access to the steering register. Support for the semaphore
register will be introduced in a future patch.
v2:
- Use Xe_LPG terminology instead of "MTL 3D" since it's the IP version
we're matching on now rather than the platform.
- Don't combine l3bank and mslice masks into a union. It's not related
to the other changes here and we might still need both of them on
some future platform.
- Separate debug dumping of steering settings to a separate helper
function. (Tvrtko)
- Update debug dumping to include DSS ranges (and future-proof it so
that any new ranges added on future platforms will also be dumped).
- Restore MULTICAST bit at the end of rw_with_mcr_steering_fw() if we
cleared it. Also force the MULTICAST bit to true at the beginning of
multicast writes just to be safe. (Bala)
Bspec: 67788, 67112
Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221014230239.1023689-14-matthew.d.roper@intel.com
2022-10-14 16:02:38 -07:00
|
|
|
if (drm_debug_enabled(DRM_UT_DRIVER))
|
|
|
|
intel_gt_mcr_report_steering(&p, gt, false);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __add_mcr_wa(struct intel_gt *gt, struct i915_wa_list *wal,
|
|
|
|
unsigned int slice, unsigned int subslice)
|
|
|
|
{
|
2021-07-29 09:59:55 -07:00
|
|
|
__set_mcr_steering(wal, GEN8_MCR_SELECTOR, slice, subslice);
|
2022-03-15 10:02:50 -07:00
|
|
|
|
|
|
|
gt->default_steering.groupid = slice;
|
|
|
|
gt->default_steering.instanceid = subslice;
|
|
|
|
|
drm/i915/xelpg: Add multicast steering
MTL's graphics IP (Xe_LPG) once again changes the multicast register
types and steering details. Key changes from past platforms:
* The number of instances of some MCR types (NODE, OAAL2, and GAM) vary
according to the MTL subplatform and cannot be read from fuse
registers. However steering to instance #0 will always provided a
non-terminated value, so we can lump these all into a single
"instance0" table.
* The MCR steering register (and its bitfields) has changed.
Unlike past platforms, we will be explicitly steering all types of MCR
accesses, including those for "SLICE" and "DSS" ranges; we no longer
rely on implicit steering. On previous platforms, various
hardware/firmware agents that needed to access registers typically had
their own steering control registers, allowing them to perform multicast
steering without clobbering the CPU/kernel steering. Starting with MTL,
more of these agents now share a single steering register (0xFD4) and it
is no longer safe for us to assume that the value will remain unchanged
from how we initialized it during startup. There is also a slight
chance of race conditions between the driver and a hardware/firmware
agent, so the hardware provides a semaphore register that can be used to
coordinate access to the steering register. Support for the semaphore
register will be introduced in a future patch.
v2:
- Use Xe_LPG terminology instead of "MTL 3D" since it's the IP version
we're matching on now rather than the platform.
- Don't combine l3bank and mslice masks into a union. It's not related
to the other changes here and we might still need both of them on
some future platform.
- Separate debug dumping of steering settings to a separate helper
function. (Tvrtko)
- Update debug dumping to include DSS ranges (and future-proof it so
that any new ranges added on future platforms will also be dumped).
- Restore MULTICAST bit at the end of rw_with_mcr_steering_fw() if we
cleared it. Also force the MULTICAST bit to true at the beginning of
multicast writes just to be safe. (Bala)
Bspec: 67788, 67112
Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221014230239.1023689-14-matthew.d.roper@intel.com
2022-10-14 16:02:38 -07:00
|
|
|
debug_dump_steering(gt);
|
2021-07-29 09:59:51 -07:00
|
|
|
}
|
|
|
|
|
2019-01-09 17:32:31 -08:00
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
|
2018-05-18 15:39:57 -07:00
|
|
|
{
|
2021-09-17 10:08:45 -07:00
|
|
|
const struct sseu_dev_info *sseu = >->info.sseu;
|
2022-06-01 08:07:24 -07:00
|
|
|
unsigned int subslice;
|
2019-07-17 19:06:21 +01:00
|
|
|
|
2021-09-17 10:08:45 -07:00
|
|
|
GEM_BUG_ON(GRAPHICS_VER(gt->i915) < 11);
|
2021-06-17 14:14:25 -07:00
|
|
|
GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
|
2018-05-18 15:39:57 -07:00
|
|
|
|
2018-05-18 15:41:25 -07:00
|
|
|
/*
|
2021-06-17 14:14:25 -07:00
|
|
|
* Although a platform may have subslices, we need to always steer
|
|
|
|
* reads to the lowest instance that isn't fused off. When Render
|
|
|
|
* Power Gating is enabled, grabbing forcewake will only power up a
|
|
|
|
* single subslice (the "minconfig") if there isn't a real workload
|
|
|
|
* that needs to be run; this means that if we steer register reads to
|
|
|
|
* one of the higher subslices, we run the risk of reading back 0's or
|
|
|
|
* random garbage.
|
2018-05-18 15:39:57 -07:00
|
|
|
*/
|
2022-06-01 08:07:24 -07:00
|
|
|
subslice = __ffs(intel_sseu_get_hsw_subslices(sseu, 0));
|
2019-07-17 19:06:21 +01:00
|
|
|
|
2021-06-17 14:14:25 -07:00
|
|
|
/*
|
|
|
|
* If the subslice we picked above also steers us to a valid L3 bank,
|
|
|
|
* then we can just rely on the default steering and won't need to
|
|
|
|
* worry about explicitly re-steering L3BANK reads later.
|
|
|
|
*/
|
2021-09-17 10:08:45 -07:00
|
|
|
if (gt->info.l3bank_mask & BIT(subslice))
|
|
|
|
gt->steering_table[L3BANK] = NULL;
|
2019-07-17 19:06:21 +01:00
|
|
|
|
2022-06-01 08:07:24 -07:00
|
|
|
__add_mcr_wa(gt, wal, 0, subslice);
|
2021-07-29 09:59:51 -07:00
|
|
|
}
|
2019-07-17 19:06:21 +01:00
|
|
|
|
2021-07-29 09:59:51 -07:00
|
|
|
static void
|
|
|
|
xehp_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
|
|
|
|
{
|
|
|
|
const struct sseu_dev_info *sseu = >->info.sseu;
|
|
|
|
unsigned long slice, subslice = 0, slice_mask = 0;
|
|
|
|
u32 lncf_mask = 0;
|
|
|
|
int i;
|
2019-07-17 19:06:21 +01:00
|
|
|
|
2021-07-29 09:59:51 -07:00
|
|
|
/*
|
|
|
|
* On Xe_HP the steering increases in complexity. There are now several
|
|
|
|
* more units that require steering and we're not guaranteed to be able
|
|
|
|
* to find a common setting for all of them. These are:
|
|
|
|
* - GSLICE (fusable)
|
|
|
|
* - DSS (sub-unit within gslice; fusable)
|
|
|
|
* - L3 Bank (fusable)
|
|
|
|
* - MSLICE (fusable)
|
|
|
|
* - LNCF (sub-unit within mslice; always present if mslice is present)
|
|
|
|
*
|
|
|
|
* We'll do our default/implicit steering based on GSLICE (in the
|
|
|
|
* sliceid field) and DSS (in the subsliceid field). If we can
|
|
|
|
* find overlap between the valid MSLICE and/or LNCF values with
|
2025-01-20 13:45:10 +05:30
|
|
|
* a suitable GSLICE, then we can just reuse the default value and
|
2021-07-29 09:59:51 -07:00
|
|
|
* skip and explicit steering at runtime.
|
|
|
|
*
|
|
|
|
* We only need to look for overlap between GSLICE/MSLICE/LNCF to find
|
|
|
|
* a valid sliceid value. DSS steering is the only type of steering
|
|
|
|
* that utilizes the 'subsliceid' bits.
|
|
|
|
*
|
|
|
|
* Also note that, even though the steering domain is called "GSlice"
|
|
|
|
* and it is encoded in the register using the gslice format, the spec
|
|
|
|
* says that the combined (geometry | compute) fuse should be used to
|
|
|
|
* select the steering.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Find the potential gslice candidates */
|
2022-06-01 08:07:24 -07:00
|
|
|
slice_mask = intel_slicemask_from_xehp_dssmask(sseu->subslice_mask,
|
|
|
|
GEN_DSS_PER_GSLICE);
|
2021-07-29 09:59:51 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Find the potential LNCF candidates. Either LNCF within a valid
|
|
|
|
* mslice is fine.
|
|
|
|
*/
|
|
|
|
for_each_set_bit(i, >->info.mslice_mask, GEN12_MAX_MSLICES)
|
|
|
|
lncf_mask |= (0x3 << (i * 2));
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Are there any sliceid values that work for both GSLICE and LNCF
|
|
|
|
* steering?
|
|
|
|
*/
|
|
|
|
if (slice_mask & lncf_mask) {
|
|
|
|
slice_mask &= lncf_mask;
|
|
|
|
gt->steering_table[LNCF] = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* How about sliceid values that also work for MSLICE steering? */
|
|
|
|
if (slice_mask & gt->info.mslice_mask) {
|
|
|
|
slice_mask &= gt->info.mslice_mask;
|
|
|
|
gt->steering_table[MSLICE] = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
slice = __ffs(slice_mask);
|
2022-06-07 10:57:16 -07:00
|
|
|
subslice = intel_sseu_find_first_xehp_dss(sseu, GEN_DSS_PER_GSLICE, slice) %
|
|
|
|
GEN_DSS_PER_GSLICE;
|
2021-07-29 09:59:51 -07:00
|
|
|
|
2021-09-17 10:08:45 -07:00
|
|
|
__add_mcr_wa(gt, wal, slice, subslice);
|
2021-07-29 09:59:55 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* SQIDI ranges are special because they use different steering
|
|
|
|
* registers than everything else we work with. On XeHP SDV and
|
|
|
|
* DG2-G10, any value in the steering registers will work fine since
|
|
|
|
* all instances are present, but DG2-G11 only has SQIDI instances at
|
|
|
|
* ID's 2 and 3, so we need to steer to one of those. For simplicity
|
|
|
|
* we'll just steer to a hardcoded "2" since that value will work
|
|
|
|
* everywhere.
|
|
|
|
*/
|
|
|
|
__set_mcr_steering(wal, MCFG_MCR_SELECTOR, 0, 2);
|
|
|
|
__set_mcr_steering(wal, SF_MCR_SELECTOR, 0, 2);
|
2022-09-15 18:43:45 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* On DG2, GAM registers have a dedicated steering control register
|
|
|
|
* and must always be programmed to a hardcoded groupid of "1."
|
|
|
|
*/
|
|
|
|
if (IS_DG2(gt->i915))
|
|
|
|
__set_mcr_steering(wal, GAM_MCR_SELECTOR, 1, 0);
|
2018-05-18 15:39:57 -07:00
|
|
|
}
|
|
|
|
|
2019-01-09 17:32:31 -08:00
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
icl_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
2018-05-08 14:29:23 -07:00
|
|
|
{
|
2021-09-17 10:08:45 -07:00
|
|
|
struct drm_i915_private *i915 = gt->i915;
|
|
|
|
|
|
|
|
icl_wa_init_mcr(gt, wal);
|
2018-05-18 15:40:32 -07:00
|
|
|
|
2018-05-08 14:29:27 -07:00
|
|
|
/* WaModifyGamTlbPartitioning:icl */
|
drm/i915/gt: rename wa_write_masked_or()
The use of "masked" in this function is due to its history. Once upon a
time it received a mask and a value as parameter. Since
commit eeec73f8a4a4 ("drm/i915/gt: Skip rmw for masked registers")
that is not true anymore and now there is a clear and a set parameter.
Depending on the case, that can still be thought as a mask and value,
but there are some subtle differences: what we clear doesn't need to be
the same bits we are setting, particularly when we are using masked
registers.
The fact that we also have "masked registers", i.e. registers whose mask
is stored in the upper 16 bits of the register, makes it even more
confusing, because "masked" in wa_write_masked_or() has little to do
with masked registers, but rather refers to the old mask parameter the
function received (that can also, but not exclusively, be used to write
to masked register).
Avoid the ambiguity and misnomer by renaming it to something else,
hopefully less confusing: wa_write_clr_set(), to designate that we are
doing both clr and set operations in the register.
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20201209045246.2905675-2-lucas.demarchi@intel.com
2020-12-08 20:52:45 -08:00
|
|
|
wa_write_clr_set(wal,
|
|
|
|
GEN11_GACB_PERF_CTRL,
|
|
|
|
GEN11_HASH_CTRL_MASK,
|
|
|
|
GEN11_HASH_CTRL_BIT0 | GEN11_HASH_CTRL_BIT4);
|
2018-05-08 14:29:28 -07:00
|
|
|
|
2018-05-08 14:29:29 -07:00
|
|
|
/* Wa_1405766107:icl
|
|
|
|
* Formerly known as WaCL2SFHalfMaxAlloc
|
|
|
|
*/
|
2018-12-03 13:33:19 +00:00
|
|
|
wa_write_or(wal,
|
|
|
|
GEN11_LSN_UNSLCVC,
|
|
|
|
GEN11_LSN_UNSLCVC_GAFS_HALF_SF_MAXALLOC |
|
|
|
|
GEN11_LSN_UNSLCVC_GAFS_HALF_CL2_MAXALLOC);
|
2018-05-08 14:29:30 -07:00
|
|
|
|
|
|
|
/* Wa_220166154:icl
|
|
|
|
* Formerly known as WaDisCtxReload
|
|
|
|
*/
|
2018-12-03 13:33:19 +00:00
|
|
|
wa_write_or(wal,
|
|
|
|
GEN8_GAMW_ECO_DEV_RW_IA,
|
|
|
|
GAMW_ECO_DEV_CTX_RELOAD_DISABLE);
|
2018-05-08 14:29:31 -07:00
|
|
|
|
2018-05-25 15:05:39 -07:00
|
|
|
/* Wa_1406463099:icl
|
|
|
|
* Formerly known as WaGamTlbPendError
|
|
|
|
*/
|
2018-12-03 13:33:19 +00:00
|
|
|
wa_write_or(wal,
|
|
|
|
GAMT_CHKN_BIT_REG,
|
|
|
|
GAMT_CHKN_DISABLE_L3_COH_PIPE);
|
2019-10-15 18:44:11 +03:00
|
|
|
|
2023-02-01 14:28:29 -08:00
|
|
|
/*
|
|
|
|
* Wa_1408615072:icl,ehl (vsunit)
|
|
|
|
* Wa_1407596294:icl,ehl (hsunit)
|
|
|
|
*/
|
|
|
|
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
|
|
|
|
VSUNIT_CLKGATE_DIS | HSUNIT_CLKGATE_DIS);
|
|
|
|
|
2021-12-03 20:26:03 +05:30
|
|
|
/* Wa_1407352427:icl,ehl */
|
|
|
|
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2,
|
|
|
|
PSDUNIT_CLKGATE_DIS);
|
|
|
|
|
|
|
|
/* Wa_1406680159:icl,ehl */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_write_or(wal,
|
|
|
|
GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
|
|
|
|
GWUNIT_CLKGATE_DIS);
|
2021-12-03 20:26:03 +05:30
|
|
|
|
2020-05-12 11:00:50 -07:00
|
|
|
/* Wa_1607087056:icl,ehl,jsl */
|
|
|
|
if (IS_ICELAKE(i915) ||
|
2023-08-01 19:23:38 +05:30
|
|
|
((IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) &&
|
|
|
|
IS_GRAPHICS_STEP(i915, STEP_A0, STEP_B0)))
|
2020-05-12 11:00:50 -07:00
|
|
|
wa_write_or(wal,
|
2022-10-14 16:02:27 -07:00
|
|
|
GEN11_SLICE_UNIT_LEVEL_CLKGATE,
|
2020-05-12 11:00:50 -07:00
|
|
|
L3_CLKGATE_DIS | L3_CR2X_CLKGATE_DIS);
|
2021-07-16 22:14:22 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This is not a documented workaround, but rather an optimization
|
|
|
|
* to reduce sampler power.
|
|
|
|
*/
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_write_clr(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
|
2018-05-08 14:29:23 -07:00
|
|
|
}
|
|
|
|
|
2021-03-24 13:05:02 -07:00
|
|
|
/*
|
|
|
|
* Though there are per-engine instances of these registers,
|
|
|
|
* they retain their value through engine resets and should
|
|
|
|
* only be provided on the GT workaround list rather than
|
|
|
|
* the engine-specific workaround list.
|
|
|
|
*/
|
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
wa_14011060649(struct intel_gt *gt, struct i915_wa_list *wal)
|
2021-03-24 13:05:02 -07:00
|
|
|
{
|
|
|
|
struct intel_engine_cs *engine;
|
|
|
|
int id;
|
|
|
|
|
|
|
|
for_each_engine(engine, gt, id) {
|
|
|
|
if (engine->class != VIDEO_DECODE_CLASS ||
|
|
|
|
(engine->instance % 2))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
wa_write_or(wal, VDBOX_CGCTL3F10(engine->mmio_base),
|
|
|
|
IECPUNIT_CLKGATE_DIS);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-08-17 02:38:42 -07:00
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
gen12_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
2019-08-17 02:38:42 -07:00
|
|
|
{
|
2021-09-17 10:08:45 -07:00
|
|
|
icl_wa_init_mcr(gt, wal);
|
2021-03-24 13:05:02 -07:00
|
|
|
|
2021-07-12 17:38:50 -07:00
|
|
|
/* Wa_14011060649:tgl,rkl,dg1,adl-s,adl-p */
|
2021-09-17 10:08:45 -07:00
|
|
|
wa_14011060649(gt, wal);
|
2021-07-16 22:14:22 -07:00
|
|
|
|
|
|
|
/* Wa_14011059788:tgl,rkl,adl-s,dg1,adl-p */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_write_or(wal, GEN10_DFR_RATIO_EN_AND_CHICKEN, DFR_DISABLE);
|
drm/i915: Extend Wa_14015795083 platforms
This workaround was already implemented for DG2, PVC, and some steppings
of MTL, but the workaround database has now been updated to extend this
workaround to TGL, RKL, DG1, and ADL.
v2:
- Skip readback verification for these extra gen12lp platforms. On
some of the platforms, the firmware locks this register, preventing
the driver from making any modifications. We should still try to
apply the workaround, but if the register is locked and the value
doesn't stick, that's semi-expected and not something we want to flag
as a driver error on debug builds.
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Haridhar Kalvala <haridhar.kalvala@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230616225041.3922719-1-matthew.d.roper@intel.com
2023-06-16 15:50:41 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Wa_14015795083
|
|
|
|
*
|
|
|
|
* Firmware on some gen12 platforms locks the MISCCPCTL register,
|
|
|
|
* preventing i915 from modifying it for this workaround. Skip the
|
|
|
|
* readback verification for this workaround on debug builds; if the
|
|
|
|
* workaround doesn't stick due to firmware behavior, it's not an error
|
|
|
|
* that we want CI to flag.
|
|
|
|
*/
|
|
|
|
wa_add(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE,
|
|
|
|
0, 0, false);
|
2020-07-16 15:05:48 -07:00
|
|
|
}
|
|
|
|
|
2020-10-14 12:19:34 -07:00
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
dg1_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
2020-10-14 12:19:34 -07:00
|
|
|
{
|
2021-09-17 10:08:45 -07:00
|
|
|
gen12_gt_workarounds_init(gt, wal);
|
2020-10-14 12:19:34 -07:00
|
|
|
|
|
|
|
/* Wa_1409420604:dg1 */
|
2023-03-06 12:49:52 -08:00
|
|
|
wa_mcr_write_or(wal, SUBSLICE_UNIT_LEVEL_CLKGATE2,
|
|
|
|
CPSSUNIT_CLKGATE_DIS);
|
2020-10-14 12:19:34 -07:00
|
|
|
|
|
|
|
/* Wa_1408615072:dg1 */
|
|
|
|
/* Empirical testing shows this register is unaffected by engine reset. */
|
2023-03-06 12:49:52 -08:00
|
|
|
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE2, VSUNIT_CLKGATE_DIS_TGL);
|
2020-10-14 12:19:34 -07:00
|
|
|
}
|
|
|
|
|
2021-11-02 15:25:10 -07:00
|
|
|
static void
|
|
|
|
dg2_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
|
|
|
{
|
|
|
|
xehp_init_mcr(gt, wal);
|
|
|
|
|
|
|
|
/* Wa_14011060649:dg2 */
|
|
|
|
wa_14011060649(gt, wal);
|
|
|
|
|
|
|
|
if (IS_DG2_G10(gt->i915)) {
|
|
|
|
/* Wa_22010523718:dg2 */
|
|
|
|
wa_write_or(wal, UNSLICE_UNIT_LEVEL_CLKGATE,
|
|
|
|
CG3DDISCFEG_CLKGATE_DIS);
|
|
|
|
|
|
|
|
/* Wa_14011006942:dg2 */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_write_or(wal, GEN11_SUBSLICE_UNIT_LEVEL_CLKGATE,
|
|
|
|
DSS_ROUTER_CLKGATE_DIS);
|
2021-11-02 15:25:10 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Wa_14014830051:dg2 */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
|
2021-11-02 15:25:11 -07:00
|
|
|
|
2023-09-12 09:35:21 +02:00
|
|
|
/*
|
|
|
|
* Wa_14015795083
|
|
|
|
* Skip verification for possibly locked register.
|
|
|
|
*/
|
|
|
|
wa_add(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE,
|
|
|
|
0, 0, false);
|
2023-01-25 15:41:57 -08:00
|
|
|
|
|
|
|
/* Wa_18018781329 */
|
|
|
|
wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
|
|
|
|
wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
|
2023-01-25 15:41:58 -08:00
|
|
|
wa_mcr_write_or(wal, XEHP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
|
|
|
|
wa_mcr_write_or(wal, XEHP_VEBX_MOD_CTRL, FORCE_MISS_FTLB);
|
2023-01-25 15:41:57 -08:00
|
|
|
|
|
|
|
/* Wa_1509235366:dg2 */
|
2023-01-25 15:41:59 -08:00
|
|
|
wa_mcr_write_or(wal, XEHP_GAMCNTRL_CTRL,
|
|
|
|
INVALIDATION_BROADCAST_MODE_DIS | GLOBAL_INVALIDATION_MODE);
|
2023-02-09 15:22:28 -08:00
|
|
|
|
|
|
|
/* Wa_14010648519:dg2 */
|
|
|
|
wa_mcr_write_or(wal, XEHP_L3NODEARBCFG, XEHP_LNESPARE);
|
2021-11-02 15:25:10 -07:00
|
|
|
}
|
|
|
|
|
drm/i915/xelpg: Add multicast steering
MTL's graphics IP (Xe_LPG) once again changes the multicast register
types and steering details. Key changes from past platforms:
* The number of instances of some MCR types (NODE, OAAL2, and GAM) vary
according to the MTL subplatform and cannot be read from fuse
registers. However steering to instance #0 will always provided a
non-terminated value, so we can lump these all into a single
"instance0" table.
* The MCR steering register (and its bitfields) has changed.
Unlike past platforms, we will be explicitly steering all types of MCR
accesses, including those for "SLICE" and "DSS" ranges; we no longer
rely on implicit steering. On previous platforms, various
hardware/firmware agents that needed to access registers typically had
their own steering control registers, allowing them to perform multicast
steering without clobbering the CPU/kernel steering. Starting with MTL,
more of these agents now share a single steering register (0xFD4) and it
is no longer safe for us to assume that the value will remain unchanged
from how we initialized it during startup. There is also a slight
chance of race conditions between the driver and a hardware/firmware
agent, so the hardware provides a semaphore register that can be used to
coordinate access to the steering register. Support for the semaphore
register will be introduced in a future patch.
v2:
- Use Xe_LPG terminology instead of "MTL 3D" since it's the IP version
we're matching on now rather than the platform.
- Don't combine l3bank and mslice masks into a union. It's not related
to the other changes here and we might still need both of them on
some future platform.
- Separate debug dumping of steering settings to a separate helper
function. (Tvrtko)
- Update debug dumping to include DSS ranges (and future-proof it so
that any new ranges added on future platforms will also be dumped).
- Restore MULTICAST bit at the end of rw_with_mcr_steering_fw() if we
cleared it. Also force the MULTICAST bit to true at the beginning of
multicast writes just to be safe. (Bala)
Bspec: 67788, 67112
Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221014230239.1023689-14-matthew.d.roper@intel.com
2022-10-14 16:02:38 -07:00
|
|
|
static void
|
|
|
|
xelpg_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
|
|
|
{
|
2024-01-08 17:57:38 +05:30
|
|
|
/* Wa_14018575942 / Wa_18018781329 */
|
2024-02-28 16:07:38 +05:30
|
|
|
wa_mcr_write_or(wal, RENDER_MOD_CTRL, FORCE_MISS_FTLB);
|
2023-04-24 15:47:49 +05:30
|
|
|
wa_mcr_write_or(wal, COMP_MOD_CTRL, FORCE_MISS_FTLB);
|
|
|
|
|
2023-05-05 16:45:44 -07:00
|
|
|
/* Wa_22016670082 */
|
2023-04-25 11:30:11 -07:00
|
|
|
wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
|
|
|
|
|
2023-08-21 11:06:24 -07:00
|
|
|
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
|
|
|
|
IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
|
2023-01-25 15:41:57 -08:00
|
|
|
/* Wa_14014830051 */
|
2023-01-05 15:44:08 -08:00
|
|
|
wa_mcr_write_clr(wal, SARB_CHICKEN1, COMP_CKN_IN);
|
drm/i915/xelpg: Add multicast steering
MTL's graphics IP (Xe_LPG) once again changes the multicast register
types and steering details. Key changes from past platforms:
* The number of instances of some MCR types (NODE, OAAL2, and GAM) vary
according to the MTL subplatform and cannot be read from fuse
registers. However steering to instance #0 will always provided a
non-terminated value, so we can lump these all into a single
"instance0" table.
* The MCR steering register (and its bitfields) has changed.
Unlike past platforms, we will be explicitly steering all types of MCR
accesses, including those for "SLICE" and "DSS" ranges; we no longer
rely on implicit steering. On previous platforms, various
hardware/firmware agents that needed to access registers typically had
their own steering control registers, allowing them to perform multicast
steering without clobbering the CPU/kernel steering. Starting with MTL,
more of these agents now share a single steering register (0xFD4) and it
is no longer safe for us to assume that the value will remain unchanged
from how we initialized it during startup. There is also a slight
chance of race conditions between the driver and a hardware/firmware
agent, so the hardware provides a semaphore register that can be used to
coordinate access to the steering register. Support for the semaphore
register will be introduced in a future patch.
v2:
- Use Xe_LPG terminology instead of "MTL 3D" since it's the IP version
we're matching on now rather than the platform.
- Don't combine l3bank and mslice masks into a union. It's not related
to the other changes here and we might still need both of them on
some future platform.
- Separate debug dumping of steering settings to a separate helper
function. (Tvrtko)
- Update debug dumping to include DSS ranges (and future-proof it so
that any new ranges added on future platforms will also be dumped).
- Restore MULTICAST bit at the end of rw_with_mcr_steering_fw() if we
cleared it. Also force the MULTICAST bit to true at the beginning of
multicast writes just to be safe. (Bala)
Bspec: 67788, 67112
Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221014230239.1023689-14-matthew.d.roper@intel.com
2022-10-14 16:02:38 -07:00
|
|
|
|
2023-04-18 15:04:46 -07:00
|
|
|
/* Wa_14015795083 */
|
|
|
|
wa_write_clr(wal, GEN7_MISCCPCTL, GEN12_DOP_CLOCK_GATE_RENDER_ENABLE);
|
2023-01-25 15:41:57 -08:00
|
|
|
}
|
2023-04-25 11:30:11 -07:00
|
|
|
|
drm/i915/xelpg: Add multicast steering
MTL's graphics IP (Xe_LPG) once again changes the multicast register
types and steering details. Key changes from past platforms:
* The number of instances of some MCR types (NODE, OAAL2, and GAM) vary
according to the MTL subplatform and cannot be read from fuse
registers. However steering to instance #0 will always provided a
non-terminated value, so we can lump these all into a single
"instance0" table.
* The MCR steering register (and its bitfields) has changed.
Unlike past platforms, we will be explicitly steering all types of MCR
accesses, including those for "SLICE" and "DSS" ranges; we no longer
rely on implicit steering. On previous platforms, various
hardware/firmware agents that needed to access registers typically had
their own steering control registers, allowing them to perform multicast
steering without clobbering the CPU/kernel steering. Starting with MTL,
more of these agents now share a single steering register (0xFD4) and it
is no longer safe for us to assume that the value will remain unchanged
from how we initialized it during startup. There is also a slight
chance of race conditions between the driver and a hardware/firmware
agent, so the hardware provides a semaphore register that can be used to
coordinate access to the steering register. Support for the semaphore
register will be introduced in a future patch.
v2:
- Use Xe_LPG terminology instead of "MTL 3D" since it's the IP version
we're matching on now rather than the platform.
- Don't combine l3bank and mslice masks into a union. It's not related
to the other changes here and we might still need both of them on
some future platform.
- Separate debug dumping of steering settings to a separate helper
function. (Tvrtko)
- Update debug dumping to include DSS ranges (and future-proof it so
that any new ranges added on future platforms will also be dumped).
- Restore MULTICAST bit at the end of rw_with_mcr_steering_fw() if we
cleared it. Also force the MULTICAST bit to true at the beginning of
multicast writes just to be safe. (Bala)
Bspec: 67788, 67112
Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221014230239.1023689-14-matthew.d.roper@intel.com
2022-10-14 16:02:38 -07:00
|
|
|
/*
|
|
|
|
* Unlike older platforms, we no longer setup implicit steering here;
|
|
|
|
* all MCR accesses are explicitly steered.
|
|
|
|
*/
|
|
|
|
debug_dump_steering(gt);
|
|
|
|
}
|
|
|
|
|
2023-11-06 17:19:59 -03:00
|
|
|
static void
|
|
|
|
wa_16021867713(struct intel_gt *gt, struct i915_wa_list *wal)
|
|
|
|
{
|
|
|
|
struct intel_engine_cs *engine;
|
|
|
|
int id;
|
|
|
|
|
|
|
|
for_each_engine(engine, gt, id)
|
|
|
|
if (engine->class == VIDEO_DECODE_CLASS)
|
|
|
|
wa_write_or(wal, VDBOX_CGCTL3F1C(engine->mmio_base),
|
|
|
|
MFXPIPE_CLKGATE_DIS);
|
|
|
|
}
|
|
|
|
|
2022-10-14 16:02:39 -07:00
|
|
|
static void
|
|
|
|
xelpmp_gt_workarounds_init(struct intel_gt *gt, struct i915_wa_list *wal)
|
|
|
|
{
|
2023-11-06 17:19:59 -03:00
|
|
|
wa_16021867713(gt, wal);
|
|
|
|
|
2023-04-24 15:47:49 +05:30
|
|
|
/*
|
|
|
|
* Wa_14018778641
|
|
|
|
* Wa_18018781329
|
|
|
|
*
|
|
|
|
* Note that although these registers are MCR on the primary
|
|
|
|
* GT, the media GT's versions are regular singleton registers.
|
|
|
|
*/
|
|
|
|
wa_write_or(wal, XELPMP_GSC_MOD_CTRL, FORCE_MISS_FTLB);
|
2022-10-14 16:02:39 -07:00
|
|
|
|
2024-05-24 17:33:49 +00:00
|
|
|
/*
|
|
|
|
* Wa_14018575942
|
|
|
|
*
|
|
|
|
* Issue is seen on media KPI test running on VDBOX engine
|
|
|
|
* especially VP9 encoding WLs
|
|
|
|
*/
|
|
|
|
wa_write_or(wal, XELPMP_VDBX_MOD_CTRL, FORCE_MISS_FTLB);
|
|
|
|
|
2023-10-25 18:47:09 +05:30
|
|
|
/* Wa_22016670082 */
|
|
|
|
wa_write_or(wal, GEN12_SQCNT1, GEN12_STRICT_RAR_ENABLE);
|
|
|
|
|
2022-10-14 16:02:39 -07:00
|
|
|
debug_dump_steering(gt);
|
2022-06-07 17:51:08 -07:00
|
|
|
}
|
|
|
|
|
2023-02-09 15:22:28 -08:00
|
|
|
/*
|
|
|
|
* The bspec performance guide has recommended MMIO tuning settings. These
|
|
|
|
* aren't truly "workarounds" but we want to program them through the
|
|
|
|
* workaround infrastructure to make sure they're (re)applied at the proper
|
|
|
|
* times.
|
|
|
|
*
|
|
|
|
* The programming in this function is for settings that persist through
|
|
|
|
* engine resets and also are not part of any engine's register state context.
|
|
|
|
* I.e., settings that only need to be re-applied in the event of a full GT
|
|
|
|
* reset.
|
|
|
|
*/
|
|
|
|
static void gt_tuning_settings(struct intel_gt *gt, struct i915_wa_list *wal)
|
|
|
|
{
|
2024-01-08 17:57:38 +05:30
|
|
|
if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74))) {
|
2023-08-21 11:06:23 -07:00
|
|
|
wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
|
2023-05-17 16:31:10 -07:00
|
|
|
wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
|
|
|
|
}
|
|
|
|
|
2023-03-06 12:49:53 -08:00
|
|
|
if (IS_DG2(gt->i915)) {
|
2023-02-09 15:22:28 -08:00
|
|
|
wa_mcr_write_or(wal, XEHP_L3SCQREG7, BLEND_FILL_CACHING_OPT_DIS);
|
2023-03-06 12:49:53 -08:00
|
|
|
wa_mcr_write_or(wal, XEHP_SQCM, EN_32B_ACCESS);
|
|
|
|
}
|
2023-02-09 15:22:28 -08:00
|
|
|
}
|
|
|
|
|
2019-01-09 17:32:31 -08:00
|
|
|
static void
|
2021-09-17 10:08:45 -07:00
|
|
|
gt_init_workarounds(struct intel_gt *gt, struct i915_wa_list *wal)
|
2018-04-10 09:12:47 -07:00
|
|
|
{
|
2021-09-17 10:08:45 -07:00
|
|
|
struct drm_i915_private *i915 = gt->i915;
|
|
|
|
|
2023-02-09 15:22:28 -08:00
|
|
|
gt_tuning_settings(gt, wal);
|
|
|
|
|
2022-10-14 16:02:39 -07:00
|
|
|
if (gt->type == GT_MEDIA) {
|
2023-08-21 11:06:22 -07:00
|
|
|
if (MEDIA_VER_FULL(i915) == IP_VER(13, 0))
|
2022-10-14 16:02:39 -07:00
|
|
|
xelpmp_gt_workarounds_init(gt, wal);
|
|
|
|
else
|
2023-08-21 11:06:22 -07:00
|
|
|
MISSING_CASE(MEDIA_VER_FULL(i915));
|
2022-10-14 16:02:39 -07:00
|
|
|
|
drm/i915/xelpg: Add multicast steering
MTL's graphics IP (Xe_LPG) once again changes the multicast register
types and steering details. Key changes from past platforms:
* The number of instances of some MCR types (NODE, OAAL2, and GAM) vary
according to the MTL subplatform and cannot be read from fuse
registers. However steering to instance #0 will always provided a
non-terminated value, so we can lump these all into a single
"instance0" table.
* The MCR steering register (and its bitfields) has changed.
Unlike past platforms, we will be explicitly steering all types of MCR
accesses, including those for "SLICE" and "DSS" ranges; we no longer
rely on implicit steering. On previous platforms, various
hardware/firmware agents that needed to access registers typically had
their own steering control registers, allowing them to perform multicast
steering without clobbering the CPU/kernel steering. Starting with MTL,
more of these agents now share a single steering register (0xFD4) and it
is no longer safe for us to assume that the value will remain unchanged
from how we initialized it during startup. There is also a slight
chance of race conditions between the driver and a hardware/firmware
agent, so the hardware provides a semaphore register that can be used to
coordinate access to the steering register. Support for the semaphore
register will be introduced in a future patch.
v2:
- Use Xe_LPG terminology instead of "MTL 3D" since it's the IP version
we're matching on now rather than the platform.
- Don't combine l3bank and mslice masks into a union. It's not related
to the other changes here and we might still need both of them on
some future platform.
- Separate debug dumping of steering settings to a separate helper
function. (Tvrtko)
- Update debug dumping to include DSS ranges (and future-proof it so
that any new ranges added on future platforms will also be dumped).
- Restore MULTICAST bit at the end of rw_with_mcr_steering_fw() if we
cleared it. Also force the MULTICAST bit to true at the beginning of
multicast writes just to be safe. (Bala)
Bspec: 67788, 67112
Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221014230239.1023689-14-matthew.d.roper@intel.com
2022-10-14 16:02:38 -07:00
|
|
|
return;
|
2022-10-14 16:02:39 -07:00
|
|
|
}
|
drm/i915/xelpg: Add multicast steering
MTL's graphics IP (Xe_LPG) once again changes the multicast register
types and steering details. Key changes from past platforms:
* The number of instances of some MCR types (NODE, OAAL2, and GAM) vary
according to the MTL subplatform and cannot be read from fuse
registers. However steering to instance #0 will always provided a
non-terminated value, so we can lump these all into a single
"instance0" table.
* The MCR steering register (and its bitfields) has changed.
Unlike past platforms, we will be explicitly steering all types of MCR
accesses, including those for "SLICE" and "DSS" ranges; we no longer
rely on implicit steering. On previous platforms, various
hardware/firmware agents that needed to access registers typically had
their own steering control registers, allowing them to perform multicast
steering without clobbering the CPU/kernel steering. Starting with MTL,
more of these agents now share a single steering register (0xFD4) and it
is no longer safe for us to assume that the value will remain unchanged
from how we initialized it during startup. There is also a slight
chance of race conditions between the driver and a hardware/firmware
agent, so the hardware provides a semaphore register that can be used to
coordinate access to the steering register. Support for the semaphore
register will be introduced in a future patch.
v2:
- Use Xe_LPG terminology instead of "MTL 3D" since it's the IP version
we're matching on now rather than the platform.
- Don't combine l3bank and mslice masks into a union. It's not related
to the other changes here and we might still need both of them on
some future platform.
- Separate debug dumping of steering settings to a separate helper
function. (Tvrtko)
- Update debug dumping to include DSS ranges (and future-proof it so
that any new ranges added on future platforms will also be dumped).
- Restore MULTICAST bit at the end of rw_with_mcr_steering_fw() if we
cleared it. Also force the MULTICAST bit to true at the beginning of
multicast writes just to be safe. (Bala)
Bspec: 67788, 67112
Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221014230239.1023689-14-matthew.d.roper@intel.com
2022-10-14 16:02:38 -07:00
|
|
|
|
2024-01-08 17:57:38 +05:30
|
|
|
if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)))
|
drm/i915/xelpg: Add multicast steering
MTL's graphics IP (Xe_LPG) once again changes the multicast register
types and steering details. Key changes from past platforms:
* The number of instances of some MCR types (NODE, OAAL2, and GAM) vary
according to the MTL subplatform and cannot be read from fuse
registers. However steering to instance #0 will always provided a
non-terminated value, so we can lump these all into a single
"instance0" table.
* The MCR steering register (and its bitfields) has changed.
Unlike past platforms, we will be explicitly steering all types of MCR
accesses, including those for "SLICE" and "DSS" ranges; we no longer
rely on implicit steering. On previous platforms, various
hardware/firmware agents that needed to access registers typically had
their own steering control registers, allowing them to perform multicast
steering without clobbering the CPU/kernel steering. Starting with MTL,
more of these agents now share a single steering register (0xFD4) and it
is no longer safe for us to assume that the value will remain unchanged
from how we initialized it during startup. There is also a slight
chance of race conditions between the driver and a hardware/firmware
agent, so the hardware provides a semaphore register that can be used to
coordinate access to the steering register. Support for the semaphore
register will be introduced in a future patch.
v2:
- Use Xe_LPG terminology instead of "MTL 3D" since it's the IP version
we're matching on now rather than the platform.
- Don't combine l3bank and mslice masks into a union. It's not related
to the other changes here and we might still need both of them on
some future platform.
- Separate debug dumping of steering settings to a separate helper
function. (Tvrtko)
- Update debug dumping to include DSS ranges (and future-proof it so
that any new ranges added on future platforms will also be dumped).
- Restore MULTICAST bit at the end of rw_with_mcr_steering_fw() if we
cleared it. Also force the MULTICAST bit to true at the beginning of
multicast writes just to be safe. (Bala)
Bspec: 67788, 67112
Cc: Radhakrishna Sripada <radhakrishna.sripada@intel.com>
Cc: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Signed-off-by: Matt Roper <matthew.d.roper@intel.com>
Reviewed-by: Balasubramani Vivekanandan <balasubramani.vivekanandan@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221014230239.1023689-14-matthew.d.roper@intel.com
2022-10-14 16:02:38 -07:00
|
|
|
xelpg_gt_workarounds_init(gt, wal);
|
2022-05-27 09:33:48 -07:00
|
|
|
else if (IS_DG2(i915))
|
2021-11-02 15:25:10 -07:00
|
|
|
dg2_gt_workarounds_init(gt, wal);
|
2021-07-29 09:59:52 -07:00
|
|
|
else if (IS_DG1(i915))
|
2021-09-17 10:08:45 -07:00
|
|
|
dg1_gt_workarounds_init(gt, wal);
|
2021-06-05 08:53:52 -07:00
|
|
|
else if (GRAPHICS_VER(i915) == 12)
|
2021-09-17 10:08:45 -07:00
|
|
|
gen12_gt_workarounds_init(gt, wal);
|
2021-06-05 08:53:52 -07:00
|
|
|
else if (GRAPHICS_VER(i915) == 11)
|
2021-09-17 10:08:45 -07:00
|
|
|
icl_gt_workarounds_init(gt, wal);
|
2020-06-02 15:05:40 +01:00
|
|
|
else if (IS_COFFEELAKE(i915) || IS_COMETLAKE(i915))
|
2021-09-17 10:08:45 -07:00
|
|
|
cfl_gt_workarounds_init(gt, wal);
|
2019-03-01 09:27:03 -08:00
|
|
|
else if (IS_GEMINILAKE(i915))
|
2021-09-17 10:08:45 -07:00
|
|
|
glk_gt_workarounds_init(gt, wal);
|
2019-03-01 09:27:03 -08:00
|
|
|
else if (IS_KABYLAKE(i915))
|
2021-09-17 10:08:45 -07:00
|
|
|
kbl_gt_workarounds_init(gt, wal);
|
2019-03-01 09:27:03 -08:00
|
|
|
else if (IS_BROXTON(i915))
|
2021-09-17 10:08:45 -07:00
|
|
|
gen9_gt_workarounds_init(gt, wal);
|
2019-03-01 09:27:03 -08:00
|
|
|
else if (IS_SKYLAKE(i915))
|
2021-09-17 10:08:45 -07:00
|
|
|
skl_gt_workarounds_init(gt, wal);
|
2020-06-11 10:30:15 +01:00
|
|
|
else if (IS_HASWELL(i915))
|
2021-09-17 10:08:45 -07:00
|
|
|
hsw_gt_workarounds_init(gt, wal);
|
2020-06-11 09:01:37 +01:00
|
|
|
else if (IS_VALLEYVIEW(i915))
|
2021-09-17 10:08:45 -07:00
|
|
|
vlv_gt_workarounds_init(gt, wal);
|
2020-06-11 09:01:36 +01:00
|
|
|
else if (IS_IVYBRIDGE(i915))
|
2021-09-17 10:08:45 -07:00
|
|
|
ivb_gt_workarounds_init(gt, wal);
|
2021-06-05 08:53:52 -07:00
|
|
|
else if (GRAPHICS_VER(i915) == 6)
|
2021-09-17 10:08:45 -07:00
|
|
|
snb_gt_workarounds_init(gt, wal);
|
2021-06-05 08:53:52 -07:00
|
|
|
else if (GRAPHICS_VER(i915) == 5)
|
2021-09-17 10:08:45 -07:00
|
|
|
ilk_gt_workarounds_init(gt, wal);
|
2020-06-11 09:01:40 +01:00
|
|
|
else if (IS_G4X(i915))
|
2021-09-17 10:08:45 -07:00
|
|
|
g4x_gt_workarounds_init(gt, wal);
|
2021-06-05 08:53:52 -07:00
|
|
|
else if (GRAPHICS_VER(i915) == 4)
|
2021-09-17 10:08:45 -07:00
|
|
|
gen4_gt_workarounds_init(gt, wal);
|
2021-06-05 08:53:52 -07:00
|
|
|
else if (GRAPHICS_VER(i915) <= 8)
|
2021-01-22 19:29:13 +00:00
|
|
|
;
|
2018-04-10 09:12:47 -07:00
|
|
|
else
|
2021-06-05 08:53:52 -07:00
|
|
|
MISSING_CASE(GRAPHICS_VER(i915));
|
2019-01-09 17:32:31 -08:00
|
|
|
}
|
|
|
|
|
2021-09-17 10:08:45 -07:00
|
|
|
void intel_gt_init_workarounds(struct intel_gt *gt)
|
2019-01-09 17:32:31 -08:00
|
|
|
{
|
2021-09-17 10:08:45 -07:00
|
|
|
struct i915_wa_list *wal = >->wa_list;
|
2018-12-03 13:33:19 +00:00
|
|
|
|
2022-11-09 10:46:33 +00:00
|
|
|
wa_init_start(wal, gt, "GT", "global");
|
2021-09-17 10:08:45 -07:00
|
|
|
gt_init_workarounds(gt, wal);
|
2018-12-03 13:33:19 +00:00
|
|
|
wa_init_finish(wal);
|
|
|
|
}
|
|
|
|
|
2019-04-17 08:56:27 +01:00
|
|
|
static bool
|
2022-11-09 10:46:33 +00:00
|
|
|
wa_verify(struct intel_gt *gt, const struct i915_wa *wa, u32 cur,
|
|
|
|
const char *name, const char *from)
|
2019-04-17 08:56:27 +01:00
|
|
|
{
|
2020-01-31 23:50:35 +00:00
|
|
|
if ((cur ^ wa->set) & wa->read) {
|
2023-10-09 11:38:01 -07:00
|
|
|
gt_err(gt,
|
|
|
|
"%s workaround lost on %s! (reg[%x]=0x%x, relevant bits were 0x%x vs expected 0x%x)\n",
|
|
|
|
name, from, i915_mmio_reg_offset(wa->reg),
|
|
|
|
cur, cur & wa->read, wa->set & wa->read);
|
2019-04-17 08:56:27 +01:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2022-11-10 12:46:33 +00:00
|
|
|
static void wa_list_apply(const struct i915_wa_list *wal)
|
2018-12-03 13:33:19 +00:00
|
|
|
{
|
2022-11-10 12:46:33 +00:00
|
|
|
struct intel_gt *gt = wal->gt;
|
2021-06-17 14:14:24 -07:00
|
|
|
struct intel_uncore *uncore = gt->uncore;
|
2018-12-03 13:33:19 +00:00
|
|
|
enum forcewake_domains fw;
|
|
|
|
unsigned long flags;
|
|
|
|
struct i915_wa *wa;
|
|
|
|
unsigned int i;
|
|
|
|
|
|
|
|
if (!wal->count)
|
|
|
|
return;
|
|
|
|
|
2019-04-12 21:24:57 +01:00
|
|
|
fw = wal_get_fw_for_rmw(uncore, wal);
|
2018-12-03 13:33:19 +00:00
|
|
|
|
2022-11-28 15:30:12 -08:00
|
|
|
intel_gt_mcr_lock(gt, &flags);
|
|
|
|
spin_lock(&uncore->lock);
|
2019-04-12 21:24:57 +01:00
|
|
|
intel_uncore_forcewake_get__locked(uncore, fw);
|
2018-12-03 13:33:19 +00:00
|
|
|
|
|
|
|
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
|
2021-06-17 14:14:24 -07:00
|
|
|
u32 val, old = 0;
|
|
|
|
|
|
|
|
/* open-coded rmw due to steering */
|
2022-10-14 16:02:36 -07:00
|
|
|
if (wa->clr)
|
|
|
|
old = wa->is_mcr ?
|
2022-10-14 16:02:37 -07:00
|
|
|
intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
|
2022-10-14 16:02:36 -07:00
|
|
|
intel_uncore_read_fw(uncore, wa->reg);
|
2021-06-17 14:14:24 -07:00
|
|
|
val = (old & ~wa->clr) | wa->set;
|
2022-10-14 16:02:36 -07:00
|
|
|
if (val != old || !wa->clr) {
|
|
|
|
if (wa->is_mcr)
|
2022-10-14 16:02:37 -07:00
|
|
|
intel_gt_mcr_multicast_write_fw(gt, wa->mcr_reg, val);
|
2022-10-14 16:02:36 -07:00
|
|
|
else
|
|
|
|
intel_uncore_write_fw(uncore, wa->reg, val);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)) {
|
|
|
|
u32 val = wa->is_mcr ?
|
2022-10-14 16:02:37 -07:00
|
|
|
intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
|
2022-10-14 16:02:36 -07:00
|
|
|
intel_uncore_read_fw(uncore, wa->reg);
|
2021-06-17 14:14:24 -07:00
|
|
|
|
2022-11-10 12:46:33 +00:00
|
|
|
wa_verify(gt, wa, val, wal->name, "application");
|
2022-10-14 16:02:36 -07:00
|
|
|
}
|
2018-12-03 13:33:19 +00:00
|
|
|
}
|
|
|
|
|
2019-04-12 21:24:57 +01:00
|
|
|
intel_uncore_forcewake_put__locked(uncore, fw);
|
2022-11-28 15:30:12 -08:00
|
|
|
spin_unlock(&uncore->lock);
|
|
|
|
intel_gt_mcr_unlock(gt, flags);
|
2018-12-03 13:33:19 +00:00
|
|
|
}
|
|
|
|
|
2019-06-21 08:07:48 +01:00
|
|
|
void intel_gt_apply_workarounds(struct intel_gt *gt)
|
2018-12-03 13:33:19 +00:00
|
|
|
{
|
2022-11-10 12:46:33 +00:00
|
|
|
wa_list_apply(>->wa_list);
|
2018-04-10 09:12:47 -07:00
|
|
|
}
|
|
|
|
|
2021-06-17 14:14:24 -07:00
|
|
|
static bool wa_list_verify(struct intel_gt *gt,
|
2018-12-03 12:50:10 +00:00
|
|
|
const struct i915_wa_list *wal,
|
|
|
|
const char *from)
|
|
|
|
{
|
2021-06-17 14:14:24 -07:00
|
|
|
struct intel_uncore *uncore = gt->uncore;
|
2018-12-03 12:50:10 +00:00
|
|
|
struct i915_wa *wa;
|
2021-06-17 14:14:24 -07:00
|
|
|
enum forcewake_domains fw;
|
|
|
|
unsigned long flags;
|
2018-12-03 12:50:10 +00:00
|
|
|
unsigned int i;
|
|
|
|
bool ok = true;
|
|
|
|
|
2021-06-17 14:14:24 -07:00
|
|
|
fw = wal_get_fw_for_rmw(uncore, wal);
|
|
|
|
|
2022-11-28 15:30:12 -08:00
|
|
|
intel_gt_mcr_lock(gt, &flags);
|
|
|
|
spin_lock(&uncore->lock);
|
2021-06-17 14:14:24 -07:00
|
|
|
intel_uncore_forcewake_get__locked(uncore, fw);
|
|
|
|
|
2018-12-03 12:50:10 +00:00
|
|
|
for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
|
2022-11-09 10:46:33 +00:00
|
|
|
ok &= wa_verify(wal->gt, wa, wa->is_mcr ?
|
2022-10-14 16:02:37 -07:00
|
|
|
intel_gt_mcr_read_any_fw(gt, wa->mcr_reg) :
|
2022-10-14 16:02:36 -07:00
|
|
|
intel_uncore_read_fw(uncore, wa->reg),
|
2019-04-12 21:24:57 +01:00
|
|
|
wal->name, from);
|
2018-12-03 12:50:10 +00:00
|
|
|
|
2021-06-17 14:14:24 -07:00
|
|
|
intel_uncore_forcewake_put__locked(uncore, fw);
|
2022-11-28 15:30:12 -08:00
|
|
|
spin_unlock(&uncore->lock);
|
|
|
|
intel_gt_mcr_unlock(gt, flags);
|
2021-06-17 14:14:24 -07:00
|
|
|
|
2018-12-03 12:50:10 +00:00
|
|
|
return ok;
|
|
|
|
}
|
|
|
|
|
2019-06-21 08:07:48 +01:00
|
|
|
bool intel_gt_verify_workarounds(struct intel_gt *gt, const char *from)
|
2018-12-03 12:50:10 +00:00
|
|
|
{
|
2021-09-17 10:08:45 -07:00
|
|
|
return wa_list_verify(gt, >->wa_list, from);
|
2018-12-03 12:50:10 +00:00
|
|
|
}
|
|
|
|
|
2021-01-09 16:34:54 +00:00
|
|
|
__maybe_unused
|
2021-01-13 15:22:24 +00:00
|
|
|
static bool is_nonpriv_flags_valid(u32 flags)
|
2019-07-12 00:07:43 -07:00
|
|
|
{
|
|
|
|
/* Check only valid flag bits are set */
|
|
|
|
if (flags & ~RING_FORCE_TO_NONPRIV_MASK_VALID)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
/* NB: Only 3 out of 4 enum values are valid for access field */
|
|
|
|
if ((flags & RING_FORCE_TO_NONPRIV_ACCESS_MASK) ==
|
|
|
|
RING_FORCE_TO_NONPRIV_ACCESS_INVALID)
|
|
|
|
return false;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-12-03 12:50:12 +00:00
|
|
|
static void
|
2019-06-17 18:01:05 -07:00
|
|
|
whitelist_reg_ext(struct i915_wa_list *wal, i915_reg_t reg, u32 flags)
|
2018-04-14 13:27:54 +01:00
|
|
|
{
|
2018-12-03 12:50:12 +00:00
|
|
|
struct i915_wa wa = {
|
|
|
|
.reg = reg
|
|
|
|
};
|
2018-04-10 09:12:47 -07:00
|
|
|
|
2018-12-03 12:50:12 +00:00
|
|
|
if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
|
|
|
|
return;
|
2018-04-10 09:12:47 -07:00
|
|
|
|
2019-07-12 00:07:43 -07:00
|
|
|
if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
|
|
|
|
return;
|
|
|
|
|
2019-06-17 18:01:05 -07:00
|
|
|
wa.reg.reg |= flags;
|
2018-12-03 13:33:57 +00:00
|
|
|
_wa_add(wal, &wa);
|
2018-04-10 09:12:47 -07:00
|
|
|
}
|
|
|
|
|
2022-10-14 16:02:36 -07:00
|
|
|
static void
|
2022-10-14 16:02:37 -07:00
|
|
|
whitelist_mcr_reg_ext(struct i915_wa_list *wal, i915_mcr_reg_t reg, u32 flags)
|
2022-10-14 16:02:36 -07:00
|
|
|
{
|
|
|
|
struct i915_wa wa = {
|
2022-10-14 16:02:37 -07:00
|
|
|
.mcr_reg = reg,
|
2022-10-14 16:02:36 -07:00
|
|
|
.is_mcr = 1,
|
|
|
|
};
|
|
|
|
|
|
|
|
if (GEM_DEBUG_WARN_ON(wal->count >= RING_MAX_NONPRIV_SLOTS))
|
|
|
|
return;
|
|
|
|
|
|
|
|
if (GEM_DEBUG_WARN_ON(!is_nonpriv_flags_valid(flags)))
|
|
|
|
return;
|
|
|
|
|
2022-10-14 16:02:37 -07:00
|
|
|
wa.mcr_reg.reg |= flags;
|
2022-10-14 16:02:36 -07:00
|
|
|
_wa_add(wal, &wa);
|
|
|
|
}
|
|
|
|
|
2019-06-17 18:01:05 -07:00
|
|
|
static void
|
|
|
|
whitelist_reg(struct i915_wa_list *wal, i915_reg_t reg)
|
|
|
|
{
|
2019-07-12 00:07:43 -07:00
|
|
|
whitelist_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
|
2019-06-17 18:01:05 -07:00
|
|
|
}
|
|
|
|
|
2022-10-14 16:02:36 -07:00
|
|
|
static void
|
2022-10-14 16:02:37 -07:00
|
|
|
whitelist_mcr_reg(struct i915_wa_list *wal, i915_mcr_reg_t reg)
|
2022-10-14 16:02:36 -07:00
|
|
|
{
|
|
|
|
whitelist_mcr_reg_ext(wal, reg, RING_FORCE_TO_NONPRIV_ACCESS_RW);
|
|
|
|
}
|
|
|
|
|
2018-12-03 12:50:12 +00:00
|
|
|
static void gen9_whitelist_build(struct i915_wa_list *w)
|
2018-04-10 09:12:47 -07:00
|
|
|
{
|
|
|
|
/* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */
|
2018-04-14 13:27:54 +01:00
|
|
|
whitelist_reg(w, GEN9_CTX_PREEMPT_REG);
|
2018-04-10 09:12:47 -07:00
|
|
|
|
|
|
|
/* WaEnablePreemptionGranularityControlByUMD:skl,bxt,kbl,cfl,[cnl] */
|
2018-04-14 13:27:54 +01:00
|
|
|
whitelist_reg(w, GEN8_CS_CHICKEN1);
|
2018-04-10 09:12:47 -07:00
|
|
|
|
|
|
|
/* WaAllowUMDToModifyHDCChicken1:skl,bxt,kbl,glk,cfl */
|
2018-04-14 13:27:54 +01:00
|
|
|
whitelist_reg(w, GEN8_HDC_CHICKEN1);
|
2019-09-10 18:48:01 -07:00
|
|
|
|
|
|
|
/* WaSendPushConstantsFromMMIO:skl,bxt */
|
|
|
|
whitelist_reg(w, COMMON_SLICE_CHICKEN2);
|
2018-04-10 09:12:47 -07:00
|
|
|
}
|
|
|
|
|
2019-06-17 18:01:06 -07:00
|
|
|
static void skl_whitelist_build(struct intel_engine_cs *engine)
|
2018-04-10 09:12:47 -07:00
|
|
|
{
|
2019-06-17 18:01:06 -07:00
|
|
|
struct i915_wa_list *w = &engine->whitelist;
|
|
|
|
|
|
|
|
if (engine->class != RENDER_CLASS)
|
|
|
|
return;
|
|
|
|
|
2018-04-14 13:27:54 +01:00
|
|
|
gen9_whitelist_build(w);
|
2018-04-10 09:12:47 -07:00
|
|
|
|
|
|
|
/* WaDisableLSQCROPERFforOCL:skl */
|
2022-10-14 16:02:36 -07:00
|
|
|
whitelist_mcr_reg(w, GEN8_L3SQCREG4);
|
2018-04-10 09:12:46 -07:00
|
|
|
}
|
|
|
|
|
2019-06-17 18:01:06 -07:00
|
|
|
static void bxt_whitelist_build(struct intel_engine_cs *engine)
|
2018-04-10 09:12:46 -07:00
|
|
|
{
|
2019-06-17 18:01:06 -07:00
|
|
|
if (engine->class != RENDER_CLASS)
|
|
|
|
return;
|
|
|
|
|
|
|
|
gen9_whitelist_build(&engine->whitelist);
|
2018-04-10 09:12:47 -07:00
|
|
|
}
|
|
|
|
|
2019-06-17 18:01:06 -07:00
|
|
|
static void kbl_whitelist_build(struct intel_engine_cs *engine)
|
2018-04-10 09:12:47 -07:00
|
|
|
{
|
2019-06-17 18:01:06 -07:00
|
|
|
struct i915_wa_list *w = &engine->whitelist;
|
|
|
|
|
|
|
|
if (engine->class != RENDER_CLASS)
|
|
|
|
return;
|
|
|
|
|
2018-04-14 13:27:54 +01:00
|
|
|
gen9_whitelist_build(w);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2018-04-10 09:12:47 -07:00
|
|
|
/* WaDisableLSQCROPERFforOCL:kbl */
|
2022-10-14 16:02:36 -07:00
|
|
|
whitelist_mcr_reg(w, GEN8_L3SQCREG4);
|
2018-04-10 09:12:46 -07:00
|
|
|
}
|
|
|
|
|
2019-06-17 18:01:06 -07:00
|
|
|
static void glk_whitelist_build(struct intel_engine_cs *engine)
|
2018-04-10 09:12:46 -07:00
|
|
|
{
|
2019-06-17 18:01:06 -07:00
|
|
|
struct i915_wa_list *w = &engine->whitelist;
|
|
|
|
|
|
|
|
if (engine->class != RENDER_CLASS)
|
|
|
|
return;
|
|
|
|
|
2018-04-14 13:27:54 +01:00
|
|
|
gen9_whitelist_build(w);
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2018-04-10 09:12:47 -07:00
|
|
|
/* WA #0862: Userspace has to set "Barrier Mode" to avoid hangs. */
|
2018-04-14 13:27:54 +01:00
|
|
|
whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
|
2018-04-10 09:12:47 -07:00
|
|
|
}
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2019-06-17 18:01:06 -07:00
|
|
|
static void cfl_whitelist_build(struct intel_engine_cs *engine)
|
2018-04-10 09:12:47 -07:00
|
|
|
{
|
2019-06-28 15:07:19 +03:00
|
|
|
struct i915_wa_list *w = &engine->whitelist;
|
|
|
|
|
2019-06-17 18:01:06 -07:00
|
|
|
if (engine->class != RENDER_CLASS)
|
|
|
|
return;
|
|
|
|
|
2019-06-28 15:07:19 +03:00
|
|
|
gen9_whitelist_build(w);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* WaAllowPMDepthAndInvocationCountAccessFromUMD:cfl,whl,cml,aml
|
|
|
|
*
|
|
|
|
* This covers 4 register which are next to one another :
|
|
|
|
* - PS_INVOCATION_COUNT
|
|
|
|
* - PS_INVOCATION_COUNT_UDW
|
|
|
|
* - PS_DEPTH_COUNT
|
|
|
|
* - PS_DEPTH_COUNT_UDW
|
|
|
|
*/
|
|
|
|
whitelist_reg_ext(w, PS_INVOCATION_COUNT,
|
2019-07-12 00:07:43 -07:00
|
|
|
RING_FORCE_TO_NONPRIV_ACCESS_RD |
|
2019-06-28 15:07:19 +03:00
|
|
|
RING_FORCE_TO_NONPRIV_RANGE_4);
|
2018-04-10 09:12:47 -07:00
|
|
|
}
|
|
|
|
|
2021-11-02 15:25:09 -07:00
|
|
|
static void allow_read_ctx_timestamp(struct intel_engine_cs *engine)
|
2020-06-02 16:48:39 +01:00
|
|
|
{
|
|
|
|
struct i915_wa_list *w = &engine->whitelist;
|
|
|
|
|
|
|
|
if (engine->class != RENDER_CLASS)
|
|
|
|
whitelist_reg_ext(w,
|
|
|
|
RING_CTX_TIMESTAMP(engine->mmio_base),
|
|
|
|
RING_FORCE_TO_NONPRIV_ACCESS_RD);
|
2021-11-02 15:25:09 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
static void cml_whitelist_build(struct intel_engine_cs *engine)
|
|
|
|
{
|
|
|
|
allow_read_ctx_timestamp(engine);
|
2020-06-02 16:48:39 +01:00
|
|
|
|
|
|
|
cfl_whitelist_build(engine);
|
|
|
|
}
|
|
|
|
|
2019-06-17 18:01:06 -07:00
|
|
|
static void icl_whitelist_build(struct intel_engine_cs *engine)
|
2018-05-08 14:29:23 -07:00
|
|
|
{
|
2019-06-17 18:01:06 -07:00
|
|
|
struct i915_wa_list *w = &engine->whitelist;
|
|
|
|
|
2021-11-02 15:25:09 -07:00
|
|
|
allow_read_ctx_timestamp(engine);
|
|
|
|
|
2019-06-17 18:01:07 -07:00
|
|
|
switch (engine->class) {
|
|
|
|
case RENDER_CLASS:
|
|
|
|
/* WaAllowUMDToModifyHalfSliceChicken7:icl */
|
2022-10-14 16:02:36 -07:00
|
|
|
whitelist_mcr_reg(w, GEN9_HALF_SLICE_CHICKEN7);
|
2019-06-17 18:01:07 -07:00
|
|
|
|
|
|
|
/* WaAllowUMDToModifySamplerMode:icl */
|
2022-10-14 16:02:36 -07:00
|
|
|
whitelist_mcr_reg(w, GEN10_SAMPLER_MODE);
|
2019-06-17 18:01:07 -07:00
|
|
|
|
|
|
|
/* WaEnableStateCacheRedirectToCS:icl */
|
|
|
|
whitelist_reg(w, GEN9_SLICE_COMMON_ECO_CHICKEN1);
|
2019-06-28 15:07:20 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* WaAllowPMDepthAndInvocationCountAccessFromUMD:icl
|
|
|
|
*
|
|
|
|
* This covers 4 register which are next to one another :
|
|
|
|
* - PS_INVOCATION_COUNT
|
|
|
|
* - PS_INVOCATION_COUNT_UDW
|
|
|
|
* - PS_DEPTH_COUNT
|
|
|
|
* - PS_DEPTH_COUNT_UDW
|
|
|
|
*/
|
|
|
|
whitelist_reg_ext(w, PS_INVOCATION_COUNT,
|
2019-07-12 00:07:43 -07:00
|
|
|
RING_FORCE_TO_NONPRIV_ACCESS_RD |
|
2019-06-28 15:07:20 +03:00
|
|
|
RING_FORCE_TO_NONPRIV_RANGE_4);
|
2019-06-17 18:01:07 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
case VIDEO_DECODE_CLASS:
|
|
|
|
/* hucStatusRegOffset */
|
|
|
|
whitelist_reg_ext(w, _MMIO(0x2000 + engine->mmio_base),
|
2019-07-12 00:07:43 -07:00
|
|
|
RING_FORCE_TO_NONPRIV_ACCESS_RD);
|
2019-06-17 18:01:07 -07:00
|
|
|
/* hucUKernelHdrInfoRegOffset */
|
|
|
|
whitelist_reg_ext(w, _MMIO(0x2014 + engine->mmio_base),
|
2019-07-12 00:07:43 -07:00
|
|
|
RING_FORCE_TO_NONPRIV_ACCESS_RD);
|
2019-06-17 18:01:07 -07:00
|
|
|
/* hucStatus2RegOffset */
|
|
|
|
whitelist_reg_ext(w, _MMIO(0x23B0 + engine->mmio_base),
|
2019-07-12 00:07:43 -07:00
|
|
|
RING_FORCE_TO_NONPRIV_ACCESS_RD);
|
2019-06-17 18:01:07 -07:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2018-05-08 14:29:23 -07:00
|
|
|
}
|
|
|
|
|
2019-08-17 02:38:42 -07:00
|
|
|
static void tgl_whitelist_build(struct intel_engine_cs *engine)
|
|
|
|
{
|
2019-10-24 13:38:58 +03:00
|
|
|
struct i915_wa_list *w = &engine->whitelist;
|
|
|
|
|
2021-11-02 15:25:09 -07:00
|
|
|
allow_read_ctx_timestamp(engine);
|
|
|
|
|
2019-10-24 13:38:58 +03:00
|
|
|
switch (engine->class) {
|
|
|
|
case RENDER_CLASS:
|
|
|
|
/*
|
|
|
|
* WaAllowPMDepthAndInvocationCountAccessFromUMD:tgl
|
2020-02-27 14:01:00 -08:00
|
|
|
* Wa_1408556865:tgl
|
2019-10-24 13:38:58 +03:00
|
|
|
*
|
|
|
|
* This covers 4 registers which are next to one another :
|
|
|
|
* - PS_INVOCATION_COUNT
|
|
|
|
* - PS_INVOCATION_COUNT_UDW
|
|
|
|
* - PS_DEPTH_COUNT
|
|
|
|
* - PS_DEPTH_COUNT_UDW
|
|
|
|
*/
|
|
|
|
whitelist_reg_ext(w, PS_INVOCATION_COUNT,
|
|
|
|
RING_FORCE_TO_NONPRIV_ACCESS_RD |
|
|
|
|
RING_FORCE_TO_NONPRIV_RANGE_4);
|
2020-02-12 11:17:28 -08:00
|
|
|
|
2021-11-19 06:09:31 -08:00
|
|
|
/*
|
|
|
|
* Wa_1808121037:tgl
|
|
|
|
* Wa_14012131227:dg1
|
|
|
|
* Wa_1508744258:tgl,rkl,dg1,adl-s,adl-p
|
|
|
|
*/
|
2020-02-12 11:17:28 -08:00
|
|
|
whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
|
2020-02-27 14:00:52 -08:00
|
|
|
|
|
|
|
/* Wa_1806527549:tgl */
|
|
|
|
whitelist_reg(w, HIZ_CHICKEN);
|
2023-02-23 16:22:59 -08:00
|
|
|
|
|
|
|
/* Required by recommended tuning setting (not a workaround) */
|
|
|
|
whitelist_reg(w, GEN11_COMMON_SLICE_CHICKEN3);
|
|
|
|
|
2019-10-24 13:38:58 +03:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
2019-08-17 02:38:42 -07:00
|
|
|
}
|
|
|
|
|
2021-11-02 15:25:10 -07:00
|
|
|
static void dg2_whitelist_build(struct intel_engine_cs *engine)
|
|
|
|
{
|
|
|
|
struct i915_wa_list *w = &engine->whitelist;
|
|
|
|
|
|
|
|
switch (engine->class) {
|
|
|
|
case RENDER_CLASS:
|
2023-02-23 16:22:59 -08:00
|
|
|
/* Required by recommended tuning setting (not a workaround) */
|
|
|
|
whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
|
2024-08-25 17:41:56 +05:30
|
|
|
whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
|
2021-11-02 15:25:10 -07:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-08-21 11:06:23 -07:00
|
|
|
static void xelpg_whitelist_build(struct intel_engine_cs *engine)
|
2023-02-23 16:22:59 -08:00
|
|
|
{
|
|
|
|
struct i915_wa_list *w = &engine->whitelist;
|
|
|
|
|
|
|
|
switch (engine->class) {
|
|
|
|
case RENDER_CLASS:
|
|
|
|
/* Required by recommended tuning setting (not a workaround) */
|
|
|
|
whitelist_mcr_reg(w, XEHP_COMMON_SLICE_CHICKEN3);
|
2024-08-25 17:41:56 +05:30
|
|
|
whitelist_reg(w, GEN7_COMMON_SLICE_CHICKEN1);
|
2023-02-23 16:22:59 -08:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-12-03 12:50:12 +00:00
|
|
|
void intel_engine_init_whitelist(struct intel_engine_cs *engine)
|
2018-04-14 13:27:54 +01:00
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 = engine->i915;
|
2018-12-03 12:50:12 +00:00
|
|
|
struct i915_wa_list *w = &engine->whitelist;
|
2018-04-14 13:27:54 +01:00
|
|
|
|
2022-11-09 10:46:33 +00:00
|
|
|
wa_init_start(w, engine->gt, "whitelist", engine->name);
|
2018-04-14 13:27:54 +01:00
|
|
|
|
2023-08-21 11:06:23 -07:00
|
|
|
if (engine->gt->type == GT_MEDIA)
|
|
|
|
; /* none yet */
|
2024-01-08 17:57:38 +05:30
|
|
|
else if (IS_GFX_GT_IP_RANGE(engine->gt, IP_VER(12, 70), IP_VER(12, 74)))
|
2023-08-21 11:06:23 -07:00
|
|
|
xelpg_whitelist_build(engine);
|
2022-05-27 09:33:48 -07:00
|
|
|
else if (IS_DG2(i915))
|
2021-11-02 15:25:10 -07:00
|
|
|
dg2_whitelist_build(engine);
|
2021-06-05 08:53:52 -07:00
|
|
|
else if (GRAPHICS_VER(i915) == 12)
|
2019-08-17 02:38:42 -07:00
|
|
|
tgl_whitelist_build(engine);
|
2021-06-05 08:53:52 -07:00
|
|
|
else if (GRAPHICS_VER(i915) == 11)
|
2019-06-17 18:01:06 -07:00
|
|
|
icl_whitelist_build(engine);
|
2020-06-02 16:48:39 +01:00
|
|
|
else if (IS_COMETLAKE(i915))
|
|
|
|
cml_whitelist_build(engine);
|
|
|
|
else if (IS_COFFEELAKE(i915))
|
2019-06-17 18:01:06 -07:00
|
|
|
cfl_whitelist_build(engine);
|
2019-03-01 09:27:03 -08:00
|
|
|
else if (IS_GEMINILAKE(i915))
|
2019-06-17 18:01:06 -07:00
|
|
|
glk_whitelist_build(engine);
|
2019-03-01 09:27:03 -08:00
|
|
|
else if (IS_KABYLAKE(i915))
|
2019-06-17 18:01:06 -07:00
|
|
|
kbl_whitelist_build(engine);
|
2019-03-01 09:27:03 -08:00
|
|
|
else if (IS_BROXTON(i915))
|
2019-06-17 18:01:06 -07:00
|
|
|
bxt_whitelist_build(engine);
|
2019-03-01 09:27:03 -08:00
|
|
|
else if (IS_SKYLAKE(i915))
|
2019-06-17 18:01:06 -07:00
|
|
|
skl_whitelist_build(engine);
|
2021-06-05 08:53:52 -07:00
|
|
|
else if (GRAPHICS_VER(i915) <= 8)
|
2021-01-22 19:29:13 +00:00
|
|
|
;
|
2018-04-14 13:27:54 +01:00
|
|
|
else
|
2021-06-05 08:53:52 -07:00
|
|
|
MISSING_CASE(GRAPHICS_VER(i915));
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2018-12-03 12:50:12 +00:00
|
|
|
wa_init_finish(w);
|
2018-04-10 09:12:46 -07:00
|
|
|
}
|
|
|
|
|
2018-12-03 12:50:12 +00:00
|
|
|
void intel_engine_apply_whitelist(struct intel_engine_cs *engine)
|
2018-04-10 09:12:46 -07:00
|
|
|
{
|
2018-12-03 12:50:12 +00:00
|
|
|
const struct i915_wa_list *wal = &engine->whitelist;
|
2019-04-12 21:24:57 +01:00
|
|
|
struct intel_uncore *uncore = engine->uncore;
|
2018-04-14 13:27:54 +01:00
|
|
|
const u32 base = engine->mmio_base;
|
2018-12-03 12:50:12 +00:00
|
|
|
struct i915_wa *wa;
|
2018-04-14 13:27:54 +01:00
|
|
|
unsigned int i;
|
|
|
|
|
2018-12-03 12:50:12 +00:00
|
|
|
if (!wal->count)
|
2018-04-14 13:27:54 +01:00
|
|
|
return;
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2018-12-03 12:50:12 +00:00
|
|
|
for (i = 0, wa = wal->list; i < wal->count; i++, wa++)
|
2019-04-12 21:24:57 +01:00
|
|
|
intel_uncore_write(uncore,
|
|
|
|
RING_FORCE_TO_NONPRIV(base, i),
|
|
|
|
i915_mmio_reg_offset(wa->reg));
|
2018-04-10 09:12:46 -07:00
|
|
|
|
2018-04-14 13:27:54 +01:00
|
|
|
/* And clear the rest just in case of garbage */
|
|
|
|
for (; i < RING_MAX_NONPRIV_SLOTS; i++)
|
2019-04-12 21:24:57 +01:00
|
|
|
intel_uncore_write(uncore,
|
|
|
|
RING_FORCE_TO_NONPRIV(base, i),
|
|
|
|
i915_mmio_reg_offset(RING_NOPID(base)));
|
2018-04-14 13:27:54 +01:00
|
|
|
}
|
|
|
|
|
2021-09-03 14:51:50 +05:30
|
|
|
/*
|
|
|
|
* engine_fake_wa_init(), a place holder to program the registers
|
|
|
|
* which are not part of an official workaround defined by the
|
|
|
|
* hardware team.
|
|
|
|
* Adding programming of those register inside workaround will
|
|
|
|
* allow utilizing wa framework to proper application and verification.
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
engine_fake_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
|
|
|
{
|
2022-05-05 14:38:03 -07:00
|
|
|
u8 mocs_w, mocs_r;
|
2021-09-03 14:51:50 +05:30
|
|
|
|
|
|
|
/*
|
2022-05-05 14:38:03 -07:00
|
|
|
* RING_CMD_CCTL specifies the default MOCS entry that will be used
|
|
|
|
* by the command streamer when executing commands that don't have
|
|
|
|
* a way to explicitly specify a MOCS setting. The default should
|
|
|
|
* usually reference whichever MOCS entry corresponds to uncached
|
|
|
|
* behavior, although use of a WB cached entry is recommended by the
|
|
|
|
* spec in certain circumstances on specific platforms.
|
2021-09-03 14:51:50 +05:30
|
|
|
*/
|
|
|
|
if (GRAPHICS_VER(engine->i915) >= 12) {
|
2022-05-05 14:38:03 -07:00
|
|
|
mocs_r = engine->gt->mocs.uc_index;
|
|
|
|
mocs_w = engine->gt->mocs.uc_index;
|
|
|
|
|
|
|
|
if (HAS_L3_CCS_READ(engine->i915) &&
|
|
|
|
engine->class == COMPUTE_CLASS) {
|
|
|
|
mocs_r = engine->gt->mocs.wb_index;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Even on the few platforms where MOCS 0 is a
|
|
|
|
* legitimate table entry, it's never the correct
|
|
|
|
* setting to use here; we can assume the MOCS init
|
|
|
|
* just forgot to initialize wb_index.
|
|
|
|
*/
|
|
|
|
drm_WARN_ON(&engine->i915->drm, mocs_r == 0);
|
|
|
|
}
|
|
|
|
|
2021-09-03 14:51:50 +05:30
|
|
|
wa_masked_field_set(wal,
|
|
|
|
RING_CMD_CCTL(engine->mmio_base),
|
|
|
|
CMD_CCTL_MOCS_MASK,
|
2022-05-05 14:38:03 -07:00
|
|
|
CMD_CCTL_MOCS_OVERRIDE(mocs_w, mocs_r));
|
2021-09-03 14:51:50 +05:30
|
|
|
}
|
|
|
|
}
|
2021-11-12 08:01:07 -08:00
|
|
|
|
2019-01-09 17:32:31 -08:00
|
|
|
static void
|
|
|
|
rcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
2018-12-03 13:33:41 +00:00
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 = engine->i915;
|
2023-08-21 11:06:24 -07:00
|
|
|
struct intel_gt *gt = engine->gt;
|
2021-11-02 15:25:10 -07:00
|
|
|
|
2023-08-21 11:06:24 -07:00
|
|
|
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
|
|
|
|
IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0)) {
|
2023-01-05 15:44:08 -08:00
|
|
|
/* Wa_22014600077 */
|
|
|
|
wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
|
|
|
|
ENABLE_EU_COUNT_FOR_TDL_FLUSH);
|
|
|
|
}
|
|
|
|
|
2023-08-21 11:06:24 -07:00
|
|
|
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
|
|
|
|
IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
|
2023-08-16 14:42:05 -07:00
|
|
|
IS_DG2(i915)) {
|
2023-01-05 15:44:08 -08:00
|
|
|
/* Wa_1509727124 */
|
|
|
|
wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
|
|
|
|
SC_DISABLE_POWER_OPTIMIZATION_EBB);
|
|
|
|
}
|
|
|
|
|
2023-08-21 11:06:24 -07:00
|
|
|
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
|
2023-08-16 14:42:05 -07:00
|
|
|
IS_DG2(i915)) {
|
2023-01-05 15:44:08 -08:00
|
|
|
/* Wa_22012856258 */
|
|
|
|
wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
|
|
|
|
GEN12_DISABLE_READ_SUPPRESSION);
|
|
|
|
}
|
|
|
|
|
2023-08-16 14:42:05 -07:00
|
|
|
if (IS_DG2(i915)) {
|
2021-11-02 15:25:10 -07:00
|
|
|
/*
|
|
|
|
* Wa_22010960976:dg2
|
|
|
|
* Wa_14013347512:dg2
|
|
|
|
*/
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_dis(wal, XEHP_HDC_CHICKEN0,
|
|
|
|
LSC_L1_FLUSH_CTL_3D_DATAPORT_FLUSH_EVENTS_MASK);
|
2021-11-02 15:25:10 -07:00
|
|
|
}
|
|
|
|
|
2023-09-01 10:27:00 +05:30
|
|
|
if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 71)) ||
|
|
|
|
IS_DG2(i915)) {
|
|
|
|
/* Wa_14015150844 */
|
|
|
|
wa_mcr_add(wal, XEHP_HDC_CHICKEN0, 0,
|
|
|
|
_MASKED_BIT_ENABLE(DIS_ATOMIC_CHAINING_TYPED_WRITES),
|
|
|
|
0, true);
|
2021-11-02 15:25:10 -07:00
|
|
|
}
|
|
|
|
|
2023-08-16 14:42:06 -07:00
|
|
|
if (IS_DG2(i915) || IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
|
|
|
|
IS_DG1(i915) || IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
|
|
|
|
/*
|
|
|
|
* Wa_1606700617:tgl,dg1,adl-p
|
|
|
|
* Wa_22010271021:tgl,rkl,dg1,adl-s,adl-p
|
|
|
|
* Wa_14010826681:tgl,dg1,rkl,adl-p
|
|
|
|
* Wa_18019627453:dg2
|
|
|
|
*/
|
|
|
|
wa_masked_en(wal,
|
|
|
|
GEN9_CS_DEBUG_MODE1,
|
|
|
|
FF_DOP_CLOCK_GATE_DISABLE);
|
|
|
|
}
|
|
|
|
|
2021-06-08 10:47:21 -07:00
|
|
|
if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) || IS_DG1(i915) ||
|
2021-01-29 10:29:45 -08:00
|
|
|
IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
|
2021-06-08 10:47:21 -07:00
|
|
|
/* Wa_1606931601:tgl,rkl,dg1,adl-s,adl-p */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, GEN12_DISABLE_EARLY_READ);
|
2020-02-27 14:00:54 -08:00
|
|
|
|
2020-10-14 12:19:34 -07:00
|
|
|
/*
|
|
|
|
* Wa_1407928979:tgl A*
|
|
|
|
* Wa_18011464164:tgl[B0+],dg1[B0+]
|
|
|
|
* Wa_22010931296:tgl[B0+],dg1[B0+]
|
2021-06-08 10:47:21 -07:00
|
|
|
* Wa_14010919138:rkl,dg1,adl-s,adl-p
|
2020-10-14 12:19:34 -07:00
|
|
|
*/
|
|
|
|
wa_write_or(wal, GEN7_FF_THREAD_MODE,
|
|
|
|
GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
|
2020-10-26 21:32:28 -07:00
|
|
|
|
2023-08-16 14:42:06 -07:00
|
|
|
/* Wa_1406941453:tgl,rkl,dg1,adl-s,adl-p */
|
|
|
|
wa_mcr_masked_en(wal,
|
|
|
|
GEN10_SAMPLER_MODE,
|
|
|
|
ENABLE_SMALLPL);
|
2020-10-14 12:19:34 -07:00
|
|
|
}
|
|
|
|
|
2021-06-08 10:47:21 -07:00
|
|
|
if (IS_ALDERLAKE_P(i915) || IS_ALDERLAKE_S(i915) ||
|
2020-10-14 12:19:34 -07:00
|
|
|
IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915)) {
|
2023-01-27 14:43:12 -08:00
|
|
|
/* Wa_1409804808 */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2,
|
|
|
|
GEN12_PUSH_CONST_DEREF_HOLD_DIS);
|
2020-03-05 10:12:04 -08:00
|
|
|
|
2023-01-27 14:43:12 -08:00
|
|
|
/* Wa_14010229206 */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, GEN12_DISABLE_TDL_PUSH);
|
2021-01-29 10:29:45 -08:00
|
|
|
}
|
|
|
|
|
2023-01-27 14:43:12 -08:00
|
|
|
if (IS_ROCKETLAKE(i915) || IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) {
|
2020-07-16 15:05:48 -07:00
|
|
|
/*
|
2023-01-27 14:43:12 -08:00
|
|
|
* Wa_1607297627
|
2020-10-14 12:19:34 -07:00
|
|
|
*
|
|
|
|
* On TGL and RKL there are multiple entries for this WA in the
|
|
|
|
* BSpec; some indicate this is an A0-only WA, others indicate
|
|
|
|
* it applies to all steppings so we trust the "all steppings."
|
2020-07-16 15:05:48 -07:00
|
|
|
*/
|
|
|
|
wa_masked_en(wal,
|
2022-01-10 21:15:53 -08:00
|
|
|
RING_PSMI_CTL(RENDER_RING_BASE),
|
2020-07-16 15:05:48 -07:00
|
|
|
GEN12_WAIT_FOR_EVENT_POWER_DOWN_DISABLE |
|
|
|
|
GEN8_RC_SEMA_IDLE_MSG_DISABLE);
|
2020-02-27 14:00:51 -08:00
|
|
|
}
|
|
|
|
|
2024-09-18 17:44:40 +03:00
|
|
|
if (IS_JASPERLAKE(i915) || IS_ELKHARTLAKE(i915)) {
|
|
|
|
/*
|
|
|
|
* "Disable Repacking for Compression (masked R/W access)
|
|
|
|
* before rendering compressed surfaces for display."
|
|
|
|
*/
|
|
|
|
wa_masked_en(wal, CACHE_MODE_0_GEN7,
|
|
|
|
DISABLE_REPACKING_FOR_COMPRESSION);
|
|
|
|
}
|
|
|
|
|
2021-06-05 08:53:52 -07:00
|
|
|
if (GRAPHICS_VER(i915) == 11) {
|
2018-12-03 13:33:41 +00:00
|
|
|
/* This is not an Wa. Enable for better image quality */
|
|
|
|
wa_masked_en(wal,
|
|
|
|
_3D_CHICKEN3,
|
|
|
|
_3D_CHICKEN3_AA_LINE_QUALITY_FIX_ENABLE);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wa_1405543622:icl
|
|
|
|
* Formerly known as WaGAPZPriorityScheme
|
|
|
|
*/
|
|
|
|
wa_write_or(wal,
|
|
|
|
GEN8_GARBCNTL,
|
|
|
|
GEN11_ARBITRATION_PRIO_ORDER_MASK);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wa_1604223664:icl
|
|
|
|
* Formerly known as WaL3BankAddressHashing
|
|
|
|
*/
|
drm/i915/gt: rename wa_write_masked_or()
The use of "masked" in this function is due to its history. Once upon a
time it received a mask and a value as parameter. Since
commit eeec73f8a4a4 ("drm/i915/gt: Skip rmw for masked registers")
that is not true anymore and now there is a clear and a set parameter.
Depending on the case, that can still be thought as a mask and value,
but there are some subtle differences: what we clear doesn't need to be
the same bits we are setting, particularly when we are using masked
registers.
The fact that we also have "masked registers", i.e. registers whose mask
is stored in the upper 16 bits of the register, makes it even more
confusing, because "masked" in wa_write_masked_or() has little to do
with masked registers, but rather refers to the old mask parameter the
function received (that can also, but not exclusively, be used to write
to masked register).
Avoid the ambiguity and misnomer by renaming it to something else,
hopefully less confusing: wa_write_clr_set(), to designate that we are
doing both clr and set operations in the register.
Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Link: https://patchwork.freedesktop.org/patch/msgid/20201209045246.2905675-2-lucas.demarchi@intel.com
2020-12-08 20:52:45 -08:00
|
|
|
wa_write_clr_set(wal,
|
|
|
|
GEN8_GARBCNTL,
|
|
|
|
GEN11_HASH_CTRL_EXCL_MASK,
|
|
|
|
GEN11_HASH_CTRL_EXCL_BIT0);
|
|
|
|
wa_write_clr_set(wal,
|
|
|
|
GEN11_GLBLINVL,
|
|
|
|
GEN11_BANK_HASH_ADDR_EXCL_MASK,
|
|
|
|
GEN11_BANK_HASH_ADDR_EXCL_BIT0);
|
2018-12-03 13:33:41 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Wa_1405733216:icl
|
|
|
|
* Formerly known as WaDisableCleanEvicts
|
|
|
|
*/
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_write_or(wal,
|
|
|
|
GEN8_L3SQCREG4,
|
|
|
|
GEN11_LQSC_CLEAN_EVICT_DISABLE);
|
2018-12-03 13:33:41 +00:00
|
|
|
|
2019-06-25 10:06:55 +01:00
|
|
|
/* Wa_1606682166:icl */
|
|
|
|
wa_write_or(wal,
|
|
|
|
GEN7_SARCHKMD,
|
|
|
|
GEN7_DISABLE_SAMPLER_PREFETCH);
|
2019-07-17 19:06:24 +01:00
|
|
|
|
|
|
|
/* Wa_1409178092:icl */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_write_clr_set(wal,
|
|
|
|
GEN11_SCRATCH2,
|
|
|
|
GEN11_COHERENT_PARTIAL_WRITE_MERGE_ENABLE,
|
|
|
|
0);
|
2020-03-02 15:14:20 -08:00
|
|
|
|
|
|
|
/* WaEnable32PlaneMode:icl */
|
|
|
|
wa_masked_en(wal, GEN9_CSFE_CHICKEN1_RCS,
|
|
|
|
GEN11_ENABLE_32_PLANE_MODE);
|
|
|
|
|
2020-03-11 09:23:00 -07:00
|
|
|
/*
|
|
|
|
* Wa_1408767742:icl[a2..forever],ehl[all]
|
|
|
|
* Wa_1605460711:icl[a0..c0]
|
|
|
|
*/
|
|
|
|
wa_write_or(wal,
|
|
|
|
GEN7_FF_THREAD_MODE,
|
|
|
|
GEN12_FF_TESSELATION_DOP_GATE_DISABLE);
|
2020-05-19 09:25:34 -07:00
|
|
|
|
2021-03-24 13:05:00 -07:00
|
|
|
/* Wa_22010271021 */
|
|
|
|
wa_masked_en(wal,
|
|
|
|
GEN9_CS_DEBUG_MODE1,
|
|
|
|
FF_DOP_CLOCK_GATE_DISABLE);
|
2018-12-03 13:33:41 +00:00
|
|
|
}
|
|
|
|
|
2022-09-07 14:24:10 -07:00
|
|
|
/*
|
|
|
|
* Intel platforms that support fine-grained preemption (i.e., gen9 and
|
|
|
|
* beyond) allow the kernel-mode driver to choose between two different
|
|
|
|
* options for controlling preemption granularity and behavior.
|
|
|
|
*
|
|
|
|
* Option 1 (hardware default):
|
|
|
|
* Preemption settings are controlled in a global manner via
|
|
|
|
* kernel-only register CS_DEBUG_MODE1 (0x20EC). Any granularity
|
|
|
|
* and settings chosen by the kernel-mode driver will apply to all
|
|
|
|
* userspace clients.
|
|
|
|
*
|
|
|
|
* Option 2:
|
|
|
|
* Preemption settings are controlled on a per-context basis via
|
|
|
|
* register CS_CHICKEN1 (0x2580). CS_CHICKEN1 is saved/restored on
|
|
|
|
* context switch and is writable by userspace (e.g., via
|
|
|
|
* MI_LOAD_REGISTER_IMMEDIATE instructions placed in a batch buffer)
|
|
|
|
* which allows different userspace drivers/clients to select
|
|
|
|
* different settings, or to change those settings on the fly in
|
|
|
|
* response to runtime needs. This option was known by name
|
|
|
|
* "FtrPerCtxtPreemptionGranularityControl" at one time, although
|
|
|
|
* that name is somewhat misleading as other non-granularity
|
|
|
|
* preemption settings are also impacted by this decision.
|
|
|
|
*
|
|
|
|
* On Linux, our policy has always been to let userspace drivers
|
|
|
|
* control preemption granularity/settings (Option 2). This was
|
|
|
|
* originally mandatory on gen9 to prevent ABI breakage (old gen9
|
|
|
|
* userspace developed before object-level preemption was enabled would
|
|
|
|
* not behave well if i915 were to go with Option 1 and enable that
|
|
|
|
* preemption in a global manner). On gen9 each context would have
|
|
|
|
* object-level preemption disabled by default (see
|
|
|
|
* WaDisable3DMidCmdPreemption in gen9_ctx_workarounds_init), but
|
|
|
|
* userspace drivers could opt-in to object-level preemption as they
|
|
|
|
* saw fit. For post-gen9 platforms, we continue to utilize Option 2;
|
|
|
|
* even though it is no longer necessary for ABI compatibility when
|
|
|
|
* enabling a new platform, it does ensure that userspace will be able
|
|
|
|
* to implement any workarounds that show up requiring temporary
|
|
|
|
* adjustments to preemption behavior at runtime.
|
|
|
|
*
|
|
|
|
* Notes/Workarounds:
|
|
|
|
* - Wa_14015141709: On DG2 and early steppings of MTL,
|
|
|
|
* CS_CHICKEN1[0] does not disable object-level preemption as
|
|
|
|
* it is supposed to (nor does CS_DEBUG_MODE1[0] if we had been
|
|
|
|
* using Option 1). Effectively this means userspace is unable
|
|
|
|
* to disable object-level preemption on these platforms/steppings
|
|
|
|
* despite the setting here.
|
|
|
|
*
|
|
|
|
* - Wa_16013994831: May require that userspace program
|
|
|
|
* CS_CHICKEN1[10] when certain runtime conditions are true.
|
|
|
|
* Userspace requires Option 2 to be in effect for their update of
|
|
|
|
* CS_CHICKEN1[10] to be effective.
|
|
|
|
*
|
|
|
|
* Other workarounds may appear in the future that will also require
|
|
|
|
* Option 2 behavior to allow proper userspace implementation.
|
|
|
|
*/
|
|
|
|
if (GRAPHICS_VER(i915) >= 9)
|
2018-12-03 13:33:41 +00:00
|
|
|
wa_masked_en(wal,
|
|
|
|
GEN7_FF_SLICE_CS_CHICKEN1,
|
|
|
|
GEN9_FFSC_PERCTX_PREEMPT_CTRL);
|
|
|
|
|
2020-06-02 15:05:40 +01:00
|
|
|
if (IS_SKYLAKE(i915) ||
|
|
|
|
IS_KABYLAKE(i915) ||
|
|
|
|
IS_COFFEELAKE(i915) ||
|
|
|
|
IS_COMETLAKE(i915)) {
|
2018-12-03 13:33:41 +00:00
|
|
|
/* WaEnableGapsTsvCreditFix:skl,kbl,cfl */
|
|
|
|
wa_write_or(wal,
|
|
|
|
GEN8_GARBCNTL,
|
|
|
|
GEN9_GAPS_TSV_CREDIT_DISABLE);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (IS_BROXTON(i915)) {
|
|
|
|
/* WaDisablePooledEuLoadBalancingFix:bxt */
|
|
|
|
wa_masked_en(wal,
|
|
|
|
FF_SLICE_CS_CHICKEN2,
|
|
|
|
GEN9_POOLED_EU_LOAD_BALANCING_FIX_DISABLE);
|
|
|
|
}
|
|
|
|
|
2021-06-05 08:53:52 -07:00
|
|
|
if (GRAPHICS_VER(i915) == 9) {
|
2018-12-03 13:33:41 +00:00
|
|
|
/* WaContextSwitchWithConcurrentTLBInvalidate:skl,bxt,kbl,glk,cfl */
|
|
|
|
wa_masked_en(wal,
|
|
|
|
GEN9_CSFE_CHICKEN1_RCS,
|
|
|
|
GEN9_PREEMPT_GPGPU_SYNC_SWITCH_DISABLE);
|
|
|
|
|
|
|
|
/* WaEnableLbsSlaRetryTimerDecrement:skl,bxt,kbl,glk,cfl */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_write_or(wal,
|
|
|
|
BDW_SCRATCH1,
|
|
|
|
GEN9_LBS_SLA_RETRY_TIMER_DECREMENT_ENABLE);
|
2018-12-03 13:33:41 +00:00
|
|
|
|
|
|
|
/* WaProgramL3SqcReg1DefaultForPerf:bxt,glk */
|
|
|
|
if (IS_GEN9_LP(i915))
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_write_clr_set(wal,
|
|
|
|
GEN8_L3SQCREG1,
|
|
|
|
L3_PRIO_CREDITS_MASK,
|
|
|
|
L3_GENERAL_PRIO_CREDITS(62) |
|
|
|
|
L3_HIGH_PRIO_CREDITS(2));
|
2018-12-03 13:33:41 +00:00
|
|
|
|
|
|
|
/* WaOCLCoherentLineFlush:skl,bxt,kbl,cfl */
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_write_or(wal,
|
|
|
|
GEN8_L3SQCREG4,
|
|
|
|
GEN8_LQSC_FLUSH_COHERENT_LINES);
|
2021-01-25 22:01:52 +00:00
|
|
|
|
|
|
|
/* Disable atomics in L3 to prevent unrecoverable hangs */
|
|
|
|
wa_write_clr_set(wal, GEN9_SCRATCH_LNCF1,
|
|
|
|
GEN9_LNCF_NONIA_COHERENT_ATOMICS_ENABLE, 0);
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_write_clr_set(wal, GEN8_L3SQCREG4,
|
|
|
|
GEN8_LQSQ_NONIA_COHERENT_ATOMICS_ENABLE, 0);
|
|
|
|
wa_mcr_write_clr_set(wal, GEN9_SCRATCH1,
|
|
|
|
EVICTION_PERF_FIX_ENABLE, 0);
|
2018-12-03 13:33:41 +00:00
|
|
|
}
|
2020-02-01 19:40:04 +00:00
|
|
|
|
2021-01-04 11:49:14 +00:00
|
|
|
if (IS_HASWELL(i915)) {
|
|
|
|
/* WaSampleCChickenBitEnable:hsw */
|
|
|
|
wa_masked_en(wal,
|
2022-10-14 16:02:26 -07:00
|
|
|
HSW_HALF_SLICE_CHICKEN3, HSW_SAMPLE_C_PERFORMANCE);
|
2021-01-04 11:49:14 +00:00
|
|
|
|
|
|
|
wa_masked_dis(wal,
|
|
|
|
CACHE_MODE_0_GEN7,
|
|
|
|
/* enable HiZ Raw Stall Optimization */
|
|
|
|
HIZ_RAW_STALL_OPT_DISABLE);
|
2021-01-13 22:51:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (IS_VALLEYVIEW(i915)) {
|
|
|
|
/* WaDisableEarlyCull:vlv */
|
|
|
|
wa_masked_en(wal,
|
|
|
|
_3D_CHICKEN3,
|
|
|
|
_3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
|
2021-01-04 11:49:14 +00:00
|
|
|
|
|
|
|
/*
|
2021-01-13 22:51:44 +00:00
|
|
|
* WaVSThreadDispatchOverride:ivb,vlv
|
2021-01-04 11:49:14 +00:00
|
|
|
*
|
2021-01-13 22:51:44 +00:00
|
|
|
* This actually overrides the dispatch
|
|
|
|
* mode for all thread types.
|
2021-01-04 11:49:14 +00:00
|
|
|
*/
|
2021-01-13 22:51:44 +00:00
|
|
|
wa_write_clr_set(wal,
|
|
|
|
GEN7_FF_THREAD_MODE,
|
|
|
|
GEN7_FF_SCHED_MASK,
|
|
|
|
GEN7_FF_TS_SCHED_HW |
|
|
|
|
GEN7_FF_VS_SCHED_HW |
|
|
|
|
GEN7_FF_DS_SCHED_HW);
|
|
|
|
|
|
|
|
/* WaPsdDispatchEnable:vlv */
|
|
|
|
/* WaDisablePSDDualDispatchEnable:vlv */
|
|
|
|
wa_masked_en(wal,
|
|
|
|
GEN7_HALF_SLICE_CHICKEN1,
|
|
|
|
GEN7_MAX_PS_THREAD_DEP |
|
|
|
|
GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
|
2021-01-04 11:49:14 +00:00
|
|
|
}
|
|
|
|
|
2021-01-13 22:51:44 +00:00
|
|
|
if (IS_IVYBRIDGE(i915)) {
|
|
|
|
/* WaDisableEarlyCull:ivb */
|
2021-01-13 22:51:43 +00:00
|
|
|
wa_masked_en(wal,
|
|
|
|
_3D_CHICKEN3,
|
|
|
|
_3D_CHICKEN_SF_DISABLE_OBJEND_CULL);
|
|
|
|
|
2021-01-13 22:51:44 +00:00
|
|
|
if (0) { /* causes HiZ corruption on ivb:gt1 */
|
|
|
|
/* enable HiZ Raw Stall Optimization */
|
|
|
|
wa_masked_dis(wal,
|
|
|
|
CACHE_MODE_0_GEN7,
|
|
|
|
HIZ_RAW_STALL_OPT_DISABLE);
|
|
|
|
}
|
|
|
|
|
2021-01-13 22:51:43 +00:00
|
|
|
/*
|
|
|
|
* WaVSThreadDispatchOverride:ivb,vlv
|
|
|
|
*
|
|
|
|
* This actually overrides the dispatch
|
|
|
|
* mode for all thread types.
|
|
|
|
*/
|
|
|
|
wa_write_clr_set(wal,
|
|
|
|
GEN7_FF_THREAD_MODE,
|
|
|
|
GEN7_FF_SCHED_MASK,
|
|
|
|
GEN7_FF_TS_SCHED_HW |
|
|
|
|
GEN7_FF_VS_SCHED_HW |
|
|
|
|
GEN7_FF_DS_SCHED_HW);
|
|
|
|
|
2021-01-13 22:51:44 +00:00
|
|
|
/* WaDisablePSDDualDispatchEnable:ivb */
|
2024-09-30 15:49:48 +03:00
|
|
|
if (INTEL_INFO(i915)->gt == 1)
|
2021-01-13 22:51:44 +00:00
|
|
|
wa_masked_en(wal,
|
|
|
|
GEN7_HALF_SLICE_CHICKEN1,
|
|
|
|
GEN7_PSD_SINGLE_PORT_DISPATCH_ENABLE);
|
|
|
|
}
|
|
|
|
|
2021-06-05 08:53:52 -07:00
|
|
|
if (GRAPHICS_VER(i915) == 7) {
|
2021-01-13 22:51:44 +00:00
|
|
|
/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
|
|
|
|
wa_masked_en(wal,
|
2022-01-10 21:15:54 -08:00
|
|
|
RING_MODE_GEN7(RENDER_RING_BASE),
|
2021-01-13 22:51:44 +00:00
|
|
|
GFX_TLB_INVALIDATE_EXPLICIT | GFX_REPLAY_MODE);
|
|
|
|
|
|
|
|
/* WaDisable_RenderCache_OperationalFlush:ivb,vlv,hsw */
|
2021-01-13 22:51:43 +00:00
|
|
|
wa_masked_dis(wal, CACHE_MODE_0_GEN7, RC_OP_FLUSH_ENABLE);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* BSpec says this must be set, even though
|
2021-01-13 22:51:44 +00:00
|
|
|
* WaDisable4x2SubspanOptimization:ivb,hsw
|
2021-01-13 22:51:43 +00:00
|
|
|
* WaDisable4x2SubspanOptimization isn't listed for VLV.
|
|
|
|
*/
|
|
|
|
wa_masked_en(wal,
|
|
|
|
CACHE_MODE_1,
|
|
|
|
PIXEL_SUBSPAN_COLLECT_OPT_DISABLE);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* BSpec recommends 8x4 when MSAA is used,
|
|
|
|
* however in practice 16x4 seems fastest.
|
|
|
|
*
|
|
|
|
* Note that PS/WM thread counts depend on the WIZ hashing
|
|
|
|
* disable bit, which we don't touch here, but it's good
|
|
|
|
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
|
|
|
|
*/
|
2021-07-26 17:23:30 -07:00
|
|
|
wa_masked_field_set(wal,
|
|
|
|
GEN7_GT_MODE,
|
|
|
|
GEN6_WIZ_HASHING_MASK,
|
|
|
|
GEN6_WIZ_HASHING_16x4);
|
2021-01-13 22:51:43 +00:00
|
|
|
}
|
|
|
|
|
2021-06-05 08:53:52 -07:00
|
|
|
if (IS_GRAPHICS_VER(i915, 6, 7))
|
2020-02-01 19:40:04 +00:00
|
|
|
/*
|
|
|
|
* We need to disable the AsyncFlip performance optimisations in
|
|
|
|
* order to use MI_WAIT_FOR_EVENT within the CS. It should
|
|
|
|
* already be programmed to '1' on all products.
|
|
|
|
*
|
|
|
|
* WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv
|
|
|
|
*/
|
|
|
|
wa_masked_en(wal,
|
2022-02-08 21:11:37 -08:00
|
|
|
RING_MI_MODE(RENDER_RING_BASE),
|
2020-02-01 19:40:04 +00:00
|
|
|
ASYNC_FLIP_PERF_DISABLE);
|
|
|
|
|
2021-06-05 08:53:52 -07:00
|
|
|
if (GRAPHICS_VER(i915) == 6) {
|
2020-02-01 19:40:04 +00:00
|
|
|
/*
|
|
|
|
* Required for the hardware to program scanline values for
|
|
|
|
* waiting
|
|
|
|
* WaEnableFlushTlbInvalidationMode:snb
|
|
|
|
*/
|
|
|
|
wa_masked_en(wal,
|
|
|
|
GFX_MODE,
|
|
|
|
GFX_TLB_INVALIDATE_EXPLICIT);
|
|
|
|
|
2021-01-04 11:49:13 +00:00
|
|
|
/* WaDisableHiZPlanesWhenMSAAEnabled:snb */
|
|
|
|
wa_masked_en(wal,
|
|
|
|
_3D_CHICKEN,
|
|
|
|
_3D_CHICKEN_HIZ_PLANE_DISABLE_MSAA_4X_SNB);
|
|
|
|
|
|
|
|
wa_masked_en(wal,
|
|
|
|
_3D_CHICKEN3,
|
|
|
|
/* WaStripsFansDisableFastClipPerformanceFix:snb */
|
|
|
|
_3D_CHICKEN3_SF_DISABLE_FASTCLIP_CULL |
|
|
|
|
/*
|
|
|
|
* Bspec says:
|
|
|
|
* "This bit must be set if 3DSTATE_CLIP clip mode is set
|
|
|
|
* to normal and 3DSTATE_SF number of SF output attributes
|
|
|
|
* is more than 16."
|
|
|
|
*/
|
|
|
|
_3D_CHICKEN3_SF_DISABLE_PIPELINED_ATTR_FETCH);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* BSpec recommends 8x4 when MSAA is used,
|
|
|
|
* however in practice 16x4 seems fastest.
|
|
|
|
*
|
|
|
|
* Note that PS/WM thread counts depend on the WIZ hashing
|
|
|
|
* disable bit, which we don't touch here, but it's good
|
|
|
|
* to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
|
|
|
|
*/
|
2021-07-26 17:23:30 -07:00
|
|
|
wa_masked_field_set(wal,
|
|
|
|
GEN6_GT_MODE,
|
|
|
|
GEN6_WIZ_HASHING_MASK,
|
|
|
|
GEN6_WIZ_HASHING_16x4);
|
2021-01-04 11:49:13 +00:00
|
|
|
|
|
|
|
/* WaDisable_RenderCache_OperationalFlush:snb */
|
|
|
|
wa_masked_dis(wal, CACHE_MODE_0, RC_OP_FLUSH_ENABLE);
|
|
|
|
|
2020-02-01 19:40:04 +00:00
|
|
|
/*
|
|
|
|
* From the Sandybridge PRM, volume 1 part 3, page 24:
|
|
|
|
* "If this bit is set, STCunit will have LRA as replacement
|
|
|
|
* policy. [...] This bit must be reset. LRA replacement
|
|
|
|
* policy is not supported."
|
|
|
|
*/
|
|
|
|
wa_masked_dis(wal,
|
|
|
|
CACHE_MODE_0,
|
|
|
|
CM0_STC_EVICT_DISABLE_LRA_SNB);
|
|
|
|
}
|
|
|
|
|
2021-06-05 08:53:52 -07:00
|
|
|
if (IS_GRAPHICS_VER(i915, 4, 6))
|
2020-02-01 19:40:04 +00:00
|
|
|
/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
|
2022-02-08 21:11:37 -08:00
|
|
|
wa_add(wal, RING_MI_MODE(RENDER_RING_BASE),
|
2020-02-01 19:40:04 +00:00
|
|
|
0, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH),
|
|
|
|
/* XXX bit doesn't stick on Broadwater */
|
2021-07-26 17:23:30 -07:00
|
|
|
IS_I965G(i915) ? 0 : VS_TIMER_DISPATCH, true);
|
2020-06-01 08:24:13 +01:00
|
|
|
|
2021-06-05 08:53:52 -07:00
|
|
|
if (GRAPHICS_VER(i915) == 4)
|
2020-06-01 08:24:13 +01:00
|
|
|
/*
|
|
|
|
* Disable CONSTANT_BUFFER before it is loaded from the context
|
|
|
|
* image. For as it is loaded, it is executed and the stored
|
|
|
|
* address may no longer be valid, leading to a GPU hang.
|
|
|
|
*
|
|
|
|
* This imposes the requirement that userspace reload their
|
|
|
|
* CONSTANT_BUFFER on every batch, fortunately a requirement
|
|
|
|
* they are already accustomed to from before contexts were
|
|
|
|
* enabled.
|
|
|
|
*/
|
2022-01-10 21:15:52 -08:00
|
|
|
wa_add(wal, ECOSKPD(RENDER_RING_BASE),
|
2020-06-01 08:24:13 +01:00
|
|
|
0, _MASKED_BIT_ENABLE(ECO_CONSTANT_BUFFER_SR_DISABLE),
|
2021-07-26 17:23:30 -07:00
|
|
|
0 /* XXX bit doesn't stick on Broadwater */,
|
|
|
|
true);
|
2018-12-03 13:33:41 +00:00
|
|
|
}
|
|
|
|
|
2019-01-09 17:32:31 -08:00
|
|
|
static void
|
|
|
|
xcs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
2018-12-03 13:33:41 +00:00
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 = engine->i915;
|
|
|
|
|
|
|
|
/* WaKBLVECSSemaphoreWaitPoll:kbl */
|
2023-08-01 19:23:34 +05:30
|
|
|
if (IS_KABYLAKE(i915) && IS_GRAPHICS_STEP(i915, STEP_A0, STEP_F0)) {
|
2018-12-03 13:33:41 +00:00
|
|
|
wa_write(wal,
|
|
|
|
RING_SEMA_WAIT_POLL(engine->mmio_base),
|
|
|
|
1);
|
|
|
|
}
|
2023-10-26 20:36:29 +02:00
|
|
|
/* Wa_16018031267, Wa_16018063123 */
|
|
|
|
if (NEEDS_FASTCOLOR_BLT_WABB(engine))
|
|
|
|
wa_masked_field_set(wal, ECOSKPD(engine->mmio_base),
|
|
|
|
XEHP_BLITTER_SCHEDULING_MODE_MASK,
|
|
|
|
XEHP_BLITTER_ROUND_ROBIN_MODE);
|
2018-12-03 13:33:41 +00:00
|
|
|
}
|
|
|
|
|
2022-05-27 09:33:48 -07:00
|
|
|
static void
|
|
|
|
ccs_engine_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
|
|
|
{
|
2024-03-19 23:03:02 -07:00
|
|
|
/* boilerplate for any CCS engine workaround */
|
2022-05-27 09:33:48 -07:00
|
|
|
}
|
|
|
|
|
2022-08-16 14:06:00 -07:00
|
|
|
/*
|
|
|
|
* The bspec performance guide has recommended MMIO tuning settings. These
|
|
|
|
* aren't truly "workarounds" but we want to program them with the same
|
|
|
|
* workaround infrastructure to ensure that they're automatically added to
|
|
|
|
* the GuC save/restore lists, re-applied at the right times, and checked for
|
|
|
|
* any conflicting programming requested by real workarounds.
|
|
|
|
*
|
|
|
|
* Programming settings should be added here only if their registers are not
|
|
|
|
* part of an engine's register state context. If a register is part of a
|
|
|
|
* context, then any tuning settings should be programmed in an appropriate
|
|
|
|
* function invoked by __intel_engine_init_ctx_wa().
|
|
|
|
*/
|
|
|
|
static void
|
2023-08-21 11:06:23 -07:00
|
|
|
add_render_compute_tuning_settings(struct intel_gt *gt,
|
2022-08-16 14:06:00 -07:00
|
|
|
struct i915_wa_list *wal)
|
|
|
|
{
|
2023-08-21 11:06:23 -07:00
|
|
|
struct drm_i915_private *i915 = gt->i915;
|
|
|
|
|
2024-01-08 17:57:38 +05:30
|
|
|
if (IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 70), IP_VER(12, 74)) || IS_DG2(i915))
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_write_clr_set(wal, RT_CTRL, STACKID_CTRL, STACKID_CTRL_512);
|
2022-08-26 14:27:18 -07:00
|
|
|
|
|
|
|
/*
|
|
|
|
* This tuning setting proves beneficial only on ATS-M designs; the
|
|
|
|
* default "age based" setting is optimal on regular DG2 and other
|
|
|
|
* platforms.
|
|
|
|
*/
|
|
|
|
if (INTEL_INFO(i915)->tuning_thread_rr_after_dep)
|
2022-10-14 16:02:36 -07:00
|
|
|
wa_mcr_masked_field_set(wal, GEN9_ROW_CHICKEN4, THREAD_EX_ARB_MODE,
|
|
|
|
THREAD_EX_ARB_MODE_RR_AFTER_DEP);
|
2022-12-01 14:22:10 -08:00
|
|
|
|
2024-03-19 23:03:01 -07:00
|
|
|
if (GRAPHICS_VER(i915) == 12 && GRAPHICS_VER_FULL(i915) < IP_VER(12, 55))
|
2022-12-01 14:22:10 -08:00
|
|
|
wa_write_clr(wal, GEN8_GARBCNTL, GEN12_BUS_HASH_CTL_BIT_EXC);
|
2022-08-16 14:06:00 -07:00
|
|
|
}
|
|
|
|
|
2024-03-28 08:34:03 +01:00
|
|
|
static void ccs_engine_wa_mode(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
|
|
|
{
|
|
|
|
struct intel_gt *gt = engine->gt;
|
2024-04-26 02:07:23 +02:00
|
|
|
u32 mode;
|
2024-03-28 08:34:03 +01:00
|
|
|
|
|
|
|
if (!IS_DG2(gt->i915))
|
|
|
|
return;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wa_14019159160: This workaround, along with others, leads to
|
|
|
|
* significant challenges in utilizing load balancing among the
|
|
|
|
* CCS slices. Consequently, an architectural decision has been
|
|
|
|
* made to completely disable automatic CCS load balancing.
|
|
|
|
*/
|
|
|
|
wa_masked_en(wal, GEN12_RCU_MODE, XEHP_RCU_MODE_FIXED_SLICE_CCS_MODE);
|
2024-03-28 08:34:05 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* After having disabled automatic load balancing we need to
|
|
|
|
* assign all slices to a single CCS. We will call it CCS mode 1
|
|
|
|
*/
|
2024-04-26 02:07:23 +02:00
|
|
|
mode = intel_gt_apply_ccs_mode(gt);
|
|
|
|
wa_masked_en(wal, XEHP_CCS_MODE, mode);
|
2024-03-28 08:34:03 +01:00
|
|
|
}
|
|
|
|
|
2022-03-01 15:15:48 -08:00
|
|
|
/*
|
|
|
|
* The workarounds in this function apply to shared registers in
|
|
|
|
* the general render reset domain that aren't tied to a
|
|
|
|
* specific engine. Since all render+compute engines get reset
|
|
|
|
* together, and the contents of these registers are lost during
|
|
|
|
* the shared render domain reset, we'll define such workarounds
|
|
|
|
* here and then add them to just a single RCS or CCS engine's
|
|
|
|
* workaround list (whichever engine has the XXXX flag).
|
|
|
|
*/
|
|
|
|
static void
|
|
|
|
general_render_compute_wa_init(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 = engine->i915;
|
2023-08-21 11:06:24 -07:00
|
|
|
struct intel_gt *gt = engine->gt;
|
2022-03-01 15:15:48 -08:00
|
|
|
|
2023-08-21 11:06:24 -07:00
|
|
|
add_render_compute_tuning_settings(gt, wal);
|
2022-06-30 17:14:07 -03:00
|
|
|
|
2023-04-07 12:32:37 +03:00
|
|
|
if (GRAPHICS_VER(i915) >= 11) {
|
|
|
|
/* This is not a Wa (although referred to as
|
|
|
|
* WaSetInidrectStateOverride in places), this allows
|
|
|
|
* applications that reference sampler states through
|
|
|
|
* the BindlessSamplerStateBaseAddress to have their
|
|
|
|
* border color relative to DynamicStateBaseAddress
|
|
|
|
* rather than BindlessSamplerStateBaseAddress.
|
|
|
|
*
|
|
|
|
* Otherwise SAMPLER_STATE border colors have to be
|
|
|
|
* copied in multiple heaps (DynamicStateBaseAddress &
|
|
|
|
* BindlessSamplerStateBaseAddress)
|
|
|
|
*
|
|
|
|
* BSpec: 46052
|
|
|
|
*/
|
|
|
|
wa_mcr_masked_en(wal,
|
|
|
|
GEN10_SAMPLER_MODE,
|
|
|
|
GEN11_INDIRECT_STATE_BASE_ADDR_OVERRIDE);
|
|
|
|
}
|
|
|
|
|
2023-08-21 11:06:24 -07:00
|
|
|
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_B0, STEP_FOREVER) ||
|
2024-01-08 17:57:38 +05:30
|
|
|
IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_B0, STEP_FOREVER) ||
|
2024-03-18 14:00:25 -07:00
|
|
|
IS_GFX_GT_IP_RANGE(gt, IP_VER(12, 74), IP_VER(12, 74))) {
|
2023-04-04 23:02:20 +05:30
|
|
|
/* Wa_14017856879 */
|
|
|
|
wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN3, MTL_DISABLE_FIX_FOR_EOT_FLUSH);
|
|
|
|
|
2024-03-18 14:00:25 -07:00
|
|
|
/* Wa_14020495402 */
|
|
|
|
wa_mcr_masked_en(wal, GEN8_ROW_CHICKEN2, XELPG_DISABLE_TDL_SVHS_GATING);
|
|
|
|
}
|
|
|
|
|
2023-08-21 11:06:24 -07:00
|
|
|
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
|
|
|
|
IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
|
2023-03-29 18:23:35 -03:00
|
|
|
/*
|
|
|
|
* Wa_14017066071
|
|
|
|
* Wa_14017654203
|
|
|
|
*/
|
|
|
|
wa_mcr_masked_en(wal, GEN10_SAMPLER_MODE,
|
|
|
|
MTL_DISABLE_SAMPLER_SC_OOO);
|
|
|
|
|
2023-08-21 11:06:24 -07:00
|
|
|
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0))
|
2023-03-29 18:23:36 -03:00
|
|
|
/* Wa_22015279794 */
|
|
|
|
wa_mcr_masked_en(wal, GEN10_CACHE_MODE_SS,
|
|
|
|
DISABLE_PREFETCH_INTO_IC);
|
|
|
|
|
2023-08-21 11:06:24 -07:00
|
|
|
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
|
|
|
|
IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
|
2023-08-16 14:42:05 -07:00
|
|
|
IS_DG2(i915)) {
|
2023-01-18 12:52:49 -03:00
|
|
|
/* Wa_22013037850 */
|
|
|
|
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW,
|
|
|
|
DISABLE_128B_EVICTION_COMMAND_UDW);
|
2023-08-16 14:42:06 -07:00
|
|
|
|
|
|
|
/* Wa_18017747507 */
|
|
|
|
wa_masked_en(wal, VFG_PREEMPTION_CHICKEN, POLYGON_TRIFAN_LINELOOP_DISABLE);
|
2023-01-18 12:52:49 -03:00
|
|
|
}
|
|
|
|
|
2023-08-21 11:06:24 -07:00
|
|
|
if (IS_GFX_GT_IP_STEP(gt, IP_VER(12, 70), STEP_A0, STEP_B0) ||
|
|
|
|
IS_GFX_GT_IP_STEP(gt, IP_VER(12, 71), STEP_A0, STEP_B0) ||
|
2023-01-05 15:44:08 -08:00
|
|
|
IS_DG2(i915)) {
|
|
|
|
/* Wa_22014226127 */
|
|
|
|
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0, DISABLE_D8_D16_COASLESCE);
|
|
|
|
}
|
|
|
|
|
2024-03-19 23:03:02 -07:00
|
|
|
if (IS_DG2(i915)) {
|
2023-08-16 14:42:06 -07:00
|
|
|
/* Wa_14015227452:dg2,pvc */
|
|
|
|
wa_mcr_masked_en(wal, GEN9_ROW_CHICKEN4, XEHP_DIS_BBL_SYSPIPE);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Wa_16011620976:dg2_g11
|
|
|
|
* Wa_22015475538:dg2
|
|
|
|
*/
|
|
|
|
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, DIS_CHAIN_2XSIMD8);
|
2023-11-15 10:21:18 -08:00
|
|
|
|
|
|
|
/* Wa_18028616096 */
|
|
|
|
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0_UDW, UGM_FRAGMENT_THRESHOLD_TO_3);
|
2023-01-05 15:44:08 -08:00
|
|
|
}
|
|
|
|
|
2023-08-16 14:42:05 -07:00
|
|
|
if (IS_DG2_G11(i915)) {
|
2023-01-18 12:52:49 -03:00
|
|
|
/*
|
|
|
|
* Wa_22012826095:dg2
|
|
|
|
* Wa_22013059131:dg2
|
|
|
|
*/
|
|
|
|
wa_mcr_write_clr_set(wal, LSC_CHICKEN_BIT_0_UDW,
|
|
|
|
MAXREQS_PER_BANK,
|
|
|
|
REG_FIELD_PREP(MAXREQS_PER_BANK, 2));
|
|
|
|
|
|
|
|
/* Wa_22013059131:dg2 */
|
|
|
|
wa_mcr_write_or(wal, LSC_CHICKEN_BIT_0,
|
|
|
|
FORCE_1_SUB_MESSAGE_PER_FRAGMENT);
|
|
|
|
|
|
|
|
/*
|
2023-08-16 14:42:06 -07:00
|
|
|
* Wa_22012654132
|
2023-01-18 12:52:49 -03:00
|
|
|
*
|
2023-08-16 14:42:06 -07:00
|
|
|
* Note that register 0xE420 is write-only and cannot be read
|
|
|
|
* back for verification on DG2 (due to Wa_14012342262), so
|
|
|
|
* we need to explicitly skip the readback.
|
2023-01-18 12:52:49 -03:00
|
|
|
*/
|
2023-08-16 14:42:06 -07:00
|
|
|
wa_mcr_add(wal, GEN10_CACHE_MODE_SS, 0,
|
|
|
|
_MASKED_BIT_ENABLE(ENABLE_PREFETCH_INTO_IC),
|
|
|
|
0 /* write-only, so skip validation */,
|
|
|
|
true);
|
2023-01-18 12:52:49 -03:00
|
|
|
}
|
2022-03-01 15:15:48 -08:00
|
|
|
}
|
|
|
|
|
2019-01-09 17:32:31 -08:00
|
|
|
static void
|
|
|
|
engine_init_workarounds(struct intel_engine_cs *engine, struct i915_wa_list *wal)
|
|
|
|
{
|
2022-11-18 11:52:49 +00:00
|
|
|
if (GRAPHICS_VER(engine->i915) < 4)
|
2019-01-09 17:32:31 -08:00
|
|
|
return;
|
|
|
|
|
2021-09-03 14:51:50 +05:30
|
|
|
engine_fake_wa_init(engine, wal);
|
|
|
|
|
2022-03-01 15:15:48 -08:00
|
|
|
/*
|
|
|
|
* These are common workarounds that just need to applied
|
|
|
|
* to a single RCS/CCS engine's workaround list since
|
|
|
|
* they're reset as part of the general render domain reset.
|
|
|
|
*/
|
2024-03-28 08:34:03 +01:00
|
|
|
if (engine->flags & I915_ENGINE_FIRST_RENDER_COMPUTE) {
|
2022-03-01 15:15:48 -08:00
|
|
|
general_render_compute_wa_init(engine, wal);
|
2024-03-28 08:34:03 +01:00
|
|
|
ccs_engine_wa_mode(engine, wal);
|
|
|
|
}
|
2022-03-01 15:15:48 -08:00
|
|
|
|
2022-05-27 09:33:48 -07:00
|
|
|
if (engine->class == COMPUTE_CLASS)
|
|
|
|
ccs_engine_wa_init(engine, wal);
|
|
|
|
else if (engine->class == RENDER_CLASS)
|
2019-01-09 17:32:31 -08:00
|
|
|
rcs_engine_wa_init(engine, wal);
|
|
|
|
else
|
|
|
|
xcs_engine_wa_init(engine, wal);
|
|
|
|
}
|
|
|
|
|
2018-12-03 13:33:41 +00:00
|
|
|
void intel_engine_init_workarounds(struct intel_engine_cs *engine)
|
|
|
|
{
|
|
|
|
struct i915_wa_list *wal = &engine->wa_list;
|
|
|
|
|
2022-11-09 10:46:33 +00:00
|
|
|
wa_init_start(wal, engine->gt, "engine", engine->name);
|
2019-01-09 17:32:31 -08:00
|
|
|
engine_init_workarounds(engine, wal);
|
2018-12-03 13:33:41 +00:00
|
|
|
wa_init_finish(wal);
|
|
|
|
}
|
|
|
|
|
|
|
|
void intel_engine_apply_workarounds(struct intel_engine_cs *engine)
|
|
|
|
{
|
2022-11-10 12:46:33 +00:00
|
|
|
wa_list_apply(&engine->wa_list);
|
2018-12-03 13:33:41 +00:00
|
|
|
}
|
|
|
|
|
2021-07-29 08:21:58 -07:00
|
|
|
static const struct i915_range mcr_ranges_gen8[] = {
|
2020-03-11 09:22:55 -07:00
|
|
|
{ .start = 0x5500, .end = 0x55ff },
|
|
|
|
{ .start = 0x7000, .end = 0x7fff },
|
|
|
|
{ .start = 0x9400, .end = 0x97ff },
|
|
|
|
{ .start = 0xb000, .end = 0xb3ff },
|
|
|
|
{ .start = 0xe000, .end = 0xe7ff },
|
|
|
|
{},
|
|
|
|
};
|
|
|
|
|
2021-07-29 08:21:58 -07:00
|
|
|
static const struct i915_range mcr_ranges_gen12[] = {
|
2020-10-09 12:44:42 -07:00
|
|
|
{ .start = 0x8150, .end = 0x815f },
|
|
|
|
{ .start = 0x9520, .end = 0x955f },
|
|
|
|
{ .start = 0xb100, .end = 0xb3ff },
|
|
|
|
{ .start = 0xde80, .end = 0xe8ff },
|
|
|
|
{ .start = 0x24a00, .end = 0x24a7f },
|
|
|
|
{},
|
|
|
|
};
|
|
|
|
|
2021-07-29 08:21:58 -07:00
|
|
|
static const struct i915_range mcr_ranges_xehp[] = {
|
2021-07-13 20:15:00 -07:00
|
|
|
{ .start = 0x4000, .end = 0x4aff },
|
|
|
|
{ .start = 0x5200, .end = 0x52ff },
|
|
|
|
{ .start = 0x5400, .end = 0x7fff },
|
|
|
|
{ .start = 0x8140, .end = 0x815f },
|
|
|
|
{ .start = 0x8c80, .end = 0x8dff },
|
|
|
|
{ .start = 0x94d0, .end = 0x955f },
|
|
|
|
{ .start = 0x9680, .end = 0x96ff },
|
|
|
|
{ .start = 0xb000, .end = 0xb3ff },
|
|
|
|
{ .start = 0xc800, .end = 0xcfff },
|
|
|
|
{ .start = 0xd800, .end = 0xd8ff },
|
|
|
|
{ .start = 0xdc00, .end = 0xffff },
|
|
|
|
{ .start = 0x17000, .end = 0x17fff },
|
|
|
|
{ .start = 0x24a00, .end = 0x24a7f },
|
2021-07-30 12:11:15 -07:00
|
|
|
{},
|
2021-07-13 20:15:00 -07:00
|
|
|
};
|
|
|
|
|
2019-07-17 19:06:22 +01:00
|
|
|
static bool mcr_range(struct drm_i915_private *i915, u32 offset)
|
|
|
|
{
|
2021-07-29 08:21:58 -07:00
|
|
|
const struct i915_range *mcr_ranges;
|
2020-03-11 09:22:55 -07:00
|
|
|
int i;
|
|
|
|
|
2024-03-19 23:03:01 -07:00
|
|
|
if (GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55))
|
2021-07-13 20:15:00 -07:00
|
|
|
mcr_ranges = mcr_ranges_xehp;
|
|
|
|
else if (GRAPHICS_VER(i915) >= 12)
|
2020-10-09 12:44:42 -07:00
|
|
|
mcr_ranges = mcr_ranges_gen12;
|
2021-06-05 08:53:52 -07:00
|
|
|
else if (GRAPHICS_VER(i915) >= 8)
|
2020-10-09 12:44:42 -07:00
|
|
|
mcr_ranges = mcr_ranges_gen8;
|
|
|
|
else
|
2020-03-11 09:22:55 -07:00
|
|
|
return false;
|
|
|
|
|
2019-07-17 19:06:22 +01:00
|
|
|
/*
|
2020-03-11 09:22:55 -07:00
|
|
|
* Registers in these ranges are affected by the MCR selector
|
2019-07-17 19:06:22 +01:00
|
|
|
* which only controls CPU initiated MMIO. Routing does not
|
|
|
|
* work for CS access so we cannot verify them on this path.
|
|
|
|
*/
|
2020-10-09 12:44:42 -07:00
|
|
|
for (i = 0; mcr_ranges[i].start; i++)
|
|
|
|
if (offset >= mcr_ranges[i].start &&
|
|
|
|
offset <= mcr_ranges[i].end)
|
2020-03-11 09:22:55 -07:00
|
|
|
return true;
|
2019-07-17 19:06:22 +01:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2019-04-17 08:56:28 +01:00
|
|
|
static int
|
|
|
|
wa_list_srm(struct i915_request *rq,
|
|
|
|
const struct i915_wa_list *wal,
|
|
|
|
struct i915_vma *vma)
|
|
|
|
{
|
2023-07-20 13:30:02 +02:00
|
|
|
struct drm_i915_private *i915 = rq->i915;
|
2019-07-17 19:06:22 +01:00
|
|
|
unsigned int i, count = 0;
|
2019-04-17 08:56:28 +01:00
|
|
|
const struct i915_wa *wa;
|
|
|
|
u32 srm, *cs;
|
|
|
|
|
|
|
|
srm = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
|
2021-06-05 08:53:52 -07:00
|
|
|
if (GRAPHICS_VER(i915) >= 8)
|
2019-04-17 08:56:28 +01:00
|
|
|
srm++;
|
|
|
|
|
2019-07-17 19:06:22 +01:00
|
|
|
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
|
|
|
|
if (!mcr_range(i915, i915_mmio_reg_offset(wa->reg)))
|
|
|
|
count++;
|
|
|
|
}
|
|
|
|
|
|
|
|
cs = intel_ring_begin(rq, 4 * count);
|
2019-04-17 08:56:28 +01:00
|
|
|
if (IS_ERR(cs))
|
|
|
|
return PTR_ERR(cs);
|
|
|
|
|
|
|
|
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
|
2019-07-17 19:06:22 +01:00
|
|
|
u32 offset = i915_mmio_reg_offset(wa->reg);
|
|
|
|
|
|
|
|
if (mcr_range(i915, offset))
|
|
|
|
continue;
|
|
|
|
|
2019-04-17 08:56:28 +01:00
|
|
|
*cs++ = srm;
|
2019-07-17 19:06:22 +01:00
|
|
|
*cs++ = offset;
|
2019-04-17 08:56:28 +01:00
|
|
|
*cs++ = i915_ggtt_offset(vma) + sizeof(u32) * i;
|
|
|
|
*cs++ = 0;
|
|
|
|
}
|
|
|
|
intel_ring_advance(rq, cs);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2019-05-20 15:25:46 +01:00
|
|
|
static int engine_wa_list_verify(struct intel_context *ce,
|
2019-04-17 08:56:28 +01:00
|
|
|
const struct i915_wa_list * const wal,
|
|
|
|
const char *from)
|
|
|
|
{
|
|
|
|
const struct i915_wa *wa;
|
|
|
|
struct i915_request *rq;
|
|
|
|
struct i915_vma *vma;
|
2020-08-19 16:08:56 +02:00
|
|
|
struct i915_gem_ww_ctx ww;
|
2019-04-17 08:56:28 +01:00
|
|
|
unsigned int i;
|
|
|
|
u32 *results;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (!wal->count)
|
|
|
|
return 0;
|
|
|
|
|
2020-12-19 02:03:43 +00:00
|
|
|
vma = __vm_create_scratch_for_read(&ce->engine->gt->ggtt->vm,
|
|
|
|
wal->count * sizeof(u32));
|
2019-04-17 08:56:28 +01:00
|
|
|
if (IS_ERR(vma))
|
|
|
|
return PTR_ERR(vma);
|
|
|
|
|
2019-11-25 10:58:56 +00:00
|
|
|
intel_engine_pm_get(ce->engine);
|
2020-08-19 16:08:56 +02:00
|
|
|
i915_gem_ww_ctx_init(&ww, false);
|
|
|
|
retry:
|
|
|
|
err = i915_gem_object_lock(vma->obj, &ww);
|
|
|
|
if (err == 0)
|
|
|
|
err = intel_context_pin_ww(ce, &ww);
|
|
|
|
if (err)
|
|
|
|
goto err_pm;
|
|
|
|
|
2021-03-23 16:50:13 +01:00
|
|
|
err = i915_vma_pin_ww(vma, &ww, 0, 0,
|
|
|
|
i915_vma_is_ggtt(vma) ? PIN_GLOBAL : PIN_USER);
|
|
|
|
if (err)
|
|
|
|
goto err_unpin;
|
|
|
|
|
2020-08-19 16:08:56 +02:00
|
|
|
rq = i915_request_create(ce);
|
2019-04-17 08:56:28 +01:00
|
|
|
if (IS_ERR(rq)) {
|
|
|
|
err = PTR_ERR(rq);
|
2021-03-23 16:50:13 +01:00
|
|
|
goto err_vma;
|
2019-04-17 08:56:28 +01:00
|
|
|
}
|
|
|
|
|
2022-10-19 23:59:05 +02:00
|
|
|
err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
|
2020-08-19 16:08:56 +02:00
|
|
|
if (err == 0)
|
|
|
|
err = wa_list_srm(rq, wal, vma);
|
2019-04-17 08:56:28 +01:00
|
|
|
|
2019-11-21 09:33:26 +00:00
|
|
|
i915_request_get(rq);
|
2020-08-19 16:08:56 +02:00
|
|
|
if (err)
|
|
|
|
i915_request_set_error_once(rq, err);
|
2019-04-17 08:56:28 +01:00
|
|
|
i915_request_add(rq);
|
2020-08-19 16:08:56 +02:00
|
|
|
|
|
|
|
if (err)
|
|
|
|
goto err_rq;
|
|
|
|
|
2019-06-18 08:41:30 +01:00
|
|
|
if (i915_request_wait(rq, 0, HZ / 5) < 0) {
|
2019-04-17 08:56:28 +01:00
|
|
|
err = -ETIME;
|
2019-11-21 09:33:26 +00:00
|
|
|
goto err_rq;
|
2019-04-17 08:56:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
results = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
|
|
|
|
if (IS_ERR(results)) {
|
|
|
|
err = PTR_ERR(results);
|
2019-11-21 09:33:26 +00:00
|
|
|
goto err_rq;
|
2019-04-17 08:56:28 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
err = 0;
|
2019-07-17 19:06:22 +01:00
|
|
|
for (i = 0, wa = wal->list; i < wal->count; i++, wa++) {
|
2023-07-20 13:30:02 +02:00
|
|
|
if (mcr_range(rq->i915, i915_mmio_reg_offset(wa->reg)))
|
2019-07-17 19:06:22 +01:00
|
|
|
continue;
|
|
|
|
|
2022-11-09 10:46:33 +00:00
|
|
|
if (!wa_verify(wal->gt, wa, results[i], wal->name, from))
|
2019-04-17 08:56:28 +01:00
|
|
|
err = -ENXIO;
|
2019-07-17 19:06:22 +01:00
|
|
|
}
|
2019-04-17 08:56:28 +01:00
|
|
|
|
|
|
|
i915_gem_object_unpin_map(vma->obj);
|
|
|
|
|
2019-11-21 09:33:26 +00:00
|
|
|
err_rq:
|
|
|
|
i915_request_put(rq);
|
2021-03-23 16:50:13 +01:00
|
|
|
err_vma:
|
|
|
|
i915_vma_unpin(vma);
|
2020-08-19 16:08:56 +02:00
|
|
|
err_unpin:
|
|
|
|
intel_context_unpin(ce);
|
|
|
|
err_pm:
|
|
|
|
if (err == -EDEADLK) {
|
|
|
|
err = i915_gem_ww_ctx_backoff(&ww);
|
|
|
|
if (!err)
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
i915_gem_ww_ctx_fini(&ww);
|
|
|
|
intel_engine_pm_put(ce->engine);
|
2019-04-17 08:56:28 +01:00
|
|
|
i915_vma_put(vma);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
int intel_engine_verify_workarounds(struct intel_engine_cs *engine,
|
|
|
|
const char *from)
|
|
|
|
{
|
2019-05-20 15:25:46 +01:00
|
|
|
return engine_wa_list_verify(engine->kernel_context,
|
|
|
|
&engine->wa_list,
|
|
|
|
from);
|
2019-04-17 08:56:28 +01:00
|
|
|
}
|
|
|
|
|
2018-04-14 13:27:54 +01:00
|
|
|
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
2019-04-24 18:48:39 +01:00
|
|
|
#include "selftest_workarounds.c"
|
2018-04-14 13:27:54 +01:00
|
|
|
#endif
|