2020-01-07 13:40:09 +00:00
|
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
/*
|
|
|
|
* Copyright © 2020 Intel Corporation
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/slab.h>
|
|
|
|
|
2021-02-03 17:12:30 +00:00
|
|
|
#include "gem/i915_gem_lmem.h"
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
#include "i915_trace.h"
|
2023-08-01 16:19:54 +02:00
|
|
|
#include "intel_gt.h"
|
2020-01-07 13:40:09 +00:00
|
|
|
#include "intel_gtt.h"
|
|
|
|
#include "gen6_ppgtt.h"
|
|
|
|
#include "gen8_ppgtt.h"
|
|
|
|
|
2022-02-19 00:17:46 +05:30
|
|
|
struct i915_page_table *alloc_pt(struct i915_address_space *vm, int sz)
|
2020-01-07 13:40:09 +00:00
|
|
|
{
|
|
|
|
struct i915_page_table *pt;
|
|
|
|
|
|
|
|
pt = kmalloc(sizeof(*pt), I915_GFP_ALLOW_FAIL);
|
|
|
|
if (unlikely(!pt))
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
2022-02-19 00:17:46 +05:30
|
|
|
pt->base = vm->alloc_pt_dma(vm, sz);
|
2020-07-29 17:42:18 +01:00
|
|
|
if (IS_ERR(pt->base)) {
|
2020-01-07 13:40:09 +00:00
|
|
|
kfree(pt);
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
|
|
|
2022-02-19 00:17:44 +05:30
|
|
|
pt->is_compact = false;
|
2020-01-07 13:40:09 +00:00
|
|
|
atomic_set(&pt->used, 0);
|
|
|
|
return pt;
|
|
|
|
}
|
|
|
|
|
drm/i915/gt: Shrink i915_page_directory's slab bucket
kmalloc uses power-of-two slab buckets for small allocations (up to a
few pages). Since i915_page_directory is a page of pointers, plus a
couple more, this is rounded up to 8K, and we waste nearly 50% of that
allocation. Long terms this leads to poor memory utilisation, bloating
the kernel footprint, but the problem is exacerbated by our conservative
preallocation scheme for binding VMA. As we are required to allocate all
levels for each vma just in case we need to insert them upon binding,
this leads to a large multiplication factor for a single page vma. By
halving the allocation we need for the page directory structure, we
halve the impact of that factor, bringing workloads that once fitted into
memory, hopefully back to fitting into memory.
We maintain the split between i915_page_directory and i915_page_table as
we only need half the allocation for the lowest, most populous, level.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200729164219.5737-3-chris@chris-wilson.co.uk
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2020-07-29 17:42:19 +01:00
|
|
|
struct i915_page_directory *__alloc_pd(int count)
|
2020-01-07 13:40:09 +00:00
|
|
|
{
|
|
|
|
struct i915_page_directory *pd;
|
|
|
|
|
drm/i915/gt: Shrink i915_page_directory's slab bucket
kmalloc uses power-of-two slab buckets for small allocations (up to a
few pages). Since i915_page_directory is a page of pointers, plus a
couple more, this is rounded up to 8K, and we waste nearly 50% of that
allocation. Long terms this leads to poor memory utilisation, bloating
the kernel footprint, but the problem is exacerbated by our conservative
preallocation scheme for binding VMA. As we are required to allocate all
levels for each vma just in case we need to insert them upon binding,
this leads to a large multiplication factor for a single page vma. By
halving the allocation we need for the page directory structure, we
halve the impact of that factor, bringing workloads that once fitted into
memory, hopefully back to fitting into memory.
We maintain the split between i915_page_directory and i915_page_table as
we only need half the allocation for the lowest, most populous, level.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200729164219.5737-3-chris@chris-wilson.co.uk
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2020-07-29 17:42:19 +01:00
|
|
|
pd = kzalloc(sizeof(*pd), I915_GFP_ALLOW_FAIL);
|
2020-01-07 13:40:09 +00:00
|
|
|
if (unlikely(!pd))
|
|
|
|
return NULL;
|
|
|
|
|
drm/i915/gt: Shrink i915_page_directory's slab bucket
kmalloc uses power-of-two slab buckets for small allocations (up to a
few pages). Since i915_page_directory is a page of pointers, plus a
couple more, this is rounded up to 8K, and we waste nearly 50% of that
allocation. Long terms this leads to poor memory utilisation, bloating
the kernel footprint, but the problem is exacerbated by our conservative
preallocation scheme for binding VMA. As we are required to allocate all
levels for each vma just in case we need to insert them upon binding,
this leads to a large multiplication factor for a single page vma. By
halving the allocation we need for the page directory structure, we
halve the impact of that factor, bringing workloads that once fitted into
memory, hopefully back to fitting into memory.
We maintain the split between i915_page_directory and i915_page_table as
we only need half the allocation for the lowest, most populous, level.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200729164219.5737-3-chris@chris-wilson.co.uk
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2020-07-29 17:42:19 +01:00
|
|
|
pd->entry = kcalloc(count, sizeof(*pd->entry), I915_GFP_ALLOW_FAIL);
|
|
|
|
if (unlikely(!pd->entry)) {
|
|
|
|
kfree(pd);
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
spin_lock_init(&pd->lock);
|
|
|
|
return pd;
|
|
|
|
}
|
|
|
|
|
|
|
|
struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
|
|
|
|
{
|
|
|
|
struct i915_page_directory *pd;
|
|
|
|
|
drm/i915/gt: Shrink i915_page_directory's slab bucket
kmalloc uses power-of-two slab buckets for small allocations (up to a
few pages). Since i915_page_directory is a page of pointers, plus a
couple more, this is rounded up to 8K, and we waste nearly 50% of that
allocation. Long terms this leads to poor memory utilisation, bloating
the kernel footprint, but the problem is exacerbated by our conservative
preallocation scheme for binding VMA. As we are required to allocate all
levels for each vma just in case we need to insert them upon binding,
this leads to a large multiplication factor for a single page vma. By
halving the allocation we need for the page directory structure, we
halve the impact of that factor, bringing workloads that once fitted into
memory, hopefully back to fitting into memory.
We maintain the split between i915_page_directory and i915_page_table as
we only need half the allocation for the lowest, most populous, level.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200729164219.5737-3-chris@chris-wilson.co.uk
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2020-07-29 17:42:19 +01:00
|
|
|
pd = __alloc_pd(I915_PDES);
|
2020-01-07 13:40:09 +00:00
|
|
|
if (unlikely(!pd))
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
2020-07-29 17:42:18 +01:00
|
|
|
pd->pt.base = vm->alloc_pt_dma(vm, I915_GTT_PAGE_SIZE_4K);
|
|
|
|
if (IS_ERR(pd->pt.base)) {
|
drm/i915/gt: Shrink i915_page_directory's slab bucket
kmalloc uses power-of-two slab buckets for small allocations (up to a
few pages). Since i915_page_directory is a page of pointers, plus a
couple more, this is rounded up to 8K, and we waste nearly 50% of that
allocation. Long terms this leads to poor memory utilisation, bloating
the kernel footprint, but the problem is exacerbated by our conservative
preallocation scheme for binding VMA. As we are required to allocate all
levels for each vma just in case we need to insert them upon binding,
this leads to a large multiplication factor for a single page vma. By
halving the allocation we need for the page directory structure, we
halve the impact of that factor, bringing workloads that once fitted into
memory, hopefully back to fitting into memory.
We maintain the split between i915_page_directory and i915_page_table as
we only need half the allocation for the lowest, most populous, level.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200729164219.5737-3-chris@chris-wilson.co.uk
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2020-07-29 17:42:19 +01:00
|
|
|
kfree(pd->entry);
|
2020-01-07 13:40:09 +00:00
|
|
|
kfree(pd);
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
}
|
|
|
|
|
|
|
|
return pd;
|
|
|
|
}
|
|
|
|
|
drm/i915/gt: Shrink i915_page_directory's slab bucket
kmalloc uses power-of-two slab buckets for small allocations (up to a
few pages). Since i915_page_directory is a page of pointers, plus a
couple more, this is rounded up to 8K, and we waste nearly 50% of that
allocation. Long terms this leads to poor memory utilisation, bloating
the kernel footprint, but the problem is exacerbated by our conservative
preallocation scheme for binding VMA. As we are required to allocate all
levels for each vma just in case we need to insert them upon binding,
this leads to a large multiplication factor for a single page vma. By
halving the allocation we need for the page directory structure, we
halve the impact of that factor, bringing workloads that once fitted into
memory, hopefully back to fitting into memory.
We maintain the split between i915_page_directory and i915_page_table as
we only need half the allocation for the lowest, most populous, level.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200729164219.5737-3-chris@chris-wilson.co.uk
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2020-07-29 17:42:19 +01:00
|
|
|
void free_px(struct i915_address_space *vm, struct i915_page_table *pt, int lvl)
|
2020-01-07 13:40:09 +00:00
|
|
|
{
|
drm/i915/gt: Shrink i915_page_directory's slab bucket
kmalloc uses power-of-two slab buckets for small allocations (up to a
few pages). Since i915_page_directory is a page of pointers, plus a
couple more, this is rounded up to 8K, and we waste nearly 50% of that
allocation. Long terms this leads to poor memory utilisation, bloating
the kernel footprint, but the problem is exacerbated by our conservative
preallocation scheme for binding VMA. As we are required to allocate all
levels for each vma just in case we need to insert them upon binding,
this leads to a large multiplication factor for a single page vma. By
halving the allocation we need for the page directory structure, we
halve the impact of that factor, bringing workloads that once fitted into
memory, hopefully back to fitting into memory.
We maintain the split between i915_page_directory and i915_page_table as
we only need half the allocation for the lowest, most populous, level.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200729164219.5737-3-chris@chris-wilson.co.uk
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2020-07-29 17:42:19 +01:00
|
|
|
BUILD_BUG_ON(offsetof(struct i915_page_directory, pt));
|
|
|
|
|
|
|
|
if (lvl) {
|
|
|
|
struct i915_page_directory *pd =
|
|
|
|
container_of(pt, typeof(*pd), pt);
|
|
|
|
kfree(pd->entry);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (pt->base)
|
|
|
|
i915_gem_object_put(pt->base);
|
|
|
|
|
2020-07-29 17:42:18 +01:00
|
|
|
kfree(pt);
|
2020-01-07 13:40:09 +00:00
|
|
|
}
|
|
|
|
|
2021-01-13 15:22:24 +00:00
|
|
|
static void
|
2020-07-29 17:42:18 +01:00
|
|
|
write_dma_entry(struct drm_i915_gem_object * const pdma,
|
2020-01-07 13:40:09 +00:00
|
|
|
const unsigned short idx,
|
|
|
|
const u64 encoded_entry)
|
|
|
|
{
|
2021-04-27 09:54:13 +01:00
|
|
|
u64 * const vaddr = __px_vaddr(pdma);
|
2020-01-07 13:40:09 +00:00
|
|
|
|
|
|
|
vaddr[idx] = encoded_entry;
|
2022-03-21 15:38:19 -07:00
|
|
|
drm_clflush_virt_range(&vaddr[idx], sizeof(u64));
|
2020-01-07 13:40:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
__set_pd_entry(struct i915_page_directory * const pd,
|
|
|
|
const unsigned short idx,
|
2020-07-29 17:42:18 +01:00
|
|
|
struct i915_page_table * const to,
|
2020-01-07 13:40:09 +00:00
|
|
|
u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
|
|
|
|
{
|
|
|
|
/* Each thread pre-pins the pd, and we may have a thread per pde. */
|
drm/i915/gt: Shrink i915_page_directory's slab bucket
kmalloc uses power-of-two slab buckets for small allocations (up to a
few pages). Since i915_page_directory is a page of pointers, plus a
couple more, this is rounded up to 8K, and we waste nearly 50% of that
allocation. Long terms this leads to poor memory utilisation, bloating
the kernel footprint, but the problem is exacerbated by our conservative
preallocation scheme for binding VMA. As we are required to allocate all
levels for each vma just in case we need to insert them upon binding,
this leads to a large multiplication factor for a single page vma. By
halving the allocation we need for the page directory structure, we
halve the impact of that factor, bringing workloads that once fitted into
memory, hopefully back to fitting into memory.
We maintain the split between i915_page_directory and i915_page_table as
we only need half the allocation for the lowest, most populous, level.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200729164219.5737-3-chris@chris-wilson.co.uk
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2020-07-29 17:42:19 +01:00
|
|
|
GEM_BUG_ON(atomic_read(px_used(pd)) > NALLOC * I915_PDES);
|
2020-01-07 13:40:09 +00:00
|
|
|
|
|
|
|
atomic_inc(px_used(pd));
|
|
|
|
pd->entry[idx] = to;
|
2020-07-29 17:42:18 +01:00
|
|
|
write_dma_entry(px_base(pd), idx, encode(px_dma(to), I915_CACHE_LLC));
|
2020-01-07 13:40:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
clear_pd_entry(struct i915_page_directory * const pd,
|
|
|
|
const unsigned short idx,
|
2020-07-29 17:42:18 +01:00
|
|
|
const struct drm_i915_gem_object * const scratch)
|
2020-01-07 13:40:09 +00:00
|
|
|
{
|
|
|
|
GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
|
|
|
|
|
|
|
|
write_dma_entry(px_base(pd), idx, scratch->encode);
|
|
|
|
pd->entry[idx] = NULL;
|
|
|
|
atomic_dec(px_used(pd));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
release_pd_entry(struct i915_page_directory * const pd,
|
|
|
|
const unsigned short idx,
|
|
|
|
struct i915_page_table * const pt,
|
2020-07-29 17:42:18 +01:00
|
|
|
const struct drm_i915_gem_object * const scratch)
|
2020-01-07 13:40:09 +00:00
|
|
|
{
|
|
|
|
bool free = false;
|
|
|
|
|
|
|
|
if (atomic_add_unless(&pt->used, -1, 1))
|
|
|
|
return false;
|
|
|
|
|
|
|
|
spin_lock(&pd->lock);
|
|
|
|
if (atomic_dec_and_test(&pt->used)) {
|
|
|
|
clear_pd_entry(pd, idx, scratch);
|
|
|
|
free = true;
|
|
|
|
}
|
|
|
|
spin_unlock(&pd->lock);
|
|
|
|
|
|
|
|
return free;
|
|
|
|
}
|
|
|
|
|
|
|
|
int i915_ppgtt_init_hw(struct intel_gt *gt)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 = gt->i915;
|
|
|
|
|
|
|
|
gtt_write_workarounds(gt);
|
|
|
|
|
2021-06-05 08:53:52 -07:00
|
|
|
if (GRAPHICS_VER(i915) == 6)
|
2020-01-07 13:40:09 +00:00
|
|
|
gen6_ppgtt_enable(gt);
|
2021-06-05 08:53:52 -07:00
|
|
|
else if (GRAPHICS_VER(i915) == 7)
|
2020-01-07 13:40:09 +00:00
|
|
|
gen7_ppgtt_enable(gt);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct i915_ppgtt *
|
2021-09-22 08:25:25 +02:00
|
|
|
__ppgtt_create(struct intel_gt *gt, unsigned long lmem_pt_obj_flags)
|
2020-01-07 13:40:09 +00:00
|
|
|
{
|
2021-06-05 08:53:52 -07:00
|
|
|
if (GRAPHICS_VER(gt->i915) < 8)
|
2020-01-07 13:40:09 +00:00
|
|
|
return gen6_ppgtt_create(gt);
|
|
|
|
else
|
2021-09-22 08:25:25 +02:00
|
|
|
return gen8_ppgtt_create(gt, lmem_pt_obj_flags);
|
2020-01-07 13:40:09 +00:00
|
|
|
}
|
|
|
|
|
2021-09-22 08:25:25 +02:00
|
|
|
struct i915_ppgtt *i915_ppgtt_create(struct intel_gt *gt,
|
|
|
|
unsigned long lmem_pt_obj_flags)
|
2020-01-07 13:40:09 +00:00
|
|
|
{
|
|
|
|
struct i915_ppgtt *ppgtt;
|
|
|
|
|
2021-09-22 08:25:25 +02:00
|
|
|
ppgtt = __ppgtt_create(gt, lmem_pt_obj_flags);
|
2020-01-07 13:40:09 +00:00
|
|
|
if (IS_ERR(ppgtt))
|
|
|
|
return ppgtt;
|
|
|
|
|
|
|
|
trace_i915_ppgtt_create(&ppgtt->vm);
|
|
|
|
|
|
|
|
return ppgtt;
|
|
|
|
}
|
|
|
|
|
2020-07-29 17:42:17 +01:00
|
|
|
void ppgtt_bind_vma(struct i915_address_space *vm,
|
|
|
|
struct i915_vm_pt_stash *stash,
|
2022-01-10 18:22:15 +01:00
|
|
|
struct i915_vma_resource *vma_res,
|
drm/i915: use pat_index instead of cache_level
Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.
From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity. PAT is expected to work much like MOCS already works today,
and by design userspace is expected to select the index that exactly
matches the desired behavior described in the hardware specification.
For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For kernel objects, cache_level is used
for simplicity and backward compatibility. For Pre-gen12 platforms PAT can
have 1:1 mapping to i915_cache_level, so these two are interchangeable. see
the use of LEGACY_CACHELEVEL.
One consequence of this change is that gen8_pte_encode is no longer working
for gen12 platforms due to the fact that gen12 platforms has different PAT
definitions. In the meantime the mtl_pte_encode introduced specfically for
MTL becomes generic for all gen12 platforms. This patch renames the MTL
PTE encode function into gen12_pte_encode and apply it to all gen12. Even
though this change looks unrelated, but separating them would temporarily
break gen12 PTE encoding, thus squash them in one patch.
Special note: this patch changes the way caching behavior is controlled in
the sense that some objects are left to be managed by userspace. For such
objects we need to be careful not to change the userspace settings.There
are kerneldoc and comments added around obj->cache_coherent, cache_dirty,
and how to bypass the checkings by i915_gem_object_has_cache_level. For
full understanding, these changes need to be looked at together with the
two follow-up patches, one disables the {set|get}_caching ioctl's and the
other adds set_pat extension to the GEM_CREATE uAPI.
Bspec: 63019
Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230509165200.1740-3-fei.yang@intel.com
2023-05-09 09:52:00 -07:00
|
|
|
unsigned int pat_index,
|
2020-07-29 17:42:17 +01:00
|
|
|
u32 flags)
|
2020-01-07 13:40:09 +00:00
|
|
|
{
|
|
|
|
u32 pte_flags;
|
|
|
|
|
2022-01-10 18:22:15 +01:00
|
|
|
if (!vma_res->allocated) {
|
|
|
|
vm->allocate_va_range(vm, stash, vma_res->start,
|
|
|
|
vma_res->vma_size);
|
|
|
|
vma_res->allocated = true;
|
2020-01-07 13:40:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Applicable to VLV, and gen8+ */
|
|
|
|
pte_flags = 0;
|
2022-01-10 18:22:15 +01:00
|
|
|
if (vma_res->bi.readonly)
|
2020-01-07 13:40:09 +00:00
|
|
|
pte_flags |= PTE_READ_ONLY;
|
2022-01-10 18:22:15 +01:00
|
|
|
if (vma_res->bi.lmem)
|
2021-02-03 17:12:30 +00:00
|
|
|
pte_flags |= PTE_LM;
|
2020-01-07 13:40:09 +00:00
|
|
|
|
drm/i915: use pat_index instead of cache_level
Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.
From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity. PAT is expected to work much like MOCS already works today,
and by design userspace is expected to select the index that exactly
matches the desired behavior described in the hardware specification.
For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For kernel objects, cache_level is used
for simplicity and backward compatibility. For Pre-gen12 platforms PAT can
have 1:1 mapping to i915_cache_level, so these two are interchangeable. see
the use of LEGACY_CACHELEVEL.
One consequence of this change is that gen8_pte_encode is no longer working
for gen12 platforms due to the fact that gen12 platforms has different PAT
definitions. In the meantime the mtl_pte_encode introduced specfically for
MTL becomes generic for all gen12 platforms. This patch renames the MTL
PTE encode function into gen12_pte_encode and apply it to all gen12. Even
though this change looks unrelated, but separating them would temporarily
break gen12 PTE encoding, thus squash them in one patch.
Special note: this patch changes the way caching behavior is controlled in
the sense that some objects are left to be managed by userspace. For such
objects we need to be careful not to change the userspace settings.There
are kerneldoc and comments added around obj->cache_coherent, cache_dirty,
and how to bypass the checkings by i915_gem_object_has_cache_level. For
full understanding, these changes need to be looked at together with the
two follow-up patches, one disables the {set|get}_caching ioctl's and the
other adds set_pat extension to the GEM_CREATE uAPI.
Bspec: 63019
Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230509165200.1740-3-fei.yang@intel.com
2023-05-09 09:52:00 -07:00
|
|
|
vm->insert_entries(vm, vma_res, pat_index, pte_flags);
|
2020-01-07 13:40:09 +00:00
|
|
|
wmb();
|
|
|
|
}
|
|
|
|
|
2022-01-10 18:22:15 +01:00
|
|
|
void ppgtt_unbind_vma(struct i915_address_space *vm,
|
|
|
|
struct i915_vma_resource *vma_res)
|
2020-01-07 13:40:09 +00:00
|
|
|
{
|
2022-07-27 14:29:55 +02:00
|
|
|
if (!vma_res->allocated)
|
|
|
|
return;
|
|
|
|
|
|
|
|
vm->clear_range(vm, vma_res->start, vma_res->vma_size);
|
2023-08-01 16:19:54 +02:00
|
|
|
vma_invalidate_tlb(vm, vma_res->tlb);
|
2020-01-07 13:40:09 +00:00
|
|
|
}
|
|
|
|
|
2020-07-29 17:42:17 +01:00
|
|
|
static unsigned long pd_count(u64 size, int shift)
|
|
|
|
{
|
|
|
|
/* Beware later misalignment */
|
|
|
|
return (size + 2 * (BIT_ULL(shift) - 1)) >> shift;
|
|
|
|
}
|
|
|
|
|
|
|
|
int i915_vm_alloc_pt_stash(struct i915_address_space *vm,
|
|
|
|
struct i915_vm_pt_stash *stash,
|
|
|
|
u64 size)
|
|
|
|
{
|
|
|
|
unsigned long count;
|
2022-02-19 00:17:46 +05:30
|
|
|
int shift, n, pt_sz;
|
2020-07-29 17:42:17 +01:00
|
|
|
|
|
|
|
shift = vm->pd_shift;
|
|
|
|
if (!shift)
|
|
|
|
return 0;
|
|
|
|
|
2022-02-19 00:17:46 +05:30
|
|
|
pt_sz = stash->pt_sz;
|
|
|
|
if (!pt_sz)
|
|
|
|
pt_sz = I915_GTT_PAGE_SIZE_4K;
|
|
|
|
else
|
|
|
|
GEM_BUG_ON(!IS_DGFX(vm->i915));
|
|
|
|
|
|
|
|
GEM_BUG_ON(!is_power_of_2(pt_sz));
|
|
|
|
|
2020-07-29 17:42:17 +01:00
|
|
|
count = pd_count(size, shift);
|
|
|
|
while (count--) {
|
|
|
|
struct i915_page_table *pt;
|
|
|
|
|
2022-02-19 00:17:46 +05:30
|
|
|
pt = alloc_pt(vm, pt_sz);
|
2020-07-29 17:42:17 +01:00
|
|
|
if (IS_ERR(pt)) {
|
|
|
|
i915_vm_free_pt_stash(vm, stash);
|
|
|
|
return PTR_ERR(pt);
|
|
|
|
}
|
|
|
|
|
|
|
|
pt->stash = stash->pt[0];
|
|
|
|
stash->pt[0] = pt;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (n = 1; n < vm->top; n++) {
|
|
|
|
shift += ilog2(I915_PDES); /* Each PD holds 512 entries */
|
|
|
|
count = pd_count(size, shift);
|
|
|
|
while (count--) {
|
|
|
|
struct i915_page_directory *pd;
|
|
|
|
|
|
|
|
pd = alloc_pd(vm);
|
|
|
|
if (IS_ERR(pd)) {
|
|
|
|
i915_vm_free_pt_stash(vm, stash);
|
|
|
|
return PTR_ERR(pd);
|
|
|
|
}
|
|
|
|
|
|
|
|
pd->pt.stash = stash->pt[1];
|
|
|
|
stash->pt[1] = &pd->pt;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-04-27 09:54:13 +01:00
|
|
|
int i915_vm_map_pt_stash(struct i915_address_space *vm,
|
2020-07-29 17:42:18 +01:00
|
|
|
struct i915_vm_pt_stash *stash)
|
|
|
|
{
|
|
|
|
struct i915_page_table *pt;
|
|
|
|
int n, err;
|
|
|
|
|
|
|
|
for (n = 0; n < ARRAY_SIZE(stash->pt); n++) {
|
|
|
|
for (pt = stash->pt[n]; pt; pt = pt->stash) {
|
2021-04-27 09:54:13 +01:00
|
|
|
err = map_pt_dma_locked(vm, pt->base);
|
2020-07-29 17:42:18 +01:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-07-29 17:42:17 +01:00
|
|
|
void i915_vm_free_pt_stash(struct i915_address_space *vm,
|
|
|
|
struct i915_vm_pt_stash *stash)
|
|
|
|
{
|
|
|
|
struct i915_page_table *pt;
|
|
|
|
int n;
|
|
|
|
|
|
|
|
for (n = 0; n < ARRAY_SIZE(stash->pt); n++) {
|
|
|
|
while ((pt = stash->pt[n])) {
|
|
|
|
stash->pt[n] = pt->stash;
|
drm/i915/gt: Shrink i915_page_directory's slab bucket
kmalloc uses power-of-two slab buckets for small allocations (up to a
few pages). Since i915_page_directory is a page of pointers, plus a
couple more, this is rounded up to 8K, and we waste nearly 50% of that
allocation. Long terms this leads to poor memory utilisation, bloating
the kernel footprint, but the problem is exacerbated by our conservative
preallocation scheme for binding VMA. As we are required to allocate all
levels for each vma just in case we need to insert them upon binding,
this leads to a large multiplication factor for a single page vma. By
halving the allocation we need for the page directory structure, we
halve the impact of that factor, bringing workloads that once fitted into
memory, hopefully back to fitting into memory.
We maintain the split between i915_page_directory and i915_page_table as
we only need half the allocation for the lowest, most populous, level.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Mika Kuoppala <mika.kuoppala@linux.intel.com>
Cc: Matthew Auld <matthew.auld@intel.com>
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200729164219.5737-3-chris@chris-wilson.co.uk
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
2020-07-29 17:42:19 +01:00
|
|
|
free_px(vm, pt, n);
|
2020-07-29 17:42:17 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-09-22 08:25:25 +02:00
|
|
|
void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt,
|
|
|
|
unsigned long lmem_pt_obj_flags)
|
2020-01-07 13:40:09 +00:00
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 = gt->i915;
|
|
|
|
|
|
|
|
ppgtt->vm.gt = gt;
|
|
|
|
ppgtt->vm.i915 = i915;
|
2021-01-28 14:31:24 +01:00
|
|
|
ppgtt->vm.dma = i915->drm.dev;
|
2022-08-19 15:02:40 +03:00
|
|
|
ppgtt->vm.total = BIT_ULL(RUNTIME_INFO(i915)->ppgtt_size);
|
2021-09-22 08:25:25 +02:00
|
|
|
ppgtt->vm.lmem_pt_obj_flags = lmem_pt_obj_flags;
|
2020-01-07 13:40:09 +00:00
|
|
|
|
2021-06-01 09:46:41 +02:00
|
|
|
dma_resv_init(&ppgtt->vm._resv);
|
2020-01-07 13:40:09 +00:00
|
|
|
i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
|
|
|
|
|
|
|
|
ppgtt->vm.vma_ops.bind_vma = ppgtt_bind_vma;
|
|
|
|
ppgtt->vm.vma_ops.unbind_vma = ppgtt_unbind_vma;
|
|
|
|
}
|