2017-10-06 23:18:29 +01:00
|
|
|
/*
|
2019-05-28 10:29:49 +01:00
|
|
|
* SPDX-License-Identifier: MIT
|
2017-10-06 23:18:29 +01:00
|
|
|
*
|
2019-05-28 10:29:49 +01:00
|
|
|
* Copyright © 2017 Intel Corporation
|
2017-10-06 23:18:29 +01:00
|
|
|
*/
|
|
|
|
|
|
|
|
#include <linux/prime_numbers.h>
|
2022-02-25 15:46:28 -08:00
|
|
|
#include <linux/string_helpers.h>
|
2022-03-03 20:19:31 +02:00
|
|
|
#include <linux/swap.h>
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2019-05-28 10:29:49 +01:00
|
|
|
#include "i915_selftest.h"
|
|
|
|
|
2022-02-10 17:45:39 +02:00
|
|
|
#include "gem/i915_gem_internal.h"
|
2019-10-25 16:37:26 +01:00
|
|
|
#include "gem/i915_gem_lmem.h"
|
2019-05-28 10:29:49 +01:00
|
|
|
#include "gem/i915_gem_pm.h"
|
2022-02-10 17:45:39 +02:00
|
|
|
#include "gem/i915_gem_region.h"
|
2019-05-28 10:29:49 +01:00
|
|
|
|
2019-06-21 08:08:02 +01:00
|
|
|
#include "gt/intel_gt.h"
|
|
|
|
|
2019-04-26 17:33:36 +01:00
|
|
|
#include "igt_gem_utils.h"
|
2019-05-28 10:29:49 +01:00
|
|
|
#include "mock_context.h"
|
|
|
|
|
|
|
|
#include "selftests/mock_drm.h"
|
|
|
|
#include "selftests/mock_gem_device.h"
|
2019-10-08 17:01:14 +01:00
|
|
|
#include "selftests/mock_region.h"
|
2019-05-28 10:29:49 +01:00
|
|
|
#include "selftests/i915_random.h"
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2021-10-28 13:58:53 +01:00
|
|
|
static struct i915_gem_context *hugepage_ctx(struct drm_i915_private *i915,
|
|
|
|
struct file *file)
|
|
|
|
{
|
|
|
|
struct i915_gem_context *ctx = live_context(i915, file);
|
|
|
|
struct i915_address_space *vm;
|
|
|
|
|
|
|
|
if (IS_ERR(ctx))
|
|
|
|
return ctx;
|
|
|
|
|
|
|
|
vm = ctx->vm;
|
|
|
|
if (vm)
|
|
|
|
WRITE_ONCE(vm->scrub_64K, true);
|
|
|
|
|
|
|
|
return ctx;
|
|
|
|
}
|
|
|
|
|
2017-10-06 23:18:29 +01:00
|
|
|
static const unsigned int page_sizes[] = {
|
|
|
|
I915_GTT_PAGE_SIZE_2M,
|
|
|
|
I915_GTT_PAGE_SIZE_64K,
|
|
|
|
I915_GTT_PAGE_SIZE_4K,
|
|
|
|
};
|
|
|
|
|
|
|
|
static unsigned int get_largest_page_size(struct drm_i915_private *i915,
|
|
|
|
u64 rem)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) {
|
|
|
|
unsigned int page_size = page_sizes[i];
|
|
|
|
|
|
|
|
if (HAS_PAGE_SIZES(i915, page_size) && rem >= page_size)
|
|
|
|
return page_size;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void huge_pages_free_pages(struct sg_table *st)
|
|
|
|
{
|
|
|
|
struct scatterlist *sg;
|
|
|
|
|
|
|
|
for (sg = st->sgl; sg; sg = __sg_next(sg)) {
|
|
|
|
if (sg_page(sg))
|
|
|
|
__free_pages(sg_page(sg), get_order(sg->length));
|
|
|
|
}
|
|
|
|
|
|
|
|
sg_free_table(st);
|
|
|
|
kfree(st);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int get_huge_pages(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
|
|
|
#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY)
|
|
|
|
unsigned int page_mask = obj->mm.page_mask;
|
|
|
|
struct sg_table *st;
|
|
|
|
struct scatterlist *sg;
|
2017-10-09 12:00:24 +01:00
|
|
|
unsigned int sg_page_sizes;
|
2017-10-06 23:18:29 +01:00
|
|
|
u64 rem;
|
|
|
|
|
2022-12-28 21:22:48 +02:00
|
|
|
/* restricted by sg_alloc_table */
|
|
|
|
if (overflows_type(obj->base.size >> PAGE_SHIFT, unsigned int))
|
|
|
|
return -E2BIG;
|
|
|
|
|
2017-10-06 23:18:29 +01:00
|
|
|
st = kmalloc(sizeof(*st), GFP);
|
|
|
|
if (!st)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) {
|
|
|
|
kfree(st);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
rem = obj->base.size;
|
|
|
|
sg = st->sgl;
|
|
|
|
st->nents = 0;
|
2017-10-09 12:00:24 +01:00
|
|
|
sg_page_sizes = 0;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Our goal here is simple, we want to greedily fill the object from
|
|
|
|
* largest to smallest page-size, while ensuring that we use *every*
|
|
|
|
* page-size as per the given page-mask.
|
|
|
|
*/
|
|
|
|
do {
|
|
|
|
unsigned int bit = ilog2(page_mask);
|
|
|
|
unsigned int page_size = BIT(bit);
|
|
|
|
int order = get_order(page_size);
|
|
|
|
|
|
|
|
do {
|
|
|
|
struct page *page;
|
|
|
|
|
2023-12-28 17:47:04 +03:00
|
|
|
GEM_BUG_ON(order > MAX_PAGE_ORDER);
|
2017-10-06 23:18:29 +01:00
|
|
|
page = alloc_pages(GFP | __GFP_ZERO, order);
|
|
|
|
if (!page)
|
|
|
|
goto err;
|
|
|
|
|
|
|
|
sg_set_page(sg, page, page_size, 0);
|
2017-10-09 12:00:24 +01:00
|
|
|
sg_page_sizes |= page_size;
|
2017-10-06 23:18:29 +01:00
|
|
|
st->nents++;
|
|
|
|
|
|
|
|
rem -= page_size;
|
|
|
|
if (!rem) {
|
|
|
|
sg_mark_end(sg);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
sg = __sg_next(sg);
|
|
|
|
} while ((rem - ((page_size-1) & page_mask)) >= page_size);
|
|
|
|
|
|
|
|
page_mask &= (page_size-1);
|
|
|
|
} while (page_mask);
|
|
|
|
|
|
|
|
if (i915_gem_gtt_prepare_pages(obj, st))
|
|
|
|
goto err;
|
|
|
|
|
2017-10-09 12:00:24 +01:00
|
|
|
GEM_BUG_ON(sg_page_sizes != obj->mm.page_mask);
|
2022-11-08 10:32:38 +00:00
|
|
|
__i915_gem_object_set_pages(obj, st);
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err:
|
|
|
|
sg_set_page(sg, NULL, 0, 0);
|
|
|
|
sg_mark_end(sg);
|
|
|
|
huge_pages_free_pages(st);
|
|
|
|
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void put_huge_pages(struct drm_i915_gem_object *obj,
|
|
|
|
struct sg_table *pages)
|
|
|
|
{
|
|
|
|
i915_gem_gtt_finish_pages(obj, pages);
|
|
|
|
huge_pages_free_pages(pages);
|
|
|
|
|
|
|
|
obj->mm.dirty = false;
|
2021-10-18 18:45:08 +01:00
|
|
|
|
|
|
|
__start_cpu_write(obj);
|
2017-10-06 23:18:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static const struct drm_i915_gem_object_ops huge_page_ops = {
|
2020-05-29 19:32:04 +01:00
|
|
|
.name = "huge-gem",
|
2021-03-23 16:49:56 +01:00
|
|
|
.flags = I915_GEM_OBJECT_IS_SHRINKABLE,
|
2017-10-06 23:18:29 +01:00
|
|
|
.get_pages = get_huge_pages,
|
|
|
|
.put_pages = put_huge_pages,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct drm_i915_gem_object *
|
|
|
|
huge_pages_object(struct drm_i915_private *i915,
|
|
|
|
u64 size,
|
|
|
|
unsigned int page_mask)
|
|
|
|
{
|
2019-10-22 15:45:01 +01:00
|
|
|
static struct lock_class_key lock_class;
|
2017-10-06 23:18:29 +01:00
|
|
|
struct drm_i915_gem_object *obj;
|
2021-10-18 18:45:08 +01:00
|
|
|
unsigned int cache_level;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
GEM_BUG_ON(!size);
|
|
|
|
GEM_BUG_ON(!IS_ALIGNED(size, BIT(__ffs(page_mask))));
|
|
|
|
|
|
|
|
if (size >> PAGE_SHIFT > INT_MAX)
|
|
|
|
return ERR_PTR(-E2BIG);
|
|
|
|
|
|
|
|
if (overflows_type(size, obj->base.size))
|
|
|
|
return ERR_PTR(-E2BIG);
|
|
|
|
|
2019-02-28 10:20:34 +00:00
|
|
|
obj = i915_gem_object_alloc();
|
2017-10-06 23:18:29 +01:00
|
|
|
if (!obj)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
drm_gem_private_object_init(&i915->drm, &obj->base, size);
|
2021-06-24 10:42:38 +02:00
|
|
|
i915_gem_object_init(obj, &huge_page_ops, &lock_class, 0);
|
|
|
|
obj->mem_flags |= I915_BO_FLAG_STRUCT_PAGE;
|
2019-10-08 17:01:16 +01:00
|
|
|
i915_gem_object_set_volatile(obj);
|
|
|
|
|
2018-02-16 13:43:38 +01:00
|
|
|
obj->write_domain = I915_GEM_DOMAIN_CPU;
|
|
|
|
obj->read_domains = I915_GEM_DOMAIN_CPU;
|
2021-10-18 18:45:08 +01:00
|
|
|
|
|
|
|
cache_level = HAS_LLC(i915) ? I915_CACHE_LLC : I915_CACHE_NONE;
|
|
|
|
i915_gem_object_set_cache_coherency(obj, cache_level);
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
obj->mm.page_mask = page_mask;
|
|
|
|
|
|
|
|
return obj;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int fake_get_huge_pages(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
|
|
|
const u64 max_len = rounddown_pow_of_two(UINT_MAX);
|
|
|
|
struct sg_table *st;
|
|
|
|
struct scatterlist *sg;
|
|
|
|
u64 rem;
|
|
|
|
|
2022-12-28 21:22:48 +02:00
|
|
|
/* restricted by sg_alloc_table */
|
|
|
|
if (overflows_type(obj->base.size >> PAGE_SHIFT, unsigned int))
|
|
|
|
return -E2BIG;
|
|
|
|
|
2017-10-06 23:18:29 +01:00
|
|
|
st = kmalloc(sizeof(*st), GFP);
|
|
|
|
if (!st)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
if (sg_alloc_table(st, obj->base.size >> PAGE_SHIFT, GFP)) {
|
|
|
|
kfree(st);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Use optimal page sized chunks to fill in the sg table */
|
|
|
|
rem = obj->base.size;
|
|
|
|
sg = st->sgl;
|
|
|
|
st->nents = 0;
|
|
|
|
do {
|
|
|
|
unsigned int page_size = get_largest_page_size(i915, rem);
|
|
|
|
unsigned int len = min(page_size * div_u64(rem, page_size),
|
|
|
|
max_len);
|
|
|
|
|
|
|
|
GEM_BUG_ON(!page_size);
|
|
|
|
|
|
|
|
sg->offset = 0;
|
|
|
|
sg->length = len;
|
|
|
|
sg_dma_len(sg) = len;
|
|
|
|
sg_dma_address(sg) = page_size;
|
|
|
|
|
|
|
|
st->nents++;
|
|
|
|
|
|
|
|
rem -= len;
|
|
|
|
if (!rem) {
|
|
|
|
sg_mark_end(sg);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
sg = sg_next(sg);
|
|
|
|
} while (1);
|
|
|
|
|
2018-09-20 15:27:06 +01:00
|
|
|
i915_sg_trim(st);
|
|
|
|
|
2022-11-08 10:32:38 +00:00
|
|
|
__i915_gem_object_set_pages(obj, st);
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int fake_get_huge_pages_single(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
|
|
|
struct sg_table *st;
|
|
|
|
struct scatterlist *sg;
|
|
|
|
unsigned int page_size;
|
|
|
|
|
|
|
|
st = kmalloc(sizeof(*st), GFP);
|
|
|
|
if (!st)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
if (sg_alloc_table(st, 1, GFP)) {
|
|
|
|
kfree(st);
|
|
|
|
return -ENOMEM;
|
|
|
|
}
|
|
|
|
|
|
|
|
sg = st->sgl;
|
|
|
|
st->nents = 1;
|
|
|
|
|
|
|
|
page_size = get_largest_page_size(i915, obj->base.size);
|
|
|
|
GEM_BUG_ON(!page_size);
|
|
|
|
|
|
|
|
sg->offset = 0;
|
|
|
|
sg->length = obj->base.size;
|
|
|
|
sg_dma_len(sg) = obj->base.size;
|
|
|
|
sg_dma_address(sg) = page_size;
|
|
|
|
|
2022-11-08 10:32:38 +00:00
|
|
|
__i915_gem_object_set_pages(obj, st);
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
return 0;
|
|
|
|
#undef GFP
|
|
|
|
}
|
|
|
|
|
|
|
|
static void fake_free_huge_pages(struct drm_i915_gem_object *obj,
|
|
|
|
struct sg_table *pages)
|
|
|
|
{
|
|
|
|
sg_free_table(pages);
|
|
|
|
kfree(pages);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void fake_put_huge_pages(struct drm_i915_gem_object *obj,
|
|
|
|
struct sg_table *pages)
|
|
|
|
{
|
|
|
|
fake_free_huge_pages(obj, pages);
|
|
|
|
obj->mm.dirty = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct drm_i915_gem_object_ops fake_ops = {
|
2020-05-29 19:32:04 +01:00
|
|
|
.name = "fake-gem",
|
2017-10-06 23:18:29 +01:00
|
|
|
.flags = I915_GEM_OBJECT_IS_SHRINKABLE,
|
|
|
|
.get_pages = fake_get_huge_pages,
|
|
|
|
.put_pages = fake_put_huge_pages,
|
|
|
|
};
|
|
|
|
|
|
|
|
static const struct drm_i915_gem_object_ops fake_ops_single = {
|
2020-05-29 19:32:04 +01:00
|
|
|
.name = "fake-gem",
|
2017-10-06 23:18:29 +01:00
|
|
|
.flags = I915_GEM_OBJECT_IS_SHRINKABLE,
|
|
|
|
.get_pages = fake_get_huge_pages_single,
|
|
|
|
.put_pages = fake_put_huge_pages,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct drm_i915_gem_object *
|
|
|
|
fake_huge_pages_object(struct drm_i915_private *i915, u64 size, bool single)
|
|
|
|
{
|
2019-10-22 15:45:01 +01:00
|
|
|
static struct lock_class_key lock_class;
|
2017-10-06 23:18:29 +01:00
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
|
|
|
|
GEM_BUG_ON(!size);
|
|
|
|
GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
|
|
|
|
|
|
|
|
if (size >> PAGE_SHIFT > UINT_MAX)
|
|
|
|
return ERR_PTR(-E2BIG);
|
|
|
|
|
|
|
|
if (overflows_type(size, obj->base.size))
|
|
|
|
return ERR_PTR(-E2BIG);
|
|
|
|
|
2019-02-28 10:20:34 +00:00
|
|
|
obj = i915_gem_object_alloc();
|
2017-10-06 23:18:29 +01:00
|
|
|
if (!obj)
|
|
|
|
return ERR_PTR(-ENOMEM);
|
|
|
|
|
|
|
|
drm_gem_private_object_init(&i915->drm, &obj->base, size);
|
|
|
|
|
|
|
|
if (single)
|
2021-03-23 16:49:56 +01:00
|
|
|
i915_gem_object_init(obj, &fake_ops_single, &lock_class, 0);
|
2017-10-06 23:18:29 +01:00
|
|
|
else
|
2021-03-23 16:49:56 +01:00
|
|
|
i915_gem_object_init(obj, &fake_ops, &lock_class, 0);
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2019-10-08 17:01:16 +01:00
|
|
|
i915_gem_object_set_volatile(obj);
|
|
|
|
|
2018-02-16 13:43:38 +01:00
|
|
|
obj->write_domain = I915_GEM_DOMAIN_CPU;
|
|
|
|
obj->read_domains = I915_GEM_DOMAIN_CPU;
|
drm/i915: use pat_index instead of cache_level
Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.
From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity. PAT is expected to work much like MOCS already works today,
and by design userspace is expected to select the index that exactly
matches the desired behavior described in the hardware specification.
For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For kernel objects, cache_level is used
for simplicity and backward compatibility. For Pre-gen12 platforms PAT can
have 1:1 mapping to i915_cache_level, so these two are interchangeable. see
the use of LEGACY_CACHELEVEL.
One consequence of this change is that gen8_pte_encode is no longer working
for gen12 platforms due to the fact that gen12 platforms has different PAT
definitions. In the meantime the mtl_pte_encode introduced specfically for
MTL becomes generic for all gen12 platforms. This patch renames the MTL
PTE encode function into gen12_pte_encode and apply it to all gen12. Even
though this change looks unrelated, but separating them would temporarily
break gen12 PTE encoding, thus squash them in one patch.
Special note: this patch changes the way caching behavior is controlled in
the sense that some objects are left to be managed by userspace. For such
objects we need to be careful not to change the userspace settings.There
are kerneldoc and comments added around obj->cache_coherent, cache_dirty,
and how to bypass the checkings by i915_gem_object_has_cache_level. For
full understanding, these changes need to be looked at together with the
two follow-up patches, one disables the {set|get}_caching ioctl's and the
other adds set_pat extension to the GEM_CREATE uAPI.
Bspec: 63019
Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230509165200.1740-3-fei.yang@intel.com
2023-05-09 09:52:00 -07:00
|
|
|
obj->pat_index = i915_gem_get_pat_index(i915, I915_CACHE_NONE);
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
return obj;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int igt_check_page_sizes(struct i915_vma *vma)
|
|
|
|
{
|
2018-06-05 16:37:58 +01:00
|
|
|
struct drm_i915_private *i915 = vma->vm->i915;
|
2022-08-19 15:02:39 +03:00
|
|
|
unsigned int supported = RUNTIME_INFO(i915)->page_sizes;
|
2017-10-06 23:18:29 +01:00
|
|
|
struct drm_i915_gem_object *obj = vma->obj;
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 14:39:58 +01:00
|
|
|
int err;
|
|
|
|
|
|
|
|
/* We have to wait for the async bind to complete before our asserts */
|
|
|
|
err = i915_vma_sync(vma);
|
|
|
|
if (err)
|
|
|
|
return err;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
if (!HAS_PAGE_SIZES(i915, vma->page_sizes.sg)) {
|
|
|
|
pr_err("unsupported page_sizes.sg=%u, supported=%u\n",
|
|
|
|
vma->page_sizes.sg & ~supported, supported);
|
|
|
|
err = -EINVAL;
|
|
|
|
}
|
|
|
|
|
2022-01-10 18:22:15 +01:00
|
|
|
if (!HAS_PAGE_SIZES(i915, vma->resource->page_sizes_gtt)) {
|
2017-10-06 23:18:29 +01:00
|
|
|
pr_err("unsupported page_sizes.gtt=%u, supported=%u\n",
|
2022-01-10 18:22:15 +01:00
|
|
|
vma->resource->page_sizes_gtt & ~supported, supported);
|
2017-10-06 23:18:29 +01:00
|
|
|
err = -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vma->page_sizes.phys != obj->mm.page_sizes.phys) {
|
|
|
|
pr_err("vma->page_sizes.phys(%u) != obj->mm.page_sizes.phys(%u)\n",
|
|
|
|
vma->page_sizes.phys, obj->mm.page_sizes.phys);
|
|
|
|
err = -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vma->page_sizes.sg != obj->mm.page_sizes.sg) {
|
|
|
|
pr_err("vma->page_sizes.sg(%u) != obj->mm.page_sizes.sg(%u)\n",
|
|
|
|
vma->page_sizes.sg, obj->mm.page_sizes.sg);
|
|
|
|
err = -EINVAL;
|
|
|
|
}
|
|
|
|
|
2020-11-30 14:18:09 +00:00
|
|
|
/*
|
|
|
|
* The dma-api is like a box of chocolates when it comes to the
|
|
|
|
* alignment of dma addresses, however for LMEM we have total control
|
|
|
|
* and so can guarantee alignment, likewise when we allocate our blocks
|
|
|
|
* they should appear in descending order, and if we know that we align
|
|
|
|
* to the largest page size for the GTT address, we should be able to
|
|
|
|
* assert that if we see 2M physical pages then we should also get 2M
|
|
|
|
* GTT pages. If we don't then something might be wrong in our
|
|
|
|
* construction of the backing pages.
|
|
|
|
*
|
|
|
|
* Maintaining alignment is required to utilise huge pages in the ppGGT.
|
|
|
|
*/
|
|
|
|
if (i915_gem_object_is_lmem(obj) &&
|
2022-12-01 00:58:02 +01:00
|
|
|
IS_ALIGNED(i915_vma_offset(vma), SZ_2M) &&
|
2020-11-30 14:18:09 +00:00
|
|
|
vma->page_sizes.sg & SZ_2M &&
|
2022-01-10 18:22:15 +01:00
|
|
|
vma->resource->page_sizes_gtt < SZ_2M) {
|
2020-11-30 14:18:09 +00:00
|
|
|
pr_err("gtt pages mismatch for LMEM, expected 2M GTT pages, sg(%u), gtt(%u)\n",
|
2022-01-10 18:22:15 +01:00
|
|
|
vma->page_sizes.sg, vma->resource->page_sizes_gtt);
|
2017-10-06 23:18:29 +01:00
|
|
|
err = -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int igt_mock_exhaust_device_supported_pages(void *arg)
|
|
|
|
{
|
2019-06-11 10:12:38 +01:00
|
|
|
struct i915_ppgtt *ppgtt = arg;
|
2018-06-05 16:37:58 +01:00
|
|
|
struct drm_i915_private *i915 = ppgtt->vm.i915;
|
2022-08-19 15:02:39 +03:00
|
|
|
unsigned int saved_mask = RUNTIME_INFO(i915)->page_sizes;
|
2017-10-06 23:18:29 +01:00
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
struct i915_vma *vma;
|
|
|
|
int i, j, single;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Sanity check creating objects with every valid page support
|
|
|
|
* combination for our mock device.
|
|
|
|
*/
|
|
|
|
|
|
|
|
for (i = 1; i < BIT(ARRAY_SIZE(page_sizes)); i++) {
|
2020-07-29 17:42:18 +01:00
|
|
|
unsigned int combination = SZ_4K; /* Required for ppGTT */
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
for (j = 0; j < ARRAY_SIZE(page_sizes); j++) {
|
|
|
|
if (i & BIT(j))
|
|
|
|
combination |= page_sizes[j];
|
|
|
|
}
|
|
|
|
|
2022-08-19 15:02:39 +03:00
|
|
|
RUNTIME_INFO(i915)->page_sizes = combination;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
for (single = 0; single <= 1; ++single) {
|
|
|
|
obj = fake_huge_pages_object(i915, combination, !!single);
|
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
err = PTR_ERR(obj);
|
|
|
|
goto out_device;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (obj->base.size != combination) {
|
|
|
|
pr_err("obj->base.size=%zu, expected=%u\n",
|
|
|
|
obj->base.size, combination);
|
|
|
|
err = -EINVAL;
|
|
|
|
goto out_put;
|
|
|
|
}
|
|
|
|
|
2018-06-05 16:37:58 +01:00
|
|
|
vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
|
2017-10-06 23:18:29 +01:00
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto out_put;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = i915_vma_pin(vma, 0, 0, PIN_USER);
|
|
|
|
if (err)
|
2020-04-22 20:05:58 +01:00
|
|
|
goto out_put;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
err = igt_check_page_sizes(vma);
|
|
|
|
|
|
|
|
if (vma->page_sizes.sg != combination) {
|
|
|
|
pr_err("page_sizes.sg=%u, expected=%u\n",
|
|
|
|
vma->page_sizes.sg, combination);
|
|
|
|
err = -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
i915_vma_unpin(vma);
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
|
|
|
|
if (err)
|
|
|
|
goto out_device;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
goto out_device;
|
|
|
|
|
|
|
|
out_put:
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
out_device:
|
2022-08-19 15:02:39 +03:00
|
|
|
RUNTIME_INFO(i915)->page_sizes = saved_mask;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2019-10-08 17:01:14 +01:00
|
|
|
static int igt_mock_memory_region_huge_pages(void *arg)
|
|
|
|
{
|
2019-10-08 17:01:15 +01:00
|
|
|
const unsigned int flags[] = { 0, I915_BO_ALLOC_CONTIGUOUS };
|
2019-10-08 17:01:14 +01:00
|
|
|
struct i915_ppgtt *ppgtt = arg;
|
|
|
|
struct drm_i915_private *i915 = ppgtt->vm.i915;
|
2022-08-19 15:02:39 +03:00
|
|
|
unsigned long supported = RUNTIME_INFO(i915)->page_sizes;
|
2019-10-08 17:01:14 +01:00
|
|
|
struct intel_memory_region *mem;
|
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
struct i915_vma *vma;
|
|
|
|
int bit;
|
|
|
|
int err = 0;
|
|
|
|
|
2022-02-25 14:54:56 +00:00
|
|
|
mem = mock_region_create(i915, 0, SZ_2G, I915_GTT_PAGE_SIZE_4K, 0, 0);
|
2019-10-08 17:01:14 +01:00
|
|
|
if (IS_ERR(mem)) {
|
|
|
|
pr_err("%s failed to create memory region\n", __func__);
|
|
|
|
return PTR_ERR(mem);
|
|
|
|
}
|
|
|
|
|
|
|
|
for_each_set_bit(bit, &supported, ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
|
|
|
|
unsigned int page_size = BIT(bit);
|
|
|
|
resource_size_t phys;
|
2019-10-08 17:01:15 +01:00
|
|
|
int i;
|
2019-10-08 17:01:14 +01:00
|
|
|
|
2019-10-08 17:01:15 +01:00
|
|
|
for (i = 0; i < ARRAY_SIZE(flags); ++i) {
|
2021-06-25 11:38:23 +01:00
|
|
|
obj = i915_gem_object_create_region(mem,
|
|
|
|
page_size, page_size,
|
2019-10-08 17:01:15 +01:00
|
|
|
flags[i]);
|
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
err = PTR_ERR(obj);
|
|
|
|
goto out_region;
|
|
|
|
}
|
2019-10-08 17:01:14 +01:00
|
|
|
|
2019-10-08 17:01:15 +01:00
|
|
|
vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
|
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto out_put;
|
|
|
|
}
|
2019-10-08 17:01:14 +01:00
|
|
|
|
2019-10-08 17:01:15 +01:00
|
|
|
err = i915_vma_pin(vma, 0, 0, PIN_USER);
|
|
|
|
if (err)
|
2020-04-22 20:05:58 +01:00
|
|
|
goto out_put;
|
2019-10-08 17:01:14 +01:00
|
|
|
|
2019-10-08 17:01:15 +01:00
|
|
|
err = igt_check_page_sizes(vma);
|
|
|
|
if (err)
|
|
|
|
goto out_unpin;
|
2019-10-08 17:01:14 +01:00
|
|
|
|
2019-10-08 17:01:15 +01:00
|
|
|
phys = i915_gem_object_get_dma_address(obj, 0);
|
|
|
|
if (!IS_ALIGNED(phys, page_size)) {
|
|
|
|
pr_err("%s addr misaligned(%pa) page_size=%u\n",
|
|
|
|
__func__, &phys, page_size);
|
|
|
|
err = -EINVAL;
|
|
|
|
goto out_unpin;
|
|
|
|
}
|
2019-10-08 17:01:14 +01:00
|
|
|
|
2022-01-10 18:22:15 +01:00
|
|
|
if (vma->resource->page_sizes_gtt != page_size) {
|
2019-10-08 17:01:15 +01:00
|
|
|
pr_err("%s page_sizes.gtt=%u, expected=%u\n",
|
2022-01-10 18:22:15 +01:00
|
|
|
__func__, vma->resource->page_sizes_gtt,
|
2019-10-08 17:01:15 +01:00
|
|
|
page_size);
|
|
|
|
err = -EINVAL;
|
|
|
|
goto out_unpin;
|
|
|
|
}
|
2019-10-08 17:01:14 +01:00
|
|
|
|
2019-10-08 17:01:15 +01:00
|
|
|
i915_vma_unpin(vma);
|
drm/i915: Switch obj->mm.lock lockdep annotations on its head
The trouble with having a plain nesting flag for locks which do not
naturally nest (unlike block devices and their partitions, which is
the original motivation for nesting levels) is that lockdep will
never spot a true deadlock if you screw up.
This patch is an attempt at trying better, by highlighting a bit more
of the actual nature of the nesting that's going on. Essentially we
have two kinds of objects:
- objects without pages allocated, which cannot be on any lru and are
hence inaccessible to the shrinker.
- objects which have pages allocated, which are on an lru, and which
the shrinker can decide to throw out.
For the former type of object, memory allocations while holding
obj->mm.lock are permissible. For the latter they are not. And
get/put_pages transitions between the two types of objects.
This is still not entirely fool-proof since the rules might change.
But as long as we run such a code ever at runtime lockdep should be
able to observe the inconsistency and complain (like with any other
lockdep class that we've split up in multiple classes). But there are
a few clear benefits:
- We can drop the nesting flag parameter from
__i915_gem_object_put_pages, because that function by definition is
never going allocate memory, and calling it on an object which
doesn't have its pages allocated would be a bug.
- We strictly catch more bugs, since there's not only one place in the
entire tree which is annotated with the special class. All the
other places that had explicit lockdep nesting annotations we're now
going to leave up to lockdep again.
- Specifically this catches stuff like calling get_pages from
put_pages (which isn't really a good idea, if we can call get_pages
so could the shrinker). I've seen patches do exactly that.
Of course I fully expect CI will show me for the fool I am with this
one here :-)
v2: There can only be one (lockdep only has a cache for the first
subclass, not for deeper ones, and we don't want to make these locks
even slower). Still separate enums for better documentation.
Real fix: don't forget about phys objs and pin_map(), and fix the
shrinker to have the right annotations ... silly me.
v3: Forgot usertptr too ...
v4: Improve comment for pages_pin_count, drop the IMPORTANT comment
and instead prime lockdep (Chris).
v5: Appease checkpatch, no double empty lines (Chris)
v6: More rebasing over selftest changes. Also somehow I forgot to
push this patch :-/
Also format comments consistently while at it.
v7: Fix typo in commit message (Joonas)
Also drop the priming, with the lmem merge we now have allocations
while holding the lmem lock, which wreaks the generic priming I've
done in earlier patches. Should probably be resurrected when lmem is
fixed. See
commit 232a6ebae419193f5b8da4fa869ae5089ab105c2
Author: Matthew Auld <matthew.auld@intel.com>
Date: Tue Oct 8 17:01:14 2019 +0100
drm/i915: introduce intel_memory_region
I'm keeping the priming patch locally so it wont get lost.
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: "Tang, CQ" <cq.tang@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v5)
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> (v6)
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191105090148.30269-1-daniel.vetter@ffwll.ch
[mlankhorst: Fix commit typos pointed out by Michael Ruhl]
2019-11-05 10:01:48 +01:00
|
|
|
__i915_gem_object_put_pages(obj);
|
2019-10-08 17:01:15 +01:00
|
|
|
i915_gem_object_put(obj);
|
|
|
|
}
|
2019-10-08 17:01:14 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
goto out_region;
|
|
|
|
|
|
|
|
out_unpin:
|
|
|
|
i915_vma_unpin(vma);
|
|
|
|
out_put:
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
out_region:
|
2021-11-22 22:45:51 +01:00
|
|
|
intel_memory_region_destroy(mem);
|
2019-10-08 17:01:14 +01:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-10-06 23:18:29 +01:00
|
|
|
static int igt_mock_ppgtt_misaligned_dma(void *arg)
|
|
|
|
{
|
2019-06-11 10:12:38 +01:00
|
|
|
struct i915_ppgtt *ppgtt = arg;
|
2018-06-05 16:37:58 +01:00
|
|
|
struct drm_i915_private *i915 = ppgtt->vm.i915;
|
2022-08-19 15:02:39 +03:00
|
|
|
unsigned long supported = RUNTIME_INFO(i915)->page_sizes;
|
2017-10-06 23:18:29 +01:00
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
int bit;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Sanity check dma misalignment for huge pages -- the dma addresses we
|
|
|
|
* insert into the paging structures need to always respect the page
|
|
|
|
* size alignment.
|
|
|
|
*/
|
|
|
|
|
|
|
|
bit = ilog2(I915_GTT_PAGE_SIZE_64K);
|
|
|
|
|
|
|
|
for_each_set_bit_from(bit, &supported,
|
|
|
|
ilog2(I915_GTT_MAX_PAGE_SIZE) + 1) {
|
|
|
|
IGT_TIMEOUT(end_time);
|
|
|
|
unsigned int page_size = BIT(bit);
|
|
|
|
unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
|
|
|
|
unsigned int offset;
|
|
|
|
unsigned int size =
|
|
|
|
round_up(page_size, I915_GTT_PAGE_SIZE_2M) << 1;
|
|
|
|
struct i915_vma *vma;
|
|
|
|
|
|
|
|
obj = fake_huge_pages_object(i915, size, true);
|
|
|
|
if (IS_ERR(obj))
|
|
|
|
return PTR_ERR(obj);
|
|
|
|
|
|
|
|
if (obj->base.size != size) {
|
|
|
|
pr_err("obj->base.size=%zu, expected=%u\n",
|
|
|
|
obj->base.size, size);
|
|
|
|
err = -EINVAL;
|
|
|
|
goto out_put;
|
|
|
|
}
|
|
|
|
|
2021-03-23 16:50:30 +01:00
|
|
|
err = i915_gem_object_pin_pages_unlocked(obj);
|
2017-10-06 23:18:29 +01:00
|
|
|
if (err)
|
|
|
|
goto out_put;
|
|
|
|
|
|
|
|
/* Force the page size for this object */
|
|
|
|
obj->mm.page_sizes.sg = page_size;
|
|
|
|
|
2018-06-05 16:37:58 +01:00
|
|
|
vma = i915_vma_instance(obj, &ppgtt->vm, NULL);
|
2017-10-06 23:18:29 +01:00
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto out_unpin;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = i915_vma_pin(vma, 0, 0, flags);
|
2020-04-22 20:05:58 +01:00
|
|
|
if (err)
|
2017-10-06 23:18:29 +01:00
|
|
|
goto out_unpin;
|
|
|
|
|
|
|
|
|
|
|
|
err = igt_check_page_sizes(vma);
|
|
|
|
|
2022-01-10 18:22:15 +01:00
|
|
|
if (vma->resource->page_sizes_gtt != page_size) {
|
2017-10-06 23:18:29 +01:00
|
|
|
pr_err("page_sizes.gtt=%u, expected %u\n",
|
2022-01-10 18:22:15 +01:00
|
|
|
vma->resource->page_sizes_gtt, page_size);
|
2017-10-06 23:18:29 +01:00
|
|
|
err = -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
i915_vma_unpin(vma);
|
|
|
|
|
2020-04-22 20:05:58 +01:00
|
|
|
if (err)
|
2017-10-06 23:18:29 +01:00
|
|
|
goto out_unpin;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Try all the other valid offsets until the next
|
|
|
|
* boundary -- should always fall back to using 4K
|
|
|
|
* pages.
|
|
|
|
*/
|
|
|
|
for (offset = 4096; offset < page_size; offset += 4096) {
|
2022-01-14 14:23:18 +01:00
|
|
|
err = i915_vma_unbind_unlocked(vma);
|
2020-04-22 20:05:58 +01:00
|
|
|
if (err)
|
2017-10-06 23:18:29 +01:00
|
|
|
goto out_unpin;
|
|
|
|
|
|
|
|
err = i915_vma_pin(vma, 0, 0, flags | offset);
|
2020-04-22 20:05:58 +01:00
|
|
|
if (err)
|
2017-10-06 23:18:29 +01:00
|
|
|
goto out_unpin;
|
|
|
|
|
|
|
|
err = igt_check_page_sizes(vma);
|
|
|
|
|
2022-01-10 18:22:15 +01:00
|
|
|
if (vma->resource->page_sizes_gtt != I915_GTT_PAGE_SIZE_4K) {
|
2018-10-25 10:18:22 +01:00
|
|
|
pr_err("page_sizes.gtt=%u, expected %llu\n",
|
2022-01-10 18:22:15 +01:00
|
|
|
vma->resource->page_sizes_gtt,
|
|
|
|
I915_GTT_PAGE_SIZE_4K);
|
2017-10-06 23:18:29 +01:00
|
|
|
err = -EINVAL;
|
|
|
|
}
|
|
|
|
|
|
|
|
i915_vma_unpin(vma);
|
|
|
|
|
2020-04-22 20:05:58 +01:00
|
|
|
if (err)
|
2017-10-06 23:18:29 +01:00
|
|
|
goto out_unpin;
|
|
|
|
|
|
|
|
if (igt_timeout(end_time,
|
|
|
|
"%s timed out at offset %x with page-size %x\n",
|
|
|
|
__func__, offset, page_size))
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2021-03-23 16:50:30 +01:00
|
|
|
i915_gem_object_lock(obj, NULL);
|
2017-10-06 23:18:29 +01:00
|
|
|
i915_gem_object_unpin_pages(obj);
|
drm/i915: Switch obj->mm.lock lockdep annotations on its head
The trouble with having a plain nesting flag for locks which do not
naturally nest (unlike block devices and their partitions, which is
the original motivation for nesting levels) is that lockdep will
never spot a true deadlock if you screw up.
This patch is an attempt at trying better, by highlighting a bit more
of the actual nature of the nesting that's going on. Essentially we
have two kinds of objects:
- objects without pages allocated, which cannot be on any lru and are
hence inaccessible to the shrinker.
- objects which have pages allocated, which are on an lru, and which
the shrinker can decide to throw out.
For the former type of object, memory allocations while holding
obj->mm.lock are permissible. For the latter they are not. And
get/put_pages transitions between the two types of objects.
This is still not entirely fool-proof since the rules might change.
But as long as we run such a code ever at runtime lockdep should be
able to observe the inconsistency and complain (like with any other
lockdep class that we've split up in multiple classes). But there are
a few clear benefits:
- We can drop the nesting flag parameter from
__i915_gem_object_put_pages, because that function by definition is
never going allocate memory, and calling it on an object which
doesn't have its pages allocated would be a bug.
- We strictly catch more bugs, since there's not only one place in the
entire tree which is annotated with the special class. All the
other places that had explicit lockdep nesting annotations we're now
going to leave up to lockdep again.
- Specifically this catches stuff like calling get_pages from
put_pages (which isn't really a good idea, if we can call get_pages
so could the shrinker). I've seen patches do exactly that.
Of course I fully expect CI will show me for the fool I am with this
one here :-)
v2: There can only be one (lockdep only has a cache for the first
subclass, not for deeper ones, and we don't want to make these locks
even slower). Still separate enums for better documentation.
Real fix: don't forget about phys objs and pin_map(), and fix the
shrinker to have the right annotations ... silly me.
v3: Forgot usertptr too ...
v4: Improve comment for pages_pin_count, drop the IMPORTANT comment
and instead prime lockdep (Chris).
v5: Appease checkpatch, no double empty lines (Chris)
v6: More rebasing over selftest changes. Also somehow I forgot to
push this patch :-/
Also format comments consistently while at it.
v7: Fix typo in commit message (Joonas)
Also drop the priming, with the lmem merge we now have allocations
while holding the lmem lock, which wreaks the generic priming I've
done in earlier patches. Should probably be resurrected when lmem is
fixed. See
commit 232a6ebae419193f5b8da4fa869ae5089ab105c2
Author: Matthew Auld <matthew.auld@intel.com>
Date: Tue Oct 8 17:01:14 2019 +0100
drm/i915: introduce intel_memory_region
I'm keeping the priming patch locally so it wont get lost.
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: "Tang, CQ" <cq.tang@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v5)
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> (v6)
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191105090148.30269-1-daniel.vetter@ffwll.ch
[mlankhorst: Fix commit typos pointed out by Michael Ruhl]
2019-11-05 10:01:48 +01:00
|
|
|
__i915_gem_object_put_pages(obj);
|
2021-03-23 16:50:30 +01:00
|
|
|
i915_gem_object_unlock(obj);
|
2017-10-06 23:18:29 +01:00
|
|
|
i915_gem_object_put(obj);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
out_unpin:
|
2021-03-23 16:50:30 +01:00
|
|
|
i915_gem_object_lock(obj, NULL);
|
2017-10-06 23:18:29 +01:00
|
|
|
i915_gem_object_unpin_pages(obj);
|
2021-03-23 16:50:30 +01:00
|
|
|
i915_gem_object_unlock(obj);
|
2017-10-06 23:18:29 +01:00
|
|
|
out_put:
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2023-04-26 23:28:48 +02:00
|
|
|
static void close_object_list(struct list_head *objects)
|
2017-10-06 23:18:29 +01:00
|
|
|
{
|
|
|
|
struct drm_i915_gem_object *obj, *on;
|
|
|
|
|
|
|
|
list_for_each_entry_safe(obj, on, objects, st_link) {
|
|
|
|
list_del(&obj->st_link);
|
2021-03-23 16:50:30 +01:00
|
|
|
i915_gem_object_lock(obj, NULL);
|
2017-10-06 23:18:29 +01:00
|
|
|
i915_gem_object_unpin_pages(obj);
|
drm/i915: Switch obj->mm.lock lockdep annotations on its head
The trouble with having a plain nesting flag for locks which do not
naturally nest (unlike block devices and their partitions, which is
the original motivation for nesting levels) is that lockdep will
never spot a true deadlock if you screw up.
This patch is an attempt at trying better, by highlighting a bit more
of the actual nature of the nesting that's going on. Essentially we
have two kinds of objects:
- objects without pages allocated, which cannot be on any lru and are
hence inaccessible to the shrinker.
- objects which have pages allocated, which are on an lru, and which
the shrinker can decide to throw out.
For the former type of object, memory allocations while holding
obj->mm.lock are permissible. For the latter they are not. And
get/put_pages transitions between the two types of objects.
This is still not entirely fool-proof since the rules might change.
But as long as we run such a code ever at runtime lockdep should be
able to observe the inconsistency and complain (like with any other
lockdep class that we've split up in multiple classes). But there are
a few clear benefits:
- We can drop the nesting flag parameter from
__i915_gem_object_put_pages, because that function by definition is
never going allocate memory, and calling it on an object which
doesn't have its pages allocated would be a bug.
- We strictly catch more bugs, since there's not only one place in the
entire tree which is annotated with the special class. All the
other places that had explicit lockdep nesting annotations we're now
going to leave up to lockdep again.
- Specifically this catches stuff like calling get_pages from
put_pages (which isn't really a good idea, if we can call get_pages
so could the shrinker). I've seen patches do exactly that.
Of course I fully expect CI will show me for the fool I am with this
one here :-)
v2: There can only be one (lockdep only has a cache for the first
subclass, not for deeper ones, and we don't want to make these locks
even slower). Still separate enums for better documentation.
Real fix: don't forget about phys objs and pin_map(), and fix the
shrinker to have the right annotations ... silly me.
v3: Forgot usertptr too ...
v4: Improve comment for pages_pin_count, drop the IMPORTANT comment
and instead prime lockdep (Chris).
v5: Appease checkpatch, no double empty lines (Chris)
v6: More rebasing over selftest changes. Also somehow I forgot to
push this patch :-/
Also format comments consistently while at it.
v7: Fix typo in commit message (Joonas)
Also drop the priming, with the lmem merge we now have allocations
while holding the lmem lock, which wreaks the generic priming I've
done in earlier patches. Should probably be resurrected when lmem is
fixed. See
commit 232a6ebae419193f5b8da4fa869ae5089ab105c2
Author: Matthew Auld <matthew.auld@intel.com>
Date: Tue Oct 8 17:01:14 2019 +0100
drm/i915: introduce intel_memory_region
I'm keeping the priming patch locally so it wont get lost.
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: "Tang, CQ" <cq.tang@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v5)
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> (v6)
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191105090148.30269-1-daniel.vetter@ffwll.ch
[mlankhorst: Fix commit typos pointed out by Michael Ruhl]
2019-11-05 10:01:48 +01:00
|
|
|
__i915_gem_object_put_pages(obj);
|
2021-03-23 16:50:30 +01:00
|
|
|
i915_gem_object_unlock(obj);
|
2017-10-06 23:18:29 +01:00
|
|
|
i915_gem_object_put(obj);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-26 23:28:48 +02:00
|
|
|
static int igt_ppgtt_huge_fill(void *arg)
|
2017-10-06 23:18:29 +01:00
|
|
|
{
|
2023-04-26 23:28:48 +02:00
|
|
|
struct drm_i915_private *i915 = arg;
|
|
|
|
unsigned int supported = RUNTIME_INFO(i915)->page_sizes;
|
2024-03-19 23:03:01 -07:00
|
|
|
bool has_pte64 = GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55);
|
2023-04-26 23:28:48 +02:00
|
|
|
struct i915_address_space *vm;
|
|
|
|
struct i915_gem_context *ctx;
|
|
|
|
unsigned long max_pages;
|
2017-10-06 23:18:29 +01:00
|
|
|
unsigned long page_num;
|
2023-04-26 23:28:48 +02:00
|
|
|
struct file *file;
|
2017-10-06 23:18:29 +01:00
|
|
|
bool single = false;
|
|
|
|
LIST_HEAD(objects);
|
|
|
|
IGT_TIMEOUT(end_time);
|
2017-10-17 11:37:23 +01:00
|
|
|
int err = -ENODEV;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2023-04-26 23:28:48 +02:00
|
|
|
if (supported == I915_GTT_PAGE_SIZE_4K)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
file = mock_file(i915);
|
|
|
|
if (IS_ERR(file))
|
|
|
|
return PTR_ERR(file);
|
|
|
|
|
|
|
|
ctx = hugepage_ctx(i915, file);
|
|
|
|
if (IS_ERR(ctx)) {
|
|
|
|
err = PTR_ERR(ctx);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
vm = i915_gem_context_get_eb_vm(ctx);
|
|
|
|
max_pages = vm->total >> PAGE_SHIFT;
|
|
|
|
|
2017-10-06 23:18:29 +01:00
|
|
|
for_each_prime_number_from(page_num, 1, max_pages) {
|
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
u64 size = page_num << PAGE_SHIFT;
|
|
|
|
struct i915_vma *vma;
|
|
|
|
unsigned int expected_gtt = 0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
obj = fake_huge_pages_object(i915, size, single);
|
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
err = PTR_ERR(obj);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (obj->base.size != size) {
|
|
|
|
pr_err("obj->base.size=%zd, expected=%llu\n",
|
|
|
|
obj->base.size, size);
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
err = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2021-03-23 16:50:30 +01:00
|
|
|
err = i915_gem_object_pin_pages_unlocked(obj);
|
2017-10-06 23:18:29 +01:00
|
|
|
if (err) {
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
list_add(&obj->st_link, &objects);
|
|
|
|
|
2023-04-26 23:28:48 +02:00
|
|
|
vma = i915_vma_instance(obj, vm, NULL);
|
2017-10-06 23:18:29 +01:00
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2023-04-26 23:28:48 +02:00
|
|
|
/* vma start must be aligned to BIT(21) to allow 2M PTEs */
|
|
|
|
err = i915_vma_pin(vma, 0, BIT(21), PIN_USER);
|
2017-10-06 23:18:29 +01:00
|
|
|
if (err)
|
|
|
|
break;
|
|
|
|
|
|
|
|
err = igt_check_page_sizes(vma);
|
|
|
|
if (err) {
|
|
|
|
i915_vma_unpin(vma);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Figure out the expected gtt page size knowing that we go from
|
|
|
|
* largest to smallest page size sg chunks, and that we align to
|
|
|
|
* the largest page size.
|
|
|
|
*/
|
|
|
|
for (i = 0; i < ARRAY_SIZE(page_sizes); ++i) {
|
|
|
|
unsigned int page_size = page_sizes[i];
|
|
|
|
|
|
|
|
if (HAS_PAGE_SIZES(i915, page_size) &&
|
|
|
|
size >= page_size) {
|
|
|
|
expected_gtt |= page_size;
|
|
|
|
size &= page_size-1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
GEM_BUG_ON(!expected_gtt);
|
|
|
|
GEM_BUG_ON(size);
|
|
|
|
|
2023-04-26 23:28:48 +02:00
|
|
|
if (!has_pte64 && (obj->base.size < I915_GTT_PAGE_SIZE_2M ||
|
|
|
|
expected_gtt & I915_GTT_PAGE_SIZE_2M))
|
2017-10-06 23:18:29 +01:00
|
|
|
expected_gtt &= ~I915_GTT_PAGE_SIZE_64K;
|
|
|
|
|
|
|
|
i915_vma_unpin(vma);
|
|
|
|
|
2023-04-26 23:28:48 +02:00
|
|
|
if (!has_pte64 && vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
|
2017-10-06 23:18:29 +01:00
|
|
|
if (!IS_ALIGNED(vma->node.start,
|
|
|
|
I915_GTT_PAGE_SIZE_2M)) {
|
|
|
|
pr_err("node.start(%llx) not aligned to 2M\n",
|
|
|
|
vma->node.start);
|
|
|
|
err = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!IS_ALIGNED(vma->node.size,
|
|
|
|
I915_GTT_PAGE_SIZE_2M)) {
|
|
|
|
pr_err("node.size(%llx) not aligned to 2M\n",
|
|
|
|
vma->node.size);
|
|
|
|
err = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-10 18:22:15 +01:00
|
|
|
if (vma->resource->page_sizes_gtt != expected_gtt) {
|
2023-04-26 23:28:48 +02:00
|
|
|
pr_err("gtt=%#x, expected=%#x, size=0x%zx, single=%s\n",
|
2022-01-10 18:22:15 +01:00
|
|
|
vma->resource->page_sizes_gtt, expected_gtt,
|
2022-02-25 15:46:28 -08:00
|
|
|
obj->base.size, str_yes_no(!!single));
|
2017-10-06 23:18:29 +01:00
|
|
|
err = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (igt_timeout(end_time,
|
|
|
|
"%s timed out at size %zd\n",
|
|
|
|
__func__, obj->base.size))
|
|
|
|
break;
|
|
|
|
|
|
|
|
single = !single;
|
|
|
|
}
|
|
|
|
|
2023-04-26 23:28:48 +02:00
|
|
|
close_object_list(&objects);
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
if (err == -ENOMEM || err == -ENOSPC)
|
|
|
|
err = 0;
|
|
|
|
|
2023-04-26 23:28:48 +02:00
|
|
|
i915_vm_put(vm);
|
|
|
|
out:
|
|
|
|
fput(file);
|
2017-10-06 23:18:29 +01:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2023-04-26 23:28:48 +02:00
|
|
|
static int igt_ppgtt_64K(void *arg)
|
2017-10-06 23:18:29 +01:00
|
|
|
{
|
2023-04-26 23:28:48 +02:00
|
|
|
struct drm_i915_private *i915 = arg;
|
2024-03-19 23:03:01 -07:00
|
|
|
bool has_pte64 = GRAPHICS_VER_FULL(i915) >= IP_VER(12, 55);
|
2017-10-06 23:18:29 +01:00
|
|
|
struct drm_i915_gem_object *obj;
|
2023-04-26 23:28:48 +02:00
|
|
|
struct i915_address_space *vm;
|
|
|
|
struct i915_gem_context *ctx;
|
|
|
|
struct file *file;
|
2017-10-06 23:18:29 +01:00
|
|
|
const struct object_info {
|
|
|
|
unsigned int size;
|
|
|
|
unsigned int gtt;
|
|
|
|
unsigned int offset;
|
|
|
|
} objects[] = {
|
|
|
|
/* Cases with forced padding/alignment */
|
|
|
|
{
|
|
|
|
.size = SZ_64K,
|
|
|
|
.gtt = I915_GTT_PAGE_SIZE_64K,
|
|
|
|
.offset = 0,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.size = SZ_64K + SZ_4K,
|
|
|
|
.gtt = I915_GTT_PAGE_SIZE_4K,
|
|
|
|
.offset = 0,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.size = SZ_64K - SZ_4K,
|
|
|
|
.gtt = I915_GTT_PAGE_SIZE_4K,
|
|
|
|
.offset = 0,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.size = SZ_2M,
|
|
|
|
.gtt = I915_GTT_PAGE_SIZE_64K,
|
|
|
|
.offset = 0,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.size = SZ_2M - SZ_4K,
|
|
|
|
.gtt = I915_GTT_PAGE_SIZE_4K,
|
|
|
|
.offset = 0,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.size = SZ_2M + SZ_4K,
|
|
|
|
.gtt = I915_GTT_PAGE_SIZE_64K | I915_GTT_PAGE_SIZE_4K,
|
|
|
|
.offset = 0,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.size = SZ_2M + SZ_64K,
|
|
|
|
.gtt = I915_GTT_PAGE_SIZE_64K,
|
|
|
|
.offset = 0,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.size = SZ_2M - SZ_64K,
|
|
|
|
.gtt = I915_GTT_PAGE_SIZE_64K,
|
|
|
|
.offset = 0,
|
|
|
|
},
|
|
|
|
/* Try without any forced padding/alignment */
|
|
|
|
{
|
|
|
|
.size = SZ_64K,
|
|
|
|
.offset = SZ_2M,
|
|
|
|
.gtt = I915_GTT_PAGE_SIZE_4K,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
.size = SZ_128K,
|
|
|
|
.offset = SZ_2M - SZ_64K,
|
|
|
|
.gtt = I915_GTT_PAGE_SIZE_4K,
|
|
|
|
},
|
|
|
|
};
|
|
|
|
struct i915_vma *vma;
|
|
|
|
int i, single;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Sanity check some of the trickiness with 64K pages -- either we can
|
|
|
|
* safely mark the whole page-table(2M block) as 64K, or we have to
|
|
|
|
* always fallback to 4K.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (!HAS_PAGE_SIZES(i915, I915_GTT_PAGE_SIZE_64K))
|
|
|
|
return 0;
|
|
|
|
|
2023-04-26 23:28:48 +02:00
|
|
|
file = mock_file(i915);
|
|
|
|
if (IS_ERR(file))
|
|
|
|
return PTR_ERR(file);
|
|
|
|
|
|
|
|
ctx = hugepage_ctx(i915, file);
|
|
|
|
if (IS_ERR(ctx)) {
|
|
|
|
err = PTR_ERR(ctx);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
vm = i915_gem_context_get_eb_vm(ctx);
|
|
|
|
|
2017-10-06 23:18:29 +01:00
|
|
|
for (i = 0; i < ARRAY_SIZE(objects); ++i) {
|
|
|
|
unsigned int size = objects[i].size;
|
|
|
|
unsigned int expected_gtt = objects[i].gtt;
|
|
|
|
unsigned int offset = objects[i].offset;
|
|
|
|
unsigned int flags = PIN_USER;
|
|
|
|
|
2023-04-26 23:28:48 +02:00
|
|
|
/*
|
|
|
|
* For modern GTT models, the requirements for marking a page-table
|
|
|
|
* as 64K have been relaxed. Account for this.
|
|
|
|
*/
|
|
|
|
if (has_pte64) {
|
|
|
|
expected_gtt = 0;
|
|
|
|
if (size >= SZ_64K)
|
|
|
|
expected_gtt |= I915_GTT_PAGE_SIZE_64K;
|
|
|
|
if (size & (SZ_64K - 1))
|
|
|
|
expected_gtt |= I915_GTT_PAGE_SIZE_4K;
|
|
|
|
}
|
|
|
|
|
2017-10-06 23:18:29 +01:00
|
|
|
for (single = 0; single <= 1; single++) {
|
|
|
|
obj = fake_huge_pages_object(i915, size, !!single);
|
2023-04-26 23:28:48 +02:00
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
err = PTR_ERR(obj);
|
|
|
|
goto out_vm;
|
|
|
|
}
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2021-03-23 16:50:30 +01:00
|
|
|
err = i915_gem_object_pin_pages_unlocked(obj);
|
2017-10-06 23:18:29 +01:00
|
|
|
if (err)
|
|
|
|
goto out_object_put;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Disable 2M pages -- We only want to use 64K/4K pages
|
|
|
|
* for this test.
|
|
|
|
*/
|
|
|
|
obj->mm.page_sizes.sg &= ~I915_GTT_PAGE_SIZE_2M;
|
|
|
|
|
2023-04-26 23:28:48 +02:00
|
|
|
vma = i915_vma_instance(obj, vm, NULL);
|
2017-10-06 23:18:29 +01:00
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto out_object_unpin;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (offset)
|
|
|
|
flags |= PIN_OFFSET_FIXED | offset;
|
|
|
|
|
|
|
|
err = i915_vma_pin(vma, 0, 0, flags);
|
|
|
|
if (err)
|
2020-04-22 20:05:58 +01:00
|
|
|
goto out_object_unpin;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
err = igt_check_page_sizes(vma);
|
|
|
|
if (err)
|
|
|
|
goto out_vma_unpin;
|
|
|
|
|
2023-04-26 23:28:48 +02:00
|
|
|
if (!has_pte64 && !offset &&
|
|
|
|
vma->page_sizes.sg & I915_GTT_PAGE_SIZE_64K) {
|
2017-10-06 23:18:29 +01:00
|
|
|
if (!IS_ALIGNED(vma->node.start,
|
|
|
|
I915_GTT_PAGE_SIZE_2M)) {
|
|
|
|
pr_err("node.start(%llx) not aligned to 2M\n",
|
|
|
|
vma->node.start);
|
|
|
|
err = -EINVAL;
|
|
|
|
goto out_vma_unpin;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!IS_ALIGNED(vma->node.size,
|
|
|
|
I915_GTT_PAGE_SIZE_2M)) {
|
|
|
|
pr_err("node.size(%llx) not aligned to 2M\n",
|
|
|
|
vma->node.size);
|
|
|
|
err = -EINVAL;
|
|
|
|
goto out_vma_unpin;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-10 18:22:15 +01:00
|
|
|
if (vma->resource->page_sizes_gtt != expected_gtt) {
|
2023-04-26 23:28:48 +02:00
|
|
|
pr_err("gtt=%#x, expected=%#x, i=%d, single=%s offset=%#x size=%#x\n",
|
2022-01-10 18:22:15 +01:00
|
|
|
vma->resource->page_sizes_gtt,
|
2023-04-26 23:28:48 +02:00
|
|
|
expected_gtt, i, str_yes_no(!!single),
|
|
|
|
offset, size);
|
2017-10-06 23:18:29 +01:00
|
|
|
err = -EINVAL;
|
|
|
|
goto out_vma_unpin;
|
|
|
|
}
|
|
|
|
|
|
|
|
i915_vma_unpin(vma);
|
2021-03-23 16:50:30 +01:00
|
|
|
i915_gem_object_lock(obj, NULL);
|
2017-10-06 23:18:29 +01:00
|
|
|
i915_gem_object_unpin_pages(obj);
|
drm/i915: Switch obj->mm.lock lockdep annotations on its head
The trouble with having a plain nesting flag for locks which do not
naturally nest (unlike block devices and their partitions, which is
the original motivation for nesting levels) is that lockdep will
never spot a true deadlock if you screw up.
This patch is an attempt at trying better, by highlighting a bit more
of the actual nature of the nesting that's going on. Essentially we
have two kinds of objects:
- objects without pages allocated, which cannot be on any lru and are
hence inaccessible to the shrinker.
- objects which have pages allocated, which are on an lru, and which
the shrinker can decide to throw out.
For the former type of object, memory allocations while holding
obj->mm.lock are permissible. For the latter they are not. And
get/put_pages transitions between the two types of objects.
This is still not entirely fool-proof since the rules might change.
But as long as we run such a code ever at runtime lockdep should be
able to observe the inconsistency and complain (like with any other
lockdep class that we've split up in multiple classes). But there are
a few clear benefits:
- We can drop the nesting flag parameter from
__i915_gem_object_put_pages, because that function by definition is
never going allocate memory, and calling it on an object which
doesn't have its pages allocated would be a bug.
- We strictly catch more bugs, since there's not only one place in the
entire tree which is annotated with the special class. All the
other places that had explicit lockdep nesting annotations we're now
going to leave up to lockdep again.
- Specifically this catches stuff like calling get_pages from
put_pages (which isn't really a good idea, if we can call get_pages
so could the shrinker). I've seen patches do exactly that.
Of course I fully expect CI will show me for the fool I am with this
one here :-)
v2: There can only be one (lockdep only has a cache for the first
subclass, not for deeper ones, and we don't want to make these locks
even slower). Still separate enums for better documentation.
Real fix: don't forget about phys objs and pin_map(), and fix the
shrinker to have the right annotations ... silly me.
v3: Forgot usertptr too ...
v4: Improve comment for pages_pin_count, drop the IMPORTANT comment
and instead prime lockdep (Chris).
v5: Appease checkpatch, no double empty lines (Chris)
v6: More rebasing over selftest changes. Also somehow I forgot to
push this patch :-/
Also format comments consistently while at it.
v7: Fix typo in commit message (Joonas)
Also drop the priming, with the lmem merge we now have allocations
while holding the lmem lock, which wreaks the generic priming I've
done in earlier patches. Should probably be resurrected when lmem is
fixed. See
commit 232a6ebae419193f5b8da4fa869ae5089ab105c2
Author: Matthew Auld <matthew.auld@intel.com>
Date: Tue Oct 8 17:01:14 2019 +0100
drm/i915: introduce intel_memory_region
I'm keeping the priming patch locally so it wont get lost.
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: "Tang, CQ" <cq.tang@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v5)
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> (v6)
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191105090148.30269-1-daniel.vetter@ffwll.ch
[mlankhorst: Fix commit typos pointed out by Michael Ruhl]
2019-11-05 10:01:48 +01:00
|
|
|
__i915_gem_object_put_pages(obj);
|
2021-03-23 16:50:30 +01:00
|
|
|
i915_gem_object_unlock(obj);
|
2017-10-06 23:18:29 +01:00
|
|
|
i915_gem_object_put(obj);
|
2021-10-28 13:58:53 +01:00
|
|
|
|
|
|
|
i915_gem_drain_freed_objects(i915);
|
2017-10-06 23:18:29 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-26 23:28:48 +02:00
|
|
|
goto out_vm;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
out_vma_unpin:
|
|
|
|
i915_vma_unpin(vma);
|
|
|
|
out_object_unpin:
|
2021-03-23 16:50:30 +01:00
|
|
|
i915_gem_object_lock(obj, NULL);
|
2017-10-06 23:18:29 +01:00
|
|
|
i915_gem_object_unpin_pages(obj);
|
2021-03-23 16:50:30 +01:00
|
|
|
i915_gem_object_unlock(obj);
|
2017-10-06 23:18:29 +01:00
|
|
|
out_object_put:
|
|
|
|
i915_gem_object_put(obj);
|
2023-04-26 23:28:48 +02:00
|
|
|
out_vm:
|
|
|
|
i915_vm_put(vm);
|
|
|
|
out:
|
|
|
|
fput(file);
|
2017-10-06 23:18:29 +01:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2019-08-24 00:51:41 +01:00
|
|
|
static int gpu_write(struct intel_context *ce,
|
|
|
|
struct i915_vma *vma,
|
2019-08-10 11:50:08 +01:00
|
|
|
u32 dw,
|
|
|
|
u32 val)
|
2017-10-06 23:18:29 +01:00
|
|
|
{
|
|
|
|
int err;
|
|
|
|
|
2020-08-19 16:08:45 +02:00
|
|
|
i915_gem_object_lock(vma->obj, NULL);
|
2019-08-10 11:50:08 +01:00
|
|
|
err = i915_gem_object_set_to_gtt_domain(vma->obj, true);
|
|
|
|
i915_gem_object_unlock(vma->obj);
|
2017-10-06 23:18:29 +01:00
|
|
|
if (err)
|
2019-08-10 11:50:08 +01:00
|
|
|
return err;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2019-08-24 00:51:41 +01:00
|
|
|
return igt_gpu_fill_dw(ce, vma, dw * sizeof(u32),
|
2019-08-10 11:50:08 +01:00
|
|
|
vma->size >> PAGE_SHIFT, val);
|
2017-10-06 23:18:29 +01:00
|
|
|
}
|
|
|
|
|
2019-10-25 16:37:26 +01:00
|
|
|
static int
|
|
|
|
__cpu_check_shmem(struct drm_i915_gem_object *obj, u32 dword, u32 val)
|
2017-10-06 23:18:29 +01:00
|
|
|
{
|
|
|
|
unsigned int needs_flush;
|
|
|
|
unsigned long n;
|
|
|
|
int err;
|
|
|
|
|
2020-08-19 16:08:46 +02:00
|
|
|
i915_gem_object_lock(obj, NULL);
|
2019-05-28 10:29:48 +01:00
|
|
|
err = i915_gem_object_prepare_read(obj, &needs_flush);
|
2017-10-06 23:18:29 +01:00
|
|
|
if (err)
|
2020-08-19 16:08:46 +02:00
|
|
|
goto err_unlock;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
for (n = 0; n < obj->base.size >> PAGE_SHIFT; ++n) {
|
2023-12-03 21:29:42 +08:00
|
|
|
u32 *ptr = kmap_local_page(i915_gem_object_get_page(obj, n));
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
if (needs_flush & CLFLUSH_BEFORE)
|
|
|
|
drm_clflush_virt_range(ptr, PAGE_SIZE);
|
|
|
|
|
|
|
|
if (ptr[dword] != val) {
|
|
|
|
pr_err("n=%lu ptr[%u]=%u, val=%u\n",
|
|
|
|
n, dword, ptr[dword], val);
|
2023-12-03 21:29:42 +08:00
|
|
|
kunmap_local(ptr);
|
2017-10-06 23:18:29 +01:00
|
|
|
err = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2023-12-03 21:29:42 +08:00
|
|
|
kunmap_local(ptr);
|
2017-10-06 23:18:29 +01:00
|
|
|
}
|
|
|
|
|
2019-05-28 10:29:48 +01:00
|
|
|
i915_gem_object_finish_access(obj);
|
2020-08-19 16:08:46 +02:00
|
|
|
err_unlock:
|
|
|
|
i915_gem_object_unlock(obj);
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2020-01-02 20:42:15 +00:00
|
|
|
static int __cpu_check_vmap(struct drm_i915_gem_object *obj, u32 dword, u32 val)
|
2019-10-25 16:37:26 +01:00
|
|
|
{
|
2020-01-02 20:42:15 +00:00
|
|
|
unsigned long n = obj->base.size >> PAGE_SHIFT;
|
|
|
|
u32 *ptr;
|
2019-10-25 16:37:26 +01:00
|
|
|
int err;
|
|
|
|
|
2020-01-02 20:42:15 +00:00
|
|
|
err = i915_gem_object_wait(obj, 0, MAX_SCHEDULE_TIMEOUT);
|
2019-10-25 16:37:26 +01:00
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
2021-03-23 16:50:30 +01:00
|
|
|
ptr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WC);
|
2020-01-02 20:42:15 +00:00
|
|
|
if (IS_ERR(ptr))
|
|
|
|
return PTR_ERR(ptr);
|
2019-10-25 16:37:26 +01:00
|
|
|
|
2020-01-02 20:42:15 +00:00
|
|
|
ptr += dword;
|
|
|
|
while (n--) {
|
|
|
|
if (*ptr != val) {
|
|
|
|
pr_err("base[%u]=%08x, val=%08x\n",
|
|
|
|
dword, *ptr, val);
|
2019-10-25 16:37:26 +01:00
|
|
|
err = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
2020-01-02 20:42:15 +00:00
|
|
|
|
|
|
|
ptr += PAGE_SIZE / sizeof(*ptr);
|
2019-10-25 16:37:26 +01:00
|
|
|
}
|
|
|
|
|
2020-01-02 20:42:15 +00:00
|
|
|
i915_gem_object_unpin_map(obj);
|
2019-10-25 16:37:26 +01:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int cpu_check(struct drm_i915_gem_object *obj, u32 dword, u32 val)
|
|
|
|
{
|
|
|
|
if (i915_gem_object_has_struct_page(obj))
|
|
|
|
return __cpu_check_shmem(obj, dword, val);
|
2020-01-02 20:42:15 +00:00
|
|
|
else
|
|
|
|
return __cpu_check_vmap(obj, dword, val);
|
2019-10-25 16:37:26 +01:00
|
|
|
}
|
|
|
|
|
2019-08-24 00:51:41 +01:00
|
|
|
static int __igt_write_huge(struct intel_context *ce,
|
2017-11-23 13:54:21 +00:00
|
|
|
struct drm_i915_gem_object *obj,
|
|
|
|
u64 size, u64 offset,
|
|
|
|
u32 dword, u32 val)
|
|
|
|
{
|
|
|
|
unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
|
|
|
|
struct i915_vma *vma;
|
|
|
|
int err;
|
|
|
|
|
2019-08-24 00:51:41 +01:00
|
|
|
vma = i915_vma_instance(obj, ce->vm, NULL);
|
2017-11-23 13:54:21 +00:00
|
|
|
if (IS_ERR(vma))
|
|
|
|
return PTR_ERR(vma);
|
|
|
|
|
|
|
|
err = i915_vma_pin(vma, size, 0, flags | offset);
|
|
|
|
if (err) {
|
|
|
|
/*
|
|
|
|
* The ggtt may have some pages reserved so
|
|
|
|
* refrain from erroring out.
|
|
|
|
*/
|
2019-08-24 00:51:41 +01:00
|
|
|
if (err == -ENOSPC && i915_is_ggtt(ce->vm))
|
2017-11-23 13:54:21 +00:00
|
|
|
err = 0;
|
|
|
|
|
2020-04-22 20:05:58 +01:00
|
|
|
return err;
|
2017-11-23 13:54:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
err = igt_check_page_sizes(vma);
|
|
|
|
if (err)
|
|
|
|
goto out_vma_unpin;
|
|
|
|
|
2019-08-24 00:51:41 +01:00
|
|
|
err = gpu_write(ce, vma, dword, val);
|
2017-11-23 13:54:21 +00:00
|
|
|
if (err) {
|
|
|
|
pr_err("gpu-write failed at offset=%llx\n", offset);
|
|
|
|
goto out_vma_unpin;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = cpu_check(obj, dword, val);
|
|
|
|
if (err) {
|
|
|
|
pr_err("cpu-check failed at offset=%llx\n", offset);
|
|
|
|
goto out_vma_unpin;
|
|
|
|
}
|
|
|
|
|
|
|
|
out_vma_unpin:
|
|
|
|
i915_vma_unpin(vma);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2021-10-28 13:58:53 +01:00
|
|
|
static int igt_write_huge(struct drm_i915_private *i915,
|
2017-10-10 14:30:30 +01:00
|
|
|
struct drm_i915_gem_object *obj)
|
2017-10-06 23:18:29 +01:00
|
|
|
{
|
2019-08-24 00:51:41 +01:00
|
|
|
struct i915_gem_engines *engines;
|
|
|
|
struct i915_gem_engines_iter it;
|
|
|
|
struct intel_context *ce;
|
2017-11-23 13:54:20 +00:00
|
|
|
I915_RND_STATE(prng);
|
|
|
|
IGT_TIMEOUT(end_time);
|
2017-10-06 23:18:29 +01:00
|
|
|
unsigned int max_page_size;
|
2019-08-24 00:51:41 +01:00
|
|
|
unsigned int count;
|
2021-10-28 13:58:53 +01:00
|
|
|
struct i915_gem_context *ctx;
|
|
|
|
struct file *file;
|
2017-10-06 23:18:29 +01:00
|
|
|
u64 max;
|
|
|
|
u64 num;
|
|
|
|
u64 size;
|
2017-11-23 13:54:20 +00:00
|
|
|
int *order;
|
|
|
|
int i, n;
|
2017-10-06 23:18:29 +01:00
|
|
|
int err = 0;
|
|
|
|
|
2021-10-28 13:58:53 +01:00
|
|
|
file = mock_file(i915);
|
|
|
|
if (IS_ERR(file))
|
|
|
|
return PTR_ERR(file);
|
|
|
|
|
|
|
|
ctx = hugepage_ctx(i915, file);
|
|
|
|
if (IS_ERR(ctx)) {
|
|
|
|
err = PTR_ERR(ctx);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2017-10-06 23:18:29 +01:00
|
|
|
GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
|
|
|
|
|
|
|
|
size = obj->base.size;
|
drm/i915: enable PS64 support for DG2
It turns out that on production DG2/ATS HW we should have support for
PS64. This feature allows to provide a 64K TLB hint at the PTE level,
which is a lot more flexible than the current method of enabling 64K GTT
pages for the entire page-table, since that leads to all kinds of
annoying restrictions, as documented in:
commit caa574ffc4aaf4f29b890223878c63e2e7772f62
Author: Matthew Auld <matthew.auld@intel.com>
Date: Sat Feb 19 00:17:49 2022 +0530
drm/i915/uapi: document behaviour for DG2 64K support
On discrete platforms like DG2, we need to support a minimum page size
of 64K when dealing with device local-memory. This is quite tricky for
various reasons, so try to document the new implicit uapi for this.
With PS64, we can now drop the 2M GTT alignment restriction, and instead
only require 64K or larger when dealing with lmem. We still use the
compact-pt layout when possible, but only when we are certain that this
doesn't interfere with userspace.
Note that this is a change in uAPI behaviour, but hopefully shouldn't be
a concern (IGT is at least able to autodetect the alignment), since we
are only making the GTT alignment constraint less restrictive.
Based on a patch from CQ Tang.
v2: update the comment wrt scratch page
v3: (Nirmoy)
- Fix the selftest to actually use the random size, plus some comment
improvements, also drop the rem stuff.
Reported-by: Michal Mrozek <michal.mrozek@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Stuart Summers <stuart.summers@intel.com>
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Yang A Shi <yang.a.shi@intel.com>
Cc: Nirmoy Das <nirmoy.das@intel.com>
Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Acked-by: Michal Mrozek <michal.mrozek@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221004114915.221708-1-matthew.auld@intel.com
2022-10-04 12:49:14 +01:00
|
|
|
if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K &&
|
|
|
|
!HAS_64K_PAGES(i915))
|
2017-10-06 23:18:29 +01:00
|
|
|
size = round_up(size, I915_GTT_PAGE_SIZE_2M);
|
|
|
|
|
2017-11-23 13:54:20 +00:00
|
|
|
n = 0;
|
2019-08-24 00:51:41 +01:00
|
|
|
count = 0;
|
|
|
|
max = U64_MAX;
|
|
|
|
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
|
|
|
|
count++;
|
|
|
|
if (!intel_engine_can_store_dword(ce->engine))
|
2017-10-06 23:18:29 +01:00
|
|
|
continue;
|
|
|
|
|
2019-08-24 00:51:41 +01:00
|
|
|
max = min(max, ce->vm->total);
|
|
|
|
n++;
|
|
|
|
}
|
|
|
|
i915_gem_context_unlock_engines(ctx);
|
2017-11-23 13:54:20 +00:00
|
|
|
if (!n)
|
2021-10-28 13:58:53 +01:00
|
|
|
goto out;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2017-11-23 13:54:20 +00:00
|
|
|
/*
|
|
|
|
* To keep things interesting when alternating between engines in our
|
|
|
|
* randomized order, lets also make feeding to the same engine a few
|
|
|
|
* times in succession a possibility by enlarging the permutation array.
|
|
|
|
*/
|
2019-08-24 00:51:41 +01:00
|
|
|
order = i915_random_order(count * count, &prng);
|
2023-07-17 20:49:31 +02:00
|
|
|
if (!order) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto out;
|
|
|
|
}
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2019-08-24 00:51:41 +01:00
|
|
|
max_page_size = rounddown_pow_of_two(obj->mm.page_sizes.sg);
|
|
|
|
max = div_u64(max - size, max_page_size);
|
|
|
|
|
2017-11-23 13:54:20 +00:00
|
|
|
/*
|
2017-11-23 13:54:21 +00:00
|
|
|
* Try various offsets in an ascending/descending fashion until we
|
|
|
|
* timeout -- we want to avoid issues hidden by effectively always using
|
|
|
|
* offset = 0.
|
2017-11-23 13:54:20 +00:00
|
|
|
*/
|
|
|
|
i = 0;
|
2019-08-24 00:51:41 +01:00
|
|
|
engines = i915_gem_context_lock_engines(ctx);
|
2017-11-23 13:54:20 +00:00
|
|
|
for_each_prime_number_from(num, 0, max) {
|
2017-11-23 13:54:21 +00:00
|
|
|
u64 offset_low = num * max_page_size;
|
|
|
|
u64 offset_high = (max - num) * max_page_size;
|
|
|
|
u32 dword = offset_in_page(num) / 4;
|
2019-08-24 00:51:41 +01:00
|
|
|
struct intel_context *ce;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2019-08-24 00:51:41 +01:00
|
|
|
ce = engines->engines[order[i] % engines->num_engines];
|
|
|
|
i = (i + 1) % (count * count);
|
|
|
|
if (!ce || !intel_engine_can_store_dword(ce->engine))
|
|
|
|
continue;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2018-10-29 20:37:34 +00:00
|
|
|
/*
|
|
|
|
* In order to utilize 64K pages we need to both pad the vma
|
|
|
|
* size and ensure the vma offset is at the start of the pt
|
|
|
|
* boundary, however to improve coverage we opt for testing both
|
|
|
|
* aligned and unaligned offsets.
|
drm/i915: enable PS64 support for DG2
It turns out that on production DG2/ATS HW we should have support for
PS64. This feature allows to provide a 64K TLB hint at the PTE level,
which is a lot more flexible than the current method of enabling 64K GTT
pages for the entire page-table, since that leads to all kinds of
annoying restrictions, as documented in:
commit caa574ffc4aaf4f29b890223878c63e2e7772f62
Author: Matthew Auld <matthew.auld@intel.com>
Date: Sat Feb 19 00:17:49 2022 +0530
drm/i915/uapi: document behaviour for DG2 64K support
On discrete platforms like DG2, we need to support a minimum page size
of 64K when dealing with device local-memory. This is quite tricky for
various reasons, so try to document the new implicit uapi for this.
With PS64, we can now drop the 2M GTT alignment restriction, and instead
only require 64K or larger when dealing with lmem. We still use the
compact-pt layout when possible, but only when we are certain that this
doesn't interfere with userspace.
Note that this is a change in uAPI behaviour, but hopefully shouldn't be
a concern (IGT is at least able to autodetect the alignment), since we
are only making the GTT alignment constraint less restrictive.
Based on a patch from CQ Tang.
v2: update the comment wrt scratch page
v3: (Nirmoy)
- Fix the selftest to actually use the random size, plus some comment
improvements, also drop the rem stuff.
Reported-by: Michal Mrozek <michal.mrozek@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Stuart Summers <stuart.summers@intel.com>
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Yang A Shi <yang.a.shi@intel.com>
Cc: Nirmoy Das <nirmoy.das@intel.com>
Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Acked-by: Michal Mrozek <michal.mrozek@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221004114915.221708-1-matthew.auld@intel.com
2022-10-04 12:49:14 +01:00
|
|
|
*
|
|
|
|
* With PS64 this is no longer the case, but to ensure we
|
|
|
|
* sometimes get the compact layout for smaller objects, apply
|
|
|
|
* the round_up anyway.
|
2018-10-29 20:37:34 +00:00
|
|
|
*/
|
|
|
|
if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K)
|
|
|
|
offset_low = round_down(offset_low,
|
|
|
|
I915_GTT_PAGE_SIZE_2M);
|
|
|
|
|
2019-08-24 00:51:41 +01:00
|
|
|
err = __igt_write_huge(ce, obj, size, offset_low,
|
2018-10-29 20:37:34 +00:00
|
|
|
dword, num + 1);
|
2017-11-23 13:54:21 +00:00
|
|
|
if (err)
|
|
|
|
break;
|
2017-11-23 13:54:20 +00:00
|
|
|
|
2019-08-24 00:51:41 +01:00
|
|
|
err = __igt_write_huge(ce, obj, size, offset_high,
|
2018-10-29 20:37:34 +00:00
|
|
|
dword, num + 1);
|
2017-11-23 13:54:21 +00:00
|
|
|
if (err)
|
|
|
|
break;
|
2017-11-23 13:54:20 +00:00
|
|
|
|
|
|
|
if (igt_timeout(end_time,
|
2019-08-24 00:51:41 +01:00
|
|
|
"%s timed out on %s, offset_low=%llx offset_high=%llx, max_page_size=%x\n",
|
|
|
|
__func__, ce->engine->name, offset_low, offset_high,
|
2018-10-29 20:37:34 +00:00
|
|
|
max_page_size))
|
2017-11-23 13:54:20 +00:00
|
|
|
break;
|
2017-10-06 23:18:29 +01:00
|
|
|
}
|
2019-08-24 00:51:41 +01:00
|
|
|
i915_gem_context_unlock_engines(ctx);
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2017-11-23 13:54:20 +00:00
|
|
|
kfree(order);
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2021-10-28 13:58:53 +01:00
|
|
|
out:
|
|
|
|
fput(file);
|
2017-10-06 23:18:29 +01:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2019-10-25 16:37:27 +01:00
|
|
|
typedef struct drm_i915_gem_object *
|
|
|
|
(*igt_create_fn)(struct drm_i915_private *i915, u32 size, u32 flags);
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
static inline bool igt_can_allocate_thp(struct drm_i915_private *i915)
|
|
|
|
{
|
|
|
|
return i915->mm.gemfs && has_transparent_hugepage();
|
|
|
|
}
|
|
|
|
|
2019-10-25 16:37:27 +01:00
|
|
|
static struct drm_i915_gem_object *
|
|
|
|
igt_create_shmem(struct drm_i915_private *i915, u32 size, u32 flags)
|
2017-10-06 23:18:29 +01:00
|
|
|
{
|
|
|
|
if (!igt_can_allocate_thp(i915)) {
|
2019-10-25 16:37:27 +01:00
|
|
|
pr_info("%s missing THP support, skipping\n", __func__);
|
|
|
|
return ERR_PTR(-ENODEV);
|
2017-10-06 23:18:29 +01:00
|
|
|
}
|
|
|
|
|
2019-10-25 16:37:27 +01:00
|
|
|
return i915_gem_object_create_shmem(i915, size);
|
|
|
|
}
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2019-10-25 16:37:27 +01:00
|
|
|
static struct drm_i915_gem_object *
|
|
|
|
igt_create_internal(struct drm_i915_private *i915, u32 size, u32 flags)
|
|
|
|
{
|
|
|
|
return i915_gem_object_create_internal(i915, size);
|
|
|
|
}
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2019-10-25 16:37:28 +01:00
|
|
|
static struct drm_i915_gem_object *
|
|
|
|
igt_create_system(struct drm_i915_private *i915, u32 size, u32 flags)
|
|
|
|
{
|
|
|
|
return huge_pages_object(i915, size, size);
|
|
|
|
}
|
|
|
|
|
2019-10-25 16:37:27 +01:00
|
|
|
static struct drm_i915_gem_object *
|
|
|
|
igt_create_local(struct drm_i915_private *i915, u32 size, u32 flags)
|
|
|
|
{
|
|
|
|
return i915_gem_object_create_lmem(i915, size, flags);
|
|
|
|
}
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2019-10-25 16:37:27 +01:00
|
|
|
static u32 igt_random_size(struct rnd_state *prng,
|
|
|
|
u32 min_page_size,
|
|
|
|
u32 max_page_size)
|
|
|
|
{
|
|
|
|
u64 mask;
|
|
|
|
u32 size;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2019-10-25 16:37:27 +01:00
|
|
|
GEM_BUG_ON(!is_power_of_2(min_page_size));
|
|
|
|
GEM_BUG_ON(!is_power_of_2(max_page_size));
|
|
|
|
GEM_BUG_ON(min_page_size < PAGE_SIZE);
|
|
|
|
GEM_BUG_ON(min_page_size > max_page_size);
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2019-10-25 16:37:27 +01:00
|
|
|
mask = ((max_page_size << 1ULL) - 1) & PAGE_MASK;
|
|
|
|
size = prandom_u32_state(prng) & mask;
|
|
|
|
if (size < min_page_size)
|
|
|
|
size |= min_page_size;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2019-10-25 16:37:27 +01:00
|
|
|
return size;
|
2017-10-06 23:18:29 +01:00
|
|
|
}
|
|
|
|
|
2019-10-25 16:37:27 +01:00
|
|
|
static int igt_ppgtt_smoke_huge(void *arg)
|
2019-10-25 16:37:26 +01:00
|
|
|
{
|
2021-10-28 13:58:53 +01:00
|
|
|
struct drm_i915_private *i915 = arg;
|
2019-10-25 16:37:26 +01:00
|
|
|
struct drm_i915_gem_object *obj;
|
2019-10-25 16:37:27 +01:00
|
|
|
I915_RND_STATE(prng);
|
|
|
|
struct {
|
|
|
|
igt_create_fn fn;
|
|
|
|
u32 min;
|
|
|
|
u32 max;
|
|
|
|
} backends[] = {
|
|
|
|
{ igt_create_internal, SZ_64K, SZ_2M, },
|
|
|
|
{ igt_create_shmem, SZ_64K, SZ_32M, },
|
|
|
|
{ igt_create_local, SZ_64K, SZ_1G, },
|
2019-10-25 16:37:26 +01:00
|
|
|
};
|
|
|
|
int err;
|
2019-10-25 16:37:27 +01:00
|
|
|
int i;
|
2019-10-25 16:37:26 +01:00
|
|
|
|
|
|
|
/*
|
2019-10-25 16:37:27 +01:00
|
|
|
* Sanity check that the HW uses huge pages correctly through our
|
|
|
|
* various backends -- ensure that our writes land in the right place.
|
2019-10-25 16:37:26 +01:00
|
|
|
*/
|
|
|
|
|
2019-10-25 16:37:27 +01:00
|
|
|
for (i = 0; i < ARRAY_SIZE(backends); ++i) {
|
|
|
|
u32 min = backends[i].min;
|
|
|
|
u32 max = backends[i].max;
|
|
|
|
u32 size = max;
|
2021-10-28 13:58:53 +01:00
|
|
|
|
2019-10-25 16:37:27 +01:00
|
|
|
try_again:
|
|
|
|
size = igt_random_size(&prng, min, rounddown_pow_of_two(size));
|
2019-10-25 16:37:26 +01:00
|
|
|
|
2019-10-25 16:37:27 +01:00
|
|
|
obj = backends[i].fn(i915, size, 0);
|
2019-10-25 16:37:26 +01:00
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
err = PTR_ERR(obj);
|
|
|
|
if (err == -E2BIG) {
|
2019-10-25 16:37:27 +01:00
|
|
|
size >>= 1;
|
|
|
|
goto try_again;
|
|
|
|
} else if (err == -ENODEV) {
|
|
|
|
err = 0;
|
|
|
|
continue;
|
2019-10-25 16:37:26 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2021-03-23 16:50:30 +01:00
|
|
|
err = i915_gem_object_pin_pages_unlocked(obj);
|
2019-10-25 16:37:27 +01:00
|
|
|
if (err) {
|
2022-02-28 12:36:06 +00:00
|
|
|
if (err == -ENXIO || err == -E2BIG || err == -ENOMEM) {
|
2019-10-25 16:37:27 +01:00
|
|
|
i915_gem_object_put(obj);
|
|
|
|
size >>= 1;
|
|
|
|
goto try_again;
|
|
|
|
}
|
2019-10-25 16:37:26 +01:00
|
|
|
goto out_put;
|
2019-10-25 16:37:27 +01:00
|
|
|
}
|
2019-10-25 16:37:26 +01:00
|
|
|
|
2019-10-25 16:37:27 +01:00
|
|
|
if (obj->mm.page_sizes.phys < min) {
|
|
|
|
pr_info("%s unable to allocate huge-page(s) with size=%u, i=%d\n",
|
|
|
|
__func__, size, i);
|
|
|
|
err = -ENOMEM;
|
2019-10-25 16:37:26 +01:00
|
|
|
goto out_unpin;
|
|
|
|
}
|
|
|
|
|
2021-10-28 13:58:53 +01:00
|
|
|
err = igt_write_huge(i915, obj);
|
2019-10-25 16:37:26 +01:00
|
|
|
if (err) {
|
2019-10-25 16:37:27 +01:00
|
|
|
pr_err("%s write-huge failed with size=%u, i=%d\n",
|
|
|
|
__func__, size, i);
|
2019-10-25 16:37:26 +01:00
|
|
|
}
|
2019-10-25 16:37:27 +01:00
|
|
|
out_unpin:
|
2021-03-23 16:50:30 +01:00
|
|
|
i915_gem_object_lock(obj, NULL);
|
2019-10-25 16:37:26 +01:00
|
|
|
i915_gem_object_unpin_pages(obj);
|
drm/i915: Switch obj->mm.lock lockdep annotations on its head
The trouble with having a plain nesting flag for locks which do not
naturally nest (unlike block devices and their partitions, which is
the original motivation for nesting levels) is that lockdep will
never spot a true deadlock if you screw up.
This patch is an attempt at trying better, by highlighting a bit more
of the actual nature of the nesting that's going on. Essentially we
have two kinds of objects:
- objects without pages allocated, which cannot be on any lru and are
hence inaccessible to the shrinker.
- objects which have pages allocated, which are on an lru, and which
the shrinker can decide to throw out.
For the former type of object, memory allocations while holding
obj->mm.lock are permissible. For the latter they are not. And
get/put_pages transitions between the two types of objects.
This is still not entirely fool-proof since the rules might change.
But as long as we run such a code ever at runtime lockdep should be
able to observe the inconsistency and complain (like with any other
lockdep class that we've split up in multiple classes). But there are
a few clear benefits:
- We can drop the nesting flag parameter from
__i915_gem_object_put_pages, because that function by definition is
never going allocate memory, and calling it on an object which
doesn't have its pages allocated would be a bug.
- We strictly catch more bugs, since there's not only one place in the
entire tree which is annotated with the special class. All the
other places that had explicit lockdep nesting annotations we're now
going to leave up to lockdep again.
- Specifically this catches stuff like calling get_pages from
put_pages (which isn't really a good idea, if we can call get_pages
so could the shrinker). I've seen patches do exactly that.
Of course I fully expect CI will show me for the fool I am with this
one here :-)
v2: There can only be one (lockdep only has a cache for the first
subclass, not for deeper ones, and we don't want to make these locks
even slower). Still separate enums for better documentation.
Real fix: don't forget about phys objs and pin_map(), and fix the
shrinker to have the right annotations ... silly me.
v3: Forgot usertptr too ...
v4: Improve comment for pages_pin_count, drop the IMPORTANT comment
and instead prime lockdep (Chris).
v5: Appease checkpatch, no double empty lines (Chris)
v6: More rebasing over selftest changes. Also somehow I forgot to
push this patch :-/
Also format comments consistently while at it.
v7: Fix typo in commit message (Joonas)
Also drop the priming, with the lmem merge we now have allocations
while holding the lmem lock, which wreaks the generic priming I've
done in earlier patches. Should probably be resurrected when lmem is
fixed. See
commit 232a6ebae419193f5b8da4fa869ae5089ab105c2
Author: Matthew Auld <matthew.auld@intel.com>
Date: Tue Oct 8 17:01:14 2019 +0100
drm/i915: introduce intel_memory_region
I'm keeping the priming patch locally so it wont get lost.
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: "Tang, CQ" <cq.tang@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v5)
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> (v6)
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191105090148.30269-1-daniel.vetter@ffwll.ch
[mlankhorst: Fix commit typos pointed out by Michael Ruhl]
2019-11-05 10:01:48 +01:00
|
|
|
__i915_gem_object_put_pages(obj);
|
2021-03-23 16:50:30 +01:00
|
|
|
i915_gem_object_unlock(obj);
|
2019-10-25 16:37:27 +01:00
|
|
|
out_put:
|
2019-10-25 16:37:26 +01:00
|
|
|
i915_gem_object_put(obj);
|
|
|
|
|
2019-10-25 16:37:27 +01:00
|
|
|
if (err == -ENOMEM || err == -ENXIO)
|
|
|
|
err = 0;
|
2019-10-25 16:37:26 +01:00
|
|
|
|
2019-10-25 16:37:27 +01:00
|
|
|
if (err)
|
|
|
|
break;
|
2019-10-25 16:37:26 +01:00
|
|
|
|
2019-10-25 16:37:27 +01:00
|
|
|
cond_resched();
|
|
|
|
}
|
2019-10-25 16:37:26 +01:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2019-10-25 16:37:28 +01:00
|
|
|
static int igt_ppgtt_sanity_check(void *arg)
|
|
|
|
{
|
2021-10-28 13:58:53 +01:00
|
|
|
struct drm_i915_private *i915 = arg;
|
2022-08-19 15:02:39 +03:00
|
|
|
unsigned int supported = RUNTIME_INFO(i915)->page_sizes;
|
2019-10-25 16:37:28 +01:00
|
|
|
struct {
|
|
|
|
igt_create_fn fn;
|
|
|
|
unsigned int flags;
|
|
|
|
} backends[] = {
|
|
|
|
{ igt_create_system, 0, },
|
2020-11-30 14:18:08 +00:00
|
|
|
{ igt_create_local, 0, },
|
2019-10-25 16:37:28 +01:00
|
|
|
{ igt_create_local, I915_BO_ALLOC_CONTIGUOUS, },
|
|
|
|
};
|
|
|
|
struct {
|
|
|
|
u32 size;
|
|
|
|
u32 pages;
|
|
|
|
} combos[] = {
|
|
|
|
{ SZ_64K, SZ_64K },
|
|
|
|
{ SZ_2M, SZ_2M },
|
|
|
|
{ SZ_2M, SZ_64K },
|
|
|
|
{ SZ_2M - SZ_64K, SZ_64K },
|
|
|
|
{ SZ_2M - SZ_4K, SZ_64K | SZ_4K },
|
|
|
|
{ SZ_2M + SZ_4K, SZ_64K | SZ_4K },
|
|
|
|
{ SZ_2M + SZ_4K, SZ_2M | SZ_4K },
|
|
|
|
{ SZ_2M + SZ_64K, SZ_2M | SZ_64K },
|
drm/i915: enable PS64 support for DG2
It turns out that on production DG2/ATS HW we should have support for
PS64. This feature allows to provide a 64K TLB hint at the PTE level,
which is a lot more flexible than the current method of enabling 64K GTT
pages for the entire page-table, since that leads to all kinds of
annoying restrictions, as documented in:
commit caa574ffc4aaf4f29b890223878c63e2e7772f62
Author: Matthew Auld <matthew.auld@intel.com>
Date: Sat Feb 19 00:17:49 2022 +0530
drm/i915/uapi: document behaviour for DG2 64K support
On discrete platforms like DG2, we need to support a minimum page size
of 64K when dealing with device local-memory. This is quite tricky for
various reasons, so try to document the new implicit uapi for this.
With PS64, we can now drop the 2M GTT alignment restriction, and instead
only require 64K or larger when dealing with lmem. We still use the
compact-pt layout when possible, but only when we are certain that this
doesn't interfere with userspace.
Note that this is a change in uAPI behaviour, but hopefully shouldn't be
a concern (IGT is at least able to autodetect the alignment), since we
are only making the GTT alignment constraint less restrictive.
Based on a patch from CQ Tang.
v2: update the comment wrt scratch page
v3: (Nirmoy)
- Fix the selftest to actually use the random size, plus some comment
improvements, also drop the rem stuff.
Reported-by: Michal Mrozek <michal.mrozek@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Stuart Summers <stuart.summers@intel.com>
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Yang A Shi <yang.a.shi@intel.com>
Cc: Nirmoy Das <nirmoy.das@intel.com>
Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Acked-by: Michal Mrozek <michal.mrozek@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221004114915.221708-1-matthew.auld@intel.com
2022-10-04 12:49:14 +01:00
|
|
|
{ SZ_2M + SZ_64K, SZ_64K },
|
2019-10-25 16:37:28 +01:00
|
|
|
};
|
|
|
|
int i, j;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (supported == I915_GTT_PAGE_SIZE_4K)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Sanity check that the HW behaves with a limited set of combinations.
|
|
|
|
* We already have a bunch of randomised testing, which should give us
|
|
|
|
* a decent amount of variation between runs, however we should keep
|
|
|
|
* this to limit the chances of introducing a temporary regression, by
|
|
|
|
* testing the most obvious cases that might make something blow up.
|
|
|
|
*/
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(backends); ++i) {
|
|
|
|
for (j = 0; j < ARRAY_SIZE(combos); ++j) {
|
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
u32 size = combos[j].size;
|
|
|
|
u32 pages = combos[j].pages;
|
|
|
|
|
|
|
|
obj = backends[i].fn(i915, size, backends[i].flags);
|
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
err = PTR_ERR(obj);
|
|
|
|
if (err == -ENODEV) {
|
|
|
|
pr_info("Device lacks local memory, skipping\n");
|
|
|
|
err = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2021-03-23 16:50:30 +01:00
|
|
|
err = i915_gem_object_pin_pages_unlocked(obj);
|
2019-10-25 16:37:28 +01:00
|
|
|
if (err) {
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
GEM_BUG_ON(pages > obj->base.size);
|
|
|
|
pages = pages & supported;
|
|
|
|
|
|
|
|
if (pages)
|
|
|
|
obj->mm.page_sizes.sg = pages;
|
|
|
|
|
2021-10-28 13:58:53 +01:00
|
|
|
err = igt_write_huge(i915, obj);
|
2019-10-25 16:37:28 +01:00
|
|
|
|
2021-03-23 16:50:30 +01:00
|
|
|
i915_gem_object_lock(obj, NULL);
|
2019-10-25 16:37:28 +01:00
|
|
|
i915_gem_object_unpin_pages(obj);
|
drm/i915: Switch obj->mm.lock lockdep annotations on its head
The trouble with having a plain nesting flag for locks which do not
naturally nest (unlike block devices and their partitions, which is
the original motivation for nesting levels) is that lockdep will
never spot a true deadlock if you screw up.
This patch is an attempt at trying better, by highlighting a bit more
of the actual nature of the nesting that's going on. Essentially we
have two kinds of objects:
- objects without pages allocated, which cannot be on any lru and are
hence inaccessible to the shrinker.
- objects which have pages allocated, which are on an lru, and which
the shrinker can decide to throw out.
For the former type of object, memory allocations while holding
obj->mm.lock are permissible. For the latter they are not. And
get/put_pages transitions between the two types of objects.
This is still not entirely fool-proof since the rules might change.
But as long as we run such a code ever at runtime lockdep should be
able to observe the inconsistency and complain (like with any other
lockdep class that we've split up in multiple classes). But there are
a few clear benefits:
- We can drop the nesting flag parameter from
__i915_gem_object_put_pages, because that function by definition is
never going allocate memory, and calling it on an object which
doesn't have its pages allocated would be a bug.
- We strictly catch more bugs, since there's not only one place in the
entire tree which is annotated with the special class. All the
other places that had explicit lockdep nesting annotations we're now
going to leave up to lockdep again.
- Specifically this catches stuff like calling get_pages from
put_pages (which isn't really a good idea, if we can call get_pages
so could the shrinker). I've seen patches do exactly that.
Of course I fully expect CI will show me for the fool I am with this
one here :-)
v2: There can only be one (lockdep only has a cache for the first
subclass, not for deeper ones, and we don't want to make these locks
even slower). Still separate enums for better documentation.
Real fix: don't forget about phys objs and pin_map(), and fix the
shrinker to have the right annotations ... silly me.
v3: Forgot usertptr too ...
v4: Improve comment for pages_pin_count, drop the IMPORTANT comment
and instead prime lockdep (Chris).
v5: Appease checkpatch, no double empty lines (Chris)
v6: More rebasing over selftest changes. Also somehow I forgot to
push this patch :-/
Also format comments consistently while at it.
v7: Fix typo in commit message (Joonas)
Also drop the priming, with the lmem merge we now have allocations
while holding the lmem lock, which wreaks the generic priming I've
done in earlier patches. Should probably be resurrected when lmem is
fixed. See
commit 232a6ebae419193f5b8da4fa869ae5089ab105c2
Author: Matthew Auld <matthew.auld@intel.com>
Date: Tue Oct 8 17:01:14 2019 +0100
drm/i915: introduce intel_memory_region
I'm keeping the priming patch locally so it wont get lost.
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: "Tang, CQ" <cq.tang@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v5)
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> (v6)
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191105090148.30269-1-daniel.vetter@ffwll.ch
[mlankhorst: Fix commit typos pointed out by Michael Ruhl]
2019-11-05 10:01:48 +01:00
|
|
|
__i915_gem_object_put_pages(obj);
|
2021-03-23 16:50:30 +01:00
|
|
|
i915_gem_object_unlock(obj);
|
2019-10-25 16:37:28 +01:00
|
|
|
i915_gem_object_put(obj);
|
|
|
|
|
|
|
|
if (err) {
|
|
|
|
pr_err("%s write-huge failed with size=%u pages=%u i=%d, j=%d\n",
|
|
|
|
__func__, size, pages, i, j);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
cond_resched();
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
if (err == -ENOMEM)
|
|
|
|
err = 0;
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2022-02-19 00:17:44 +05:30
|
|
|
static int igt_ppgtt_compact(void *arg)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 = arg;
|
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Simple test to catch issues with compact 64K pages -- since the pt is
|
|
|
|
* compacted to 256B that gives us 32 entries per pt, however since the
|
|
|
|
* backing page for the pt is 4K, any extra entries we might incorrectly
|
|
|
|
* write out should be ignored by the HW. If ever hit such a case this
|
|
|
|
* test should catch it since some of our writes would land in scratch.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (!HAS_64K_PAGES(i915)) {
|
|
|
|
pr_info("device lacks compact 64K page support, skipping\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!HAS_LMEM(i915)) {
|
|
|
|
pr_info("device lacks LMEM support, skipping\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* We want the range to cover multiple page-table boundaries. */
|
|
|
|
obj = i915_gem_object_create_lmem(i915, SZ_4M, 0);
|
|
|
|
if (IS_ERR(obj))
|
|
|
|
return PTR_ERR(obj);
|
|
|
|
|
|
|
|
err = i915_gem_object_pin_pages_unlocked(obj);
|
|
|
|
if (err)
|
|
|
|
goto out_put;
|
|
|
|
|
|
|
|
if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_64K) {
|
|
|
|
pr_info("LMEM compact unable to allocate huge-page(s)\n");
|
|
|
|
goto out_unpin;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Disable 2M GTT pages by forcing the page-size to 64K for the GTT
|
|
|
|
* insertion.
|
|
|
|
*/
|
|
|
|
obj->mm.page_sizes.sg = I915_GTT_PAGE_SIZE_64K;
|
|
|
|
|
|
|
|
err = igt_write_huge(i915, obj);
|
|
|
|
if (err)
|
|
|
|
pr_err("LMEM compact write-huge failed\n");
|
|
|
|
|
|
|
|
out_unpin:
|
|
|
|
i915_gem_object_unpin_pages(obj);
|
|
|
|
out_put:
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
|
|
|
|
if (err == -ENOMEM)
|
|
|
|
err = 0;
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
drm/i915: enable PS64 support for DG2
It turns out that on production DG2/ATS HW we should have support for
PS64. This feature allows to provide a 64K TLB hint at the PTE level,
which is a lot more flexible than the current method of enabling 64K GTT
pages for the entire page-table, since that leads to all kinds of
annoying restrictions, as documented in:
commit caa574ffc4aaf4f29b890223878c63e2e7772f62
Author: Matthew Auld <matthew.auld@intel.com>
Date: Sat Feb 19 00:17:49 2022 +0530
drm/i915/uapi: document behaviour for DG2 64K support
On discrete platforms like DG2, we need to support a minimum page size
of 64K when dealing with device local-memory. This is quite tricky for
various reasons, so try to document the new implicit uapi for this.
With PS64, we can now drop the 2M GTT alignment restriction, and instead
only require 64K or larger when dealing with lmem. We still use the
compact-pt layout when possible, but only when we are certain that this
doesn't interfere with userspace.
Note that this is a change in uAPI behaviour, but hopefully shouldn't be
a concern (IGT is at least able to autodetect the alignment), since we
are only making the GTT alignment constraint less restrictive.
Based on a patch from CQ Tang.
v2: update the comment wrt scratch page
v3: (Nirmoy)
- Fix the selftest to actually use the random size, plus some comment
improvements, also drop the rem stuff.
Reported-by: Michal Mrozek <michal.mrozek@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Stuart Summers <stuart.summers@intel.com>
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Yang A Shi <yang.a.shi@intel.com>
Cc: Nirmoy Das <nirmoy.das@intel.com>
Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Acked-by: Michal Mrozek <michal.mrozek@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221004114915.221708-1-matthew.auld@intel.com
2022-10-04 12:49:14 +01:00
|
|
|
static int igt_ppgtt_mixed(void *arg)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 = arg;
|
|
|
|
const unsigned long flags = PIN_OFFSET_FIXED | PIN_USER;
|
|
|
|
struct drm_i915_gem_object *obj, *on;
|
|
|
|
struct i915_gem_engines *engines;
|
|
|
|
struct i915_gem_engines_iter it;
|
|
|
|
struct i915_address_space *vm;
|
|
|
|
struct i915_gem_context *ctx;
|
|
|
|
struct intel_context *ce;
|
|
|
|
struct file *file;
|
|
|
|
I915_RND_STATE(prng);
|
|
|
|
LIST_HEAD(objects);
|
|
|
|
struct intel_memory_region *mr;
|
|
|
|
struct i915_vma *vma;
|
|
|
|
unsigned int count;
|
|
|
|
u32 i, addr;
|
|
|
|
int *order;
|
|
|
|
int n, err;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Sanity check mixing 4K and 64K pages within the same page-table via
|
|
|
|
* the new PS64 TLB hint.
|
|
|
|
*/
|
|
|
|
|
|
|
|
if (!HAS_64K_PAGES(i915)) {
|
|
|
|
pr_info("device lacks PS64, skipping\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
file = mock_file(i915);
|
|
|
|
if (IS_ERR(file))
|
|
|
|
return PTR_ERR(file);
|
|
|
|
|
|
|
|
ctx = hugepage_ctx(i915, file);
|
|
|
|
if (IS_ERR(ctx)) {
|
|
|
|
err = PTR_ERR(ctx);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
vm = i915_gem_context_get_eb_vm(ctx);
|
|
|
|
|
|
|
|
i = 0;
|
|
|
|
addr = 0;
|
|
|
|
do {
|
|
|
|
u32 sz;
|
|
|
|
|
|
|
|
sz = i915_prandom_u32_max_state(SZ_4M, &prng);
|
|
|
|
sz = max_t(u32, sz, SZ_4K);
|
|
|
|
|
|
|
|
mr = i915->mm.regions[INTEL_REGION_LMEM_0];
|
|
|
|
if (i & 1)
|
|
|
|
mr = i915->mm.regions[INTEL_REGION_SMEM];
|
|
|
|
|
|
|
|
obj = i915_gem_object_create_region(mr, sz, 0, 0);
|
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
err = PTR_ERR(obj);
|
|
|
|
goto out_vm;
|
|
|
|
}
|
|
|
|
|
|
|
|
list_add_tail(&obj->st_link, &objects);
|
|
|
|
|
|
|
|
vma = i915_vma_instance(obj, vm, NULL);
|
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto err_put;
|
|
|
|
}
|
|
|
|
|
|
|
|
addr = round_up(addr, mr->min_page_size);
|
|
|
|
err = i915_vma_pin(vma, 0, 0, addr | flags);
|
|
|
|
if (err)
|
|
|
|
goto err_put;
|
|
|
|
|
|
|
|
if (mr->type == INTEL_MEMORY_LOCAL &&
|
|
|
|
(vma->resource->page_sizes_gtt & I915_GTT_PAGE_SIZE_4K)) {
|
|
|
|
err = -EINVAL;
|
|
|
|
goto err_put;
|
|
|
|
}
|
|
|
|
|
|
|
|
addr += obj->base.size;
|
|
|
|
i++;
|
|
|
|
} while (addr <= SZ_16M);
|
|
|
|
|
|
|
|
n = 0;
|
|
|
|
count = 0;
|
|
|
|
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
|
|
|
|
count++;
|
|
|
|
if (!intel_engine_can_store_dword(ce->engine))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
n++;
|
|
|
|
}
|
|
|
|
i915_gem_context_unlock_engines(ctx);
|
|
|
|
if (!n)
|
|
|
|
goto err_put;
|
|
|
|
|
|
|
|
order = i915_random_order(count * count, &prng);
|
|
|
|
if (!order) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto err_put;
|
|
|
|
}
|
|
|
|
|
|
|
|
i = 0;
|
|
|
|
addr = 0;
|
|
|
|
engines = i915_gem_context_lock_engines(ctx);
|
|
|
|
list_for_each_entry(obj, &objects, st_link) {
|
|
|
|
u32 rnd = i915_prandom_u32_max_state(UINT_MAX, &prng);
|
|
|
|
|
|
|
|
addr = round_up(addr, obj->mm.region->min_page_size);
|
|
|
|
|
|
|
|
ce = engines->engines[order[i] % engines->num_engines];
|
|
|
|
i = (i + 1) % (count * count);
|
|
|
|
if (!ce || !intel_engine_can_store_dword(ce->engine))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
err = __igt_write_huge(ce, obj, obj->base.size, addr, 0, rnd);
|
|
|
|
if (err)
|
|
|
|
break;
|
|
|
|
|
|
|
|
err = __igt_write_huge(ce, obj, obj->base.size, addr,
|
|
|
|
offset_in_page(rnd) / sizeof(u32), rnd + 1);
|
|
|
|
if (err)
|
|
|
|
break;
|
|
|
|
|
|
|
|
err = __igt_write_huge(ce, obj, obj->base.size, addr,
|
|
|
|
(PAGE_SIZE / sizeof(u32)) - 1,
|
|
|
|
rnd + 2);
|
|
|
|
if (err)
|
|
|
|
break;
|
|
|
|
|
|
|
|
addr += obj->base.size;
|
|
|
|
|
|
|
|
cond_resched();
|
|
|
|
}
|
|
|
|
|
|
|
|
i915_gem_context_unlock_engines(ctx);
|
|
|
|
kfree(order);
|
|
|
|
err_put:
|
|
|
|
list_for_each_entry_safe(obj, on, &objects, st_link) {
|
|
|
|
list_del(&obj->st_link);
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
}
|
|
|
|
out_vm:
|
|
|
|
i915_vm_put(vm);
|
|
|
|
out:
|
|
|
|
fput(file);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-10-06 23:18:29 +01:00
|
|
|
static int igt_tmpfs_fallback(void *arg)
|
|
|
|
{
|
2021-10-28 13:58:53 +01:00
|
|
|
struct drm_i915_private *i915 = arg;
|
|
|
|
struct i915_address_space *vm;
|
|
|
|
struct i915_gem_context *ctx;
|
2017-10-06 23:18:29 +01:00
|
|
|
struct vfsmount *gemfs = i915->mm.gemfs;
|
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
struct i915_vma *vma;
|
2021-10-28 13:58:53 +01:00
|
|
|
struct file *file;
|
2017-10-06 23:18:29 +01:00
|
|
|
u32 *vaddr;
|
|
|
|
int err = 0;
|
|
|
|
|
2021-10-28 13:58:53 +01:00
|
|
|
file = mock_file(i915);
|
|
|
|
if (IS_ERR(file))
|
|
|
|
return PTR_ERR(file);
|
|
|
|
|
|
|
|
ctx = hugepage_ctx(i915, file);
|
|
|
|
if (IS_ERR(ctx)) {
|
|
|
|
err = PTR_ERR(ctx);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
vm = i915_gem_context_get_eb_vm(ctx);
|
|
|
|
|
2017-10-06 23:18:29 +01:00
|
|
|
/*
|
|
|
|
* Make sure that we don't burst into a ball of flames upon falling back
|
2025-01-20 13:45:12 +05:30
|
|
|
* to tmpfs, which we rely on if on the off-chance we encounter a failure
|
2017-10-06 23:18:29 +01:00
|
|
|
* when setting up gemfs.
|
|
|
|
*/
|
|
|
|
|
|
|
|
i915->mm.gemfs = NULL;
|
|
|
|
|
2019-05-28 10:29:45 +01:00
|
|
|
obj = i915_gem_object_create_shmem(i915, PAGE_SIZE);
|
2017-10-06 23:18:29 +01:00
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
err = PTR_ERR(obj);
|
|
|
|
goto out_restore;
|
|
|
|
}
|
|
|
|
|
2021-03-23 16:50:30 +01:00
|
|
|
vaddr = i915_gem_object_pin_map_unlocked(obj, I915_MAP_WB);
|
2017-10-06 23:18:29 +01:00
|
|
|
if (IS_ERR(vaddr)) {
|
|
|
|
err = PTR_ERR(vaddr);
|
|
|
|
goto out_put;
|
|
|
|
}
|
|
|
|
*vaddr = 0xdeadbeaf;
|
|
|
|
|
drm/i915: Flush pages on acquisition
When we return pages to the system, we ensure that they are marked as
being in the CPU domain since any external access is uncontrolled and we
must assume the worst. This means that we need to always flush the pages
on acquisition if we need to use them on the GPU, and from the beginning
have used set-domain. Set-domain is overkill for the purpose as it is a
general synchronisation barrier, but our intent is to only flush the
pages being swapped in. If we move that flush into the pages acquisition
phase, we know then that when we have obj->mm.pages, they are coherent
with the GPU and need only maintain that status without resorting to
heavy handed use of set-domain.
The principle knock-on effect for userspace is through mmap-gtt
pagefaulting. Our uAPI has always implied that the GTT mmap was async
(especially as when any pagefault occurs is unpredicatable to userspace)
and so userspace had to apply explicit domain control itself
(set-domain). However, swapping is transparent to the kernel, and so on
first fault we need to acquire the pages and make them coherent for
access through the GTT. Our use of set-domain here leaks into the uABI
that the first pagefault was synchronous. This is unintentional and
baring a few igt should be unoticed, nevertheless we bump the uABI
version for mmap-gtt to reflect the change in behaviour.
Another implication of the change is that gem_create() is presumed to
create an object that is coherent with the CPU and is in the CPU write
domain, so a set-domain(CPU) following a gem_create() would be a minor
operation that merely checked whether we could allocate all pages for
the object. On applying this change, a set-domain(CPU) causes a clflush
as we acquire the pages. This will have a small impact on mesa as we move
the clflush here on !llc from execbuf time to create, but that should
have minimal performance impact as the same clflush exists but is now
done early and because of the clflush issue, userspace recycles bo and
so should resist allocating fresh objects.
Internally, the presumption that objects are created in the CPU
write-domain and remain so through writes to obj->mm.mapping is more
prevalent than I expected; but easy enough to catch and apply a manual
flush.
For the future, we should push the page flush from the central
set_pages() into the callers so that we can more finely control when it
is applied, but for now doing it one location is easier to validate, at
the cost of sometimes flushing when there is no need.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Matthew Auld <matthew.william.auld@gmail.com>
Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
Cc: Antonio Argenziano <antonio.argenziano@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Matthew Auld <matthew.william.auld@gmail.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190321161908.8007-1-chris@chris-wilson.co.uk
2019-03-21 16:19:07 +00:00
|
|
|
__i915_gem_object_flush_map(obj, 0, 64);
|
2017-10-06 23:18:29 +01:00
|
|
|
i915_gem_object_unpin_map(obj);
|
|
|
|
|
|
|
|
vma = i915_vma_instance(obj, vm, NULL);
|
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto out_put;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = i915_vma_pin(vma, 0, 0, PIN_USER);
|
|
|
|
if (err)
|
2020-04-22 20:05:58 +01:00
|
|
|
goto out_put;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
err = igt_check_page_sizes(vma);
|
|
|
|
|
|
|
|
i915_vma_unpin(vma);
|
|
|
|
out_put:
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
out_restore:
|
|
|
|
i915->mm.gemfs = gemfs;
|
|
|
|
|
2019-10-04 14:40:09 +01:00
|
|
|
i915_vm_put(vm);
|
2021-10-28 13:58:53 +01:00
|
|
|
out:
|
|
|
|
fput(file);
|
2017-10-06 23:18:29 +01:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int igt_shrink_thp(void *arg)
|
|
|
|
{
|
2021-10-28 13:58:53 +01:00
|
|
|
struct drm_i915_private *i915 = arg;
|
|
|
|
struct i915_address_space *vm;
|
|
|
|
struct i915_gem_context *ctx;
|
2017-10-06 23:18:29 +01:00
|
|
|
struct drm_i915_gem_object *obj;
|
2019-08-24 00:51:41 +01:00
|
|
|
struct i915_gem_engines_iter it;
|
|
|
|
struct intel_context *ce;
|
2017-10-06 23:18:29 +01:00
|
|
|
struct i915_vma *vma;
|
2021-10-28 13:58:53 +01:00
|
|
|
struct file *file;
|
2017-10-06 23:18:29 +01:00
|
|
|
unsigned int flags = PIN_USER;
|
2019-07-04 22:23:43 +01:00
|
|
|
unsigned int n;
|
2022-07-06 16:47:38 +01:00
|
|
|
intel_wakeref_t wf;
|
2021-09-06 10:17:29 +01:00
|
|
|
bool should_swap;
|
2021-10-28 13:58:53 +01:00
|
|
|
int err;
|
|
|
|
|
|
|
|
if (!igt_can_allocate_thp(i915)) {
|
|
|
|
pr_info("missing THP support, skipping\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
file = mock_file(i915);
|
|
|
|
if (IS_ERR(file))
|
|
|
|
return PTR_ERR(file);
|
|
|
|
|
|
|
|
ctx = hugepage_ctx(i915, file);
|
|
|
|
if (IS_ERR(ctx)) {
|
|
|
|
err = PTR_ERR(ctx);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
vm = i915_gem_context_get_eb_vm(ctx);
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Sanity check shrinking huge-paged object -- make sure nothing blows
|
|
|
|
* up.
|
|
|
|
*/
|
|
|
|
|
2019-05-28 10:29:45 +01:00
|
|
|
obj = i915_gem_object_create_shmem(i915, SZ_2M);
|
2019-10-04 14:40:09 +01:00
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
err = PTR_ERR(obj);
|
|
|
|
goto out_vm;
|
|
|
|
}
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
vma = i915_vma_instance(obj, vm, NULL);
|
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto out_put;
|
|
|
|
}
|
|
|
|
|
2022-07-06 16:47:38 +01:00
|
|
|
wf = intel_runtime_pm_get(&i915->runtime_pm); /* active shrink */
|
|
|
|
|
2017-10-06 23:18:29 +01:00
|
|
|
err = i915_vma_pin(vma, 0, 0, flags);
|
|
|
|
if (err)
|
2022-07-06 16:47:38 +01:00
|
|
|
goto out_wf;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
if (obj->mm.page_sizes.phys < I915_GTT_PAGE_SIZE_2M) {
|
|
|
|
pr_info("failed to allocate THP, finishing test early\n");
|
|
|
|
goto out_unpin;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = igt_check_page_sizes(vma);
|
|
|
|
if (err)
|
|
|
|
goto out_unpin;
|
|
|
|
|
2019-07-04 22:23:43 +01:00
|
|
|
n = 0;
|
2019-08-24 00:51:41 +01:00
|
|
|
|
|
|
|
for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) {
|
|
|
|
if (!intel_engine_can_store_dword(ce->engine))
|
2019-07-04 22:23:43 +01:00
|
|
|
continue;
|
|
|
|
|
2019-08-24 00:51:41 +01:00
|
|
|
err = gpu_write(ce, vma, n++, 0xdeadbeaf);
|
2019-07-04 22:23:43 +01:00
|
|
|
if (err)
|
2019-08-24 00:51:41 +01:00
|
|
|
break;
|
2019-07-04 22:23:43 +01:00
|
|
|
}
|
2019-08-24 00:51:41 +01:00
|
|
|
i915_gem_context_unlock_engines(ctx);
|
2021-09-06 10:17:29 +01:00
|
|
|
/*
|
|
|
|
* Nuke everything *before* we unpin the pages so we can be reasonably
|
|
|
|
* sure that when later checking get_nr_swap_pages() that some random
|
|
|
|
* leftover object doesn't steal the remaining swap space.
|
|
|
|
*/
|
|
|
|
i915_gem_shrink(NULL, i915, -1UL, NULL,
|
|
|
|
I915_SHRINK_BOUND |
|
|
|
|
I915_SHRINK_UNBOUND |
|
|
|
|
I915_SHRINK_ACTIVE);
|
2017-10-06 23:18:29 +01:00
|
|
|
i915_vma_unpin(vma);
|
2019-08-24 00:51:41 +01:00
|
|
|
if (err)
|
2023-01-17 13:32:34 +01:00
|
|
|
goto out_wf;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
|
|
|
/*
|
2021-09-06 10:17:29 +01:00
|
|
|
* Now that the pages are *unpinned* shrinking should invoke
|
|
|
|
* shmem to truncate our pages, if we have available swap.
|
2017-10-06 23:18:29 +01:00
|
|
|
*/
|
2021-09-06 10:17:29 +01:00
|
|
|
should_swap = get_nr_swap_pages() > 0;
|
|
|
|
i915_gem_shrink(NULL, i915, -1UL, NULL,
|
|
|
|
I915_SHRINK_BOUND |
|
|
|
|
I915_SHRINK_UNBOUND |
|
2021-09-21 15:21:16 +01:00
|
|
|
I915_SHRINK_ACTIVE |
|
|
|
|
I915_SHRINK_WRITEBACK);
|
2021-09-06 10:17:29 +01:00
|
|
|
if (should_swap == i915_gem_object_has_pages(obj)) {
|
|
|
|
pr_err("unexpected pages mismatch, should_swap=%s\n",
|
2022-02-25 15:46:28 -08:00
|
|
|
str_yes_no(should_swap));
|
2017-10-06 23:18:29 +01:00
|
|
|
err = -EINVAL;
|
2023-01-17 13:32:34 +01:00
|
|
|
goto out_wf;
|
2017-10-06 23:18:29 +01:00
|
|
|
}
|
|
|
|
|
2021-09-06 10:17:29 +01:00
|
|
|
if (should_swap == (obj->mm.page_sizes.sg || obj->mm.page_sizes.phys)) {
|
|
|
|
pr_err("unexpected residual page-size bits, should_swap=%s\n",
|
2022-02-25 15:46:28 -08:00
|
|
|
str_yes_no(should_swap));
|
2017-10-06 23:18:29 +01:00
|
|
|
err = -EINVAL;
|
2023-01-17 13:32:34 +01:00
|
|
|
goto out_wf;
|
2017-10-06 23:18:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
err = i915_vma_pin(vma, 0, 0, flags);
|
|
|
|
if (err)
|
2023-01-17 13:32:34 +01:00
|
|
|
goto out_wf;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2019-07-04 22:23:43 +01:00
|
|
|
while (n--) {
|
|
|
|
err = cpu_check(obj, n, 0xdeadbeaf);
|
|
|
|
if (err)
|
2019-08-24 00:51:41 +01:00
|
|
|
break;
|
2019-07-04 22:23:43 +01:00
|
|
|
}
|
|
|
|
|
2017-10-06 23:18:29 +01:00
|
|
|
out_unpin:
|
|
|
|
i915_vma_unpin(vma);
|
2022-07-06 16:47:38 +01:00
|
|
|
out_wf:
|
|
|
|
intel_runtime_pm_put(&i915->runtime_pm, wf);
|
2017-10-06 23:18:29 +01:00
|
|
|
out_put:
|
|
|
|
i915_gem_object_put(obj);
|
2019-10-04 14:40:09 +01:00
|
|
|
out_vm:
|
|
|
|
i915_vm_put(vm);
|
2021-10-28 13:58:53 +01:00
|
|
|
out:
|
|
|
|
fput(file);
|
2017-10-06 23:18:29 +01:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
int i915_gem_huge_page_mock_selftests(void)
|
|
|
|
{
|
|
|
|
static const struct i915_subtest tests[] = {
|
|
|
|
SUBTEST(igt_mock_exhaust_device_supported_pages),
|
2019-10-08 17:01:14 +01:00
|
|
|
SUBTEST(igt_mock_memory_region_huge_pages),
|
2017-10-06 23:18:29 +01:00
|
|
|
SUBTEST(igt_mock_ppgtt_misaligned_dma),
|
|
|
|
};
|
2024-03-28 08:18:33 +01:00
|
|
|
struct drm_i915_private *i915;
|
2019-06-11 10:12:38 +01:00
|
|
|
struct i915_ppgtt *ppgtt;
|
2017-10-06 23:18:29 +01:00
|
|
|
int err;
|
|
|
|
|
2024-03-28 08:18:33 +01:00
|
|
|
i915 = mock_gem_device();
|
|
|
|
if (!i915)
|
2017-10-06 23:18:29 +01:00
|
|
|
return -ENOMEM;
|
|
|
|
|
|
|
|
/* Pretend to be a device which supports the 48b PPGTT */
|
2024-03-28 08:18:33 +01:00
|
|
|
RUNTIME_INFO(i915)->ppgtt_type = INTEL_PPGTT_FULL;
|
|
|
|
RUNTIME_INFO(i915)->ppgtt_size = 48;
|
2017-10-06 23:18:29 +01:00
|
|
|
|
2024-03-28 08:18:33 +01:00
|
|
|
ppgtt = i915_ppgtt_create(to_gt(i915), 0);
|
2017-10-06 23:18:29 +01:00
|
|
|
if (IS_ERR(ppgtt)) {
|
|
|
|
err = PTR_ERR(ppgtt);
|
|
|
|
goto out_unlock;
|
|
|
|
}
|
|
|
|
|
2019-03-14 22:38:38 +00:00
|
|
|
if (!i915_vm_is_4lvl(&ppgtt->vm)) {
|
2017-10-06 23:18:29 +01:00
|
|
|
pr_err("failed to create 48b PPGTT\n");
|
|
|
|
err = -EINVAL;
|
2020-04-22 20:05:58 +01:00
|
|
|
goto out_put;
|
2017-10-06 23:18:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
/* If we were ever hit this then it's time to mock the 64K scratch */
|
2018-06-05 16:37:58 +01:00
|
|
|
if (!i915_vm_has_scratch_64K(&ppgtt->vm)) {
|
2017-10-06 23:18:29 +01:00
|
|
|
pr_err("PPGTT missing 64K scratch page\n");
|
|
|
|
err = -EINVAL;
|
2020-04-22 20:05:58 +01:00
|
|
|
goto out_put;
|
2017-10-06 23:18:29 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
err = i915_subtests(tests, ppgtt);
|
|
|
|
|
2020-04-22 20:05:58 +01:00
|
|
|
out_put:
|
2019-06-11 10:12:37 +01:00
|
|
|
i915_vm_put(&ppgtt->vm);
|
2017-10-06 23:18:29 +01:00
|
|
|
out_unlock:
|
2024-03-28 08:18:33 +01:00
|
|
|
mock_destroy_device(i915);
|
2017-10-06 23:18:29 +01:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2019-07-12 20:29:53 +01:00
|
|
|
int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915)
|
2017-10-06 23:18:29 +01:00
|
|
|
{
|
|
|
|
static const struct i915_subtest tests[] = {
|
|
|
|
SUBTEST(igt_shrink_thp),
|
|
|
|
SUBTEST(igt_tmpfs_fallback),
|
2019-10-25 16:37:27 +01:00
|
|
|
SUBTEST(igt_ppgtt_smoke_huge),
|
2019-10-25 16:37:28 +01:00
|
|
|
SUBTEST(igt_ppgtt_sanity_check),
|
2022-02-19 00:17:44 +05:30
|
|
|
SUBTEST(igt_ppgtt_compact),
|
drm/i915: enable PS64 support for DG2
It turns out that on production DG2/ATS HW we should have support for
PS64. This feature allows to provide a 64K TLB hint at the PTE level,
which is a lot more flexible than the current method of enabling 64K GTT
pages for the entire page-table, since that leads to all kinds of
annoying restrictions, as documented in:
commit caa574ffc4aaf4f29b890223878c63e2e7772f62
Author: Matthew Auld <matthew.auld@intel.com>
Date: Sat Feb 19 00:17:49 2022 +0530
drm/i915/uapi: document behaviour for DG2 64K support
On discrete platforms like DG2, we need to support a minimum page size
of 64K when dealing with device local-memory. This is quite tricky for
various reasons, so try to document the new implicit uapi for this.
With PS64, we can now drop the 2M GTT alignment restriction, and instead
only require 64K or larger when dealing with lmem. We still use the
compact-pt layout when possible, but only when we are certain that this
doesn't interfere with userspace.
Note that this is a change in uAPI behaviour, but hopefully shouldn't be
a concern (IGT is at least able to autodetect the alignment), since we
are only making the GTT alignment constraint less restrictive.
Based on a patch from CQ Tang.
v2: update the comment wrt scratch page
v3: (Nirmoy)
- Fix the selftest to actually use the random size, plus some comment
improvements, also drop the rem stuff.
Reported-by: Michal Mrozek <michal.mrozek@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Stuart Summers <stuart.summers@intel.com>
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Yang A Shi <yang.a.shi@intel.com>
Cc: Nirmoy Das <nirmoy.das@intel.com>
Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Acked-by: Michal Mrozek <michal.mrozek@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221004114915.221708-1-matthew.auld@intel.com
2022-10-04 12:49:14 +01:00
|
|
|
SUBTEST(igt_ppgtt_mixed),
|
2023-04-26 23:28:48 +02:00
|
|
|
SUBTEST(igt_ppgtt_huge_fill),
|
|
|
|
SUBTEST(igt_ppgtt_64K),
|
2017-10-06 23:18:29 +01:00
|
|
|
};
|
|
|
|
|
2019-07-12 20:29:53 +01:00
|
|
|
if (!HAS_PPGTT(i915)) {
|
2017-10-06 23:18:29 +01:00
|
|
|
pr_info("PPGTT not supported, skipping live-selftests\n");
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-12-14 21:33:35 +02:00
|
|
|
if (intel_gt_is_wedged(to_gt(i915)))
|
2018-07-06 07:53:13 +01:00
|
|
|
return 0;
|
|
|
|
|
2021-10-28 13:58:53 +01:00
|
|
|
return i915_live_subtests(tests, i915);
|
2017-10-06 23:18:29 +01:00
|
|
|
}
|