2017-02-13 17:15:38 +00:00
|
|
|
/*
|
|
|
|
* Copyright © 2016 Intel Corporation
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2017-02-13 17:15:40 +00:00
|
|
|
#include <linux/list_sort.h>
|
2017-02-13 17:15:39 +00:00
|
|
|
#include <linux/prime_numbers.h>
|
|
|
|
|
2019-09-19 14:14:14 +01:00
|
|
|
#include "gem/i915_gem_context.h"
|
2022-02-10 17:45:39 +02:00
|
|
|
#include "gem/i915_gem_internal.h"
|
2022-09-14 19:35:14 +03:00
|
|
|
#include "gem/i915_gem_lmem.h"
|
2022-02-19 00:17:45 +05:30
|
|
|
#include "gem/i915_gem_region.h"
|
2019-05-28 10:29:49 +01:00
|
|
|
#include "gem/selftests/mock_context.h"
|
2019-09-19 14:14:14 +01:00
|
|
|
#include "gt/intel_context.h"
|
2020-12-16 13:54:52 +00:00
|
|
|
#include "gt/intel_gpu_commands.h"
|
2022-02-19 00:17:45 +05:30
|
|
|
#include "gt/intel_gtt.h"
|
2019-05-28 10:29:49 +01:00
|
|
|
|
2017-02-13 17:15:42 +00:00
|
|
|
#include "i915_random.h"
|
2019-05-28 10:29:49 +01:00
|
|
|
#include "i915_selftest.h"
|
2022-01-10 18:22:14 +01:00
|
|
|
#include "i915_vma_resource.h"
|
2017-02-13 17:15:38 +00:00
|
|
|
|
2017-02-13 17:15:39 +00:00
|
|
|
#include "mock_drm.h"
|
2017-02-13 17:15:53 +00:00
|
|
|
#include "mock_gem_device.h"
|
2020-01-07 13:40:09 +00:00
|
|
|
#include "mock_gtt.h"
|
2019-09-19 14:14:14 +01:00
|
|
|
#include "igt_flush_test.h"
|
2017-02-13 17:15:39 +00:00
|
|
|
|
2018-07-03 11:18:29 +01:00
|
|
|
static void cleanup_freed_objects(struct drm_i915_private *i915)
|
|
|
|
{
|
|
|
|
i915_gem_drain_freed_objects(i915);
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:39 +00:00
|
|
|
static void fake_free_pages(struct drm_i915_gem_object *obj,
|
|
|
|
struct sg_table *pages)
|
|
|
|
{
|
|
|
|
sg_free_table(pages);
|
|
|
|
kfree(pages);
|
|
|
|
}
|
|
|
|
|
2017-10-06 23:18:17 +01:00
|
|
|
static int fake_get_pages(struct drm_i915_gem_object *obj)
|
2017-02-13 17:15:39 +00:00
|
|
|
{
|
|
|
|
#define GFP (GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY)
|
|
|
|
#define PFN_BIAS 0x1000
|
|
|
|
struct sg_table *pages;
|
|
|
|
struct scatterlist *sg;
|
|
|
|
typeof(obj->base.size) rem;
|
|
|
|
|
|
|
|
pages = kmalloc(sizeof(*pages), GFP);
|
|
|
|
if (!pages)
|
2017-10-06 23:18:17 +01:00
|
|
|
return -ENOMEM;
|
2017-02-13 17:15:39 +00:00
|
|
|
|
|
|
|
rem = round_up(obj->base.size, BIT(31)) >> 31;
|
2022-12-28 21:22:48 +02:00
|
|
|
/* restricted by sg_alloc_table */
|
2023-04-15 00:41:09 +02:00
|
|
|
if (overflows_type(rem, unsigned int)) {
|
|
|
|
kfree(pages);
|
2022-12-28 21:22:48 +02:00
|
|
|
return -E2BIG;
|
2023-04-15 00:41:09 +02:00
|
|
|
}
|
2022-12-28 21:22:48 +02:00
|
|
|
|
2017-02-13 17:15:39 +00:00
|
|
|
if (sg_alloc_table(pages, rem, GFP)) {
|
|
|
|
kfree(pages);
|
2017-10-06 23:18:17 +01:00
|
|
|
return -ENOMEM;
|
2017-02-13 17:15:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
rem = obj->base.size;
|
|
|
|
for (sg = pages->sgl; sg; sg = sg_next(sg)) {
|
|
|
|
unsigned long len = min_t(typeof(rem), rem, BIT(31));
|
|
|
|
|
2017-02-25 18:11:19 +00:00
|
|
|
GEM_BUG_ON(!len);
|
2017-02-13 17:15:39 +00:00
|
|
|
sg_set_page(sg, pfn_to_page(PFN_BIAS), len, 0);
|
|
|
|
sg_dma_address(sg) = page_to_phys(sg_page(sg));
|
|
|
|
sg_dma_len(sg) = len;
|
|
|
|
|
|
|
|
rem -= len;
|
|
|
|
}
|
2017-02-25 18:11:19 +00:00
|
|
|
GEM_BUG_ON(rem);
|
2017-02-13 17:15:39 +00:00
|
|
|
|
2022-11-08 10:32:38 +00:00
|
|
|
__i915_gem_object_set_pages(obj, pages);
|
2017-10-06 23:18:17 +01:00
|
|
|
|
|
|
|
return 0;
|
2017-02-13 17:15:39 +00:00
|
|
|
#undef GFP
|
|
|
|
}
|
|
|
|
|
|
|
|
static void fake_put_pages(struct drm_i915_gem_object *obj,
|
|
|
|
struct sg_table *pages)
|
|
|
|
{
|
|
|
|
fake_free_pages(obj, pages);
|
|
|
|
obj->mm.dirty = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static const struct drm_i915_gem_object_ops fake_ops = {
|
2020-05-29 19:32:04 +01:00
|
|
|
.name = "fake-gem",
|
2017-02-13 17:15:39 +00:00
|
|
|
.flags = I915_GEM_OBJECT_IS_SHRINKABLE,
|
|
|
|
.get_pages = fake_get_pages,
|
|
|
|
.put_pages = fake_put_pages,
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct drm_i915_gem_object *
|
|
|
|
fake_dma_object(struct drm_i915_private *i915, u64 size)
|
|
|
|
{
|
2019-10-22 15:45:01 +01:00
|
|
|
static struct lock_class_key lock_class;
|
2017-02-13 17:15:39 +00:00
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
|
|
|
|
GEM_BUG_ON(!size);
|
|
|
|
GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
|
|
|
|
|
|
|
|
if (overflows_type(size, obj->base.size))
|
|
|
|
return ERR_PTR(-E2BIG);
|
|
|
|
|
2019-02-28 10:20:34 +00:00
|
|
|
obj = i915_gem_object_alloc();
|
2017-02-13 17:15:39 +00:00
|
|
|
if (!obj)
|
2017-03-06 23:54:00 +00:00
|
|
|
goto err;
|
2017-02-13 17:15:39 +00:00
|
|
|
|
|
|
|
drm_gem_private_object_init(&i915->drm, &obj->base, size);
|
2021-03-23 16:49:56 +01:00
|
|
|
i915_gem_object_init(obj, &fake_ops, &lock_class, 0);
|
2017-02-13 17:15:39 +00:00
|
|
|
|
2019-10-08 17:01:16 +01:00
|
|
|
i915_gem_object_set_volatile(obj);
|
|
|
|
|
2018-02-16 13:43:38 +01:00
|
|
|
obj->write_domain = I915_GEM_DOMAIN_CPU;
|
|
|
|
obj->read_domains = I915_GEM_DOMAIN_CPU;
|
drm/i915: use pat_index instead of cache_level
Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.
From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity. PAT is expected to work much like MOCS already works today,
and by design userspace is expected to select the index that exactly
matches the desired behavior described in the hardware specification.
For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For kernel objects, cache_level is used
for simplicity and backward compatibility. For Pre-gen12 platforms PAT can
have 1:1 mapping to i915_cache_level, so these two are interchangeable. see
the use of LEGACY_CACHELEVEL.
One consequence of this change is that gen8_pte_encode is no longer working
for gen12 platforms due to the fact that gen12 platforms has different PAT
definitions. In the meantime the mtl_pte_encode introduced specfically for
MTL becomes generic for all gen12 platforms. This patch renames the MTL
PTE encode function into gen12_pte_encode and apply it to all gen12. Even
though this change looks unrelated, but separating them would temporarily
break gen12 PTE encoding, thus squash them in one patch.
Special note: this patch changes the way caching behavior is controlled in
the sense that some objects are left to be managed by userspace. For such
objects we need to be careful not to change the userspace settings.There
are kerneldoc and comments added around obj->cache_coherent, cache_dirty,
and how to bypass the checkings by i915_gem_object_has_cache_level. For
full understanding, these changes need to be looked at together with the
two follow-up patches, one disables the {set|get}_caching ioctl's and the
other adds set_pat extension to the GEM_CREATE uAPI.
Bspec: 63019
Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230509165200.1740-3-fei.yang@intel.com
2023-05-09 09:52:00 -07:00
|
|
|
obj->pat_index = i915_gem_get_pat_index(i915, I915_CACHE_NONE);
|
2017-02-13 17:15:39 +00:00
|
|
|
|
|
|
|
/* Preallocate the "backing storage" */
|
2021-03-23 16:50:49 +01:00
|
|
|
if (i915_gem_object_pin_pages_unlocked(obj))
|
2017-03-06 23:54:00 +00:00
|
|
|
goto err_obj;
|
2017-02-13 17:15:39 +00:00
|
|
|
|
|
|
|
i915_gem_object_unpin_pages(obj);
|
|
|
|
return obj;
|
2017-03-06 23:54:00 +00:00
|
|
|
|
|
|
|
err_obj:
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
err:
|
|
|
|
return ERR_PTR(-ENOMEM);
|
2017-02-13 17:15:39 +00:00
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:38 +00:00
|
|
|
static int igt_ppgtt_alloc(void *arg)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *dev_priv = arg;
|
2019-06-11 10:12:38 +01:00
|
|
|
struct i915_ppgtt *ppgtt;
|
2021-03-23 16:50:49 +01:00
|
|
|
struct i915_gem_ww_ctx ww;
|
2018-07-06 13:53:38 +01:00
|
|
|
u64 size, last, limit;
|
2018-06-12 09:18:14 +01:00
|
|
|
int err = 0;
|
2017-02-13 17:15:38 +00:00
|
|
|
|
|
|
|
/* Allocate a ppggt and try to fill the entire range */
|
|
|
|
|
2018-09-26 21:12:22 +01:00
|
|
|
if (!HAS_PPGTT(dev_priv))
|
2017-02-13 17:15:38 +00:00
|
|
|
return 0;
|
|
|
|
|
2021-12-14 21:33:37 +02:00
|
|
|
ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0);
|
2018-07-03 14:53:31 +01:00
|
|
|
if (IS_ERR(ppgtt))
|
|
|
|
return PTR_ERR(ppgtt);
|
2017-02-13 17:15:38 +00:00
|
|
|
|
2018-06-12 09:18:14 +01:00
|
|
|
if (!ppgtt->vm.allocate_va_range)
|
2025-01-30 09:19:31 +00:00
|
|
|
goto ppgtt_vm_put;
|
2017-02-13 17:15:38 +00:00
|
|
|
|
2018-07-06 13:53:38 +01:00
|
|
|
/*
|
|
|
|
* While we only allocate the page tables here and so we could
|
|
|
|
* address a much larger GTT than we could actually fit into
|
|
|
|
* RAM, a practical limit is the amount of physical pages in the system.
|
|
|
|
* This should ensure that we do not run into the oomkiller during
|
|
|
|
* the test and take down the machine wilfully.
|
|
|
|
*/
|
2018-12-28 00:34:29 -08:00
|
|
|
limit = totalram_pages() << PAGE_SHIFT;
|
2018-07-06 13:53:38 +01:00
|
|
|
limit = min(ppgtt->vm.total, limit);
|
|
|
|
|
2021-03-23 16:50:49 +01:00
|
|
|
i915_gem_ww_ctx_init(&ww, false);
|
|
|
|
retry:
|
|
|
|
err = i915_vm_lock_objects(&ppgtt->vm, &ww);
|
|
|
|
if (err)
|
|
|
|
goto err_ppgtt_cleanup;
|
|
|
|
|
2017-02-13 17:15:38 +00:00
|
|
|
/* Check we can allocate the entire range */
|
2018-07-06 13:53:38 +01:00
|
|
|
for (size = 4096; size <= limit; size <<= 2) {
|
2020-07-29 17:42:17 +01:00
|
|
|
struct i915_vm_pt_stash stash = {};
|
|
|
|
|
|
|
|
err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, size);
|
|
|
|
if (err)
|
2017-02-13 17:15:38 +00:00
|
|
|
goto err_ppgtt_cleanup;
|
|
|
|
|
2021-04-27 09:54:13 +01:00
|
|
|
err = i915_vm_map_pt_stash(&ppgtt->vm, &stash);
|
2020-07-29 17:42:18 +01:00
|
|
|
if (err) {
|
|
|
|
i915_vm_free_pt_stash(&ppgtt->vm, &stash);
|
|
|
|
goto err_ppgtt_cleanup;
|
|
|
|
}
|
|
|
|
|
2020-07-29 17:42:17 +01:00
|
|
|
ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash, 0, size);
|
2018-07-03 14:53:31 +01:00
|
|
|
cond_resched();
|
|
|
|
|
2018-06-05 16:37:58 +01:00
|
|
|
ppgtt->vm.clear_range(&ppgtt->vm, 0, size);
|
2020-07-29 17:42:17 +01:00
|
|
|
|
|
|
|
i915_vm_free_pt_stash(&ppgtt->vm, &stash);
|
2017-02-13 17:15:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Check we can incrementally allocate the entire range */
|
2018-07-06 13:53:38 +01:00
|
|
|
for (last = 0, size = 4096; size <= limit; last = size, size <<= 2) {
|
2020-07-29 17:42:17 +01:00
|
|
|
struct i915_vm_pt_stash stash = {};
|
|
|
|
|
|
|
|
err = i915_vm_alloc_pt_stash(&ppgtt->vm, &stash, size - last);
|
|
|
|
if (err)
|
2017-02-13 17:15:38 +00:00
|
|
|
goto err_ppgtt_cleanup;
|
2018-07-03 14:53:31 +01:00
|
|
|
|
2021-04-27 09:54:13 +01:00
|
|
|
err = i915_vm_map_pt_stash(&ppgtt->vm, &stash);
|
2020-07-29 17:42:18 +01:00
|
|
|
if (err) {
|
|
|
|
i915_vm_free_pt_stash(&ppgtt->vm, &stash);
|
|
|
|
goto err_ppgtt_cleanup;
|
|
|
|
}
|
|
|
|
|
2020-07-29 17:42:17 +01:00
|
|
|
ppgtt->vm.allocate_va_range(&ppgtt->vm, &stash,
|
|
|
|
last, size - last);
|
2018-07-03 14:53:31 +01:00
|
|
|
cond_resched();
|
2020-07-29 17:42:17 +01:00
|
|
|
|
|
|
|
i915_vm_free_pt_stash(&ppgtt->vm, &stash);
|
2017-02-13 17:15:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
err_ppgtt_cleanup:
|
2021-03-23 16:50:49 +01:00
|
|
|
if (err == -EDEADLK) {
|
|
|
|
err = i915_gem_ww_ctx_backoff(&ww);
|
|
|
|
if (!err)
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
i915_gem_ww_ctx_fini(&ww);
|
2025-01-30 09:19:31 +00:00
|
|
|
ppgtt_vm_put:
|
2019-06-11 10:12:37 +01:00
|
|
|
i915_vm_put(&ppgtt->vm);
|
2017-02-13 17:15:38 +00:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
static int lowlevel_hole(struct i915_address_space *vm,
|
2017-02-13 17:15:43 +00:00
|
|
|
u64 hole_start, u64 hole_end,
|
|
|
|
unsigned long end_time)
|
|
|
|
{
|
2022-02-19 00:17:43 +05:30
|
|
|
const unsigned int min_alignment =
|
|
|
|
i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
|
2017-02-13 17:15:43 +00:00
|
|
|
I915_RND_STATE(seed_prng);
|
2022-01-10 18:22:15 +01:00
|
|
|
struct i915_vma_resource *mock_vma_res;
|
2017-02-13 17:15:43 +00:00
|
|
|
unsigned int size;
|
2017-06-22 10:58:36 +01:00
|
|
|
|
2022-01-10 18:22:15 +01:00
|
|
|
mock_vma_res = kzalloc(sizeof(*mock_vma_res), GFP_KERNEL);
|
|
|
|
if (!mock_vma_res)
|
2019-11-25 12:48:56 +00:00
|
|
|
return -ENOMEM;
|
2017-02-13 17:15:43 +00:00
|
|
|
|
|
|
|
/* Keep creating larger objects until one cannot fit into the hole */
|
|
|
|
for (size = 12; (hole_end - hole_start) >> size; size++) {
|
|
|
|
I915_RND_SUBSTATE(prng, seed_prng);
|
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
unsigned int *order, count, n;
|
2022-02-19 00:17:43 +05:30
|
|
|
u64 hole_size, aligned_size;
|
2017-02-13 17:15:43 +00:00
|
|
|
|
2022-02-19 00:17:43 +05:30
|
|
|
aligned_size = max_t(u32, ilog2(min_alignment), size);
|
|
|
|
hole_size = (hole_end - hole_start) >> aligned_size;
|
2017-02-13 17:15:43 +00:00
|
|
|
if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32))
|
|
|
|
hole_size = KMALLOC_MAX_SIZE / sizeof(u32);
|
2017-11-17 16:29:45 +00:00
|
|
|
count = hole_size >> 1;
|
|
|
|
if (!count) {
|
|
|
|
pr_debug("%s: hole is too small [%llx - %llx] >> %d: %lld\n",
|
|
|
|
__func__, hole_start, hole_end, size, hole_size);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:43 +00:00
|
|
|
do {
|
|
|
|
order = i915_random_order(count, &prng);
|
2017-11-17 16:29:45 +00:00
|
|
|
if (order)
|
|
|
|
break;
|
|
|
|
} while (count >>= 1);
|
2019-11-25 12:48:56 +00:00
|
|
|
if (!count) {
|
2022-01-10 18:22:15 +01:00
|
|
|
kfree(mock_vma_res);
|
2017-11-17 16:29:45 +00:00
|
|
|
return -ENOMEM;
|
2019-11-25 12:48:56 +00:00
|
|
|
}
|
2017-11-17 16:29:45 +00:00
|
|
|
GEM_BUG_ON(!order);
|
2017-02-13 17:15:43 +00:00
|
|
|
|
2022-02-19 00:17:43 +05:30
|
|
|
GEM_BUG_ON(count * BIT_ULL(aligned_size) > vm->total);
|
|
|
|
GEM_BUG_ON(hole_start + count * BIT_ULL(aligned_size) > hole_end);
|
2017-02-13 17:15:43 +00:00
|
|
|
|
2025-01-20 13:45:14 +05:30
|
|
|
/*
|
|
|
|
* Ignore allocation failures (i.e. don't report them as
|
2017-02-13 17:15:43 +00:00
|
|
|
* a test failure) as we are purposefully allocating very
|
|
|
|
* large objects without checking that we have sufficient
|
|
|
|
* memory. We expect to hit -ENOMEM.
|
|
|
|
*/
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
obj = fake_dma_object(vm->i915, BIT_ULL(size));
|
2017-02-13 17:15:43 +00:00
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
kfree(order);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
GEM_BUG_ON(obj->base.size != BIT_ULL(size));
|
|
|
|
|
2021-03-23 16:50:49 +01:00
|
|
|
if (i915_gem_object_pin_pages_unlocked(obj)) {
|
2017-02-13 17:15:43 +00:00
|
|
|
i915_gem_object_put(obj);
|
|
|
|
kfree(order);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (n = 0; n < count; n++) {
|
2022-02-19 00:17:43 +05:30
|
|
|
u64 addr = hole_start + order[n] * BIT_ULL(aligned_size);
|
2019-01-14 14:21:22 +00:00
|
|
|
intel_wakeref_t wakeref;
|
2017-02-13 17:15:43 +00:00
|
|
|
|
2022-02-19 00:17:43 +05:30
|
|
|
GEM_BUG_ON(addr + BIT_ULL(aligned_size) > vm->total);
|
2017-02-13 17:15:43 +00:00
|
|
|
|
2017-02-24 19:33:15 +00:00
|
|
|
if (igt_timeout(end_time,
|
|
|
|
"%s timed out before %d/%d\n",
|
|
|
|
__func__, n, count)) {
|
|
|
|
hole_end = hole_start; /* quit */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2020-07-29 17:42:17 +01:00
|
|
|
if (vm->allocate_va_range) {
|
|
|
|
struct i915_vm_pt_stash stash = {};
|
2021-03-23 16:50:49 +01:00
|
|
|
struct i915_gem_ww_ctx ww;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
i915_gem_ww_ctx_init(&ww, false);
|
|
|
|
retry:
|
|
|
|
err = i915_vm_lock_objects(vm, &ww);
|
|
|
|
if (err)
|
|
|
|
goto alloc_vm_end;
|
2020-07-29 17:42:17 +01:00
|
|
|
|
2021-03-23 16:50:49 +01:00
|
|
|
err = -ENOMEM;
|
2020-07-29 17:42:17 +01:00
|
|
|
if (i915_vm_alloc_pt_stash(vm, &stash,
|
|
|
|
BIT_ULL(size)))
|
2021-03-23 16:50:49 +01:00
|
|
|
goto alloc_vm_end;
|
2020-07-29 17:42:18 +01:00
|
|
|
|
2021-04-27 09:54:13 +01:00
|
|
|
err = i915_vm_map_pt_stash(vm, &stash);
|
2021-03-23 16:50:49 +01:00
|
|
|
if (!err)
|
|
|
|
vm->allocate_va_range(vm, &stash,
|
|
|
|
addr, BIT_ULL(size));
|
2020-07-29 17:42:17 +01:00
|
|
|
i915_vm_free_pt_stash(vm, &stash);
|
2021-03-23 16:50:49 +01:00
|
|
|
alloc_vm_end:
|
|
|
|
if (err == -EDEADLK) {
|
|
|
|
err = i915_gem_ww_ctx_backoff(&ww);
|
|
|
|
if (!err)
|
|
|
|
goto retry;
|
|
|
|
}
|
|
|
|
i915_gem_ww_ctx_fini(&ww);
|
|
|
|
|
|
|
|
if (err)
|
|
|
|
break;
|
2020-07-29 17:42:17 +01:00
|
|
|
}
|
2017-02-13 17:15:43 +00:00
|
|
|
|
2022-01-10 18:22:15 +01:00
|
|
|
mock_vma_res->bi.pages = obj->mm.pages;
|
2022-02-19 00:17:43 +05:30
|
|
|
mock_vma_res->node_size = BIT_ULL(aligned_size);
|
2022-01-10 18:22:15 +01:00
|
|
|
mock_vma_res->start = addr;
|
2017-06-22 10:58:36 +01:00
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref)
|
2022-01-10 18:22:15 +01:00
|
|
|
vm->insert_entries(vm, mock_vma_res,
|
drm/i915: use pat_index instead of cache_level
Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.
From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity. PAT is expected to work much like MOCS already works today,
and by design userspace is expected to select the index that exactly
matches the desired behavior described in the hardware specification.
For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For kernel objects, cache_level is used
for simplicity and backward compatibility. For Pre-gen12 platforms PAT can
have 1:1 mapping to i915_cache_level, so these two are interchangeable. see
the use of LEGACY_CACHELEVEL.
One consequence of this change is that gen8_pte_encode is no longer working
for gen12 platforms due to the fact that gen12 platforms has different PAT
definitions. In the meantime the mtl_pte_encode introduced specfically for
MTL becomes generic for all gen12 platforms. This patch renames the MTL
PTE encode function into gen12_pte_encode and apply it to all gen12. Even
though this change looks unrelated, but separating them would temporarily
break gen12 PTE encoding, thus squash them in one patch.
Special note: this patch changes the way caching behavior is controlled in
the sense that some objects are left to be managed by userspace. For such
objects we need to be careful not to change the userspace settings.There
are kerneldoc and comments added around obj->cache_coherent, cache_dirty,
and how to bypass the checkings by i915_gem_object_has_cache_level. For
full understanding, these changes need to be looked at together with the
two follow-up patches, one disables the {set|get}_caching ioctl's and the
other adds set_pat extension to the GEM_CREATE uAPI.
Bspec: 63019
Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230509165200.1740-3-fei.yang@intel.com
2023-05-09 09:52:00 -07:00
|
|
|
i915_gem_get_pat_index(vm->i915,
|
|
|
|
I915_CACHE_NONE),
|
|
|
|
0);
|
2017-02-13 17:15:43 +00:00
|
|
|
}
|
|
|
|
count = n;
|
|
|
|
|
|
|
|
i915_random_reorder(order, count, &prng);
|
|
|
|
for (n = 0; n < count; n++) {
|
2022-02-19 00:17:43 +05:30
|
|
|
u64 addr = hole_start + order[n] * BIT_ULL(aligned_size);
|
2019-09-09 12:00:06 +01:00
|
|
|
intel_wakeref_t wakeref;
|
2017-02-13 17:15:43 +00:00
|
|
|
|
|
|
|
GEM_BUG_ON(addr + BIT_ULL(size) > vm->total);
|
2020-01-07 13:40:09 +00:00
|
|
|
with_intel_runtime_pm(vm->gt->uncore->rpm, wakeref)
|
2019-09-09 12:00:06 +01:00
|
|
|
vm->clear_range(vm, addr, BIT_ULL(size));
|
2017-02-13 17:15:43 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
i915_gem_object_unpin_pages(obj);
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
|
|
|
|
kfree(order);
|
2018-07-03 11:18:29 +01:00
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
cleanup_freed_objects(vm->i915);
|
2017-02-13 17:15:43 +00:00
|
|
|
}
|
|
|
|
|
2022-01-10 18:22:15 +01:00
|
|
|
kfree(mock_vma_res);
|
2017-02-13 17:15:43 +00:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:39 +00:00
|
|
|
static void close_object_list(struct list_head *objects,
|
|
|
|
struct i915_address_space *vm)
|
|
|
|
{
|
|
|
|
struct drm_i915_gem_object *obj, *on;
|
2023-05-26 19:38:04 +03:00
|
|
|
int __maybe_unused ignored;
|
2017-02-13 17:15:39 +00:00
|
|
|
|
|
|
|
list_for_each_entry_safe(obj, on, objects, st_link) {
|
|
|
|
struct i915_vma *vma;
|
|
|
|
|
|
|
|
vma = i915_vma_instance(obj, vm, NULL);
|
2017-02-13 17:15:44 +00:00
|
|
|
if (!IS_ERR(vma))
|
2022-01-14 14:23:18 +01:00
|
|
|
ignored = i915_vma_unbind_unlocked(vma);
|
2017-02-13 17:15:39 +00:00
|
|
|
|
|
|
|
list_del(&obj->st_link);
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
static int fill_hole(struct i915_address_space *vm,
|
2017-02-13 17:15:39 +00:00
|
|
|
u64 hole_start, u64 hole_end,
|
|
|
|
unsigned long end_time)
|
|
|
|
{
|
|
|
|
const u64 hole_size = hole_end - hole_start;
|
|
|
|
struct drm_i915_gem_object *obj;
|
2022-02-19 00:17:43 +05:30
|
|
|
const unsigned int min_alignment =
|
|
|
|
i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
|
2017-02-13 17:15:39 +00:00
|
|
|
const unsigned long max_pages =
|
2022-02-19 00:17:43 +05:30
|
|
|
min_t(u64, ULONG_MAX - 1, (hole_size / 2) >> ilog2(min_alignment));
|
2017-02-13 17:15:39 +00:00
|
|
|
const unsigned long max_step = max(int_sqrt(max_pages), 2UL);
|
|
|
|
unsigned long npages, prime, flags;
|
|
|
|
struct i915_vma *vma;
|
|
|
|
LIST_HEAD(objects);
|
|
|
|
int err;
|
|
|
|
|
|
|
|
/* Try binding many VMA working inwards from either edge */
|
|
|
|
|
|
|
|
flags = PIN_OFFSET_FIXED | PIN_USER;
|
|
|
|
if (i915_is_ggtt(vm))
|
|
|
|
flags |= PIN_GLOBAL;
|
|
|
|
|
|
|
|
for_each_prime_number_from(prime, 2, max_step) {
|
|
|
|
for (npages = 1; npages <= max_pages; npages *= prime) {
|
|
|
|
const u64 full_size = npages << PAGE_SHIFT;
|
|
|
|
const struct {
|
|
|
|
const char *name;
|
|
|
|
u64 offset;
|
|
|
|
int step;
|
|
|
|
} phases[] = {
|
|
|
|
{ "top-down", hole_end, -1, },
|
|
|
|
{ "bottom-up", hole_start, 1, },
|
|
|
|
{ }
|
|
|
|
}, *p;
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
obj = fake_dma_object(vm->i915, full_size);
|
2017-02-13 17:15:39 +00:00
|
|
|
if (IS_ERR(obj))
|
|
|
|
break;
|
|
|
|
|
|
|
|
list_add(&obj->st_link, &objects);
|
|
|
|
|
2025-01-20 13:45:14 +05:30
|
|
|
/*
|
|
|
|
* Align differing sized objects against the edges, and
|
2017-02-13 17:15:39 +00:00
|
|
|
* check we don't walk off into the void when binding
|
|
|
|
* them into the GTT.
|
|
|
|
*/
|
|
|
|
for (p = phases; p->name; p++) {
|
|
|
|
u64 offset;
|
|
|
|
|
|
|
|
offset = p->offset;
|
|
|
|
list_for_each_entry(obj, &objects, st_link) {
|
2022-02-19 00:17:43 +05:30
|
|
|
u64 aligned_size = round_up(obj->base.size,
|
|
|
|
min_alignment);
|
|
|
|
|
2017-02-13 17:15:39 +00:00
|
|
|
vma = i915_vma_instance(obj, vm, NULL);
|
|
|
|
if (IS_ERR(vma))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (p->step < 0) {
|
2022-02-19 00:17:43 +05:30
|
|
|
if (offset < hole_start + aligned_size)
|
2017-02-13 17:15:39 +00:00
|
|
|
break;
|
2022-02-19 00:17:43 +05:30
|
|
|
offset -= aligned_size;
|
2017-02-13 17:15:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
err = i915_vma_pin(vma, 0, 0, offset | flags);
|
|
|
|
if (err) {
|
|
|
|
pr_err("%s(%s) pin (forward) failed with err=%d on size=%lu pages (prime=%lu), offset=%llx\n",
|
|
|
|
__func__, p->name, err, npages, prime, offset);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!drm_mm_node_allocated(&vma->node) ||
|
|
|
|
i915_vma_misplaced(vma, 0, 0, offset | flags)) {
|
|
|
|
pr_err("%s(%s) (forward) insert failed: vma.node=%llx + %llx [allocated? %d], expected offset %llx\n",
|
|
|
|
__func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node),
|
|
|
|
offset);
|
|
|
|
err = -EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
i915_vma_unpin(vma);
|
|
|
|
|
|
|
|
if (p->step > 0) {
|
2022-02-19 00:17:43 +05:30
|
|
|
if (offset + aligned_size > hole_end)
|
2017-02-13 17:15:39 +00:00
|
|
|
break;
|
2022-02-19 00:17:43 +05:30
|
|
|
offset += aligned_size;
|
2017-02-13 17:15:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
offset = p->offset;
|
|
|
|
list_for_each_entry(obj, &objects, st_link) {
|
2022-02-19 00:17:43 +05:30
|
|
|
u64 aligned_size = round_up(obj->base.size,
|
|
|
|
min_alignment);
|
|
|
|
|
2017-02-13 17:15:39 +00:00
|
|
|
vma = i915_vma_instance(obj, vm, NULL);
|
|
|
|
if (IS_ERR(vma))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (p->step < 0) {
|
2022-02-19 00:17:43 +05:30
|
|
|
if (offset < hole_start + aligned_size)
|
2017-02-13 17:15:39 +00:00
|
|
|
break;
|
2022-02-19 00:17:43 +05:30
|
|
|
offset -= aligned_size;
|
2017-02-13 17:15:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!drm_mm_node_allocated(&vma->node) ||
|
|
|
|
i915_vma_misplaced(vma, 0, 0, offset | flags)) {
|
|
|
|
pr_err("%s(%s) (forward) moved vma.node=%llx + %llx, expected offset %llx\n",
|
|
|
|
__func__, p->name, vma->node.start, vma->node.size,
|
|
|
|
offset);
|
|
|
|
err = -EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2022-01-14 14:23:18 +01:00
|
|
|
err = i915_vma_unbind_unlocked(vma);
|
2017-02-13 17:15:39 +00:00
|
|
|
if (err) {
|
|
|
|
pr_err("%s(%s) (forward) unbind of vma.node=%llx + %llx failed with err=%d\n",
|
|
|
|
__func__, p->name, vma->node.start, vma->node.size,
|
|
|
|
err);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (p->step > 0) {
|
2022-02-19 00:17:43 +05:30
|
|
|
if (offset + aligned_size > hole_end)
|
2017-02-13 17:15:39 +00:00
|
|
|
break;
|
2022-02-19 00:17:43 +05:30
|
|
|
offset += aligned_size;
|
2017-02-13 17:15:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
offset = p->offset;
|
|
|
|
list_for_each_entry_reverse(obj, &objects, st_link) {
|
2022-02-19 00:17:43 +05:30
|
|
|
u64 aligned_size = round_up(obj->base.size,
|
|
|
|
min_alignment);
|
|
|
|
|
2017-02-13 17:15:39 +00:00
|
|
|
vma = i915_vma_instance(obj, vm, NULL);
|
|
|
|
if (IS_ERR(vma))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (p->step < 0) {
|
2022-02-19 00:17:43 +05:30
|
|
|
if (offset < hole_start + aligned_size)
|
2017-02-13 17:15:39 +00:00
|
|
|
break;
|
2022-02-19 00:17:43 +05:30
|
|
|
offset -= aligned_size;
|
2017-02-13 17:15:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
err = i915_vma_pin(vma, 0, 0, offset | flags);
|
|
|
|
if (err) {
|
|
|
|
pr_err("%s(%s) pin (backward) failed with err=%d on size=%lu pages (prime=%lu), offset=%llx\n",
|
|
|
|
__func__, p->name, err, npages, prime, offset);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!drm_mm_node_allocated(&vma->node) ||
|
|
|
|
i915_vma_misplaced(vma, 0, 0, offset | flags)) {
|
|
|
|
pr_err("%s(%s) (backward) insert failed: vma.node=%llx + %llx [allocated? %d], expected offset %llx\n",
|
|
|
|
__func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node),
|
|
|
|
offset);
|
|
|
|
err = -EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
i915_vma_unpin(vma);
|
|
|
|
|
|
|
|
if (p->step > 0) {
|
2022-02-19 00:17:43 +05:30
|
|
|
if (offset + aligned_size > hole_end)
|
2017-02-13 17:15:39 +00:00
|
|
|
break;
|
2022-02-19 00:17:43 +05:30
|
|
|
offset += aligned_size;
|
2017-02-13 17:15:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
offset = p->offset;
|
|
|
|
list_for_each_entry_reverse(obj, &objects, st_link) {
|
2022-02-19 00:17:43 +05:30
|
|
|
u64 aligned_size = round_up(obj->base.size,
|
|
|
|
min_alignment);
|
|
|
|
|
2017-02-13 17:15:39 +00:00
|
|
|
vma = i915_vma_instance(obj, vm, NULL);
|
|
|
|
if (IS_ERR(vma))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (p->step < 0) {
|
2022-02-19 00:17:43 +05:30
|
|
|
if (offset < hole_start + aligned_size)
|
2017-02-13 17:15:39 +00:00
|
|
|
break;
|
2022-02-19 00:17:43 +05:30
|
|
|
offset -= aligned_size;
|
2017-02-13 17:15:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!drm_mm_node_allocated(&vma->node) ||
|
|
|
|
i915_vma_misplaced(vma, 0, 0, offset | flags)) {
|
|
|
|
pr_err("%s(%s) (backward) moved vma.node=%llx + %llx [allocated? %d], expected offset %llx\n",
|
|
|
|
__func__, p->name, vma->node.start, vma->node.size, drm_mm_node_allocated(&vma->node),
|
|
|
|
offset);
|
|
|
|
err = -EINVAL;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2022-01-14 14:23:18 +01:00
|
|
|
err = i915_vma_unbind_unlocked(vma);
|
2017-02-13 17:15:39 +00:00
|
|
|
if (err) {
|
|
|
|
pr_err("%s(%s) (backward) unbind of vma.node=%llx + %llx failed with err=%d\n",
|
|
|
|
__func__, p->name, vma->node.start, vma->node.size,
|
|
|
|
err);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (p->step > 0) {
|
2022-02-19 00:17:43 +05:30
|
|
|
if (offset + aligned_size > hole_end)
|
2017-02-13 17:15:39 +00:00
|
|
|
break;
|
2022-02-19 00:17:43 +05:30
|
|
|
offset += aligned_size;
|
2017-02-13 17:15:39 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (igt_timeout(end_time, "%s timed out (npages=%lu, prime=%lu)\n",
|
|
|
|
__func__, npages, prime)) {
|
|
|
|
err = -EINTR;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
close_object_list(&objects, vm);
|
2020-01-07 13:40:09 +00:00
|
|
|
cleanup_freed_objects(vm->i915);
|
2017-02-13 17:15:39 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err:
|
|
|
|
close_object_list(&objects, vm);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
static int walk_hole(struct i915_address_space *vm,
|
2017-02-13 17:15:41 +00:00
|
|
|
u64 hole_start, u64 hole_end,
|
|
|
|
unsigned long end_time)
|
|
|
|
{
|
|
|
|
const u64 hole_size = hole_end - hole_start;
|
|
|
|
const unsigned long max_pages =
|
|
|
|
min_t(u64, ULONG_MAX - 1, hole_size >> PAGE_SHIFT);
|
2022-02-19 00:17:43 +05:30
|
|
|
unsigned long min_alignment;
|
2017-02-13 17:15:41 +00:00
|
|
|
unsigned long flags;
|
|
|
|
u64 size;
|
|
|
|
|
|
|
|
/* Try binding a single VMA in different positions within the hole */
|
|
|
|
|
|
|
|
flags = PIN_OFFSET_FIXED | PIN_USER;
|
|
|
|
if (i915_is_ggtt(vm))
|
|
|
|
flags |= PIN_GLOBAL;
|
|
|
|
|
2022-02-19 00:17:43 +05:30
|
|
|
min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
|
|
|
|
|
2017-02-13 17:15:41 +00:00
|
|
|
for_each_prime_number_from(size, 1, max_pages) {
|
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
struct i915_vma *vma;
|
|
|
|
u64 addr;
|
|
|
|
int err = 0;
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
obj = fake_dma_object(vm->i915, size << PAGE_SHIFT);
|
2017-02-13 17:15:41 +00:00
|
|
|
if (IS_ERR(obj))
|
|
|
|
break;
|
|
|
|
|
|
|
|
vma = i915_vma_instance(obj, vm, NULL);
|
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
2017-03-13 10:07:50 +00:00
|
|
|
goto err_put;
|
2017-02-13 17:15:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
for (addr = hole_start;
|
|
|
|
addr + obj->base.size < hole_end;
|
2022-02-19 00:17:43 +05:30
|
|
|
addr += round_up(obj->base.size, min_alignment)) {
|
2017-02-13 17:15:41 +00:00
|
|
|
err = i915_vma_pin(vma, 0, 0, addr | flags);
|
|
|
|
if (err) {
|
|
|
|
pr_err("%s bind failed at %llx + %llx [hole %llx- %llx] with err=%d\n",
|
|
|
|
__func__, addr, vma->size,
|
|
|
|
hole_start, hole_end, err);
|
2020-04-22 20:05:58 +01:00
|
|
|
goto err_put;
|
2017-02-13 17:15:41 +00:00
|
|
|
}
|
|
|
|
i915_vma_unpin(vma);
|
|
|
|
|
|
|
|
if (!drm_mm_node_allocated(&vma->node) ||
|
|
|
|
i915_vma_misplaced(vma, 0, 0, addr | flags)) {
|
|
|
|
pr_err("%s incorrect at %llx + %llx\n",
|
|
|
|
__func__, addr, vma->size);
|
|
|
|
err = -EINVAL;
|
2020-04-22 20:05:58 +01:00
|
|
|
goto err_put;
|
2017-02-13 17:15:41 +00:00
|
|
|
}
|
|
|
|
|
2022-01-14 14:23:18 +01:00
|
|
|
err = i915_vma_unbind_unlocked(vma);
|
2017-02-13 17:15:41 +00:00
|
|
|
if (err) {
|
|
|
|
pr_err("%s unbind failed at %llx + %llx with err=%d\n",
|
|
|
|
__func__, addr, vma->size, err);
|
2020-04-22 20:05:58 +01:00
|
|
|
goto err_put;
|
2017-02-13 17:15:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
|
|
|
|
|
|
|
|
if (igt_timeout(end_time,
|
|
|
|
"%s timed out at %llx\n",
|
|
|
|
__func__, addr)) {
|
|
|
|
err = -EINTR;
|
2020-04-22 20:05:58 +01:00
|
|
|
goto err_put;
|
2017-02-13 17:15:41 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-03-13 10:07:50 +00:00
|
|
|
err_put:
|
2017-02-13 17:15:41 +00:00
|
|
|
i915_gem_object_put(obj);
|
|
|
|
if (err)
|
|
|
|
return err;
|
2018-07-03 11:18:29 +01:00
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
cleanup_freed_objects(vm->i915);
|
2017-02-13 17:15:41 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
static int pot_hole(struct i915_address_space *vm,
|
2017-02-13 17:15:57 +00:00
|
|
|
u64 hole_start, u64 hole_end,
|
|
|
|
unsigned long end_time)
|
|
|
|
{
|
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
struct i915_vma *vma;
|
2022-02-19 00:17:43 +05:30
|
|
|
unsigned int min_alignment;
|
2017-02-13 17:15:57 +00:00
|
|
|
unsigned long flags;
|
|
|
|
unsigned int pot;
|
2017-02-14 11:37:56 +00:00
|
|
|
int err = 0;
|
2017-02-13 17:15:57 +00:00
|
|
|
|
|
|
|
flags = PIN_OFFSET_FIXED | PIN_USER;
|
|
|
|
if (i915_is_ggtt(vm))
|
|
|
|
flags |= PIN_GLOBAL;
|
|
|
|
|
2022-02-19 00:17:43 +05:30
|
|
|
min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
obj = i915_gem_object_create_internal(vm->i915, 2 * I915_GTT_PAGE_SIZE);
|
2017-02-13 17:15:57 +00:00
|
|
|
if (IS_ERR(obj))
|
|
|
|
return PTR_ERR(obj);
|
|
|
|
|
|
|
|
vma = i915_vma_instance(obj, vm, NULL);
|
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto err_obj;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Insert a pair of pages across every pot boundary within the hole */
|
|
|
|
for (pot = fls64(hole_end - 1) - 1;
|
2022-02-19 00:17:43 +05:30
|
|
|
pot > ilog2(2 * min_alignment);
|
2017-02-13 17:15:57 +00:00
|
|
|
pot--) {
|
|
|
|
u64 step = BIT_ULL(pot);
|
|
|
|
u64 addr;
|
|
|
|
|
2022-02-19 00:17:43 +05:30
|
|
|
for (addr = round_up(hole_start + min_alignment, step) - min_alignment;
|
2022-06-24 13:35:28 +02:00
|
|
|
hole_end > addr && hole_end - addr >= 2 * min_alignment;
|
2017-02-13 17:15:57 +00:00
|
|
|
addr += step) {
|
|
|
|
err = i915_vma_pin(vma, 0, 0, addr | flags);
|
|
|
|
if (err) {
|
|
|
|
pr_err("%s failed to pin object at %llx in hole [%llx - %llx], with err=%d\n",
|
|
|
|
__func__,
|
|
|
|
addr,
|
|
|
|
hole_start, hole_end,
|
|
|
|
err);
|
2020-04-22 20:05:58 +01:00
|
|
|
goto err_obj;
|
2017-02-13 17:15:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!drm_mm_node_allocated(&vma->node) ||
|
|
|
|
i915_vma_misplaced(vma, 0, 0, addr | flags)) {
|
|
|
|
pr_err("%s incorrect at %llx + %llx\n",
|
|
|
|
__func__, addr, vma->size);
|
|
|
|
i915_vma_unpin(vma);
|
2022-01-14 14:23:18 +01:00
|
|
|
err = i915_vma_unbind_unlocked(vma);
|
2017-02-13 17:15:57 +00:00
|
|
|
err = -EINVAL;
|
2020-04-22 20:05:58 +01:00
|
|
|
goto err_obj;
|
2017-02-13 17:15:57 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
i915_vma_unpin(vma);
|
2022-01-14 14:23:18 +01:00
|
|
|
err = i915_vma_unbind_unlocked(vma);
|
2017-02-13 17:15:57 +00:00
|
|
|
GEM_BUG_ON(err);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (igt_timeout(end_time,
|
|
|
|
"%s timed out after %d/%d\n",
|
|
|
|
__func__, pot, fls64(hole_end - 1) - 1)) {
|
|
|
|
err = -EINTR;
|
2020-04-22 20:05:58 +01:00
|
|
|
goto err_obj;
|
2017-02-13 17:15:57 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
err_obj:
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
static int drunk_hole(struct i915_address_space *vm,
|
2017-02-13 17:15:42 +00:00
|
|
|
u64 hole_start, u64 hole_end,
|
|
|
|
unsigned long end_time)
|
|
|
|
{
|
|
|
|
I915_RND_STATE(prng);
|
2022-02-19 00:17:43 +05:30
|
|
|
unsigned int min_alignment;
|
2017-02-13 17:15:42 +00:00
|
|
|
unsigned int size;
|
|
|
|
unsigned long flags;
|
|
|
|
|
|
|
|
flags = PIN_OFFSET_FIXED | PIN_USER;
|
|
|
|
if (i915_is_ggtt(vm))
|
|
|
|
flags |= PIN_GLOBAL;
|
|
|
|
|
2022-02-19 00:17:43 +05:30
|
|
|
min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
|
|
|
|
|
2017-02-13 17:15:42 +00:00
|
|
|
/* Keep creating larger objects until one cannot fit into the hole */
|
|
|
|
for (size = 12; (hole_end - hole_start) >> size; size++) {
|
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
unsigned int *order, count, n;
|
|
|
|
struct i915_vma *vma;
|
2022-02-19 00:17:43 +05:30
|
|
|
u64 hole_size, aligned_size;
|
2017-11-14 22:33:46 +00:00
|
|
|
int err = -ENODEV;
|
2017-02-13 17:15:42 +00:00
|
|
|
|
2022-02-19 00:17:43 +05:30
|
|
|
aligned_size = max_t(u32, ilog2(min_alignment), size);
|
|
|
|
hole_size = (hole_end - hole_start) >> aligned_size;
|
2017-02-13 17:15:42 +00:00
|
|
|
if (hole_size > KMALLOC_MAX_SIZE / sizeof(u32))
|
|
|
|
hole_size = KMALLOC_MAX_SIZE / sizeof(u32);
|
2017-11-17 16:29:45 +00:00
|
|
|
count = hole_size >> 1;
|
|
|
|
if (!count) {
|
|
|
|
pr_debug("%s: hole is too small [%llx - %llx] >> %d: %lld\n",
|
|
|
|
__func__, hole_start, hole_end, size, hole_size);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:42 +00:00
|
|
|
do {
|
|
|
|
order = i915_random_order(count, &prng);
|
2017-11-17 16:29:45 +00:00
|
|
|
if (order)
|
|
|
|
break;
|
|
|
|
} while (count >>= 1);
|
|
|
|
if (!count)
|
|
|
|
return -ENOMEM;
|
|
|
|
GEM_BUG_ON(!order);
|
2017-02-13 17:15:42 +00:00
|
|
|
|
2025-01-20 13:45:14 +05:30
|
|
|
/*
|
|
|
|
* Ignore allocation failures (i.e. don't report them as
|
2017-02-13 17:15:42 +00:00
|
|
|
* a test failure) as we are purposefully allocating very
|
|
|
|
* large objects without checking that we have sufficient
|
|
|
|
* memory. We expect to hit -ENOMEM.
|
|
|
|
*/
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
obj = fake_dma_object(vm->i915, BIT_ULL(size));
|
2017-02-13 17:15:42 +00:00
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
kfree(order);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
vma = i915_vma_instance(obj, vm, NULL);
|
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto err_obj;
|
|
|
|
}
|
|
|
|
|
|
|
|
GEM_BUG_ON(vma->size != BIT_ULL(size));
|
|
|
|
|
|
|
|
for (n = 0; n < count; n++) {
|
2022-02-19 00:17:43 +05:30
|
|
|
u64 addr = hole_start + order[n] * BIT_ULL(aligned_size);
|
2017-02-13 17:15:42 +00:00
|
|
|
|
|
|
|
err = i915_vma_pin(vma, 0, 0, addr | flags);
|
|
|
|
if (err) {
|
|
|
|
pr_err("%s failed to pin object at %llx + %llx in hole [%llx - %llx], with err=%d\n",
|
|
|
|
__func__,
|
|
|
|
addr, BIT_ULL(size),
|
|
|
|
hole_start, hole_end,
|
|
|
|
err);
|
2020-04-22 20:05:58 +01:00
|
|
|
goto err_obj;
|
2017-02-13 17:15:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!drm_mm_node_allocated(&vma->node) ||
|
|
|
|
i915_vma_misplaced(vma, 0, 0, addr | flags)) {
|
|
|
|
pr_err("%s incorrect at %llx + %llx\n",
|
|
|
|
__func__, addr, BIT_ULL(size));
|
|
|
|
i915_vma_unpin(vma);
|
2022-01-14 14:23:18 +01:00
|
|
|
err = i915_vma_unbind_unlocked(vma);
|
2017-02-13 17:15:42 +00:00
|
|
|
err = -EINVAL;
|
2020-04-22 20:05:58 +01:00
|
|
|
goto err_obj;
|
2017-02-13 17:15:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
i915_vma_unpin(vma);
|
2022-01-14 14:23:18 +01:00
|
|
|
err = i915_vma_unbind_unlocked(vma);
|
2017-02-13 17:15:42 +00:00
|
|
|
GEM_BUG_ON(err);
|
|
|
|
|
|
|
|
if (igt_timeout(end_time,
|
|
|
|
"%s timed out after %d/%d\n",
|
|
|
|
__func__, n, count)) {
|
|
|
|
err = -EINTR;
|
2020-04-22 20:05:58 +01:00
|
|
|
goto err_obj;
|
2017-02-13 17:15:42 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
err_obj:
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
kfree(order);
|
|
|
|
if (err)
|
|
|
|
return err;
|
2018-07-03 11:18:29 +01:00
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
cleanup_freed_objects(vm->i915);
|
2017-02-13 17:15:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
static int __shrink_hole(struct i915_address_space *vm,
|
2017-02-13 17:15:44 +00:00
|
|
|
u64 hole_start, u64 hole_end,
|
|
|
|
unsigned long end_time)
|
|
|
|
{
|
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
unsigned long flags = PIN_OFFSET_FIXED | PIN_USER;
|
2022-02-19 00:17:43 +05:30
|
|
|
unsigned int min_alignment;
|
2017-02-13 17:15:44 +00:00
|
|
|
unsigned int order = 12;
|
|
|
|
LIST_HEAD(objects);
|
|
|
|
int err = 0;
|
|
|
|
u64 addr;
|
|
|
|
|
2022-02-19 00:17:43 +05:30
|
|
|
min_alignment = i915_vm_min_alignment(vm, INTEL_MEMORY_SYSTEM);
|
|
|
|
|
2017-02-13 17:15:44 +00:00
|
|
|
/* Keep creating larger objects until one cannot fit into the hole */
|
|
|
|
for (addr = hole_start; addr < hole_end; ) {
|
|
|
|
struct i915_vma *vma;
|
|
|
|
u64 size = BIT_ULL(order++);
|
|
|
|
|
|
|
|
size = min(size, hole_end - addr);
|
2020-01-07 13:40:09 +00:00
|
|
|
obj = fake_dma_object(vm->i915, size);
|
2017-02-13 17:15:44 +00:00
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
err = PTR_ERR(obj);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
list_add(&obj->st_link, &objects);
|
|
|
|
|
|
|
|
vma = i915_vma_instance(obj, vm, NULL);
|
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
GEM_BUG_ON(vma->size != size);
|
|
|
|
|
|
|
|
err = i915_vma_pin(vma, 0, 0, addr | flags);
|
|
|
|
if (err) {
|
|
|
|
pr_err("%s failed to pin object at %llx + %llx in hole [%llx - %llx], with err=%d\n",
|
|
|
|
__func__, addr, size, hole_start, hole_end, err);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!drm_mm_node_allocated(&vma->node) ||
|
|
|
|
i915_vma_misplaced(vma, 0, 0, addr | flags)) {
|
|
|
|
pr_err("%s incorrect at %llx + %llx\n",
|
|
|
|
__func__, addr, size);
|
|
|
|
i915_vma_unpin(vma);
|
2022-01-14 14:23:18 +01:00
|
|
|
err = i915_vma_unbind_unlocked(vma);
|
2017-02-13 17:15:44 +00:00
|
|
|
err = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
i915_vma_unpin(vma);
|
2022-02-19 00:17:43 +05:30
|
|
|
addr += round_up(size, min_alignment);
|
2017-02-13 17:15:44 +00:00
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 14:39:58 +01:00
|
|
|
/*
|
|
|
|
* Since we are injecting allocation faults at random intervals,
|
|
|
|
* wait for this allocation to complete before we change the
|
|
|
|
* faultinjection.
|
|
|
|
*/
|
|
|
|
err = i915_vma_sync(vma);
|
|
|
|
if (err)
|
|
|
|
break;
|
|
|
|
|
2017-02-13 17:15:44 +00:00
|
|
|
if (igt_timeout(end_time,
|
2025-01-20 13:45:14 +05:30
|
|
|
"%s timed out at offset %llx [%llx - %llx]\n",
|
2017-02-13 17:15:44 +00:00
|
|
|
__func__, addr, hole_start, hole_end)) {
|
|
|
|
err = -EINTR;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
close_object_list(&objects, vm);
|
2020-01-07 13:40:09 +00:00
|
|
|
cleanup_freed_objects(vm->i915);
|
2017-02-13 17:15:44 +00:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
static int shrink_hole(struct i915_address_space *vm,
|
2017-02-13 17:15:44 +00:00
|
|
|
u64 hole_start, u64 hole_end,
|
|
|
|
unsigned long end_time)
|
|
|
|
{
|
|
|
|
unsigned long prime;
|
|
|
|
int err;
|
|
|
|
|
2017-02-15 08:43:40 +00:00
|
|
|
vm->fault_attr.probability = 999;
|
|
|
|
atomic_set(&vm->fault_attr.times, -1);
|
2017-02-13 17:15:44 +00:00
|
|
|
|
|
|
|
for_each_prime_number_from(prime, 0, ULONG_MAX - 1) {
|
2017-02-15 08:43:40 +00:00
|
|
|
vm->fault_attr.interval = prime;
|
2020-01-07 13:40:09 +00:00
|
|
|
err = __shrink_hole(vm, hole_start, hole_end, end_time);
|
2017-02-13 17:15:44 +00:00
|
|
|
if (err)
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2017-02-15 08:43:40 +00:00
|
|
|
memset(&vm->fault_attr, 0, sizeof(vm->fault_attr));
|
2017-02-13 17:15:44 +00:00
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
static int shrink_boom(struct i915_address_space *vm,
|
2018-01-31 21:44:40 +00:00
|
|
|
u64 hole_start, u64 hole_end,
|
|
|
|
unsigned long end_time)
|
|
|
|
{
|
|
|
|
unsigned int sizes[] = { SZ_2M, SZ_1G };
|
|
|
|
struct drm_i915_gem_object *purge;
|
|
|
|
struct drm_i915_gem_object *explode;
|
|
|
|
int err;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Catch the case which shrink_hole seems to miss. The setup here
|
|
|
|
* requires invoking the shrinker as we do the alloc_pt/alloc_pd, while
|
2025-01-20 13:45:14 +05:30
|
|
|
* ensuring that all vma associated with the respective pd/pdp are
|
2018-01-31 21:44:40 +00:00
|
|
|
* unpinned at the time.
|
|
|
|
*/
|
|
|
|
|
|
|
|
for (i = 0; i < ARRAY_SIZE(sizes); ++i) {
|
|
|
|
unsigned int flags = PIN_USER | PIN_OFFSET_FIXED;
|
|
|
|
unsigned int size = sizes[i];
|
|
|
|
struct i915_vma *vma;
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
purge = fake_dma_object(vm->i915, size);
|
2018-01-31 21:44:40 +00:00
|
|
|
if (IS_ERR(purge))
|
|
|
|
return PTR_ERR(purge);
|
|
|
|
|
|
|
|
vma = i915_vma_instance(purge, vm, NULL);
|
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto err_purge;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = i915_vma_pin(vma, 0, 0, flags);
|
|
|
|
if (err)
|
|
|
|
goto err_purge;
|
|
|
|
|
|
|
|
/* Should now be ripe for purging */
|
|
|
|
i915_vma_unpin(vma);
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
explode = fake_dma_object(vm->i915, size);
|
2018-01-31 21:44:40 +00:00
|
|
|
if (IS_ERR(explode)) {
|
2018-02-14 15:12:34 -06:00
|
|
|
err = PTR_ERR(explode);
|
2018-01-31 21:44:40 +00:00
|
|
|
goto err_purge;
|
|
|
|
}
|
|
|
|
|
|
|
|
vm->fault_attr.probability = 100;
|
|
|
|
vm->fault_attr.interval = 1;
|
|
|
|
atomic_set(&vm->fault_attr.times, -1);
|
|
|
|
|
|
|
|
vma = i915_vma_instance(explode, vm, NULL);
|
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto err_explode;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = i915_vma_pin(vma, 0, 0, flags | size);
|
|
|
|
if (err)
|
|
|
|
goto err_explode;
|
|
|
|
|
|
|
|
i915_vma_unpin(vma);
|
|
|
|
|
|
|
|
i915_gem_object_put(purge);
|
|
|
|
i915_gem_object_put(explode);
|
|
|
|
|
|
|
|
memset(&vm->fault_attr, 0, sizeof(vm->fault_attr));
|
2020-01-07 13:40:09 +00:00
|
|
|
cleanup_freed_objects(vm->i915);
|
2018-01-31 21:44:40 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
err_explode:
|
|
|
|
i915_gem_object_put(explode);
|
|
|
|
err_purge:
|
|
|
|
i915_gem_object_put(purge);
|
|
|
|
memset(&vm->fault_attr, 0, sizeof(vm->fault_attr));
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2022-02-19 00:17:45 +05:30
|
|
|
static int misaligned_case(struct i915_address_space *vm, struct intel_memory_region *mr,
|
|
|
|
u64 addr, u64 size, unsigned long flags)
|
|
|
|
{
|
|
|
|
struct drm_i915_gem_object *obj;
|
|
|
|
struct i915_vma *vma;
|
|
|
|
int err = 0;
|
|
|
|
u64 expected_vma_size, expected_node_size;
|
|
|
|
bool is_stolen = mr->type == INTEL_MEMORY_STOLEN_SYSTEM ||
|
|
|
|
mr->type == INTEL_MEMORY_STOLEN_LOCAL;
|
|
|
|
|
2022-08-25 17:42:39 +02:00
|
|
|
obj = i915_gem_object_create_region(mr, size, 0, I915_BO_ALLOC_GPU_ONLY);
|
2022-02-19 00:17:45 +05:30
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
/* if iGVT-g or DMAR is active, stolen mem will be uninitialized */
|
|
|
|
if (PTR_ERR(obj) == -ENODEV && is_stolen)
|
|
|
|
return 0;
|
|
|
|
return PTR_ERR(obj);
|
|
|
|
}
|
|
|
|
|
|
|
|
vma = i915_vma_instance(obj, vm, NULL);
|
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto err_put;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = i915_vma_pin(vma, 0, 0, addr | flags);
|
|
|
|
if (err)
|
|
|
|
goto err_put;
|
|
|
|
i915_vma_unpin(vma);
|
|
|
|
|
|
|
|
if (!drm_mm_node_allocated(&vma->node)) {
|
|
|
|
err = -EINVAL;
|
|
|
|
goto err_put;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (i915_vma_misplaced(vma, 0, 0, addr | flags)) {
|
|
|
|
err = -EINVAL;
|
|
|
|
goto err_put;
|
|
|
|
}
|
|
|
|
|
|
|
|
expected_vma_size = round_up(size, 1 << (ffs(vma->resource->page_sizes_gtt) - 1));
|
|
|
|
expected_node_size = expected_vma_size;
|
|
|
|
|
2022-04-20 19:16:13 +01:00
|
|
|
if (HAS_64K_PAGES(vm->i915) && i915_gem_object_is_lmem(obj)) {
|
2022-02-19 00:17:45 +05:30
|
|
|
expected_vma_size = round_up(size, I915_GTT_PAGE_SIZE_64K);
|
drm/i915: enable PS64 support for DG2
It turns out that on production DG2/ATS HW we should have support for
PS64. This feature allows to provide a 64K TLB hint at the PTE level,
which is a lot more flexible than the current method of enabling 64K GTT
pages for the entire page-table, since that leads to all kinds of
annoying restrictions, as documented in:
commit caa574ffc4aaf4f29b890223878c63e2e7772f62
Author: Matthew Auld <matthew.auld@intel.com>
Date: Sat Feb 19 00:17:49 2022 +0530
drm/i915/uapi: document behaviour for DG2 64K support
On discrete platforms like DG2, we need to support a minimum page size
of 64K when dealing with device local-memory. This is quite tricky for
various reasons, so try to document the new implicit uapi for this.
With PS64, we can now drop the 2M GTT alignment restriction, and instead
only require 64K or larger when dealing with lmem. We still use the
compact-pt layout when possible, but only when we are certain that this
doesn't interfere with userspace.
Note that this is a change in uAPI behaviour, but hopefully shouldn't be
a concern (IGT is at least able to autodetect the alignment), since we
are only making the GTT alignment constraint less restrictive.
Based on a patch from CQ Tang.
v2: update the comment wrt scratch page
v3: (Nirmoy)
- Fix the selftest to actually use the random size, plus some comment
improvements, also drop the rem stuff.
Reported-by: Michal Mrozek <michal.mrozek@intel.com>
Signed-off-by: Matthew Auld <matthew.auld@intel.com>
Cc: Lionel Landwerlin <lionel.g.landwerlin@intel.com>
Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
Cc: Stuart Summers <stuart.summers@intel.com>
Cc: Jordan Justen <jordan.l.justen@intel.com>
Cc: Yang A Shi <yang.a.shi@intel.com>
Cc: Nirmoy Das <nirmoy.das@intel.com>
Cc: Niranjana Vishwanathapura <niranjana.vishwanathapura@intel.com>
Reviewed-by: Nirmoy Das <nirmoy.das@intel.com>
Acked-by: Michal Mrozek <michal.mrozek@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20221004114915.221708-1-matthew.auld@intel.com
2022-10-04 12:49:14 +01:00
|
|
|
expected_node_size = round_up(size, I915_GTT_PAGE_SIZE_64K);
|
2022-02-19 00:17:45 +05:30
|
|
|
}
|
|
|
|
|
|
|
|
if (vma->size != expected_vma_size || vma->node.size != expected_node_size) {
|
|
|
|
err = i915_vma_unbind_unlocked(vma);
|
|
|
|
err = -EBADSLT;
|
|
|
|
goto err_put;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = i915_vma_unbind_unlocked(vma);
|
|
|
|
if (err)
|
|
|
|
goto err_put;
|
|
|
|
|
|
|
|
GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
|
|
|
|
|
|
|
|
err_put:
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
cleanup_freed_objects(vm->i915);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int misaligned_pin(struct i915_address_space *vm,
|
|
|
|
u64 hole_start, u64 hole_end,
|
|
|
|
unsigned long end_time)
|
|
|
|
{
|
|
|
|
struct intel_memory_region *mr;
|
|
|
|
enum intel_region_id id;
|
|
|
|
unsigned long flags = PIN_OFFSET_FIXED | PIN_USER;
|
|
|
|
int err = 0;
|
|
|
|
u64 hole_size = hole_end - hole_start;
|
|
|
|
|
|
|
|
if (i915_is_ggtt(vm))
|
|
|
|
flags |= PIN_GLOBAL;
|
|
|
|
|
|
|
|
for_each_memory_region(mr, vm->i915, id) {
|
2022-04-20 19:16:12 +01:00
|
|
|
u64 min_alignment = i915_vm_min_alignment(vm, mr->type);
|
2022-02-19 00:17:45 +05:30
|
|
|
u64 size = min_alignment;
|
|
|
|
u64 addr = round_down(hole_start + (hole_size / 2), min_alignment);
|
|
|
|
|
|
|
|
/* avoid -ENOSPC on very small hole setups */
|
|
|
|
if (hole_size < 3 * min_alignment)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
/* we can't test < 4k alignment due to flags being encoded in lower bits */
|
|
|
|
if (min_alignment != I915_GTT_PAGE_SIZE_4K) {
|
|
|
|
err = misaligned_case(vm, mr, addr + (min_alignment / 2), size, flags);
|
|
|
|
/* misaligned should error with -EINVAL*/
|
|
|
|
if (!err)
|
|
|
|
err = -EBADSLT;
|
|
|
|
if (err != -EINVAL)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* test for vma->size expansion to min page size */
|
|
|
|
err = misaligned_case(vm, mr, addr, PAGE_SIZE, flags);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
|
|
|
|
/* test for intermediate size not expanding vma->size for large alignments */
|
|
|
|
err = misaligned_case(vm, mr, addr, size / 2, flags);
|
|
|
|
if (err)
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:39 +00:00
|
|
|
static int exercise_ppgtt(struct drm_i915_private *dev_priv,
|
2020-01-07 13:40:09 +00:00
|
|
|
int (*func)(struct i915_address_space *vm,
|
2017-02-13 17:15:39 +00:00
|
|
|
u64 hole_start, u64 hole_end,
|
|
|
|
unsigned long end_time))
|
|
|
|
{
|
2019-06-11 10:12:38 +01:00
|
|
|
struct i915_ppgtt *ppgtt;
|
2017-02-13 17:15:39 +00:00
|
|
|
IGT_TIMEOUT(end_time);
|
2019-11-07 21:39:29 +00:00
|
|
|
struct file *file;
|
2017-02-13 17:15:39 +00:00
|
|
|
int err;
|
|
|
|
|
2018-09-26 21:12:22 +01:00
|
|
|
if (!HAS_FULL_PPGTT(dev_priv))
|
2017-02-13 17:15:39 +00:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
file = mock_file(dev_priv);
|
|
|
|
if (IS_ERR(file))
|
|
|
|
return PTR_ERR(file);
|
|
|
|
|
2021-12-14 21:33:37 +02:00
|
|
|
ppgtt = i915_ppgtt_create(to_gt(dev_priv), 0);
|
2017-02-13 17:15:39 +00:00
|
|
|
if (IS_ERR(ppgtt)) {
|
|
|
|
err = PTR_ERR(ppgtt);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 14:39:58 +01:00
|
|
|
goto out_free;
|
2017-02-13 17:15:39 +00:00
|
|
|
}
|
2018-06-05 16:37:58 +01:00
|
|
|
GEM_BUG_ON(offset_in_page(ppgtt->vm.total));
|
2022-03-04 09:26:39 +01:00
|
|
|
assert_vm_alive(&ppgtt->vm);
|
2017-02-13 17:15:39 +00:00
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
err = func(&ppgtt->vm, 0, ppgtt->vm.total, end_time);
|
2017-02-13 17:15:39 +00:00
|
|
|
|
2019-06-11 10:12:37 +01:00
|
|
|
i915_vm_put(&ppgtt->vm);
|
2017-02-13 17:15:39 +00:00
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 14:39:58 +01:00
|
|
|
out_free:
|
2019-11-07 21:39:29 +00:00
|
|
|
fput(file);
|
2017-02-13 17:15:39 +00:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int igt_ppgtt_fill(void *arg)
|
|
|
|
{
|
|
|
|
return exercise_ppgtt(arg, fill_hole);
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:41 +00:00
|
|
|
static int igt_ppgtt_walk(void *arg)
|
|
|
|
{
|
|
|
|
return exercise_ppgtt(arg, walk_hole);
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:57 +00:00
|
|
|
static int igt_ppgtt_pot(void *arg)
|
|
|
|
{
|
|
|
|
return exercise_ppgtt(arg, pot_hole);
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:42 +00:00
|
|
|
static int igt_ppgtt_drunk(void *arg)
|
|
|
|
{
|
|
|
|
return exercise_ppgtt(arg, drunk_hole);
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:43 +00:00
|
|
|
static int igt_ppgtt_lowlevel(void *arg)
|
|
|
|
{
|
|
|
|
return exercise_ppgtt(arg, lowlevel_hole);
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:44 +00:00
|
|
|
static int igt_ppgtt_shrink(void *arg)
|
|
|
|
{
|
|
|
|
return exercise_ppgtt(arg, shrink_hole);
|
|
|
|
}
|
|
|
|
|
2018-01-31 21:44:40 +00:00
|
|
|
static int igt_ppgtt_shrink_boom(void *arg)
|
|
|
|
{
|
|
|
|
return exercise_ppgtt(arg, shrink_boom);
|
|
|
|
}
|
|
|
|
|
2022-02-19 00:17:45 +05:30
|
|
|
static int igt_ppgtt_misaligned_pin(void *arg)
|
|
|
|
{
|
|
|
|
return exercise_ppgtt(arg, misaligned_pin);
|
|
|
|
}
|
|
|
|
|
2021-04-08 11:28:34 -07:00
|
|
|
static int sort_holes(void *priv, const struct list_head *A,
|
|
|
|
const struct list_head *B)
|
2017-02-13 17:15:40 +00:00
|
|
|
{
|
|
|
|
struct drm_mm_node *a = list_entry(A, typeof(*a), hole_stack);
|
|
|
|
struct drm_mm_node *b = list_entry(B, typeof(*b), hole_stack);
|
|
|
|
|
|
|
|
if (a->start < b->start)
|
|
|
|
return -1;
|
|
|
|
else
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int exercise_ggtt(struct drm_i915_private *i915,
|
2020-01-07 13:40:09 +00:00
|
|
|
int (*func)(struct i915_address_space *vm,
|
2017-02-13 17:15:40 +00:00
|
|
|
u64 hole_start, u64 hole_end,
|
|
|
|
unsigned long end_time))
|
|
|
|
{
|
2021-12-19 23:24:59 +02:00
|
|
|
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
|
2017-02-13 17:15:40 +00:00
|
|
|
u64 hole_start, hole_end, last = 0;
|
|
|
|
struct drm_mm_node *node;
|
|
|
|
IGT_TIMEOUT(end_time);
|
2017-11-15 15:25:58 +00:00
|
|
|
int err = 0;
|
2017-02-13 17:15:40 +00:00
|
|
|
|
|
|
|
restart:
|
2018-06-05 16:37:58 +01:00
|
|
|
list_sort(NULL, &ggtt->vm.mm.hole_stack, sort_holes);
|
|
|
|
drm_mm_for_each_hole(node, &ggtt->vm.mm, hole_start, hole_end) {
|
2017-02-13 17:15:40 +00:00
|
|
|
if (hole_start < last)
|
|
|
|
continue;
|
|
|
|
|
2018-06-05 16:37:58 +01:00
|
|
|
if (ggtt->vm.mm.color_adjust)
|
|
|
|
ggtt->vm.mm.color_adjust(node, 0,
|
|
|
|
&hole_start, &hole_end);
|
2017-02-13 17:15:40 +00:00
|
|
|
if (hole_start >= hole_end)
|
|
|
|
continue;
|
|
|
|
|
2020-01-07 13:40:09 +00:00
|
|
|
err = func(&ggtt->vm, hole_start, hole_end, end_time);
|
2017-02-13 17:15:40 +00:00
|
|
|
if (err)
|
|
|
|
break;
|
|
|
|
|
|
|
|
/* As we have manipulated the drm_mm, the list may be corrupt */
|
|
|
|
last = hole_end;
|
|
|
|
goto restart;
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int igt_ggtt_fill(void *arg)
|
|
|
|
{
|
|
|
|
return exercise_ggtt(arg, fill_hole);
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:41 +00:00
|
|
|
static int igt_ggtt_walk(void *arg)
|
|
|
|
{
|
|
|
|
return exercise_ggtt(arg, walk_hole);
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:57 +00:00
|
|
|
static int igt_ggtt_pot(void *arg)
|
|
|
|
{
|
|
|
|
return exercise_ggtt(arg, pot_hole);
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:42 +00:00
|
|
|
static int igt_ggtt_drunk(void *arg)
|
|
|
|
{
|
|
|
|
return exercise_ggtt(arg, drunk_hole);
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:43 +00:00
|
|
|
static int igt_ggtt_lowlevel(void *arg)
|
|
|
|
{
|
|
|
|
return exercise_ggtt(arg, lowlevel_hole);
|
|
|
|
}
|
|
|
|
|
2022-02-19 00:17:45 +05:30
|
|
|
static int igt_ggtt_misaligned_pin(void *arg)
|
|
|
|
{
|
|
|
|
return exercise_ggtt(arg, misaligned_pin);
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:56 +00:00
|
|
|
static int igt_ggtt_page(void *arg)
|
|
|
|
{
|
|
|
|
const unsigned int count = PAGE_SIZE/sizeof(u32);
|
|
|
|
I915_RND_STATE(prng);
|
|
|
|
struct drm_i915_private *i915 = arg;
|
2021-12-19 23:24:59 +02:00
|
|
|
struct i915_ggtt *ggtt = to_gt(i915)->ggtt;
|
2017-02-13 17:15:56 +00:00
|
|
|
struct drm_i915_gem_object *obj;
|
2019-01-14 14:21:22 +00:00
|
|
|
intel_wakeref_t wakeref;
|
2017-02-13 17:15:56 +00:00
|
|
|
struct drm_mm_node tmp;
|
|
|
|
unsigned int *order, n;
|
|
|
|
int err;
|
|
|
|
|
2019-10-29 09:58:56 +00:00
|
|
|
if (!i915_ggtt_has_aperture(ggtt))
|
|
|
|
return 0;
|
|
|
|
|
2017-02-13 17:15:56 +00:00
|
|
|
obj = i915_gem_object_create_internal(i915, PAGE_SIZE);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 14:39:58 +01:00
|
|
|
if (IS_ERR(obj))
|
|
|
|
return PTR_ERR(obj);
|
2017-02-13 17:15:56 +00:00
|
|
|
|
2021-03-23 16:50:49 +01:00
|
|
|
err = i915_gem_object_pin_pages_unlocked(obj);
|
2017-02-13 17:15:56 +00:00
|
|
|
if (err)
|
|
|
|
goto out_free;
|
|
|
|
|
|
|
|
memset(&tmp, 0, sizeof(tmp));
|
2019-11-29 09:56:59 +00:00
|
|
|
mutex_lock(&ggtt->vm.mutex);
|
2018-06-05 16:37:58 +01:00
|
|
|
err = drm_mm_insert_node_in_range(&ggtt->vm.mm, &tmp,
|
2017-12-23 11:04:06 +00:00
|
|
|
count * PAGE_SIZE, 0,
|
2017-02-13 17:15:56 +00:00
|
|
|
I915_COLOR_UNEVICTABLE,
|
|
|
|
0, ggtt->mappable_end,
|
|
|
|
DRM_MM_INSERT_LOW);
|
2019-11-29 09:56:59 +00:00
|
|
|
mutex_unlock(&ggtt->vm.mutex);
|
2017-02-13 17:15:56 +00:00
|
|
|
if (err)
|
|
|
|
goto out_unpin;
|
|
|
|
|
2019-06-13 16:21:54 -07:00
|
|
|
wakeref = intel_runtime_pm_get(&i915->runtime_pm);
|
2017-12-23 11:04:06 +00:00
|
|
|
|
|
|
|
for (n = 0; n < count; n++) {
|
|
|
|
u64 offset = tmp.start + n * PAGE_SIZE;
|
|
|
|
|
2018-06-05 16:37:58 +01:00
|
|
|
ggtt->vm.insert_page(&ggtt->vm,
|
|
|
|
i915_gem_object_get_dma_address(obj, 0),
|
drm/i915: use pat_index instead of cache_level
Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.
From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity. PAT is expected to work much like MOCS already works today,
and by design userspace is expected to select the index that exactly
matches the desired behavior described in the hardware specification.
For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For kernel objects, cache_level is used
for simplicity and backward compatibility. For Pre-gen12 platforms PAT can
have 1:1 mapping to i915_cache_level, so these two are interchangeable. see
the use of LEGACY_CACHELEVEL.
One consequence of this change is that gen8_pte_encode is no longer working
for gen12 platforms due to the fact that gen12 platforms has different PAT
definitions. In the meantime the mtl_pte_encode introduced specfically for
MTL becomes generic for all gen12 platforms. This patch renames the MTL
PTE encode function into gen12_pte_encode and apply it to all gen12. Even
though this change looks unrelated, but separating them would temporarily
break gen12 PTE encoding, thus squash them in one patch.
Special note: this patch changes the way caching behavior is controlled in
the sense that some objects are left to be managed by userspace. For such
objects we need to be careful not to change the userspace settings.There
are kerneldoc and comments added around obj->cache_coherent, cache_dirty,
and how to bypass the checkings by i915_gem_object_has_cache_level. For
full understanding, these changes need to be looked at together with the
two follow-up patches, one disables the {set|get}_caching ioctl's and the
other adds set_pat extension to the GEM_CREATE uAPI.
Bspec: 63019
Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230509165200.1740-3-fei.yang@intel.com
2023-05-09 09:52:00 -07:00
|
|
|
offset,
|
|
|
|
i915_gem_get_pat_index(i915,
|
|
|
|
I915_CACHE_NONE),
|
|
|
|
0);
|
2017-12-23 11:04:06 +00:00
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:56 +00:00
|
|
|
order = i915_random_order(count, &prng);
|
|
|
|
if (!order) {
|
|
|
|
err = -ENOMEM;
|
|
|
|
goto out_remove;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (n = 0; n < count; n++) {
|
|
|
|
u64 offset = tmp.start + order[n] * PAGE_SIZE;
|
|
|
|
u32 __iomem *vaddr;
|
|
|
|
|
2017-12-11 15:18:20 +00:00
|
|
|
vaddr = io_mapping_map_atomic_wc(&ggtt->iomap, offset);
|
2017-02-13 17:15:56 +00:00
|
|
|
iowrite32(n, vaddr + n);
|
|
|
|
io_mapping_unmap_atomic(vaddr);
|
|
|
|
}
|
2019-06-21 08:08:01 +01:00
|
|
|
intel_gt_flush_ggtt_writes(ggtt->vm.gt);
|
2017-02-13 17:15:56 +00:00
|
|
|
|
|
|
|
i915_random_reorder(order, count, &prng);
|
|
|
|
for (n = 0; n < count; n++) {
|
|
|
|
u64 offset = tmp.start + order[n] * PAGE_SIZE;
|
|
|
|
u32 __iomem *vaddr;
|
|
|
|
u32 val;
|
|
|
|
|
2017-12-11 15:18:20 +00:00
|
|
|
vaddr = io_mapping_map_atomic_wc(&ggtt->iomap, offset);
|
2017-02-13 17:15:56 +00:00
|
|
|
val = ioread32(vaddr + n);
|
|
|
|
io_mapping_unmap_atomic(vaddr);
|
|
|
|
|
|
|
|
if (val != n) {
|
|
|
|
pr_err("insert page failed: found %d, expected %d\n",
|
|
|
|
val, n);
|
|
|
|
err = -EINVAL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
kfree(order);
|
|
|
|
out_remove:
|
2018-06-05 16:37:58 +01:00
|
|
|
ggtt->vm.clear_range(&ggtt->vm, tmp.start, tmp.size);
|
2019-06-13 16:21:54 -07:00
|
|
|
intel_runtime_pm_put(&i915->runtime_pm, wakeref);
|
2019-11-29 09:56:59 +00:00
|
|
|
mutex_lock(&ggtt->vm.mutex);
|
2017-02-13 17:15:56 +00:00
|
|
|
drm_mm_remove_node(&tmp);
|
2019-11-29 09:56:59 +00:00
|
|
|
mutex_unlock(&ggtt->vm.mutex);
|
2017-02-13 17:15:56 +00:00
|
|
|
out_unpin:
|
|
|
|
i915_gem_object_unpin_pages(obj);
|
|
|
|
out_free:
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:53 +00:00
|
|
|
static void track_vma_bind(struct i915_vma *vma)
|
|
|
|
{
|
|
|
|
struct drm_i915_gem_object *obj = vma->obj;
|
|
|
|
|
|
|
|
__i915_gem_object_pin_pages(obj);
|
|
|
|
|
drm/i915: Remove pages_mutex and intel_gtt->vma_ops.set/clear_pages members, v3.
Big delta, but boils down to moving set_pages to i915_vma.c, and removing
the special handling, all callers use the defaults anyway. We only remap
in ggtt, so default case will fall through.
Because we still don't require locking in i915_vma_unpin(), handle this by
using xchg in get_pages(), as it's locked with obj->mutex, and cmpxchg in
unpin, which only fails if we race a against a new pin.
Changes since v1:
- aliasing gtt sets ZERO_SIZE_PTR, not -ENODEV, remove special case
from __i915_vma_get_pages(). (Matt)
Changes since v2:
- Free correct old pages in __i915_vma_get_pages(). (Matt)
Remove race of clearing vma->pages accidentally from put,
free it but leave it set, as only get has the lock.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20211216142749.1966107-4-maarten.lankhorst@linux.intel.com
Reviewed-by: Matthew Auld <matthew.auld@intel.com>
2021-12-16 15:27:35 +01:00
|
|
|
GEM_BUG_ON(atomic_read(&vma->pages_count));
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 14:39:58 +01:00
|
|
|
atomic_set(&vma->pages_count, I915_VMA_PAGES_ACTIVE);
|
|
|
|
__i915_gem_object_pin_pages(obj);
|
2017-02-13 17:15:53 +00:00
|
|
|
vma->pages = obj->mm.pages;
|
2022-01-10 18:22:15 +01:00
|
|
|
vma->resource->bi.pages = vma->pages;
|
2019-01-28 10:23:53 +00:00
|
|
|
|
|
|
|
mutex_lock(&vma->vm->mutex);
|
2022-03-04 09:26:39 +01:00
|
|
|
list_move_tail(&vma->vm_link, &vma->vm->bound_list);
|
2019-01-28 10:23:53 +00:00
|
|
|
mutex_unlock(&vma->vm->mutex);
|
2017-02-13 17:15:53 +00:00
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:55 +00:00
|
|
|
static int exercise_mock(struct drm_i915_private *i915,
|
2020-01-07 13:40:09 +00:00
|
|
|
int (*func)(struct i915_address_space *vm,
|
2017-02-13 17:15:55 +00:00
|
|
|
u64 hole_start, u64 hole_end,
|
|
|
|
unsigned long end_time))
|
|
|
|
{
|
2018-12-28 00:34:29 -08:00
|
|
|
const u64 limit = totalram_pages() << PAGE_SHIFT;
|
2019-10-04 14:40:09 +01:00
|
|
|
struct i915_address_space *vm;
|
2017-02-13 17:15:55 +00:00
|
|
|
struct i915_gem_context *ctx;
|
|
|
|
IGT_TIMEOUT(end_time);
|
|
|
|
int err;
|
|
|
|
|
|
|
|
ctx = mock_context(i915, "mock");
|
|
|
|
if (!ctx)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2021-09-02 16:20:51 +02:00
|
|
|
vm = i915_gem_context_get_eb_vm(ctx);
|
2020-01-07 13:40:09 +00:00
|
|
|
err = func(vm, 0, min(vm->total, limit), end_time);
|
2019-10-04 14:40:09 +01:00
|
|
|
i915_vm_put(vm);
|
2017-02-13 17:15:55 +00:00
|
|
|
|
|
|
|
mock_context_close(ctx);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int igt_mock_fill(void *arg)
|
|
|
|
{
|
2019-01-21 22:20:49 +00:00
|
|
|
struct i915_ggtt *ggtt = arg;
|
|
|
|
|
|
|
|
return exercise_mock(ggtt->vm.i915, fill_hole);
|
2017-02-13 17:15:55 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int igt_mock_walk(void *arg)
|
|
|
|
{
|
2019-01-21 22:20:49 +00:00
|
|
|
struct i915_ggtt *ggtt = arg;
|
|
|
|
|
|
|
|
return exercise_mock(ggtt->vm.i915, walk_hole);
|
2017-02-13 17:15:55 +00:00
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:57 +00:00
|
|
|
static int igt_mock_pot(void *arg)
|
|
|
|
{
|
2019-01-21 22:20:49 +00:00
|
|
|
struct i915_ggtt *ggtt = arg;
|
|
|
|
|
|
|
|
return exercise_mock(ggtt->vm.i915, pot_hole);
|
2017-02-13 17:15:57 +00:00
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:55 +00:00
|
|
|
static int igt_mock_drunk(void *arg)
|
|
|
|
{
|
2019-01-21 22:20:49 +00:00
|
|
|
struct i915_ggtt *ggtt = arg;
|
|
|
|
|
|
|
|
return exercise_mock(ggtt->vm.i915, drunk_hole);
|
2017-02-13 17:15:55 +00:00
|
|
|
}
|
|
|
|
|
2022-01-10 18:22:14 +01:00
|
|
|
static int reserve_gtt_with_resource(struct i915_vma *vma, u64 offset)
|
|
|
|
{
|
|
|
|
struct i915_address_space *vm = vma->vm;
|
|
|
|
struct i915_vma_resource *vma_res;
|
|
|
|
struct drm_i915_gem_object *obj = vma->obj;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
vma_res = i915_vma_resource_alloc();
|
|
|
|
if (IS_ERR(vma_res))
|
|
|
|
return PTR_ERR(vma_res);
|
|
|
|
|
|
|
|
mutex_lock(&vm->mutex);
|
2022-01-14 14:23:17 +01:00
|
|
|
err = i915_gem_gtt_reserve(vm, NULL, &vma->node, obj->base.size,
|
2022-01-10 18:22:14 +01:00
|
|
|
offset,
|
drm/i915: use pat_index instead of cache_level
Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.
From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity. PAT is expected to work much like MOCS already works today,
and by design userspace is expected to select the index that exactly
matches the desired behavior described in the hardware specification.
For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For kernel objects, cache_level is used
for simplicity and backward compatibility. For Pre-gen12 platforms PAT can
have 1:1 mapping to i915_cache_level, so these two are interchangeable. see
the use of LEGACY_CACHELEVEL.
One consequence of this change is that gen8_pte_encode is no longer working
for gen12 platforms due to the fact that gen12 platforms has different PAT
definitions. In the meantime the mtl_pte_encode introduced specfically for
MTL becomes generic for all gen12 platforms. This patch renames the MTL
PTE encode function into gen12_pte_encode and apply it to all gen12. Even
though this change looks unrelated, but separating them would temporarily
break gen12 PTE encoding, thus squash them in one patch.
Special note: this patch changes the way caching behavior is controlled in
the sense that some objects are left to be managed by userspace. For such
objects we need to be careful not to change the userspace settings.There
are kerneldoc and comments added around obj->cache_coherent, cache_dirty,
and how to bypass the checkings by i915_gem_object_has_cache_level. For
full understanding, these changes need to be looked at together with the
two follow-up patches, one disables the {set|get}_caching ioctl's and the
other adds set_pat extension to the GEM_CREATE uAPI.
Bspec: 63019
Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230509165200.1740-3-fei.yang@intel.com
2023-05-09 09:52:00 -07:00
|
|
|
obj->pat_index,
|
2022-01-10 18:22:14 +01:00
|
|
|
0);
|
|
|
|
if (!err) {
|
2022-01-10 18:22:15 +01:00
|
|
|
i915_vma_resource_init_from_vma(vma_res, vma);
|
2022-01-10 18:22:14 +01:00
|
|
|
vma->resource = vma_res;
|
|
|
|
} else {
|
|
|
|
kfree(vma_res);
|
|
|
|
}
|
|
|
|
mutex_unlock(&vm->mutex);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:53 +00:00
|
|
|
static int igt_gtt_reserve(void *arg)
|
|
|
|
{
|
2019-01-21 22:20:49 +00:00
|
|
|
struct i915_ggtt *ggtt = arg;
|
2017-02-13 17:15:53 +00:00
|
|
|
struct drm_i915_gem_object *obj, *on;
|
2019-10-02 13:24:30 +01:00
|
|
|
I915_RND_STATE(prng);
|
2017-02-13 17:15:53 +00:00
|
|
|
LIST_HEAD(objects);
|
|
|
|
u64 total;
|
2017-11-14 22:33:46 +00:00
|
|
|
int err = -ENODEV;
|
2017-02-13 17:15:53 +00:00
|
|
|
|
2025-01-20 13:45:14 +05:30
|
|
|
/*
|
|
|
|
* i915_gem_gtt_reserve() tries to reserve the precise range
|
2017-02-13 17:15:53 +00:00
|
|
|
* for the node, and evicts if it has to. So our test checks that
|
2025-01-20 13:45:14 +05:30
|
|
|
* it can give us the requested space and prevent overlaps.
|
2017-02-13 17:15:53 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
/* Start by filling the GGTT */
|
|
|
|
for (total = 0;
|
2019-01-21 22:20:49 +00:00
|
|
|
total + 2 * I915_GTT_PAGE_SIZE <= ggtt->vm.total;
|
|
|
|
total += 2 * I915_GTT_PAGE_SIZE) {
|
2017-02-13 17:15:53 +00:00
|
|
|
struct i915_vma *vma;
|
|
|
|
|
2019-01-21 22:20:49 +00:00
|
|
|
obj = i915_gem_object_create_internal(ggtt->vm.i915,
|
|
|
|
2 * PAGE_SIZE);
|
2017-02-13 17:15:53 +00:00
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
err = PTR_ERR(obj);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2021-03-23 16:50:49 +01:00
|
|
|
err = i915_gem_object_pin_pages_unlocked(obj);
|
2017-02-13 17:15:53 +00:00
|
|
|
if (err) {
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
list_add(&obj->st_link, &objects);
|
2019-01-21 22:20:49 +00:00
|
|
|
vma = i915_vma_instance(obj, &ggtt->vm, NULL);
|
2017-02-13 17:15:53 +00:00
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2022-01-10 18:22:14 +01:00
|
|
|
err = reserve_gtt_with_resource(vma, total);
|
2017-02-13 17:15:53 +00:00
|
|
|
if (err) {
|
|
|
|
pr_err("i915_gem_gtt_reserve (pass 1) failed at %llu/%llu with err=%d\n",
|
2019-01-21 22:20:49 +00:00
|
|
|
total, ggtt->vm.total, err);
|
2017-02-13 17:15:53 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
track_vma_bind(vma);
|
|
|
|
|
|
|
|
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
|
|
|
|
if (vma->node.start != total ||
|
|
|
|
vma->node.size != 2*I915_GTT_PAGE_SIZE) {
|
2018-10-25 10:18:22 +01:00
|
|
|
pr_err("i915_gem_gtt_reserve (pass 1) placement failed, found (%llx + %llx), expected (%llx + %llx)\n",
|
2017-02-13 17:15:53 +00:00
|
|
|
vma->node.start, vma->node.size,
|
|
|
|
total, 2*I915_GTT_PAGE_SIZE);
|
|
|
|
err = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Now we start forcing evictions */
|
|
|
|
for (total = I915_GTT_PAGE_SIZE;
|
2019-01-21 22:20:49 +00:00
|
|
|
total + 2 * I915_GTT_PAGE_SIZE <= ggtt->vm.total;
|
|
|
|
total += 2 * I915_GTT_PAGE_SIZE) {
|
2017-02-13 17:15:53 +00:00
|
|
|
struct i915_vma *vma;
|
|
|
|
|
2019-01-21 22:20:49 +00:00
|
|
|
obj = i915_gem_object_create_internal(ggtt->vm.i915,
|
|
|
|
2 * PAGE_SIZE);
|
2017-02-13 17:15:53 +00:00
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
err = PTR_ERR(obj);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2021-03-23 16:50:49 +01:00
|
|
|
err = i915_gem_object_pin_pages_unlocked(obj);
|
2017-02-13 17:15:53 +00:00
|
|
|
if (err) {
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
list_add(&obj->st_link, &objects);
|
|
|
|
|
2019-01-21 22:20:49 +00:00
|
|
|
vma = i915_vma_instance(obj, &ggtt->vm, NULL);
|
2017-02-13 17:15:53 +00:00
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2022-01-10 18:22:14 +01:00
|
|
|
err = reserve_gtt_with_resource(vma, total);
|
2017-02-13 17:15:53 +00:00
|
|
|
if (err) {
|
|
|
|
pr_err("i915_gem_gtt_reserve (pass 2) failed at %llu/%llu with err=%d\n",
|
2019-01-21 22:20:49 +00:00
|
|
|
total, ggtt->vm.total, err);
|
2017-02-13 17:15:53 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
track_vma_bind(vma);
|
|
|
|
|
|
|
|
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
|
|
|
|
if (vma->node.start != total ||
|
|
|
|
vma->node.size != 2*I915_GTT_PAGE_SIZE) {
|
2018-10-25 10:18:22 +01:00
|
|
|
pr_err("i915_gem_gtt_reserve (pass 2) placement failed, found (%llx + %llx), expected (%llx + %llx)\n",
|
2017-02-13 17:15:53 +00:00
|
|
|
vma->node.start, vma->node.size,
|
|
|
|
total, 2*I915_GTT_PAGE_SIZE);
|
|
|
|
err = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* And then try at random */
|
|
|
|
list_for_each_entry_safe(obj, on, &objects, st_link) {
|
|
|
|
struct i915_vma *vma;
|
|
|
|
u64 offset;
|
|
|
|
|
2019-01-21 22:20:49 +00:00
|
|
|
vma = i915_vma_instance(obj, &ggtt->vm, NULL);
|
2017-02-13 17:15:53 +00:00
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2022-01-14 14:23:18 +01:00
|
|
|
err = i915_vma_unbind_unlocked(vma);
|
2017-02-13 17:15:53 +00:00
|
|
|
if (err) {
|
|
|
|
pr_err("i915_vma_unbind failed with err=%d!\n", err);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2019-10-02 13:24:30 +01:00
|
|
|
offset = igt_random_offset(&prng,
|
|
|
|
0, ggtt->vm.total,
|
|
|
|
2 * I915_GTT_PAGE_SIZE,
|
|
|
|
I915_GTT_MIN_ALIGNMENT);
|
2017-02-13 17:15:53 +00:00
|
|
|
|
2022-01-10 18:22:14 +01:00
|
|
|
err = reserve_gtt_with_resource(vma, offset);
|
2017-02-13 17:15:53 +00:00
|
|
|
if (err) {
|
|
|
|
pr_err("i915_gem_gtt_reserve (pass 3) failed at %llu/%llu with err=%d\n",
|
2019-01-21 22:20:49 +00:00
|
|
|
total, ggtt->vm.total, err);
|
2017-02-13 17:15:53 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
track_vma_bind(vma);
|
|
|
|
|
|
|
|
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
|
|
|
|
if (vma->node.start != offset ||
|
|
|
|
vma->node.size != 2*I915_GTT_PAGE_SIZE) {
|
2018-10-25 10:18:22 +01:00
|
|
|
pr_err("i915_gem_gtt_reserve (pass 3) placement failed, found (%llx + %llx), expected (%llx + %llx)\n",
|
2017-02-13 17:15:53 +00:00
|
|
|
vma->node.start, vma->node.size,
|
|
|
|
offset, 2*I915_GTT_PAGE_SIZE);
|
|
|
|
err = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
list_for_each_entry_safe(obj, on, &objects, st_link) {
|
|
|
|
i915_gem_object_unpin_pages(obj);
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
}
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2022-01-10 18:22:14 +01:00
|
|
|
static int insert_gtt_with_resource(struct i915_vma *vma)
|
|
|
|
{
|
|
|
|
struct i915_address_space *vm = vma->vm;
|
|
|
|
struct i915_vma_resource *vma_res;
|
|
|
|
struct drm_i915_gem_object *obj = vma->obj;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
vma_res = i915_vma_resource_alloc();
|
|
|
|
if (IS_ERR(vma_res))
|
|
|
|
return PTR_ERR(vma_res);
|
|
|
|
|
|
|
|
mutex_lock(&vm->mutex);
|
2022-01-14 14:23:17 +01:00
|
|
|
err = i915_gem_gtt_insert(vm, NULL, &vma->node, obj->base.size, 0,
|
drm/i915: use pat_index instead of cache_level
Currently the KMD is using enum i915_cache_level to set caching policy for
buffer objects. This is flaky because the PAT index which really controls
the caching behavior in PTE has far more levels than what's defined in the
enum. In addition, the PAT index is platform dependent, having to translate
between i915_cache_level and PAT index is not reliable, and makes the code
more complicated.
From UMD's perspective there is also a necessity to set caching policy for
performance fine tuning. It's much easier for the UMD to directly use PAT
index because the behavior of each PAT index is clearly defined in Bspec.
Having the abstracted i915_cache_level sitting in between would only cause
more ambiguity. PAT is expected to work much like MOCS already works today,
and by design userspace is expected to select the index that exactly
matches the desired behavior described in the hardware specification.
For these reasons this patch replaces i915_cache_level with PAT index. Also
note, the cache_level is not completely removed yet, because the KMD still
has the need of creating buffer objects with simple cache settings such as
cached, uncached, or writethrough. For kernel objects, cache_level is used
for simplicity and backward compatibility. For Pre-gen12 platforms PAT can
have 1:1 mapping to i915_cache_level, so these two are interchangeable. see
the use of LEGACY_CACHELEVEL.
One consequence of this change is that gen8_pte_encode is no longer working
for gen12 platforms due to the fact that gen12 platforms has different PAT
definitions. In the meantime the mtl_pte_encode introduced specfically for
MTL becomes generic for all gen12 platforms. This patch renames the MTL
PTE encode function into gen12_pte_encode and apply it to all gen12. Even
though this change looks unrelated, but separating them would temporarily
break gen12 PTE encoding, thus squash them in one patch.
Special note: this patch changes the way caching behavior is controlled in
the sense that some objects are left to be managed by userspace. For such
objects we need to be careful not to change the userspace settings.There
are kerneldoc and comments added around obj->cache_coherent, cache_dirty,
and how to bypass the checkings by i915_gem_object_has_cache_level. For
full understanding, these changes need to be looked at together with the
two follow-up patches, one disables the {set|get}_caching ioctl's and the
other adds set_pat extension to the GEM_CREATE uAPI.
Bspec: 63019
Cc: Chris Wilson <chris.p.wilson@linux.intel.com>
Signed-off-by: Fei Yang <fei.yang@intel.com>
Reviewed-by: Andi Shyti <andi.shyti@linux.intel.com>
Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
Signed-off-by: Andi Shyti <andi.shyti@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20230509165200.1740-3-fei.yang@intel.com
2023-05-09 09:52:00 -07:00
|
|
|
obj->pat_index, 0, vm->total, 0);
|
2022-01-10 18:22:14 +01:00
|
|
|
if (!err) {
|
2022-01-10 18:22:15 +01:00
|
|
|
i915_vma_resource_init_from_vma(vma_res, vma);
|
2022-01-10 18:22:14 +01:00
|
|
|
vma->resource = vma_res;
|
|
|
|
} else {
|
|
|
|
kfree(vma_res);
|
|
|
|
}
|
|
|
|
mutex_unlock(&vm->mutex);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:54 +00:00
|
|
|
static int igt_gtt_insert(void *arg)
|
|
|
|
{
|
2019-01-21 22:20:49 +00:00
|
|
|
struct i915_ggtt *ggtt = arg;
|
2017-02-13 17:15:54 +00:00
|
|
|
struct drm_i915_gem_object *obj, *on;
|
|
|
|
struct drm_mm_node tmp = {};
|
|
|
|
const struct invalid_insert {
|
|
|
|
u64 size;
|
|
|
|
u64 alignment;
|
|
|
|
u64 start, end;
|
|
|
|
} invalid_insert[] = {
|
|
|
|
{
|
2019-01-21 22:20:49 +00:00
|
|
|
ggtt->vm.total + I915_GTT_PAGE_SIZE, 0,
|
|
|
|
0, ggtt->vm.total,
|
2017-02-13 17:15:54 +00:00
|
|
|
},
|
|
|
|
{
|
|
|
|
2*I915_GTT_PAGE_SIZE, 0,
|
|
|
|
0, I915_GTT_PAGE_SIZE,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
-(u64)I915_GTT_PAGE_SIZE, 0,
|
|
|
|
0, 4*I915_GTT_PAGE_SIZE,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
-(u64)2*I915_GTT_PAGE_SIZE, 2*I915_GTT_PAGE_SIZE,
|
|
|
|
0, 4*I915_GTT_PAGE_SIZE,
|
|
|
|
},
|
|
|
|
{
|
|
|
|
I915_GTT_PAGE_SIZE, I915_GTT_MIN_ALIGNMENT << 1,
|
|
|
|
I915_GTT_MIN_ALIGNMENT, I915_GTT_MIN_ALIGNMENT << 1,
|
|
|
|
},
|
|
|
|
{}
|
|
|
|
}, *ii;
|
|
|
|
LIST_HEAD(objects);
|
|
|
|
u64 total;
|
2017-11-14 22:33:46 +00:00
|
|
|
int err = -ENODEV;
|
2017-02-13 17:15:54 +00:00
|
|
|
|
2025-01-20 13:45:14 +05:30
|
|
|
/*
|
|
|
|
* i915_gem_gtt_insert() tries to allocate some free space in the GTT
|
2017-02-13 17:15:54 +00:00
|
|
|
* to the node, evicting if required.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Check a couple of obviously invalid requests */
|
|
|
|
for (ii = invalid_insert; ii->size; ii++) {
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 14:39:58 +01:00
|
|
|
mutex_lock(&ggtt->vm.mutex);
|
2022-01-14 14:23:17 +01:00
|
|
|
err = i915_gem_gtt_insert(&ggtt->vm, NULL, &tmp,
|
2017-02-13 17:15:54 +00:00
|
|
|
ii->size, ii->alignment,
|
|
|
|
I915_COLOR_UNEVICTABLE,
|
|
|
|
ii->start, ii->end,
|
|
|
|
0);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 14:39:58 +01:00
|
|
|
mutex_unlock(&ggtt->vm.mutex);
|
2017-02-13 17:15:54 +00:00
|
|
|
if (err != -ENOSPC) {
|
|
|
|
pr_err("Invalid i915_gem_gtt_insert(.size=%llx, .alignment=%llx, .start=%llx, .end=%llx) succeeded (err=%d)\n",
|
|
|
|
ii->size, ii->alignment, ii->start, ii->end,
|
|
|
|
err);
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Start by filling the GGTT */
|
|
|
|
for (total = 0;
|
2019-01-21 22:20:49 +00:00
|
|
|
total + I915_GTT_PAGE_SIZE <= ggtt->vm.total;
|
2017-02-13 17:15:54 +00:00
|
|
|
total += I915_GTT_PAGE_SIZE) {
|
|
|
|
struct i915_vma *vma;
|
|
|
|
|
2019-01-21 22:20:49 +00:00
|
|
|
obj = i915_gem_object_create_internal(ggtt->vm.i915,
|
|
|
|
I915_GTT_PAGE_SIZE);
|
2017-02-13 17:15:54 +00:00
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
err = PTR_ERR(obj);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2021-03-23 16:50:49 +01:00
|
|
|
err = i915_gem_object_pin_pages_unlocked(obj);
|
2017-02-13 17:15:54 +00:00
|
|
|
if (err) {
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
list_add(&obj->st_link, &objects);
|
|
|
|
|
2019-01-21 22:20:49 +00:00
|
|
|
vma = i915_vma_instance(obj, &ggtt->vm, NULL);
|
2017-02-13 17:15:54 +00:00
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2022-01-10 18:22:14 +01:00
|
|
|
err = insert_gtt_with_resource(vma);
|
2017-02-13 17:15:54 +00:00
|
|
|
if (err == -ENOSPC) {
|
|
|
|
/* maxed out the GGTT space */
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (err) {
|
|
|
|
pr_err("i915_gem_gtt_insert (pass 1) failed at %llu/%llu with err=%d\n",
|
2019-01-21 22:20:49 +00:00
|
|
|
total, ggtt->vm.total, err);
|
2017-02-13 17:15:54 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
track_vma_bind(vma);
|
|
|
|
__i915_vma_pin(vma);
|
|
|
|
|
|
|
|
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
|
|
|
|
}
|
|
|
|
|
|
|
|
list_for_each_entry(obj, &objects, st_link) {
|
|
|
|
struct i915_vma *vma;
|
|
|
|
|
2019-01-21 22:20:49 +00:00
|
|
|
vma = i915_vma_instance(obj, &ggtt->vm, NULL);
|
2017-02-13 17:15:54 +00:00
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!drm_mm_node_allocated(&vma->node)) {
|
|
|
|
pr_err("VMA was unexpectedly evicted!\n");
|
|
|
|
err = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
__i915_vma_unpin(vma);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If we then reinsert, we should find the same hole */
|
|
|
|
list_for_each_entry_safe(obj, on, &objects, st_link) {
|
|
|
|
struct i915_vma *vma;
|
|
|
|
u64 offset;
|
|
|
|
|
2019-01-21 22:20:49 +00:00
|
|
|
vma = i915_vma_instance(obj, &ggtt->vm, NULL);
|
2017-02-13 17:15:54 +00:00
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
|
|
|
|
offset = vma->node.start;
|
|
|
|
|
2022-01-14 14:23:18 +01:00
|
|
|
err = i915_vma_unbind_unlocked(vma);
|
2017-02-13 17:15:54 +00:00
|
|
|
if (err) {
|
|
|
|
pr_err("i915_vma_unbind failed with err=%d!\n", err);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2022-01-10 18:22:14 +01:00
|
|
|
err = insert_gtt_with_resource(vma);
|
2017-02-13 17:15:54 +00:00
|
|
|
if (err) {
|
|
|
|
pr_err("i915_gem_gtt_insert (pass 2) failed at %llu/%llu with err=%d\n",
|
2019-01-21 22:20:49 +00:00
|
|
|
total, ggtt->vm.total, err);
|
2017-02-13 17:15:54 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
track_vma_bind(vma);
|
|
|
|
|
|
|
|
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
|
|
|
|
if (vma->node.start != offset) {
|
|
|
|
pr_err("i915_gem_gtt_insert did not return node to its previous location (the only hole), expected address %llx, found %llx\n",
|
|
|
|
offset, vma->node.start);
|
|
|
|
err = -EINVAL;
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* And then force evictions */
|
|
|
|
for (total = 0;
|
2019-01-21 22:20:49 +00:00
|
|
|
total + 2 * I915_GTT_PAGE_SIZE <= ggtt->vm.total;
|
|
|
|
total += 2 * I915_GTT_PAGE_SIZE) {
|
2017-02-13 17:15:54 +00:00
|
|
|
struct i915_vma *vma;
|
|
|
|
|
2019-01-21 22:20:49 +00:00
|
|
|
obj = i915_gem_object_create_internal(ggtt->vm.i915,
|
|
|
|
2 * I915_GTT_PAGE_SIZE);
|
2017-02-13 17:15:54 +00:00
|
|
|
if (IS_ERR(obj)) {
|
|
|
|
err = PTR_ERR(obj);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2021-03-23 16:50:49 +01:00
|
|
|
err = i915_gem_object_pin_pages_unlocked(obj);
|
2017-02-13 17:15:54 +00:00
|
|
|
if (err) {
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
list_add(&obj->st_link, &objects);
|
|
|
|
|
2019-01-21 22:20:49 +00:00
|
|
|
vma = i915_vma_instance(obj, &ggtt->vm, NULL);
|
2017-02-13 17:15:54 +00:00
|
|
|
if (IS_ERR(vma)) {
|
|
|
|
err = PTR_ERR(vma);
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2022-01-10 18:22:14 +01:00
|
|
|
err = insert_gtt_with_resource(vma);
|
2017-02-13 17:15:54 +00:00
|
|
|
if (err) {
|
|
|
|
pr_err("i915_gem_gtt_insert (pass 3) failed at %llu/%llu with err=%d\n",
|
2019-01-21 22:20:49 +00:00
|
|
|
total, ggtt->vm.total, err);
|
2017-02-13 17:15:54 +00:00
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
track_vma_bind(vma);
|
|
|
|
|
|
|
|
GEM_BUG_ON(!drm_mm_node_allocated(&vma->node));
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
list_for_each_entry_safe(obj, on, &objects, st_link) {
|
|
|
|
i915_gem_object_unpin_pages(obj);
|
|
|
|
i915_gem_object_put(obj);
|
|
|
|
}
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:53 +00:00
|
|
|
int i915_gem_gtt_mock_selftests(void)
|
|
|
|
{
|
|
|
|
static const struct i915_subtest tests[] = {
|
2017-02-13 17:15:55 +00:00
|
|
|
SUBTEST(igt_mock_drunk),
|
|
|
|
SUBTEST(igt_mock_walk),
|
2017-02-13 17:15:57 +00:00
|
|
|
SUBTEST(igt_mock_pot),
|
2017-02-13 17:15:55 +00:00
|
|
|
SUBTEST(igt_mock_fill),
|
2017-02-13 17:15:53 +00:00
|
|
|
SUBTEST(igt_gtt_reserve),
|
2017-02-13 17:15:54 +00:00
|
|
|
SUBTEST(igt_gtt_insert),
|
2017-02-13 17:15:53 +00:00
|
|
|
};
|
|
|
|
struct drm_i915_private *i915;
|
2021-12-19 23:25:00 +02:00
|
|
|
struct intel_gt *gt;
|
2017-02-13 17:15:53 +00:00
|
|
|
int err;
|
|
|
|
|
|
|
|
i915 = mock_gem_device();
|
|
|
|
if (!i915)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2021-12-19 23:25:00 +02:00
|
|
|
/* allocate the ggtt */
|
|
|
|
err = intel_gt_assign_ggtt(to_gt(i915));
|
|
|
|
if (err)
|
2019-02-17 20:25:18 +00:00
|
|
|
goto out_put;
|
2019-01-21 22:20:49 +00:00
|
|
|
|
2021-12-19 23:25:00 +02:00
|
|
|
gt = to_gt(i915);
|
|
|
|
|
|
|
|
mock_init_ggtt(gt);
|
|
|
|
|
|
|
|
err = i915_subtests(tests, gt->ggtt);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 14:39:58 +01:00
|
|
|
|
2019-01-21 22:20:49 +00:00
|
|
|
mock_device_flush(i915);
|
|
|
|
i915_gem_drain_freed_objects(i915);
|
2021-12-19 23:25:00 +02:00
|
|
|
mock_fini_ggtt(gt->ggtt);
|
|
|
|
|
2019-02-17 20:25:18 +00:00
|
|
|
out_put:
|
2020-09-18 15:25:02 +02:00
|
|
|
mock_destroy_device(i915);
|
2017-02-13 17:15:53 +00:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2017-02-13 17:15:38 +00:00
|
|
|
int i915_gem_gtt_live_selftests(struct drm_i915_private *i915)
|
|
|
|
{
|
|
|
|
static const struct i915_subtest tests[] = {
|
|
|
|
SUBTEST(igt_ppgtt_alloc),
|
2017-02-13 17:15:43 +00:00
|
|
|
SUBTEST(igt_ppgtt_lowlevel),
|
2017-02-13 17:15:42 +00:00
|
|
|
SUBTEST(igt_ppgtt_drunk),
|
2017-02-13 17:15:41 +00:00
|
|
|
SUBTEST(igt_ppgtt_walk),
|
2017-02-13 17:15:57 +00:00
|
|
|
SUBTEST(igt_ppgtt_pot),
|
2017-02-13 17:15:39 +00:00
|
|
|
SUBTEST(igt_ppgtt_fill),
|
2017-02-13 17:15:44 +00:00
|
|
|
SUBTEST(igt_ppgtt_shrink),
|
2018-01-31 21:44:40 +00:00
|
|
|
SUBTEST(igt_ppgtt_shrink_boom),
|
2022-02-19 00:17:45 +05:30
|
|
|
SUBTEST(igt_ppgtt_misaligned_pin),
|
2017-02-13 17:15:43 +00:00
|
|
|
SUBTEST(igt_ggtt_lowlevel),
|
2017-02-13 17:15:42 +00:00
|
|
|
SUBTEST(igt_ggtt_drunk),
|
2017-02-13 17:15:41 +00:00
|
|
|
SUBTEST(igt_ggtt_walk),
|
2017-02-13 17:15:57 +00:00
|
|
|
SUBTEST(igt_ggtt_pot),
|
2017-02-13 17:15:40 +00:00
|
|
|
SUBTEST(igt_ggtt_fill),
|
2017-02-13 17:15:56 +00:00
|
|
|
SUBTEST(igt_ggtt_page),
|
2022-02-19 00:17:45 +05:30
|
|
|
SUBTEST(igt_ggtt_misaligned_pin),
|
2017-02-13 17:15:38 +00:00
|
|
|
};
|
|
|
|
|
2021-12-19 23:24:59 +02:00
|
|
|
GEM_BUG_ON(offset_in_page(to_gt(i915)->ggtt->vm.total));
|
2017-02-13 17:15:40 +00:00
|
|
|
|
2022-08-16 19:05:10 -07:00
|
|
|
return i915_live_subtests(tests, i915);
|
2017-02-13 17:15:38 +00:00
|
|
|
}
|