2017-08-11 12:11:16 +01:00
|
|
|
/*
|
|
|
|
* Copyright © 2017 Intel Corporation
|
|
|
|
*
|
|
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
|
|
* to deal in the Software without restriction, including without limitation
|
|
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
|
|
*
|
|
|
|
* The above copyright notice and this permission notice (including the next
|
|
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
|
|
* Software.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
|
|
|
* IN THE SOFTWARE.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
drm/i915: Switch obj->mm.lock lockdep annotations on its head
The trouble with having a plain nesting flag for locks which do not
naturally nest (unlike block devices and their partitions, which is
the original motivation for nesting levels) is that lockdep will
never spot a true deadlock if you screw up.
This patch is an attempt at trying better, by highlighting a bit more
of the actual nature of the nesting that's going on. Essentially we
have two kinds of objects:
- objects without pages allocated, which cannot be on any lru and are
hence inaccessible to the shrinker.
- objects which have pages allocated, which are on an lru, and which
the shrinker can decide to throw out.
For the former type of object, memory allocations while holding
obj->mm.lock are permissible. For the latter they are not. And
get/put_pages transitions between the two types of objects.
This is still not entirely fool-proof since the rules might change.
But as long as we run such a code ever at runtime lockdep should be
able to observe the inconsistency and complain (like with any other
lockdep class that we've split up in multiple classes). But there are
a few clear benefits:
- We can drop the nesting flag parameter from
__i915_gem_object_put_pages, because that function by definition is
never going allocate memory, and calling it on an object which
doesn't have its pages allocated would be a bug.
- We strictly catch more bugs, since there's not only one place in the
entire tree which is annotated with the special class. All the
other places that had explicit lockdep nesting annotations we're now
going to leave up to lockdep again.
- Specifically this catches stuff like calling get_pages from
put_pages (which isn't really a good idea, if we can call get_pages
so could the shrinker). I've seen patches do exactly that.
Of course I fully expect CI will show me for the fool I am with this
one here :-)
v2: There can only be one (lockdep only has a cache for the first
subclass, not for deeper ones, and we don't want to make these locks
even slower). Still separate enums for better documentation.
Real fix: don't forget about phys objs and pin_map(), and fix the
shrinker to have the right annotations ... silly me.
v3: Forgot usertptr too ...
v4: Improve comment for pages_pin_count, drop the IMPORTANT comment
and instead prime lockdep (Chris).
v5: Appease checkpatch, no double empty lines (Chris)
v6: More rebasing over selftest changes. Also somehow I forgot to
push this patch :-/
Also format comments consistently while at it.
v7: Fix typo in commit message (Joonas)
Also drop the priming, with the lmem merge we now have allocations
while holding the lmem lock, which wreaks the generic priming I've
done in earlier patches. Should probably be resurrected when lmem is
fixed. See
commit 232a6ebae419193f5b8da4fa869ae5089ab105c2
Author: Matthew Auld <matthew.auld@intel.com>
Date: Tue Oct 8 17:01:14 2019 +0100
drm/i915: introduce intel_memory_region
I'm keeping the priming patch locally so it wont get lost.
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: "Tang, CQ" <cq.tang@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v5)
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> (v6)
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191105090148.30269-1-daniel.vetter@ffwll.ch
[mlankhorst: Fix commit typos pointed out by Michael Ruhl]
2019-11-05 10:01:48 +01:00
|
|
|
#include <linux/sched/mm.h>
|
|
|
|
|
2019-06-13 11:44:16 +03:00
|
|
|
#include "display/intel_frontbuffer.h"
|
2019-06-21 08:08:01 +01:00
|
|
|
#include "gt/intel_gt.h"
|
2017-08-11 12:11:16 +01:00
|
|
|
#include "i915_drv.h"
|
2019-05-28 10:29:47 +01:00
|
|
|
#include "i915_gem_clflush.h"
|
2019-05-28 10:29:49 +01:00
|
|
|
#include "i915_gem_context.h"
|
2019-12-04 12:00:32 +00:00
|
|
|
#include "i915_gem_mman.h"
|
2019-05-28 10:29:49 +01:00
|
|
|
#include "i915_gem_object.h"
|
2019-03-05 21:38:30 +00:00
|
|
|
#include "i915_globals.h"
|
2019-08-06 13:07:28 +03:00
|
|
|
#include "i915_trace.h"
|
2017-08-11 12:11:16 +01:00
|
|
|
|
2019-02-28 10:20:34 +00:00
|
|
|
static struct i915_global_object {
|
2019-03-05 21:38:30 +00:00
|
|
|
struct i915_global base;
|
2019-02-28 10:20:34 +00:00
|
|
|
struct kmem_cache *slab_objects;
|
|
|
|
} global;
|
|
|
|
|
|
|
|
struct drm_i915_gem_object *i915_gem_object_alloc(void)
|
|
|
|
{
|
|
|
|
return kmem_cache_zalloc(global.slab_objects, GFP_KERNEL);
|
|
|
|
}
|
|
|
|
|
|
|
|
void i915_gem_object_free(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
|
|
|
return kmem_cache_free(global.slab_objects, obj);
|
|
|
|
}
|
|
|
|
|
2019-05-28 10:29:45 +01:00
|
|
|
void i915_gem_object_init(struct drm_i915_gem_object *obj,
|
2019-10-22 15:45:01 +01:00
|
|
|
const struct drm_i915_gem_object_ops *ops,
|
|
|
|
struct lock_class_key *key)
|
2019-05-28 10:29:45 +01:00
|
|
|
{
|
2020-05-29 19:32:04 +01:00
|
|
|
__mutex_init(&obj->mm.lock, ops->name ?: "obj->mm.lock", key);
|
2019-05-28 10:29:45 +01:00
|
|
|
|
|
|
|
spin_lock_init(&obj->vma.lock);
|
|
|
|
INIT_LIST_HEAD(&obj->vma.list);
|
|
|
|
|
2019-08-02 22:21:36 +01:00
|
|
|
INIT_LIST_HEAD(&obj->mm.link);
|
|
|
|
|
2019-05-28 10:29:45 +01:00
|
|
|
INIT_LIST_HEAD(&obj->lut_list);
|
2020-07-01 09:44:39 +01:00
|
|
|
spin_lock_init(&obj->lut_lock);
|
2019-05-28 10:29:45 +01:00
|
|
|
|
2019-12-04 12:00:32 +00:00
|
|
|
spin_lock_init(&obj->mmo.lock);
|
2020-01-20 10:49:22 +00:00
|
|
|
obj->mmo.offsets = RB_ROOT;
|
2019-12-04 12:00:32 +00:00
|
|
|
|
2019-05-28 10:29:45 +01:00
|
|
|
init_rcu_head(&obj->rcu);
|
|
|
|
|
|
|
|
obj->ops = ops;
|
|
|
|
|
|
|
|
obj->mm.madv = I915_MADV_WILLNEED;
|
|
|
|
INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
|
|
|
|
mutex_init(&obj->mm.get_page.lock);
|
2020-05-29 19:32:03 +01:00
|
|
|
|
|
|
|
if (IS_ENABLED(CONFIG_LOCKDEP) && i915_gem_object_is_shrinkable(obj))
|
|
|
|
i915_gem_shrinker_taints_mutex(to_i915(obj->base.dev),
|
|
|
|
&obj->mm.lock);
|
2019-05-28 10:29:45 +01:00
|
|
|
}
|
|
|
|
|
2017-08-11 12:11:16 +01:00
|
|
|
/**
|
|
|
|
* Mark up the object's coherency levels for a given cache_level
|
|
|
|
* @obj: #drm_i915_gem_object
|
|
|
|
* @cache_level: cache level
|
|
|
|
*/
|
|
|
|
void i915_gem_object_set_cache_coherency(struct drm_i915_gem_object *obj,
|
|
|
|
unsigned int cache_level)
|
|
|
|
{
|
|
|
|
obj->cache_level = cache_level;
|
|
|
|
|
|
|
|
if (cache_level != I915_CACHE_NONE)
|
|
|
|
obj->cache_coherent = (I915_BO_CACHE_COHERENT_FOR_READ |
|
|
|
|
I915_BO_CACHE_COHERENT_FOR_WRITE);
|
|
|
|
else if (HAS_LLC(to_i915(obj->base.dev)))
|
|
|
|
obj->cache_coherent = I915_BO_CACHE_COHERENT_FOR_READ;
|
|
|
|
else
|
|
|
|
obj->cache_coherent = 0;
|
|
|
|
|
|
|
|
obj->cache_dirty =
|
|
|
|
!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE);
|
|
|
|
}
|
2019-02-28 10:20:34 +00:00
|
|
|
|
2019-05-28 10:29:45 +01:00
|
|
|
void i915_gem_close_object(struct drm_gem_object *gem, struct drm_file *file)
|
|
|
|
{
|
|
|
|
struct drm_i915_gem_object *obj = to_intel_bo(gem);
|
|
|
|
struct drm_i915_file_private *fpriv = file->driver_priv;
|
2020-07-01 09:44:39 +01:00
|
|
|
struct i915_lut_handle bookmark = {};
|
2020-01-20 10:49:22 +00:00
|
|
|
struct i915_mmap_offset *mmo, *mn;
|
2019-05-28 10:29:45 +01:00
|
|
|
struct i915_lut_handle *lut, *ln;
|
2019-06-06 12:23:20 +01:00
|
|
|
LIST_HEAD(close);
|
2019-05-28 10:29:45 +01:00
|
|
|
|
2020-07-01 09:44:39 +01:00
|
|
|
spin_lock(&obj->lut_lock);
|
2019-05-28 10:29:45 +01:00
|
|
|
list_for_each_entry_safe(lut, ln, &obj->lut_list, obj_link) {
|
|
|
|
struct i915_gem_context *ctx = lut->ctx;
|
|
|
|
|
2020-07-01 09:44:39 +01:00
|
|
|
if (ctx && ctx->file_priv == fpriv) {
|
|
|
|
i915_gem_context_get(ctx);
|
|
|
|
list_move(&lut->obj_link, &close);
|
|
|
|
}
|
2019-05-28 10:29:45 +01:00
|
|
|
|
2020-07-01 09:44:39 +01:00
|
|
|
/* Break long locks, and carefully continue on from this spot */
|
|
|
|
if (&ln->obj_link != &obj->lut_list) {
|
|
|
|
list_add_tail(&bookmark.obj_link, &ln->obj_link);
|
|
|
|
if (cond_resched_lock(&obj->lut_lock))
|
|
|
|
list_safe_reset_next(&bookmark, ln, obj_link);
|
|
|
|
__list_del_entry(&bookmark.obj_link);
|
|
|
|
}
|
2019-06-06 12:23:20 +01:00
|
|
|
}
|
2020-07-01 09:44:39 +01:00
|
|
|
spin_unlock(&obj->lut_lock);
|
2019-06-06 12:23:20 +01:00
|
|
|
|
2019-12-04 12:00:32 +00:00
|
|
|
spin_lock(&obj->mmo.lock);
|
2020-01-20 10:49:22 +00:00
|
|
|
rbtree_postorder_for_each_entry_safe(mmo, mn, &obj->mmo.offsets, offset)
|
2019-12-04 12:00:32 +00:00
|
|
|
drm_vma_node_revoke(&mmo->vma_node, file);
|
|
|
|
spin_unlock(&obj->mmo.lock);
|
|
|
|
|
2019-06-06 12:23:20 +01:00
|
|
|
list_for_each_entry_safe(lut, ln, &close, obj_link) {
|
|
|
|
struct i915_gem_context *ctx = lut->ctx;
|
|
|
|
struct i915_vma *vma;
|
2019-05-28 10:29:45 +01:00
|
|
|
|
2019-06-06 12:23:20 +01:00
|
|
|
/*
|
|
|
|
* We allow the process to have multiple handles to the same
|
2019-05-28 10:29:45 +01:00
|
|
|
* vma, in the same fd namespace, by virtue of flink/open.
|
|
|
|
*/
|
|
|
|
|
2020-07-03 01:43:06 +01:00
|
|
|
mutex_lock(&ctx->lut_mutex);
|
2019-06-06 12:23:20 +01:00
|
|
|
vma = radix_tree_delete(&ctx->handles_vma, lut->handle);
|
|
|
|
if (vma) {
|
|
|
|
GEM_BUG_ON(vma->obj != obj);
|
|
|
|
GEM_BUG_ON(!atomic_read(&vma->open_count));
|
2020-04-22 20:05:58 +01:00
|
|
|
i915_vma_close(vma);
|
2019-06-06 12:23:20 +01:00
|
|
|
}
|
2020-07-03 01:43:06 +01:00
|
|
|
mutex_unlock(&ctx->lut_mutex);
|
2019-06-06 12:23:20 +01:00
|
|
|
|
|
|
|
i915_gem_context_put(lut->ctx);
|
2019-05-28 10:29:45 +01:00
|
|
|
i915_lut_handle_free(lut);
|
2019-05-28 10:29:56 +01:00
|
|
|
i915_gem_object_put(obj);
|
2019-05-28 10:29:45 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-07-03 10:17:17 +01:00
|
|
|
static void __i915_gem_free_object_rcu(struct rcu_head *head)
|
|
|
|
{
|
|
|
|
struct drm_i915_gem_object *obj =
|
|
|
|
container_of(head, typeof(*obj), rcu);
|
|
|
|
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
|
|
|
|
drm-misc-next for 5.4:
UAPI Changes:
Cross-subsystem Changes:
Core Changes:
- dma-buf: add reservation_object_fences helper, relax
reservation_object_add_shared_fence, remove
reservation_object seq number (and then
restored)
- dma-fence: Shrinkage of the dma_fence structure,
Merge dma_fence_signal and dma_fence_signal_locked,
Store the timestamp in struct dma_fence in a union with
cb_list
Driver Changes:
- More dt-bindings YAML conversions
- More removal of drmP.h includes
- dw-hdmi: Support get_eld and various i2s improvements
- gm12u320: Few fixes
- meson: Global cleanup
- panfrost: Few refactors, Support for GPU heap allocations
- sun4i: Support for DDC enable GPIO
- New panels: TI nspire, NEC NL8048HL11, LG Philips LB035Q02,
Sharp LS037V7DW01, Sony ACX565AKM, Toppoly TD028TTEC1
Toppoly TD043MTEA1
-----BEGIN PGP SIGNATURE-----
iHUEABYIAB0WIQRcEzekXsqa64kGDp7j7w1vZxhRxQUCXVqvpwAKCRDj7w1vZxhR
xa3RAQDzAnt5zeesAxX4XhRJzHoCEwj2PJj9Re6xMJ9PlcfcvwD+OS+bcB6jfiXV
Ug9IBd/DqjlmD9G9MxFxfSV946rksAw=
=8uv4
-----END PGP SIGNATURE-----
Merge tag 'drm-misc-next-2019-08-19' of git://anongit.freedesktop.org/drm/drm-misc into drm-next
drm-misc-next for 5.4:
UAPI Changes:
Cross-subsystem Changes:
Core Changes:
- dma-buf: add reservation_object_fences helper, relax
reservation_object_add_shared_fence, remove
reservation_object seq number (and then
restored)
- dma-fence: Shrinkage of the dma_fence structure,
Merge dma_fence_signal and dma_fence_signal_locked,
Store the timestamp in struct dma_fence in a union with
cb_list
Driver Changes:
- More dt-bindings YAML conversions
- More removal of drmP.h includes
- dw-hdmi: Support get_eld and various i2s improvements
- gm12u320: Few fixes
- meson: Global cleanup
- panfrost: Few refactors, Support for GPU heap allocations
- sun4i: Support for DDC enable GPIO
- New panels: TI nspire, NEC NL8048HL11, LG Philips LB035Q02,
Sharp LS037V7DW01, Sony ACX565AKM, Toppoly TD028TTEC1
Toppoly TD043MTEA1
Signed-off-by: Dave Airlie <airlied@redhat.com>
[airlied: fixup dma_resv rename fallout]
From: Maxime Ripard <maxime.ripard@bootlin.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20190819141923.7l2adietcr2pioct@flea
2019-08-21 15:38:43 +10:00
|
|
|
dma_resv_fini(&obj->base._resv);
|
2019-07-03 10:17:17 +01:00
|
|
|
i915_gem_object_free(obj);
|
|
|
|
|
|
|
|
GEM_BUG_ON(!atomic_read(&i915->mm.free_count));
|
|
|
|
atomic_dec(&i915->mm.free_count);
|
|
|
|
}
|
|
|
|
|
2020-07-02 17:36:23 +01:00
|
|
|
static void __i915_gem_object_free_mmaps(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
|
|
|
/* Skip serialisation and waking the device if known to be not used. */
|
|
|
|
|
|
|
|
if (obj->userfault_count)
|
|
|
|
i915_gem_object_release_mmap_gtt(obj);
|
|
|
|
|
|
|
|
if (!RB_EMPTY_ROOT(&obj->mmo.offsets)) {
|
|
|
|
struct i915_mmap_offset *mmo, *mn;
|
|
|
|
|
|
|
|
i915_gem_object_release_mmap_offset(obj);
|
|
|
|
|
|
|
|
rbtree_postorder_for_each_entry_safe(mmo, mn,
|
|
|
|
&obj->mmo.offsets,
|
|
|
|
offset) {
|
|
|
|
drm_vma_offset_remove(obj->base.dev->vma_offset_manager,
|
|
|
|
&mmo->vma_node);
|
|
|
|
kfree(mmo);
|
|
|
|
}
|
|
|
|
obj->mmo.offsets = RB_ROOT;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-28 10:29:45 +01:00
|
|
|
static void __i915_gem_free_objects(struct drm_i915_private *i915,
|
|
|
|
struct llist_node *freed)
|
|
|
|
{
|
|
|
|
struct drm_i915_gem_object *obj, *on;
|
|
|
|
|
|
|
|
llist_for_each_entry_safe(obj, on, freed, freed) {
|
|
|
|
trace_i915_gem_object_destroy(obj);
|
|
|
|
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 14:39:58 +01:00
|
|
|
if (!list_empty(&obj->vma.list)) {
|
|
|
|
struct i915_vma *vma;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Note that the vma keeps an object reference while
|
|
|
|
* it is active, so it *should* not sleep while we
|
|
|
|
* destroy it. Our debug code errs insits it *might*.
|
|
|
|
* For the moment, play along.
|
|
|
|
*/
|
|
|
|
spin_lock(&obj->vma.lock);
|
|
|
|
while ((vma = list_first_entry_or_null(&obj->vma.list,
|
|
|
|
struct i915_vma,
|
|
|
|
obj_link))) {
|
|
|
|
GEM_BUG_ON(vma->obj != obj);
|
|
|
|
spin_unlock(&obj->vma.lock);
|
|
|
|
|
2019-12-22 21:02:55 +00:00
|
|
|
__i915_vma_put(vma);
|
drm/i915: Pull i915_vma_pin under the vm->mutex
Replace the struct_mutex requirement for pinning the i915_vma with the
local vm->mutex instead. Note that the vm->mutex is tainted by the
shrinker (we require unbinding from inside fs-reclaim) and so we cannot
allocate while holding that mutex. Instead we have to preallocate
workers to do allocate and apply the PTE updates after we have we
reserved their slot in the drm_mm (using fences to order the PTE writes
with the GPU work and with later unbind).
In adding the asynchronous vma binding, one subtle requirement is to
avoid coupling the binding fence into the backing object->resv. That is
the asynchronous binding only applies to the vma timeline itself and not
to the pages as that is a more global timeline (the binding of one vma
does not need to be ordered with another vma, nor does the implicit GEM
fencing depend on a vma, only on writes to the backing store). Keeping
the vma binding distinct from the backing store timelines is verified by
a number of async gem_exec_fence and gem_exec_schedule tests. The way we
do this is quite simple, we keep the fence for the vma binding separate
and only wait on it as required, and never add it to the obj->resv
itself.
Another consequence in reducing the locking around the vma is the
destruction of the vma is no longer globally serialised by struct_mutex.
A natural solution would be to add a kref to i915_vma, but that requires
decoupling the reference cycles, possibly by introducing a new
i915_mm_pages object that is own by both obj->mm and vma->pages.
However, we have not taken that route due to the overshadowing lmem/ttm
discussions, and instead play a series of complicated games with
trylocks to (hopefully) ensure that only one destruction path is called!
v2: Add some commentary, and some helpers to reduce patch churn.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191004134015.13204-4-chris@chris-wilson.co.uk
2019-10-04 14:39:58 +01:00
|
|
|
|
|
|
|
spin_lock(&obj->vma.lock);
|
|
|
|
}
|
|
|
|
spin_unlock(&obj->vma.lock);
|
2019-05-28 10:29:45 +01:00
|
|
|
}
|
|
|
|
|
2020-07-02 17:36:23 +01:00
|
|
|
__i915_gem_object_free_mmaps(obj);
|
2019-12-04 12:00:32 +00:00
|
|
|
|
2019-05-28 10:29:45 +01:00
|
|
|
GEM_BUG_ON(!list_empty(&obj->lut_list));
|
|
|
|
|
2019-06-18 08:41:29 +01:00
|
|
|
atomic_set(&obj->mm.pages_pin_count, 0);
|
drm/i915: Switch obj->mm.lock lockdep annotations on its head
The trouble with having a plain nesting flag for locks which do not
naturally nest (unlike block devices and their partitions, which is
the original motivation for nesting levels) is that lockdep will
never spot a true deadlock if you screw up.
This patch is an attempt at trying better, by highlighting a bit more
of the actual nature of the nesting that's going on. Essentially we
have two kinds of objects:
- objects without pages allocated, which cannot be on any lru and are
hence inaccessible to the shrinker.
- objects which have pages allocated, which are on an lru, and which
the shrinker can decide to throw out.
For the former type of object, memory allocations while holding
obj->mm.lock are permissible. For the latter they are not. And
get/put_pages transitions between the two types of objects.
This is still not entirely fool-proof since the rules might change.
But as long as we run such a code ever at runtime lockdep should be
able to observe the inconsistency and complain (like with any other
lockdep class that we've split up in multiple classes). But there are
a few clear benefits:
- We can drop the nesting flag parameter from
__i915_gem_object_put_pages, because that function by definition is
never going allocate memory, and calling it on an object which
doesn't have its pages allocated would be a bug.
- We strictly catch more bugs, since there's not only one place in the
entire tree which is annotated with the special class. All the
other places that had explicit lockdep nesting annotations we're now
going to leave up to lockdep again.
- Specifically this catches stuff like calling get_pages from
put_pages (which isn't really a good idea, if we can call get_pages
so could the shrinker). I've seen patches do exactly that.
Of course I fully expect CI will show me for the fool I am with this
one here :-)
v2: There can only be one (lockdep only has a cache for the first
subclass, not for deeper ones, and we don't want to make these locks
even slower). Still separate enums for better documentation.
Real fix: don't forget about phys objs and pin_map(), and fix the
shrinker to have the right annotations ... silly me.
v3: Forgot usertptr too ...
v4: Improve comment for pages_pin_count, drop the IMPORTANT comment
and instead prime lockdep (Chris).
v5: Appease checkpatch, no double empty lines (Chris)
v6: More rebasing over selftest changes. Also somehow I forgot to
push this patch :-/
Also format comments consistently while at it.
v7: Fix typo in commit message (Joonas)
Also drop the priming, with the lmem merge we now have allocations
while holding the lmem lock, which wreaks the generic priming I've
done in earlier patches. Should probably be resurrected when lmem is
fixed. See
commit 232a6ebae419193f5b8da4fa869ae5089ab105c2
Author: Matthew Auld <matthew.auld@intel.com>
Date: Tue Oct 8 17:01:14 2019 +0100
drm/i915: introduce intel_memory_region
I'm keeping the priming patch locally so it wont get lost.
Cc: Matthew Auld <matthew.auld@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: "Tang, CQ" <cq.tang@intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin@intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Reviewed-by: Chris Wilson <chris@chris-wilson.co.uk> (v5)
Reviewed-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> (v6)
Signed-off-by: Daniel Vetter <daniel.vetter@intel.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20191105090148.30269-1-daniel.vetter@ffwll.ch
[mlankhorst: Fix commit typos pointed out by Michael Ruhl]
2019-11-05 10:01:48 +01:00
|
|
|
__i915_gem_object_put_pages(obj);
|
2019-05-28 10:29:45 +01:00
|
|
|
GEM_BUG_ON(i915_gem_object_has_pages(obj));
|
2019-07-03 10:17:17 +01:00
|
|
|
bitmap_free(obj->bit_17);
|
2019-05-28 10:29:45 +01:00
|
|
|
|
|
|
|
if (obj->base.import_attach)
|
|
|
|
drm_prime_gem_destroy(&obj->base, NULL);
|
|
|
|
|
2019-07-03 19:06:01 +01:00
|
|
|
drm_gem_free_mmap_offset(&obj->base);
|
|
|
|
|
|
|
|
if (obj->ops->release)
|
|
|
|
obj->ops->release(obj);
|
2019-05-28 10:29:45 +01:00
|
|
|
|
2019-07-03 10:17:17 +01:00
|
|
|
/* But keep the pointer alive for RCU-protected lookups */
|
|
|
|
call_rcu(&obj->rcu, __i915_gem_free_object_rcu);
|
2020-02-21 10:09:53 +00:00
|
|
|
cond_resched();
|
2019-05-28 10:29:45 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void i915_gem_flush_free_objects(struct drm_i915_private *i915)
|
|
|
|
{
|
2019-08-02 22:21:37 +01:00
|
|
|
struct llist_node *freed = llist_del_all(&i915->mm.free_list);
|
|
|
|
|
|
|
|
if (unlikely(freed))
|
2019-05-28 10:29:45 +01:00
|
|
|
__i915_gem_free_objects(i915, freed);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void __i915_gem_free_work(struct work_struct *work)
|
|
|
|
{
|
|
|
|
struct drm_i915_private *i915 =
|
|
|
|
container_of(work, struct drm_i915_private, mm.free_work);
|
|
|
|
|
2019-08-02 22:21:37 +01:00
|
|
|
i915_gem_flush_free_objects(i915);
|
2019-05-28 10:29:45 +01:00
|
|
|
}
|
|
|
|
|
2019-07-03 10:17:17 +01:00
|
|
|
void i915_gem_free_object(struct drm_gem_object *gem_obj)
|
2019-05-28 10:29:45 +01:00
|
|
|
{
|
2019-07-03 10:17:17 +01:00
|
|
|
struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
|
2019-05-28 10:29:45 +01:00
|
|
|
struct drm_i915_private *i915 = to_i915(obj->base.dev);
|
|
|
|
|
2019-08-16 08:46:35 +01:00
|
|
|
GEM_BUG_ON(i915_gem_object_is_framebuffer(obj));
|
|
|
|
|
2019-05-28 10:29:45 +01:00
|
|
|
/*
|
2019-07-03 10:17:17 +01:00
|
|
|
* Before we free the object, make sure any pure RCU-only
|
|
|
|
* read-side critical sections are complete, e.g.
|
|
|
|
* i915_gem_busy_ioctl(). For the corresponding synchronized
|
|
|
|
* lookup see i915_gem_object_lookup_rcu().
|
2019-05-28 10:29:45 +01:00
|
|
|
*/
|
2019-07-03 10:17:17 +01:00
|
|
|
atomic_inc(&i915->mm.free_count);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This serializes freeing with the shrinker. Since the free
|
|
|
|
* is delayed, first by RCU then by the workqueue, we want the
|
|
|
|
* shrinker to be able to free pages of unreferenced objects,
|
|
|
|
* or else we may oom whilst there are plenty of deferred
|
|
|
|
* freed objects.
|
|
|
|
*/
|
2019-08-02 22:21:36 +01:00
|
|
|
i915_gem_object_make_unshrinkable(obj);
|
2019-05-28 10:29:45 +01:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Since we require blocking on struct_mutex to unbind the freed
|
|
|
|
* object from the GPU before releasing resources back to the
|
|
|
|
* system, we can not do that directly from the RCU callback (which may
|
|
|
|
* be a softirq context), but must instead then defer that work onto a
|
|
|
|
* kthread. We use the RCU callback rather than move the freed object
|
|
|
|
* directly onto the work queue so that we can mix between using the
|
|
|
|
* worker and performing frees directly from subsequent allocations for
|
|
|
|
* crude but effective memory throttling.
|
|
|
|
*/
|
|
|
|
if (llist_add(&obj->freed, &i915->mm.free_list))
|
|
|
|
queue_work(i915->wq, &i915->mm.free_work);
|
|
|
|
}
|
|
|
|
|
2019-05-28 10:29:47 +01:00
|
|
|
static bool gpu_write_needs_clflush(struct drm_i915_gem_object *obj)
|
|
|
|
{
|
|
|
|
return !(obj->cache_level == I915_CACHE_NONE ||
|
|
|
|
obj->cache_level == I915_CACHE_WT);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
i915_gem_object_flush_write_domain(struct drm_i915_gem_object *obj,
|
|
|
|
unsigned int flush_domains)
|
|
|
|
{
|
|
|
|
struct i915_vma *vma;
|
|
|
|
|
2019-05-28 10:29:51 +01:00
|
|
|
assert_object_held(obj);
|
|
|
|
|
2019-05-28 10:29:47 +01:00
|
|
|
if (!(obj->write_domain & flush_domains))
|
|
|
|
return;
|
|
|
|
|
|
|
|
switch (obj->write_domain) {
|
|
|
|
case I915_GEM_DOMAIN_GTT:
|
2019-11-19 10:09:20 +00:00
|
|
|
spin_lock(&obj->vma.lock);
|
2019-05-28 10:29:47 +01:00
|
|
|
for_each_ggtt_vma(vma, obj) {
|
2019-11-19 10:09:18 +00:00
|
|
|
if (i915_vma_unset_ggtt_write(vma))
|
|
|
|
intel_gt_flush_ggtt_writes(vma->vm->gt);
|
2019-05-28 10:29:47 +01:00
|
|
|
}
|
2019-11-19 10:09:20 +00:00
|
|
|
spin_unlock(&obj->vma.lock);
|
2019-06-21 08:08:01 +01:00
|
|
|
|
2019-12-18 10:40:43 +00:00
|
|
|
i915_gem_object_flush_frontbuffer(obj, ORIGIN_CPU);
|
2019-05-28 10:29:47 +01:00
|
|
|
break;
|
|
|
|
|
|
|
|
case I915_GEM_DOMAIN_WC:
|
|
|
|
wmb();
|
|
|
|
break;
|
|
|
|
|
|
|
|
case I915_GEM_DOMAIN_CPU:
|
|
|
|
i915_gem_clflush_object(obj, I915_CLFLUSH_SYNC);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case I915_GEM_DOMAIN_RENDER:
|
|
|
|
if (gpu_write_needs_clflush(obj))
|
|
|
|
obj->cache_dirty = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
obj->write_domain = 0;
|
|
|
|
}
|
|
|
|
|
2019-12-18 10:40:43 +00:00
|
|
|
void __i915_gem_object_flush_frontbuffer(struct drm_i915_gem_object *obj,
|
|
|
|
enum fb_op_origin origin)
|
|
|
|
{
|
|
|
|
struct intel_frontbuffer *front;
|
|
|
|
|
|
|
|
front = __intel_frontbuffer_get(obj);
|
|
|
|
if (front) {
|
|
|
|
intel_frontbuffer_flush(front, origin);
|
|
|
|
intel_frontbuffer_put(front);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void __i915_gem_object_invalidate_frontbuffer(struct drm_i915_gem_object *obj,
|
|
|
|
enum fb_op_origin origin)
|
|
|
|
{
|
|
|
|
struct intel_frontbuffer *front;
|
|
|
|
|
|
|
|
front = __intel_frontbuffer_get(obj);
|
|
|
|
if (front) {
|
|
|
|
intel_frontbuffer_invalidate(front, origin);
|
|
|
|
intel_frontbuffer_put(front);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2019-05-28 10:29:45 +01:00
|
|
|
void i915_gem_init__objects(struct drm_i915_private *i915)
|
|
|
|
{
|
|
|
|
INIT_WORK(&i915->mm.free_work, __i915_gem_free_work);
|
|
|
|
}
|
|
|
|
|
2019-03-05 21:38:30 +00:00
|
|
|
static void i915_global_objects_shrink(void)
|
|
|
|
{
|
|
|
|
kmem_cache_shrink(global.slab_objects);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void i915_global_objects_exit(void)
|
|
|
|
{
|
|
|
|
kmem_cache_destroy(global.slab_objects);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct i915_global_object global = { {
|
|
|
|
.shrink = i915_global_objects_shrink,
|
|
|
|
.exit = i915_global_objects_exit,
|
|
|
|
} };
|
|
|
|
|
2019-02-28 10:20:34 +00:00
|
|
|
int __init i915_global_objects_init(void)
|
|
|
|
{
|
|
|
|
global.slab_objects =
|
|
|
|
KMEM_CACHE(drm_i915_gem_object, SLAB_HWCACHE_ALIGN);
|
|
|
|
if (!global.slab_objects)
|
|
|
|
return -ENOMEM;
|
|
|
|
|
2019-03-05 21:38:30 +00:00
|
|
|
i915_global_register(&global.base);
|
2019-02-28 10:20:34 +00:00
|
|
|
return 0;
|
|
|
|
}
|
2019-05-28 10:29:49 +01:00
|
|
|
|
|
|
|
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
|
|
|
|
#include "selftests/huge_gem_object.c"
|
|
|
|
#include "selftests/huge_pages.c"
|
|
|
|
#include "selftests/i915_gem_object.c"
|
|
|
|
#include "selftests/i915_gem_coherency.c"
|
|
|
|
#endif
|