mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00
drm/amdgpu: nuke amdgpu_vm_bo_split_mapping v2
Merge the functionality mostly into amdgpu_vm_bo_update_mapping. This way we can even handle small contiguous system pages without to much extra CPU overhead. v2: fix typo, keep the cursor as it is for now Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Madhav Chauhan <madhav.chauhan@amd.com> (v1) Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
This commit is contained in:
parent
ce9a6cad3e
commit
a39f2a8d70
1 changed files with 79 additions and 103 deletions
|
@ -1570,7 +1570,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
|
||||||
/**
|
/**
|
||||||
* amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
|
* amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
|
||||||
*
|
*
|
||||||
* @adev: amdgpu_device pointer
|
* @adev: amdgpu_device pointer of the VM
|
||||||
|
* @bo_adev: amdgpu_device pointer of the mapped BO
|
||||||
* @vm: requested vm
|
* @vm: requested vm
|
||||||
* @immediate: immediate submission in a page fault
|
* @immediate: immediate submission in a page fault
|
||||||
* @unlocked: unlocked invalidation during MM callback
|
* @unlocked: unlocked invalidation during MM callback
|
||||||
|
@ -1578,7 +1579,8 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
|
||||||
* @start: start of mapped range
|
* @start: start of mapped range
|
||||||
* @last: last mapped entry
|
* @last: last mapped entry
|
||||||
* @flags: flags for the entries
|
* @flags: flags for the entries
|
||||||
* @addr: addr to set the area to
|
* @offset: offset into nodes and pages_addr
|
||||||
|
* @nodes: array of drm_mm_nodes with the MC addresses
|
||||||
* @pages_addr: DMA addresses to use for mapping
|
* @pages_addr: DMA addresses to use for mapping
|
||||||
* @fence: optional resulting fence
|
* @fence: optional resulting fence
|
||||||
*
|
*
|
||||||
|
@ -1588,15 +1590,18 @@ static int amdgpu_vm_update_ptes(struct amdgpu_vm_update_params *params,
|
||||||
* 0 for success, -EINVAL for failure.
|
* 0 for success, -EINVAL for failure.
|
||||||
*/
|
*/
|
||||||
static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
|
static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
|
||||||
|
struct amdgpu_device *bo_adev,
|
||||||
struct amdgpu_vm *vm, bool immediate,
|
struct amdgpu_vm *vm, bool immediate,
|
||||||
bool unlocked, struct dma_resv *resv,
|
bool unlocked, struct dma_resv *resv,
|
||||||
uint64_t start, uint64_t last,
|
uint64_t start, uint64_t last,
|
||||||
uint64_t flags, uint64_t addr,
|
uint64_t flags, uint64_t offset,
|
||||||
|
struct drm_mm_node *nodes,
|
||||||
dma_addr_t *pages_addr,
|
dma_addr_t *pages_addr,
|
||||||
struct dma_fence **fence)
|
struct dma_fence **fence)
|
||||||
{
|
{
|
||||||
struct amdgpu_vm_update_params params;
|
struct amdgpu_vm_update_params params;
|
||||||
enum amdgpu_sync_mode sync_mode;
|
enum amdgpu_sync_mode sync_mode;
|
||||||
|
uint64_t pfn;
|
||||||
int r;
|
int r;
|
||||||
|
|
||||||
memset(¶ms, 0, sizeof(params));
|
memset(¶ms, 0, sizeof(params));
|
||||||
|
@ -1614,6 +1619,14 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
|
||||||
else
|
else
|
||||||
sync_mode = AMDGPU_SYNC_EXPLICIT;
|
sync_mode = AMDGPU_SYNC_EXPLICIT;
|
||||||
|
|
||||||
|
pfn = offset >> PAGE_SHIFT;
|
||||||
|
if (nodes) {
|
||||||
|
while (pfn >= nodes->size) {
|
||||||
|
pfn -= nodes->size;
|
||||||
|
++nodes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
amdgpu_vm_eviction_lock(vm);
|
amdgpu_vm_eviction_lock(vm);
|
||||||
if (vm->evicting) {
|
if (vm->evicting) {
|
||||||
r = -EBUSY;
|
r = -EBUSY;
|
||||||
|
@ -1632,105 +1645,47 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
|
||||||
if (r)
|
if (r)
|
||||||
goto error_unlock;
|
goto error_unlock;
|
||||||
|
|
||||||
r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags);
|
|
||||||
if (r)
|
|
||||||
goto error_unlock;
|
|
||||||
|
|
||||||
r = vm->update_funcs->commit(¶ms, fence);
|
|
||||||
|
|
||||||
error_unlock:
|
|
||||||
amdgpu_vm_eviction_unlock(vm);
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
|
|
||||||
*
|
|
||||||
* @adev: amdgpu_device pointer
|
|
||||||
* @resv: fences we need to sync to
|
|
||||||
* @pages_addr: DMA addresses to use for mapping
|
|
||||||
* @vm: requested vm
|
|
||||||
* @mapping: mapped range and flags to use for the update
|
|
||||||
* @flags: HW flags for the mapping
|
|
||||||
* @bo_adev: amdgpu_device pointer that bo actually been allocated
|
|
||||||
* @nodes: array of drm_mm_nodes with the MC addresses
|
|
||||||
* @fence: optional resulting fence
|
|
||||||
*
|
|
||||||
* Split the mapping into smaller chunks so that each update fits
|
|
||||||
* into a SDMA IB.
|
|
||||||
*
|
|
||||||
* Returns:
|
|
||||||
* 0 for success, -EINVAL for failure.
|
|
||||||
*/
|
|
||||||
static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
|
|
||||||
struct dma_resv *resv,
|
|
||||||
dma_addr_t *pages_addr,
|
|
||||||
struct amdgpu_vm *vm,
|
|
||||||
struct amdgpu_bo_va_mapping *mapping,
|
|
||||||
uint64_t flags,
|
|
||||||
struct amdgpu_device *bo_adev,
|
|
||||||
struct drm_mm_node *nodes,
|
|
||||||
struct dma_fence **fence)
|
|
||||||
{
|
|
||||||
unsigned min_linear_pages = 1 << adev->vm_manager.fragment_size;
|
|
||||||
uint64_t pfn, start = mapping->start;
|
|
||||||
int r;
|
|
||||||
|
|
||||||
/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
|
|
||||||
* but in case of something, we filter the flags in first place
|
|
||||||
*/
|
|
||||||
if (!(mapping->flags & AMDGPU_PTE_READABLE))
|
|
||||||
flags &= ~AMDGPU_PTE_READABLE;
|
|
||||||
if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
|
|
||||||
flags &= ~AMDGPU_PTE_WRITEABLE;
|
|
||||||
|
|
||||||
/* Apply ASIC specific mapping flags */
|
|
||||||
amdgpu_gmc_get_vm_pte(adev, mapping, &flags);
|
|
||||||
|
|
||||||
trace_amdgpu_vm_bo_update(mapping);
|
|
||||||
|
|
||||||
pfn = mapping->offset >> PAGE_SHIFT;
|
|
||||||
if (nodes) {
|
|
||||||
while (pfn >= nodes->size) {
|
|
||||||
pfn -= nodes->size;
|
|
||||||
++nodes;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
do {
|
||||||
dma_addr_t *dma_addr = NULL;
|
uint64_t tmp, num_entries, addr;
|
||||||
uint64_t max_entries;
|
|
||||||
uint64_t addr, last;
|
|
||||||
|
|
||||||
max_entries = mapping->last - start + 1;
|
|
||||||
|
num_entries = last - start + 1;
|
||||||
if (nodes) {
|
if (nodes) {
|
||||||
addr = nodes->start << PAGE_SHIFT;
|
addr = nodes->start << PAGE_SHIFT;
|
||||||
max_entries = min((nodes->size - pfn) *
|
num_entries = min((nodes->size - pfn) *
|
||||||
AMDGPU_GPU_PAGES_IN_CPU_PAGE, max_entries);
|
AMDGPU_GPU_PAGES_IN_CPU_PAGE, num_entries);
|
||||||
} else {
|
} else {
|
||||||
addr = 0;
|
addr = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pages_addr) {
|
if (pages_addr) {
|
||||||
uint64_t count;
|
bool contiguous = true;
|
||||||
|
|
||||||
for (count = 1;
|
if (num_entries > AMDGPU_GPU_PAGES_IN_CPU_PAGE) {
|
||||||
count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
|
uint64_t count;
|
||||||
++count) {
|
|
||||||
uint64_t idx = pfn + count;
|
|
||||||
|
|
||||||
if (pages_addr[idx] !=
|
contiguous = pages_addr[pfn + 1] ==
|
||||||
(pages_addr[idx - 1] + PAGE_SIZE))
|
pages_addr[pfn] + PAGE_SIZE;
|
||||||
break;
|
|
||||||
|
tmp = num_entries /
|
||||||
|
AMDGPU_GPU_PAGES_IN_CPU_PAGE;
|
||||||
|
for (count = 2; count < tmp; ++count) {
|
||||||
|
uint64_t idx = pfn + count;
|
||||||
|
|
||||||
|
if (contiguous != (pages_addr[idx] ==
|
||||||
|
pages_addr[idx - 1] + PAGE_SIZE))
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
num_entries = count *
|
||||||
|
AMDGPU_GPU_PAGES_IN_CPU_PAGE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (count < min_linear_pages) {
|
if (!contiguous) {
|
||||||
addr = pfn << PAGE_SHIFT;
|
addr = pfn << PAGE_SHIFT;
|
||||||
dma_addr = pages_addr;
|
params.pages_addr = pages_addr;
|
||||||
} else {
|
} else {
|
||||||
addr = pages_addr[pfn];
|
addr = pages_addr[pfn];
|
||||||
max_entries = count *
|
params.pages_addr = NULL;
|
||||||
AMDGPU_GPU_PAGES_IN_CPU_PAGE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
|
} else if (flags & (AMDGPU_PTE_VALID | AMDGPU_PTE_PRT)) {
|
||||||
|
@ -1738,23 +1693,25 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
|
||||||
addr += pfn << PAGE_SHIFT;
|
addr += pfn << PAGE_SHIFT;
|
||||||
}
|
}
|
||||||
|
|
||||||
last = start + max_entries - 1;
|
tmp = start + num_entries;
|
||||||
r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
|
r = amdgpu_vm_update_ptes(¶ms, start, tmp, addr, flags);
|
||||||
start, last, flags, addr,
|
|
||||||
dma_addr, fence);
|
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
goto error_unlock;
|
||||||
|
|
||||||
pfn += (last - start + 1) / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
|
pfn += num_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
|
||||||
if (nodes && nodes->size == pfn) {
|
if (nodes && nodes->size == pfn) {
|
||||||
pfn = 0;
|
pfn = 0;
|
||||||
++nodes;
|
++nodes;
|
||||||
}
|
}
|
||||||
start = last + 1;
|
start = tmp;
|
||||||
|
|
||||||
} while (unlikely(start != mapping->last + 1));
|
} while (unlikely(start != last + 1));
|
||||||
|
|
||||||
return 0;
|
r = vm->update_funcs->commit(¶ms, fence);
|
||||||
|
|
||||||
|
error_unlock:
|
||||||
|
amdgpu_vm_eviction_unlock(vm);
|
||||||
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1835,9 +1792,26 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va,
|
||||||
}
|
}
|
||||||
|
|
||||||
list_for_each_entry(mapping, &bo_va->invalids, list) {
|
list_for_each_entry(mapping, &bo_va->invalids, list) {
|
||||||
r = amdgpu_vm_bo_split_mapping(adev, resv, pages_addr, vm,
|
uint64_t update_flags = flags;
|
||||||
mapping, flags, bo_adev, nodes,
|
|
||||||
last_update);
|
/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
|
||||||
|
* but in case of something, we filter the flags in first place
|
||||||
|
*/
|
||||||
|
if (!(mapping->flags & AMDGPU_PTE_READABLE))
|
||||||
|
update_flags &= ~AMDGPU_PTE_READABLE;
|
||||||
|
if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
|
||||||
|
update_flags &= ~AMDGPU_PTE_WRITEABLE;
|
||||||
|
|
||||||
|
/* Apply ASIC specific mapping flags */
|
||||||
|
amdgpu_gmc_get_vm_pte(adev, mapping, &update_flags);
|
||||||
|
|
||||||
|
trace_amdgpu_vm_bo_update(mapping);
|
||||||
|
|
||||||
|
r = amdgpu_vm_bo_update_mapping(adev, bo_adev, vm, false, false,
|
||||||
|
resv, mapping->start,
|
||||||
|
mapping->last, update_flags,
|
||||||
|
mapping->offset, nodes,
|
||||||
|
pages_addr, last_update);
|
||||||
if (r)
|
if (r)
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
@ -2045,9 +2019,10 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
|
||||||
mapping->start < AMDGPU_GMC_HOLE_START)
|
mapping->start < AMDGPU_GMC_HOLE_START)
|
||||||
init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
|
init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
|
||||||
|
|
||||||
r = amdgpu_vm_bo_update_mapping(adev, vm, false, false, resv,
|
r = amdgpu_vm_bo_update_mapping(adev, adev, vm, false, false,
|
||||||
mapping->start, mapping->last,
|
resv, mapping->start,
|
||||||
init_pte_value, 0, NULL, &f);
|
mapping->last, init_pte_value,
|
||||||
|
0, NULL, NULL, &f);
|
||||||
amdgpu_vm_free_mapping(adev, vm, mapping, f);
|
amdgpu_vm_free_mapping(adev, vm, mapping, f);
|
||||||
if (r) {
|
if (r) {
|
||||||
dma_fence_put(f);
|
dma_fence_put(f);
|
||||||
|
@ -3375,8 +3350,9 @@ bool amdgpu_vm_handle_fault(struct amdgpu_device *adev, unsigned int pasid,
|
||||||
value = 0;
|
value = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
r = amdgpu_vm_bo_update_mapping(adev, vm, true, false, NULL, addr,
|
r = amdgpu_vm_bo_update_mapping(adev, adev, vm, true, false, NULL, addr,
|
||||||
addr + 1, flags, value, NULL, NULL);
|
addr + 1, flags, value, NULL, NULL,
|
||||||
|
NULL);
|
||||||
if (r)
|
if (r)
|
||||||
goto error_unlock;
|
goto error_unlock;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue