drm/nouveau/gf100-: track chan progress with non-WFI semaphore release

From VOLTA_CHANNEL_GPFIFO_A onwards, HW no longer updates the GET/GP_GET
pointers in USERD following channel progress, but instead updates on a
timer for compatibility, and SW is expected to implement its own method
of tracking channel progress (typically via non-WFI semaphore release).

Nouveau has been making use of the compatibility mode up until now,
however, from BLACKWELL_CHANNEL_GPFIFO_A HW no longer supports USERD
writeback at all.

Allocate a per-channel buffer in system memory, and append a non-WFI
semaphore release to the end of each push buffer segment to simulate
the pointers previously read from USERD.

This change is implemented for Fermi (which is the first to support non-
WFI semaphore release) onwards, as readback from system memory is likely
faster than BAR1 reads.

Signed-off-by: Ben Skeggs <bskeggs@nvidia.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
Reviewed-by: Timur Tabi <ttabi@nvidia.com>
Tested-by: Timur Tabi <ttabi@nvidia.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
This commit is contained in:
Ben Skeggs 2024-06-19 14:23:04 +10:00 committed by Dave Airlie
parent d1fb887a08
commit 862450a85b
12 changed files with 245 additions and 18 deletions

View file

@ -17,7 +17,13 @@ struct nvif_chan {
void (*push)(struct nvif_chan *, bool main, u64 addr, u32 size,
bool no_prefetch);
void (*kick)(struct nvif_chan *);
int (*post)(struct nvif_chan *, u32 gpptr, u32 pbptr);
u32 post_size;
} gpfifo;
struct {
int (*release)(struct nvif_chan *, u64 addr, u32 data);
} sem;
} *func;
struct {
@ -31,6 +37,11 @@ struct nvif_chan {
int free;
} gpfifo;
struct {
struct nvif_map map;
u64 addr;
} sema;
struct nvif_push push;
struct nvif_user *usermode;
@ -43,14 +54,23 @@ void nvif_chan_gpfifo_ctor(const struct nvif_chan_func *, void *userd, void *gpf
void *push, u64 push_addr, u32 push_size, struct nvif_chan *);
int nvif_chan_gpfifo_wait(struct nvif_chan *, u32 gpfifo_nr, u32 push_nr);
void nvif_chan_gpfifo_push(struct nvif_chan *, u64 addr, u32 size, bool no_prefetch);
int nvif_chan_gpfifo_post(struct nvif_chan *);
void nvif_chan506f_gpfifo_push(struct nvif_chan *, bool main, u64 addr, u32 size, bool no_prefetch);
void nvif_chan506f_gpfifo_kick(struct nvif_chan *);
int nvif_chan906f_ctor_(const struct nvif_chan_func *, void *userd, void *gpfifo, u32 gpfifo_size,
void *push, u64 push_addr, u32 push_size, void *sema, u64 sema_addr,
struct nvif_chan *);
u32 nvif_chan906f_read_get(struct nvif_chan *);
u32 nvif_chan906f_gpfifo_read_get(struct nvif_chan *);
int nvif_chan906f_gpfifo_post(struct nvif_chan *, u32 gpptr, u32 pbptr);
int nvif_chan506f_ctor(struct nvif_chan *, void *userd, void *gpfifo, u32 gpfifo_size,
void *push, u64 push_addr, u32 push_size);
u32 nvif_chan506f_read_get(struct nvif_chan *);
u32 nvif_chan506f_gpfifo_read_get(struct nvif_chan *);
void nvif_chan506f_gpfifo_push(struct nvif_chan *, bool main, u64 addr, u32 size, bool no_prefetch);
int nvif_chan906f_ctor(struct nvif_chan *, void *userd, void *gpfifo, u32 gpfifo_size,
void *push, u64 push_addr, u32 push_size, void *sema, u64 sema_addr);
int nvif_chanc36f_ctor(struct nvif_chan *, void *userd, void *gpfifo, u32 gpfifo_size,
void *push, u64 push_addr, u32 push_size,
void *push, u64 push_addr, u32 push_size, void *sema, u64 sema_addr,
struct nvif_user *usermode, u32 doorbell_token);
#endif

View file

@ -456,6 +456,28 @@ nouveau_bo_new_map(struct nouveau_cli *cli, u32 domain, u32 size, struct nouveau
return 0;
}
int
nouveau_bo_new_map_gpu(struct nouveau_cli *cli, u32 domain, u32 size,
struct nouveau_bo **pnvbo, struct nouveau_vma **pvma)
{
struct nouveau_vmm *vmm = nouveau_cli_vmm(cli);
struct nouveau_bo *nvbo;
int ret;
ret = nouveau_bo_new_map(cli, domain, size, &nvbo);
if (ret)
return ret;
ret = nouveau_vma_new(nvbo, vmm, pvma);
if (ret) {
nouveau_bo_unpin_del(&nvbo);
return ret;
}
*pnvbo = nvbo;
return 0;
}
static void
set_placement_range(struct nouveau_bo *nvbo, uint32_t domain)
{

View file

@ -92,6 +92,8 @@ void nouveau_bo_del_io_reserve_lru(struct ttm_buffer_object *bo);
int nouveau_bo_new_pin(struct nouveau_cli *, u32 domain, u32 size, struct nouveau_bo **);
int nouveau_bo_new_map(struct nouveau_cli *, u32 domain, u32 size, struct nouveau_bo **);
int nouveau_bo_new_map_gpu(struct nouveau_cli *, u32 domain, u32 size,
struct nouveau_bo **, struct nouveau_vma **);
void nouveau_bo_unpin_del(struct nouveau_bo **);
/* TODO: submit equivalent to TTM generic API upstream? */

View file

@ -103,6 +103,8 @@ nouveau_channel_del(struct nouveau_channel **pchan)
nvif_event_dtor(&chan->kill);
nvif_object_dtor(&chan->user);
nvif_mem_dtor(&chan->mem_userd);
nouveau_vma_del(&chan->sema.vma);
nouveau_bo_unpin_del(&chan->sema.bo);
nvif_object_dtor(&chan->push.ctxdma);
nouveau_vma_del(&chan->push.vma);
nouveau_bo_unpin_del(&chan->push.buffer);
@ -189,8 +191,10 @@ nouveau_channel_prep(struct nouveau_cli *cli,
chan->push.addr = chan->push.vma->addr;
if (device->info.family >= NV_DEVICE_INFO_V0_FERMI)
return 0;
if (device->info.family >= NV_DEVICE_INFO_V0_FERMI) {
return nouveau_bo_new_map_gpu(cli, NOUVEAU_GEM_DOMAIN_GART, PAGE_SIZE,
&chan->sema.bo, &chan->sema.vma);
}
args.target = NV_DMA_V0_TARGET_VM;
args.access = NV_DMA_V0_ACCESS_VM;
@ -429,16 +433,25 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart)
chan->user_get = 0x44;
chan->dma.max = (0x10000 / 4) - 2;
} else
if (chan->user.oclass < VOLTA_CHANNEL_GPFIFO_A) {
if (chan->user.oclass < FERMI_CHANNEL_GPFIFO) {
ret = nvif_chan506f_ctor(&chan->chan, chan->userd->map.ptr,
(u8*)chan->push.buffer->kmap.virtual + 0x10000, 0x2000,
chan->push.buffer->kmap.virtual, chan->push.addr, 0x10000);
if (ret)
return ret;
} else
if (chan->user.oclass < VOLTA_CHANNEL_GPFIFO_A) {
ret = nvif_chan906f_ctor(&chan->chan, chan->userd->map.ptr,
(u8*)chan->push.buffer->kmap.virtual + 0x10000, 0x2000,
chan->push.buffer->kmap.virtual, chan->push.addr, 0x10000,
chan->sema.bo->kmap.virtual, chan->sema.vma->addr);
if (ret)
return ret;
} else {
ret = nvif_chanc36f_ctor(&chan->chan, chan->userd->map.ptr,
(u8*)chan->push.buffer->kmap.virtual + 0x10000, 0x2000,
chan->push.buffer->kmap.virtual, chan->push.addr, 0x10000,
chan->sema.bo->kmap.virtual, chan->sema.vma->addr,
&drm->client.device.user, chan->token);
if (ret)
return ret;

View file

@ -43,6 +43,11 @@ struct nouveau_channel {
u32 user_get;
u32 user_put;
struct {
struct nouveau_bo *bo;
struct nouveau_vma *vma;
} sema;
struct nvif_object user;
struct nvif_object blit;

View file

@ -146,6 +146,8 @@ nouveau_exec_job_run(struct nouveau_job *job)
nvif_chan_gpfifo_push(&chan->chan, p->va, p->va_len, no_prefetch);
}
nvif_chan_gpfifo_post(&chan->chan);
ret = nouveau_fence_emit(fence);
if (ret) {
nouveau_fence_unref(&exec_job->fence);

View file

@ -866,6 +866,8 @@ revalidate:
nvif_chan_gpfifo_push(&chan->chan, addr, length, no_prefetch);
}
nvif_chan_gpfifo_post(&chan->chan);
} else
if (drm->client.device.info.chipset >= 0x25) {
ret = PUSH_WAIT(&chan->chan.push, req->nr_push * 2);

View file

@ -17,6 +17,7 @@ nvif-y += nvif/vmm.o
# Channel classes
nvif-y += nvif/chan.o
nvif-y += nvif/chan506f.o
nvif-y += nvif/chan906f.o
nvif-y += nvif/chanc36f.o
# Usermode classes

View file

@ -9,7 +9,16 @@ nvif_chan_gpfifo_push_kick(struct nvif_push *push)
{
struct nvif_chan *chan = container_of(push, typeof(*chan), push);
u32 put = push->bgn - (u32 *)chan->push.mem.object.map.ptr;
u32 cnt = push->cur - push->bgn;
u32 cnt;
if (chan->func->gpfifo.post) {
if (push->end - push->cur < chan->func->gpfifo.post_size)
push->end = push->cur + chan->func->gpfifo.post_size;
WARN_ON(nvif_chan_gpfifo_post(chan));
}
cnt = push->cur - push->bgn;
chan->func->gpfifo.push(chan, true, chan->push.addr + (put << 2), cnt << 2, false);
chan->func->gpfifo.kick(chan);
@ -23,6 +32,16 @@ nvif_chan_gpfifo_push_wait(struct nvif_push *push, u32 push_nr)
return nvif_chan_gpfifo_wait(chan, 1, push_nr);
}
int
nvif_chan_gpfifo_post(struct nvif_chan *chan)
{
const u32 *map = chan->push.mem.object.map.ptr;
const u32 pbptr = (chan->push.cur - map) + chan->func->gpfifo.post_size;
const u32 gpptr = (chan->gpfifo.cur + 1) & chan->gpfifo.max;
return chan->func->gpfifo.post(chan, gpptr, pbptr);
}
void
nvif_chan_gpfifo_push(struct nvif_chan *chan, u64 addr, u32 size, bool no_prefetch)
{
@ -35,6 +54,14 @@ nvif_chan_gpfifo_wait(struct nvif_chan *chan, u32 gpfifo_nr, u32 push_nr)
struct nvif_push *push = &chan->push;
int ret = 0, time = 1000000;
if (gpfifo_nr) {
/* Account for pushbuf space needed by nvif_chan_gpfifo_post(),
* if used after pushing userspace GPFIFO entries.
*/
if (chan->func->gpfifo.post)
push_nr += chan->func->gpfifo.post_size;
}
/* Account for the GPFIFO entry needed to submit pushbuf. */
if (push_nr)
gpfifo_nr++;
@ -89,6 +116,8 @@ nvif_chan_dma_wait(struct nvif_chan *chan, u32 nr)
u32 cur = push->cur - (u32 *)push->mem.object.map.ptr;
u32 free, time = 1000000;
nr += chan->func->gpfifo.post_size;
do {
u32 get = chan->func->push.read_get(chan);
@ -122,6 +151,6 @@ nvif_chan_dma_wait(struct nvif_chan *chan, u32 nr)
push->bgn = (u32 *)push->mem.object.map.ptr + cur;
push->cur = push->bgn;
push->end = push->bgn + free;
push->end = push->bgn + free - chan->func->gpfifo.post_size;
return 0;
}

View file

@ -4,7 +4,7 @@
*/
#include <nvif/chan.h>
static void
void
nvif_chan506f_gpfifo_kick(struct nvif_chan *chan)
{
wmb();
@ -31,13 +31,13 @@ nvif_chan506f_gpfifo_push(struct nvif_chan *chan, bool main, u64 addr, u32 size,
chan->push.end = chan->push.cur;
}
u32
static u32
nvif_chan506f_gpfifo_read_get(struct nvif_chan *chan)
{
return nvif_rd32(&chan->userd, 0x88);
}
u32
static u32
nvif_chan506f_read_get(struct nvif_chan *chan)
{
u32 tlgetlo = nvif_rd32(&chan->userd, 0x58);

View file

@ -0,0 +1,93 @@
/* SPDX-License-Identifier: MIT
*
* Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
*/
#include <nvif/chan.h>
#include <nvif/user.h>
#include <nvif/push906f.h>
#include <nvhw/class/cl906f.h>
/* Limits GPFIFO size to 1MiB, and "main" push buffer size to 64KiB. */
#define NVIF_CHAN906F_PBPTR_BITS 15
#define NVIF_CHAN906F_PBPTR_MASK ((1 << NVIF_CHAN906F_PBPTR_BITS) - 1)
#define NVIF_CHAN906F_GPPTR_SHIFT NVIF_CHAN906F_PBPTR_BITS
#define NVIF_CHAN906F_GPPTR_BITS (32 - NVIF_CHAN906F_PBPTR_BITS)
#define NVIF_CHAN906F_GPPTR_MASK ((1 << NVIF_CHAN906F_GPPTR_BITS) - 1)
#define NVIF_CHAN906F_SEM_RELEASE_SIZE 5
static int
nvif_chan906f_sem_release(struct nvif_chan *chan, u64 addr, u32 data)
{
struct nvif_push *push = &chan->push;
int ret;
ret = PUSH_WAIT(push, NVIF_CHAN906F_SEM_RELEASE_SIZE);
if (ret)
return ret;
PUSH_MTHD(push, NV906F, SEMAPHOREA,
NVVAL(NV906F, SEMAPHOREA, OFFSET_UPPER, upper_32_bits(addr)),
SEMAPHOREB, lower_32_bits(addr),
SEMAPHOREC, data,
SEMAPHORED,
NVDEF(NV906F, SEMAPHORED, OPERATION, RELEASE) |
NVDEF(NV906F, SEMAPHORED, RELEASE_WFI, DIS) |
NVDEF(NV906F, SEMAPHORED, RELEASE_SIZE, 16BYTE));
return 0;
}
int
nvif_chan906f_gpfifo_post(struct nvif_chan *chan, u32 gpptr, u32 pbptr)
{
return chan->func->sem.release(chan, chan->sema.addr,
(gpptr << NVIF_CHAN906F_GPPTR_SHIFT) | pbptr);
}
u32
nvif_chan906f_gpfifo_read_get(struct nvif_chan *chan)
{
return nvif_rd32(&chan->sema, 0) >> NVIF_CHAN906F_GPPTR_SHIFT;
}
u32
nvif_chan906f_read_get(struct nvif_chan *chan)
{
return nvif_rd32(&chan->sema, 0) & NVIF_CHAN906F_PBPTR_MASK;
}
static const struct nvif_chan_func
nvif_chan906f = {
.push.read_get = nvif_chan906f_read_get,
.gpfifo.read_get = nvif_chan906f_gpfifo_read_get,
.gpfifo.push = nvif_chan506f_gpfifo_push,
.gpfifo.kick = nvif_chan506f_gpfifo_kick,
.gpfifo.post = nvif_chan906f_gpfifo_post,
.gpfifo.post_size = NVIF_CHAN906F_SEM_RELEASE_SIZE,
.sem.release = nvif_chan906f_sem_release,
};
int
nvif_chan906f_ctor_(const struct nvif_chan_func *func, void *userd, void *gpfifo, u32 gpfifo_size,
void *push, u64 push_addr, u32 push_size, void *sema, u64 sema_addr,
struct nvif_chan *chan)
{
nvif_chan_gpfifo_ctor(func, userd, gpfifo, gpfifo_size, push, push_addr, push_size, chan);
chan->sema.map.ptr = sema;
chan->sema.addr = sema_addr;
return 0;
}
int
nvif_chan906f_ctor(struct nvif_chan *chan, void *userd, void *gpfifo, u32 gpfifo_size,
void *push, u64 push_addr, u32 push_size, void *sema, u64 sema_addr)
{
return nvif_chan906f_ctor_(&nvif_chan906f, userd, gpfifo, gpfifo_size,
push, push_addr, push_size, sema, sema_addr, chan);
}

View file

@ -5,6 +5,9 @@
#include <nvif/chan.h>
#include <nvif/user.h>
#include <nvif/push906f.h>
#include <nvhw/class/clc36f.h>
static void
nvif_chanc36f_gpfifo_kick(struct nvif_chan *chan)
{
@ -18,21 +21,56 @@ nvif_chanc36f_gpfifo_kick(struct nvif_chan *chan)
usermode->func->doorbell(usermode, chan->doorbell_token);
}
#define NVIF_CHANC36F_SEM_RELEASE_SIZE 6
static int
nvif_chanc36f_sem_release(struct nvif_chan *chan, u64 addr, u32 data)
{
struct nvif_push *push = &chan->push;
int ret;
ret = PUSH_WAIT(push, NVIF_CHANC36F_SEM_RELEASE_SIZE);
if (ret)
return ret;
PUSH_MTHD(push, NVC36F, SEM_ADDR_LO, lower_32_bits(addr),
SEM_ADDR_HI, upper_32_bits(addr),
SEM_PAYLOAD_LO, data);
PUSH_MTHD(push, NVC36F, SEM_EXECUTE,
NVDEF(NVC36F, SEM_EXECUTE, OPERATION, RELEASE) |
NVDEF(NVC36F, SEM_EXECUTE, RELEASE_WFI, DIS) |
NVDEF(NVC36F, SEM_EXECUTE, PAYLOAD_SIZE, 32BIT) |
NVDEF(NVC36F, SEM_EXECUTE, RELEASE_TIMESTAMP, DIS));
return 0;
}
static const struct nvif_chan_func
nvif_chanc36f = {
.push.read_get = nvif_chan506f_read_get,
.gpfifo.read_get = nvif_chan506f_gpfifo_read_get,
.push.read_get = nvif_chan906f_read_get,
.gpfifo.read_get = nvif_chan906f_gpfifo_read_get,
.gpfifo.push = nvif_chan506f_gpfifo_push,
.gpfifo.kick = nvif_chanc36f_gpfifo_kick,
.gpfifo.post = nvif_chan906f_gpfifo_post,
.gpfifo.post_size = NVIF_CHANC36F_SEM_RELEASE_SIZE,
.sem.release = nvif_chanc36f_sem_release,
};
int
nvif_chanc36f_ctor(struct nvif_chan *chan, void *userd, void *gpfifo, u32 gpfifo_size,
void *push, u64 push_addr, u32 push_size,
void *push, u64 push_addr, u32 push_size, void *sema, u64 sema_addr,
struct nvif_user *usermode, u32 doorbell_token)
{
nvif_chan_gpfifo_ctor(&nvif_chanc36f, userd, gpfifo, gpfifo_size,
push, push_addr, push_size, chan);
int ret;
ret = nvif_chan906f_ctor_(&nvif_chanc36f, userd, gpfifo, gpfifo_size,
push, push_addr, push_size, sema, sema_addr, chan);
if (ret)
return ret;
chan->usermode = usermode;
chan->doorbell_token = doorbell_token;
return 0;