mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00
accel/amdxdna: Add command execution
Add interfaces for user application to submit command and wait for its completion. Co-developed-by: Min Ma <min.ma@amd.com> Signed-off-by: Min Ma <min.ma@amd.com> Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com> Signed-off-by: Jeffrey Hugo <quic_jhugo@quicinc.com> Link: https://patchwork.freedesktop.org/patch/msgid/20241118172942.2014541-8-lizhi.hou@amd.com
This commit is contained in:
parent
ac49797c18
commit
aac243092b
19 changed files with 1531 additions and 9 deletions
|
@ -8,8 +8,12 @@
|
|||
#include <drm/drm_gem.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/drm_syncobj.h>
|
||||
#include <linux/hmm.h>
|
||||
#include <linux/types.h>
|
||||
#include <trace/events/amdxdna.h>
|
||||
|
||||
#include "aie2_msg_priv.h"
|
||||
#include "aie2_pci.h"
|
||||
#include "aie2_solver.h"
|
||||
#include "amdxdna_ctx.h"
|
||||
|
@ -17,6 +21,321 @@
|
|||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
bool force_cmdlist;
|
||||
module_param(force_cmdlist, bool, 0600);
|
||||
MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)");
|
||||
|
||||
#define HWCTX_MAX_TIMEOUT 60000 /* milliseconds */
|
||||
|
||||
static void aie2_job_release(struct kref *ref)
|
||||
{
|
||||
struct amdxdna_sched_job *job;
|
||||
|
||||
job = container_of(ref, struct amdxdna_sched_job, refcnt);
|
||||
amdxdna_sched_job_cleanup(job);
|
||||
if (job->out_fence)
|
||||
dma_fence_put(job->out_fence);
|
||||
kfree(job);
|
||||
}
|
||||
|
||||
static void aie2_job_put(struct amdxdna_sched_job *job)
|
||||
{
|
||||
kref_put(&job->refcnt, aie2_job_release);
|
||||
}
|
||||
|
||||
/* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
|
||||
static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
|
||||
struct drm_sched_job *bad_job)
|
||||
{
|
||||
drm_sched_stop(&hwctx->priv->sched, bad_job);
|
||||
aie2_destroy_context(xdna->dev_handle, hwctx);
|
||||
}
|
||||
|
||||
static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_gem_obj *heap = hwctx->priv->heap;
|
||||
int ret;
|
||||
|
||||
ret = aie2_create_context(xdna->dev_handle, hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
|
||||
heap->mem.userptr, heap->mem.size);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (hwctx->status != HWCTX_STAT_READY) {
|
||||
XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = aie2_config_cu(hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
drm_sched_start(&hwctx->priv->sched, 0);
|
||||
XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void aie2_restart_ctx(struct amdxdna_client *client)
|
||||
{
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
int next = 0;
|
||||
|
||||
drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
|
||||
mutex_lock(&client->hwctx_lock);
|
||||
idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) {
|
||||
if (hwctx->status != HWCTX_STAT_STOP)
|
||||
continue;
|
||||
|
||||
hwctx->status = hwctx->old_status;
|
||||
XDNA_DBG(xdna, "Resetting %s", hwctx->name);
|
||||
aie2_hwctx_restart(xdna, hwctx);
|
||||
}
|
||||
mutex_unlock(&client->hwctx_lock);
|
||||
}
|
||||
|
||||
static struct dma_fence *aie2_cmd_get_out_fence(struct amdxdna_hwctx *hwctx, u64 seq)
|
||||
{
|
||||
struct dma_fence *fence, *out_fence = NULL;
|
||||
int ret;
|
||||
|
||||
fence = drm_syncobj_fence_get(hwctx->priv->syncobj);
|
||||
if (!fence)
|
||||
return NULL;
|
||||
|
||||
ret = dma_fence_chain_find_seqno(&fence, seq);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
out_fence = dma_fence_get(dma_fence_chain_contained(fence));
|
||||
|
||||
out:
|
||||
dma_fence_put(fence);
|
||||
return out_fence;
|
||||
}
|
||||
|
||||
static void aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct dma_fence *fence;
|
||||
|
||||
fence = aie2_cmd_get_out_fence(hwctx, hwctx->priv->seq - 1);
|
||||
if (!fence)
|
||||
return;
|
||||
|
||||
dma_fence_wait(fence, false);
|
||||
dma_fence_put(fence);
|
||||
}
|
||||
|
||||
static void
|
||||
aie2_sched_notify(struct amdxdna_sched_job *job)
|
||||
{
|
||||
struct dma_fence *fence = job->fence;
|
||||
|
||||
trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
|
||||
job->hwctx->priv->completed++;
|
||||
dma_fence_signal(fence);
|
||||
|
||||
up(&job->hwctx->priv->job_sem);
|
||||
job->job_done = true;
|
||||
dma_fence_put(fence);
|
||||
mmput(job->mm);
|
||||
aie2_job_put(job);
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_sched_resp_handler(void *handle, const u32 *data, size_t size)
|
||||
{
|
||||
struct amdxdna_sched_job *job = handle;
|
||||
struct amdxdna_gem_obj *cmd_abo;
|
||||
u32 ret = 0;
|
||||
u32 status;
|
||||
|
||||
cmd_abo = job->cmd_bo;
|
||||
|
||||
if (unlikely(!data))
|
||||
goto out;
|
||||
|
||||
if (unlikely(size != sizeof(u32))) {
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = *data;
|
||||
XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
|
||||
if (status == AIE2_STATUS_SUCCESS)
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
|
||||
else
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
|
||||
|
||||
out:
|
||||
aie2_sched_notify(job);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size)
|
||||
{
|
||||
struct amdxdna_sched_job *job = handle;
|
||||
u32 ret = 0;
|
||||
u32 status;
|
||||
|
||||
if (unlikely(!data))
|
||||
goto out;
|
||||
|
||||
if (unlikely(size != sizeof(u32))) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = *data;
|
||||
XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
|
||||
|
||||
out:
|
||||
aie2_sched_notify(job);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_sched_cmdlist_resp_handler(void *handle, const u32 *data, size_t size)
|
||||
{
|
||||
struct amdxdna_sched_job *job = handle;
|
||||
struct amdxdna_gem_obj *cmd_abo;
|
||||
struct cmd_chain_resp *resp;
|
||||
struct amdxdna_dev *xdna;
|
||||
u32 fail_cmd_status;
|
||||
u32 fail_cmd_idx;
|
||||
u32 ret = 0;
|
||||
|
||||
cmd_abo = job->cmd_bo;
|
||||
if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
resp = (struct cmd_chain_resp *)data;
|
||||
xdna = job->hwctx->client->xdna;
|
||||
XDNA_DBG(xdna, "Status 0x%x", resp->status);
|
||||
if (resp->status == AIE2_STATUS_SUCCESS) {
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Slow path to handle error, read from ringbuf on BAR */
|
||||
fail_cmd_idx = resp->fail_cmd_idx;
|
||||
fail_cmd_status = resp->fail_cmd_status;
|
||||
XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
|
||||
fail_cmd_idx, fail_cmd_status);
|
||||
|
||||
if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
amdxdna_cmd_set_state(cmd_abo, fail_cmd_status);
|
||||
|
||||
if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
|
||||
struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);
|
||||
|
||||
cc->error_index = fail_cmd_idx;
|
||||
if (cc->error_index >= cc->command_count)
|
||||
cc->error_index = 0;
|
||||
}
|
||||
out:
|
||||
aie2_sched_notify(job);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct dma_fence *
|
||||
aie2_sched_job_run(struct drm_sched_job *sched_job)
|
||||
{
|
||||
struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
|
||||
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
|
||||
struct amdxdna_hwctx *hwctx = job->hwctx;
|
||||
struct dma_fence *fence;
|
||||
int ret;
|
||||
|
||||
if (!mmget_not_zero(job->mm))
|
||||
return ERR_PTR(-ESRCH);
|
||||
|
||||
kref_get(&job->refcnt);
|
||||
fence = dma_fence_get(job->fence);
|
||||
|
||||
if (unlikely(!cmd_abo)) {
|
||||
ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler);
|
||||
goto out;
|
||||
}
|
||||
|
||||
amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW);
|
||||
|
||||
if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN)
|
||||
ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
|
||||
else if (force_cmdlist)
|
||||
ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
|
||||
else
|
||||
ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler);
|
||||
|
||||
out:
|
||||
if (ret) {
|
||||
dma_fence_put(job->fence);
|
||||
aie2_job_put(job);
|
||||
mmput(job->mm);
|
||||
fence = ERR_PTR(ret);
|
||||
}
|
||||
trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
|
||||
|
||||
return fence;
|
||||
}
|
||||
|
||||
static void aie2_sched_job_free(struct drm_sched_job *sched_job)
|
||||
{
|
||||
struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
|
||||
struct amdxdna_hwctx *hwctx = job->hwctx;
|
||||
|
||||
trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
|
||||
if (!job->job_done)
|
||||
up(&hwctx->priv->job_sem);
|
||||
|
||||
drm_sched_job_cleanup(sched_job);
|
||||
aie2_job_put(job);
|
||||
}
|
||||
|
||||
static enum drm_gpu_sched_stat
|
||||
aie2_sched_job_timedout(struct drm_sched_job *sched_job)
|
||||
{
|
||||
struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
|
||||
struct amdxdna_hwctx *hwctx = job->hwctx;
|
||||
struct amdxdna_dev *xdna;
|
||||
|
||||
xdna = hwctx->client->xdna;
|
||||
trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
aie2_hwctx_stop(xdna, hwctx, sched_job);
|
||||
|
||||
aie2_hwctx_restart(xdna, hwctx);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
|
||||
return DRM_GPU_SCHED_STAT_NOMINAL;
|
||||
}
|
||||
|
||||
const struct drm_sched_backend_ops sched_ops = {
|
||||
.run_job = aie2_sched_job_run,
|
||||
.free_job = aie2_sched_job_free,
|
||||
.timedout_job = aie2_sched_job_timedout,
|
||||
};
|
||||
|
||||
static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
|
@ -126,13 +445,50 @@ static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
|
|||
XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
|
||||
}
|
||||
|
||||
static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
struct drm_file *filp = hwctx->client->filp;
|
||||
struct drm_syncobj *syncobj;
|
||||
u32 hdl;
|
||||
int ret;
|
||||
|
||||
hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
|
||||
|
||||
ret = drm_syncobj_create(&syncobj, 0, NULL);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
|
||||
if (ret) {
|
||||
drm_syncobj_put(syncobj);
|
||||
XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
hwctx->priv->syncobj = syncobj;
|
||||
hwctx->syncobj_hdl = hdl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
/*
|
||||
* The syncobj_hdl is owned by user space and will be cleaned up
|
||||
* separately.
|
||||
*/
|
||||
drm_syncobj_put(hwctx->priv->syncobj);
|
||||
}
|
||||
|
||||
int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_client *client = hwctx->client;
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct drm_gpu_scheduler *sched;
|
||||
struct amdxdna_hwctx_priv *priv;
|
||||
struct amdxdna_gem_obj *heap;
|
||||
int ret;
|
||||
int i, ret;
|
||||
|
||||
priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
|
||||
if (!priv)
|
||||
|
@ -150,6 +506,7 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
|
|||
drm_gem_object_get(to_gobj(heap));
|
||||
mutex_unlock(&client->mm_lock);
|
||||
priv->heap = heap;
|
||||
sema_init(&priv->job_sem, HWCTX_MAX_CMDS);
|
||||
|
||||
ret = amdxdna_gem_pin(heap);
|
||||
if (ret) {
|
||||
|
@ -157,10 +514,52 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
|
|||
goto put_heap;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
|
||||
struct amdxdna_gem_obj *abo;
|
||||
struct amdxdna_drm_create_bo args = {
|
||||
.flags = 0,
|
||||
.type = AMDXDNA_BO_DEV,
|
||||
.vaddr = 0,
|
||||
.size = MAX_CHAIN_CMDBUF_SIZE,
|
||||
};
|
||||
|
||||
abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp, true);
|
||||
if (IS_ERR(abo)) {
|
||||
ret = PTR_ERR(abo);
|
||||
goto free_cmd_bufs;
|
||||
}
|
||||
|
||||
XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
|
||||
i, abo->mem.dev_addr, abo->mem.size);
|
||||
priv->cmd_buf[i] = abo;
|
||||
}
|
||||
|
||||
sched = &priv->sched;
|
||||
mutex_init(&priv->io_lock);
|
||||
|
||||
fs_reclaim_acquire(GFP_KERNEL);
|
||||
might_lock(&priv->io_lock);
|
||||
fs_reclaim_release(GFP_KERNEL);
|
||||
|
||||
ret = drm_sched_init(sched, &sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT,
|
||||
HWCTX_MAX_CMDS, 0, msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
|
||||
NULL, NULL, hwctx->name, xdna->ddev.dev);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
|
||||
goto free_cmd_bufs;
|
||||
}
|
||||
|
||||
ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
|
||||
&sched, 1, NULL);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
|
||||
goto free_sched;
|
||||
}
|
||||
|
||||
ret = aie2_hwctx_col_list(hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
|
||||
goto unpin;
|
||||
goto free_entity;
|
||||
}
|
||||
|
||||
ret = aie2_alloc_resource(hwctx);
|
||||
|
@ -175,6 +574,13 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
|
|||
XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
|
||||
goto release_resource;
|
||||
}
|
||||
|
||||
ret = aie2_ctx_syncobj_create(hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
|
||||
goto release_resource;
|
||||
}
|
||||
|
||||
hwctx->status = HWCTX_STAT_INIT;
|
||||
|
||||
XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
|
||||
|
@ -185,7 +591,16 @@ release_resource:
|
|||
aie2_release_resource(hwctx);
|
||||
free_col_list:
|
||||
kfree(hwctx->col_list);
|
||||
unpin:
|
||||
free_entity:
|
||||
drm_sched_entity_destroy(&priv->entity);
|
||||
free_sched:
|
||||
drm_sched_fini(&priv->sched);
|
||||
free_cmd_bufs:
|
||||
for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
|
||||
if (!priv->cmd_buf[i])
|
||||
continue;
|
||||
drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
|
||||
}
|
||||
amdxdna_gem_unpin(heap);
|
||||
put_heap:
|
||||
drm_gem_object_put(to_gobj(heap));
|
||||
|
@ -196,11 +611,35 @@ free_priv:
|
|||
|
||||
void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_dev *xdna;
|
||||
int idx;
|
||||
|
||||
xdna = hwctx->client->xdna;
|
||||
drm_sched_wqueue_stop(&hwctx->priv->sched);
|
||||
|
||||
/* Now, scheduler will not send command to device. */
|
||||
aie2_release_resource(hwctx);
|
||||
|
||||
/*
|
||||
* All submitted commands are aborted.
|
||||
* Restart scheduler queues to cleanup jobs. The amdxdna_sched_job_run()
|
||||
* will return NODEV if it is called.
|
||||
*/
|
||||
drm_sched_wqueue_start(&hwctx->priv->sched);
|
||||
|
||||
aie2_hwctx_wait_for_idle(hwctx);
|
||||
drm_sched_entity_destroy(&hwctx->priv->entity);
|
||||
drm_sched_fini(&hwctx->priv->sched);
|
||||
aie2_ctx_syncobj_destroy(hwctx);
|
||||
|
||||
XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
|
||||
|
||||
for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
|
||||
drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
|
||||
amdxdna_gem_unpin(hwctx->priv->heap);
|
||||
drm_gem_object_put(to_gobj(hwctx->priv->heap));
|
||||
|
||||
mutex_destroy(&hwctx->priv->io_lock);
|
||||
kfree(hwctx->col_list);
|
||||
kfree(hwctx->priv);
|
||||
kfree(hwctx->cus);
|
||||
|
@ -267,3 +706,165 @@ int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *bu
|
|||
return -EOPNOTSUPP;
|
||||
}
|
||||
}
|
||||
|
||||
static int aie2_populate_range(struct amdxdna_gem_obj *abo)
|
||||
{
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
|
||||
struct mm_struct *mm = abo->mem.notifier.mm;
|
||||
struct hmm_range range = { 0 };
|
||||
unsigned long timeout;
|
||||
int ret;
|
||||
|
||||
XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx",
|
||||
abo->mem.userptr, abo->mem.size);
|
||||
range.notifier = &abo->mem.notifier;
|
||||
range.start = abo->mem.userptr;
|
||||
range.end = abo->mem.userptr + abo->mem.size;
|
||||
range.hmm_pfns = abo->mem.pfns;
|
||||
range.default_flags = HMM_PFN_REQ_FAULT;
|
||||
|
||||
if (!mmget_not_zero(mm))
|
||||
return -EFAULT;
|
||||
|
||||
timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
|
||||
again:
|
||||
range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier);
|
||||
mmap_read_lock(mm);
|
||||
ret = hmm_range_fault(&range);
|
||||
mmap_read_unlock(mm);
|
||||
if (ret) {
|
||||
if (time_after(jiffies, timeout)) {
|
||||
ret = -ETIME;
|
||||
goto put_mm;
|
||||
}
|
||||
|
||||
if (ret == -EBUSY)
|
||||
goto again;
|
||||
|
||||
goto put_mm;
|
||||
}
|
||||
|
||||
down_read(&xdna->notifier_lock);
|
||||
if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) {
|
||||
up_read(&xdna->notifier_lock);
|
||||
goto again;
|
||||
}
|
||||
abo->mem.map_invalid = false;
|
||||
up_read(&xdna->notifier_lock);
|
||||
|
||||
put_mm:
|
||||
mmput(mm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
struct ww_acquire_ctx acquire_ctx;
|
||||
struct dma_fence_chain *chain;
|
||||
struct amdxdna_gem_obj *abo;
|
||||
unsigned long timeout = 0;
|
||||
int ret, i;
|
||||
|
||||
ret = down_interruptible(&hwctx->priv->job_sem);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Grab job sem failed, ret %d", ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
chain = dma_fence_chain_alloc();
|
||||
if (!chain) {
|
||||
XDNA_ERR(xdna, "Alloc fence chain failed");
|
||||
ret = -ENOMEM;
|
||||
goto up_sem;
|
||||
}
|
||||
|
||||
ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
|
||||
goto free_chain;
|
||||
}
|
||||
|
||||
retry:
|
||||
ret = drm_gem_lock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
|
||||
if (ret) {
|
||||
XDNA_WARN(xdna, "Failed to lock BOs, ret %d", ret);
|
||||
goto cleanup_job;
|
||||
}
|
||||
|
||||
for (i = 0; i < job->bo_cnt; i++) {
|
||||
ret = dma_resv_reserve_fences(job->bos[i]->resv, 1);
|
||||
if (ret) {
|
||||
XDNA_WARN(xdna, "Failed to reserve fences %d", ret);
|
||||
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
|
||||
goto cleanup_job;
|
||||
}
|
||||
}
|
||||
|
||||
down_read(&xdna->notifier_lock);
|
||||
for (i = 0; i < job->bo_cnt; i++) {
|
||||
abo = to_xdna_obj(job->bos[i]);
|
||||
if (abo->mem.map_invalid) {
|
||||
up_read(&xdna->notifier_lock);
|
||||
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
|
||||
if (!timeout) {
|
||||
timeout = jiffies +
|
||||
msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
|
||||
} else if (time_after(jiffies, timeout)) {
|
||||
ret = -ETIME;
|
||||
goto cleanup_job;
|
||||
}
|
||||
|
||||
ret = aie2_populate_range(abo);
|
||||
if (ret)
|
||||
goto cleanup_job;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
||||
mutex_lock(&hwctx->priv->io_lock);
|
||||
drm_sched_job_arm(&job->base);
|
||||
job->out_fence = dma_fence_get(&job->base.s_fence->finished);
|
||||
for (i = 0; i < job->bo_cnt; i++)
|
||||
dma_resv_add_fence(job->bos[i]->resv, job->out_fence, DMA_RESV_USAGE_WRITE);
|
||||
job->seq = hwctx->priv->seq++;
|
||||
kref_get(&job->refcnt);
|
||||
drm_sched_entity_push_job(&job->base);
|
||||
|
||||
*seq = job->seq;
|
||||
drm_syncobj_add_point(hwctx->priv->syncobj, chain, job->out_fence, *seq);
|
||||
mutex_unlock(&hwctx->priv->io_lock);
|
||||
|
||||
up_read(&xdna->notifier_lock);
|
||||
drm_gem_unlock_reservations(job->bos, job->bo_cnt, &acquire_ctx);
|
||||
|
||||
aie2_job_put(job);
|
||||
|
||||
return 0;
|
||||
|
||||
cleanup_job:
|
||||
drm_sched_job_cleanup(&job->base);
|
||||
free_chain:
|
||||
dma_fence_chain_free(chain);
|
||||
up_sem:
|
||||
up(&hwctx->priv->job_sem);
|
||||
job->job_done = true;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo,
|
||||
unsigned long cur_seq)
|
||||
{
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
|
||||
struct drm_gem_object *gobj = to_gobj(abo);
|
||||
long ret;
|
||||
|
||||
down_write(&xdna->notifier_lock);
|
||||
abo->mem.map_invalid = true;
|
||||
mmu_interval_set_seq(&abo->mem.notifier, cur_seq);
|
||||
up_write(&xdna->notifier_lock);
|
||||
ret = dma_resv_wait_timeout(gobj->resv, DMA_RESV_USAGE_BOOKKEEP,
|
||||
true, MAX_SCHEDULE_TIMEOUT);
|
||||
if (!ret || ret == -ERESTARTSYS)
|
||||
XDNA_ERR(xdna, "Failed to wait for bo, ret %ld", ret);
|
||||
}
|
||||
|
|
|
@ -4,10 +4,12 @@
|
|||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_cache.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_gem.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/pci.h>
|
||||
|
@ -362,3 +364,344 @@ int aie2_config_cu(struct amdxdna_hwctx *hwctx)
|
|||
msg.opcode, resp.status, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t))
|
||||
{
|
||||
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
|
||||
union {
|
||||
struct execute_buffer_req ebuf;
|
||||
struct exec_dpu_req dpu;
|
||||
} req;
|
||||
struct xdna_mailbox_msg msg;
|
||||
u32 payload_len;
|
||||
void *payload;
|
||||
int cu_idx;
|
||||
int ret;
|
||||
u32 op;
|
||||
|
||||
if (!chann)
|
||||
return -ENODEV;
|
||||
|
||||
payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
|
||||
if (!payload) {
|
||||
XDNA_ERR(xdna, "Invalid command, cannot get payload");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cu_idx = amdxdna_cmd_get_cu_idx(cmd_abo);
|
||||
if (cu_idx < 0) {
|
||||
XDNA_DBG(xdna, "Invalid cu idx");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
op = amdxdna_cmd_get_op(cmd_abo);
|
||||
switch (op) {
|
||||
case ERT_START_CU:
|
||||
if (unlikely(payload_len > sizeof(req.ebuf.payload)))
|
||||
XDNA_DBG(xdna, "Invalid ebuf payload len: %d", payload_len);
|
||||
req.ebuf.cu_idx = cu_idx;
|
||||
memcpy(req.ebuf.payload, payload, sizeof(req.ebuf.payload));
|
||||
msg.send_size = sizeof(req.ebuf);
|
||||
msg.opcode = MSG_OP_EXECUTE_BUFFER_CF;
|
||||
break;
|
||||
case ERT_START_NPU: {
|
||||
struct amdxdna_cmd_start_npu *sn = payload;
|
||||
|
||||
if (unlikely(payload_len - sizeof(*sn) > sizeof(req.dpu.payload)))
|
||||
XDNA_DBG(xdna, "Invalid dpu payload len: %d", payload_len);
|
||||
req.dpu.inst_buf_addr = sn->buffer;
|
||||
req.dpu.inst_size = sn->buffer_size;
|
||||
req.dpu.inst_prop_cnt = sn->prop_count;
|
||||
req.dpu.cu_idx = cu_idx;
|
||||
memcpy(req.dpu.payload, sn->prop_args, sizeof(req.dpu.payload));
|
||||
msg.send_size = sizeof(req.dpu);
|
||||
msg.opcode = MSG_OP_EXEC_DPU;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
XDNA_DBG(xdna, "Invalid ERT cmd op code: %d", op);
|
||||
return -EINVAL;
|
||||
}
|
||||
msg.handle = job;
|
||||
msg.notify_cb = notify_cb;
|
||||
msg.send_data = (u8 *)&req;
|
||||
print_hex_dump_debug("cmd: ", DUMP_PREFIX_OFFSET, 16, 4, &req,
|
||||
0x40, false);
|
||||
|
||||
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Send message failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_cmdlist_fill_one_slot_cf(void *cmd_buf, u32 offset,
|
||||
struct amdxdna_gem_obj *abo, u32 *size)
|
||||
{
|
||||
struct cmd_chain_slot_execbuf_cf *buf = cmd_buf + offset;
|
||||
int cu_idx = amdxdna_cmd_get_cu_idx(abo);
|
||||
u32 payload_len;
|
||||
void *payload;
|
||||
|
||||
if (cu_idx < 0)
|
||||
return -EINVAL;
|
||||
|
||||
payload = amdxdna_cmd_get_payload(abo, &payload_len);
|
||||
if (!payload)
|
||||
return -EINVAL;
|
||||
|
||||
if (!slot_cf_has_space(offset, payload_len))
|
||||
return -ENOSPC;
|
||||
|
||||
buf->cu_idx = cu_idx;
|
||||
buf->arg_cnt = payload_len / sizeof(u32);
|
||||
memcpy(buf->args, payload, payload_len);
|
||||
/* Accurate buf size to hint firmware to do necessary copy */
|
||||
*size = sizeof(*buf) + payload_len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_cmdlist_fill_one_slot_dpu(void *cmd_buf, u32 offset,
|
||||
struct amdxdna_gem_obj *abo, u32 *size)
|
||||
{
|
||||
struct cmd_chain_slot_dpu *buf = cmd_buf + offset;
|
||||
int cu_idx = amdxdna_cmd_get_cu_idx(abo);
|
||||
struct amdxdna_cmd_start_npu *sn;
|
||||
u32 payload_len;
|
||||
void *payload;
|
||||
u32 arg_sz;
|
||||
|
||||
if (cu_idx < 0)
|
||||
return -EINVAL;
|
||||
|
||||
payload = amdxdna_cmd_get_payload(abo, &payload_len);
|
||||
if (!payload)
|
||||
return -EINVAL;
|
||||
sn = payload;
|
||||
arg_sz = payload_len - sizeof(*sn);
|
||||
if (payload_len < sizeof(*sn) || arg_sz > MAX_DPU_ARGS_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
if (!slot_dpu_has_space(offset, arg_sz))
|
||||
return -ENOSPC;
|
||||
|
||||
buf->inst_buf_addr = sn->buffer;
|
||||
buf->inst_size = sn->buffer_size;
|
||||
buf->inst_prop_cnt = sn->prop_count;
|
||||
buf->cu_idx = cu_idx;
|
||||
buf->arg_cnt = arg_sz / sizeof(u32);
|
||||
memcpy(buf->args, sn->prop_args, arg_sz);
|
||||
|
||||
/* Accurate buf size to hint firmware to do necessary copy */
|
||||
*size += sizeof(*buf) + arg_sz;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
aie2_cmdlist_fill_one_slot(u32 op, struct amdxdna_gem_obj *cmdbuf_abo, u32 offset,
|
||||
struct amdxdna_gem_obj *abo, u32 *size)
|
||||
{
|
||||
u32 this_op = amdxdna_cmd_get_op(abo);
|
||||
void *cmd_buf = cmdbuf_abo->mem.kva;
|
||||
int ret;
|
||||
|
||||
if (this_op != op) {
|
||||
ret = -EINVAL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
switch (op) {
|
||||
case ERT_START_CU:
|
||||
ret = aie2_cmdlist_fill_one_slot_cf(cmd_buf, offset, abo, size);
|
||||
break;
|
||||
case ERT_START_NPU:
|
||||
ret = aie2_cmdlist_fill_one_slot_dpu(cmd_buf, offset, abo, size);
|
||||
break;
|
||||
default:
|
||||
ret = -EOPNOTSUPP;
|
||||
}
|
||||
|
||||
done:
|
||||
if (ret) {
|
||||
XDNA_ERR(abo->client->xdna, "Can't fill slot for cmd op %d ret %d",
|
||||
op, ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline struct amdxdna_gem_obj *
|
||||
aie2_cmdlist_get_cmd_buf(struct amdxdna_sched_job *job)
|
||||
{
|
||||
int idx = get_job_idx(job->seq);
|
||||
|
||||
return job->hwctx->priv->cmd_buf[idx];
|
||||
}
|
||||
|
||||
static void
|
||||
aie2_cmdlist_prepare_request(struct cmd_chain_req *req,
|
||||
struct amdxdna_gem_obj *cmdbuf_abo, u32 size, u32 cnt)
|
||||
{
|
||||
req->buf_addr = cmdbuf_abo->mem.dev_addr;
|
||||
req->buf_size = size;
|
||||
req->count = cnt;
|
||||
drm_clflush_virt_range(cmdbuf_abo->mem.kva, size);
|
||||
XDNA_DBG(cmdbuf_abo->client->xdna, "Command buf addr 0x%llx size 0x%x count %d",
|
||||
req->buf_addr, size, cnt);
|
||||
}
|
||||
|
||||
static inline u32
|
||||
aie2_cmd_op_to_msg_op(u32 op)
|
||||
{
|
||||
switch (op) {
|
||||
case ERT_START_CU:
|
||||
return MSG_OP_CHAIN_EXEC_BUFFER_CF;
|
||||
case ERT_START_NPU:
|
||||
return MSG_OP_CHAIN_EXEC_DPU;
|
||||
default:
|
||||
return MSG_OP_MAX_OPCODE;
|
||||
}
|
||||
}
|
||||
|
||||
int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
|
||||
struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t))
|
||||
{
|
||||
struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
|
||||
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
|
||||
struct amdxdna_client *client = hwctx->client;
|
||||
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
|
||||
struct amdxdna_cmd_chain *payload;
|
||||
struct xdna_mailbox_msg msg;
|
||||
struct cmd_chain_req req;
|
||||
u32 payload_len;
|
||||
u32 offset = 0;
|
||||
u32 size;
|
||||
int ret;
|
||||
u32 op;
|
||||
u32 i;
|
||||
|
||||
op = amdxdna_cmd_get_op(cmd_abo);
|
||||
payload = amdxdna_cmd_get_payload(cmd_abo, &payload_len);
|
||||
if (op != ERT_CMD_CHAIN || !payload ||
|
||||
payload_len < struct_size(payload, data, payload->command_count))
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < payload->command_count; i++) {
|
||||
u32 boh = (u32)(payload->data[i]);
|
||||
struct amdxdna_gem_obj *abo;
|
||||
|
||||
abo = amdxdna_gem_get_obj(client, boh, AMDXDNA_BO_CMD);
|
||||
if (!abo) {
|
||||
XDNA_ERR(client->xdna, "Failed to find cmd BO %d", boh);
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
/* All sub-cmd should have same op, use the first one. */
|
||||
if (i == 0)
|
||||
op = amdxdna_cmd_get_op(abo);
|
||||
|
||||
ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, offset, abo, &size);
|
||||
amdxdna_gem_put_obj(abo);
|
||||
if (ret)
|
||||
return -EINVAL;
|
||||
|
||||
offset += size;
|
||||
}
|
||||
|
||||
/* The offset is the accumulated total size of the cmd buffer */
|
||||
aie2_cmdlist_prepare_request(&req, cmdbuf_abo, offset, payload->command_count);
|
||||
|
||||
msg.opcode = aie2_cmd_op_to_msg_op(op);
|
||||
if (msg.opcode == MSG_OP_MAX_OPCODE)
|
||||
return -EOPNOTSUPP;
|
||||
msg.handle = job;
|
||||
msg.notify_cb = notify_cb;
|
||||
msg.send_data = (u8 *)&req;
|
||||
msg.send_size = sizeof(req);
|
||||
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
|
||||
if (ret) {
|
||||
XDNA_ERR(hwctx->client->xdna, "Send message failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
|
||||
struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t))
|
||||
{
|
||||
struct amdxdna_gem_obj *cmdbuf_abo = aie2_cmdlist_get_cmd_buf(job);
|
||||
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
|
||||
struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
|
||||
struct xdna_mailbox_msg msg;
|
||||
struct cmd_chain_req req;
|
||||
u32 size;
|
||||
int ret;
|
||||
u32 op;
|
||||
|
||||
op = amdxdna_cmd_get_op(cmd_abo);
|
||||
ret = aie2_cmdlist_fill_one_slot(op, cmdbuf_abo, 0, cmd_abo, &size);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
aie2_cmdlist_prepare_request(&req, cmdbuf_abo, size, 1);
|
||||
|
||||
msg.opcode = aie2_cmd_op_to_msg_op(op);
|
||||
if (msg.opcode == MSG_OP_MAX_OPCODE)
|
||||
return -EOPNOTSUPP;
|
||||
msg.handle = job;
|
||||
msg.notify_cb = notify_cb;
|
||||
msg.send_data = (u8 *)&req;
|
||||
msg.send_size = sizeof(req);
|
||||
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
|
||||
if (ret) {
|
||||
XDNA_ERR(hwctx->client->xdna, "Send message failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t))
|
||||
{
|
||||
struct mailbox_channel *chann = hwctx->priv->mbox_chann;
|
||||
struct amdxdna_gem_obj *abo = to_xdna_obj(job->bos[0]);
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
struct xdna_mailbox_msg msg;
|
||||
struct sync_bo_req req;
|
||||
int ret = 0;
|
||||
|
||||
req.src_addr = 0;
|
||||
req.dst_addr = abo->mem.dev_addr - hwctx->client->dev_heap->mem.dev_addr;
|
||||
req.size = abo->mem.size;
|
||||
|
||||
/* Device to Host */
|
||||
req.type = FIELD_PREP(AIE2_MSG_SYNC_BO_SRC_TYPE, SYNC_BO_DEV_MEM) |
|
||||
FIELD_PREP(AIE2_MSG_SYNC_BO_DST_TYPE, SYNC_BO_HOST_MEM);
|
||||
|
||||
XDNA_DBG(xdna, "sync %d bytes src(0x%llx) to dst(0x%llx) completed",
|
||||
req.size, req.src_addr, req.dst_addr);
|
||||
|
||||
msg.handle = job;
|
||||
msg.notify_cb = notify_cb;
|
||||
msg.send_data = (u8 *)&req;
|
||||
msg.send_size = sizeof(req);
|
||||
msg.opcode = MSG_OP_SYNC_BO;
|
||||
|
||||
ret = xdna_mailbox_send_msg(chann, &msg, TX_TIMEOUT);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Send message failed");
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -5,8 +5,10 @@
|
|||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_managed.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/firmware.h>
|
||||
#include <linux/iommu.h>
|
||||
|
@ -17,6 +19,7 @@
|
|||
#include "aie2_pci.h"
|
||||
#include "aie2_solver.h"
|
||||
#include "amdxdna_ctx.h"
|
||||
#include "amdxdna_gem.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
|
@ -496,4 +499,6 @@ const struct amdxdna_dev_ops aie2_ops = {
|
|||
.hwctx_init = aie2_hwctx_init,
|
||||
.hwctx_fini = aie2_hwctx_fini,
|
||||
.hwctx_config = aie2_hwctx_config,
|
||||
.cmd_submit = aie2_cmd_submit,
|
||||
.hmm_invalidate = aie2_hmm_invalidate,
|
||||
};
|
||||
|
|
|
@ -6,6 +6,8 @@
|
|||
#ifndef _AIE2_PCI_H_
|
||||
#define _AIE2_PCI_H_
|
||||
|
||||
#include <linux/semaphore.h>
|
||||
|
||||
#include "amdxdna_mailbox.h"
|
||||
|
||||
#define AIE2_INTERVAL 20000 /* us */
|
||||
|
@ -76,8 +78,10 @@ enum psp_reg_idx {
|
|||
PSP_MAX_REGS /* Keep this at the end */
|
||||
};
|
||||
|
||||
struct amdxdna_client;
|
||||
struct amdxdna_fw_ver;
|
||||
struct amdxdna_hwctx;
|
||||
struct amdxdna_sched_job;
|
||||
|
||||
struct psp_config {
|
||||
const void *fw_buf;
|
||||
|
@ -118,9 +122,31 @@ struct rt_config {
|
|||
u32 value;
|
||||
};
|
||||
|
||||
/*
|
||||
* Define the maximum number of pending commands in a hardware context.
|
||||
* Must be power of 2!
|
||||
*/
|
||||
#define HWCTX_MAX_CMDS 4
|
||||
#define get_job_idx(seq) ((seq) & (HWCTX_MAX_CMDS - 1))
|
||||
struct amdxdna_hwctx_priv {
|
||||
struct amdxdna_gem_obj *heap;
|
||||
void *mbox_chann;
|
||||
|
||||
struct drm_gpu_scheduler sched;
|
||||
struct drm_sched_entity entity;
|
||||
|
||||
struct mutex io_lock; /* protect seq and cmd order */
|
||||
struct wait_queue_head job_free_wq;
|
||||
u32 num_pending;
|
||||
u64 seq;
|
||||
struct semaphore job_sem;
|
||||
bool job_done;
|
||||
|
||||
/* Completed job counter */
|
||||
u64 completed;
|
||||
|
||||
struct amdxdna_gem_obj *cmd_buf[HWCTX_MAX_CMDS];
|
||||
struct drm_syncobj *syncobj;
|
||||
};
|
||||
|
||||
struct amdxdna_dev_hdl {
|
||||
|
@ -199,10 +225,23 @@ int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwct
|
|||
int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
|
||||
int aie2_map_host_buf(struct amdxdna_dev_hdl *ndev, u32 context_id, u64 addr, u64 size);
|
||||
int aie2_config_cu(struct amdxdna_hwctx *hwctx);
|
||||
int aie2_execbuf(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t));
|
||||
int aie2_cmdlist_single_execbuf(struct amdxdna_hwctx *hwctx,
|
||||
struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t));
|
||||
int aie2_cmdlist_multi_execbuf(struct amdxdna_hwctx *hwctx,
|
||||
struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t));
|
||||
int aie2_sync_bo(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job,
|
||||
int (*notify_cb)(void *, const u32 *, size_t));
|
||||
|
||||
/* aie2_hwctx.c */
|
||||
int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
|
||||
void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx);
|
||||
int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
|
||||
int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
|
||||
void aie2_hmm_invalidate(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
|
||||
void aie2_restart_ctx(struct amdxdna_client *client);
|
||||
|
||||
#endif /* _AIE2_PCI_H_ */
|
||||
|
|
|
@ -4,8 +4,10 @@
|
|||
*/
|
||||
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_managed.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/bitfield.h>
|
||||
#include <linux/iopoll.h>
|
||||
|
||||
|
|
|
@ -4,7 +4,9 @@
|
|||
*/
|
||||
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/iopoll.h>
|
||||
|
||||
#include "aie2_pci.h"
|
||||
|
|
|
@ -7,17 +7,65 @@
|
|||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_drv.h>
|
||||
#include <drm/drm_file.h>
|
||||
#include <drm/drm_gem.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <trace/events/amdxdna.h>
|
||||
|
||||
#include "amdxdna_ctx.h"
|
||||
#include "amdxdna_gem.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
#define MAX_HWCTX_ID 255
|
||||
#define MAX_ARG_COUNT 4095
|
||||
|
||||
static void amdxdna_hwctx_destroy(struct amdxdna_hwctx *hwctx)
|
||||
struct amdxdna_fence {
|
||||
struct dma_fence base;
|
||||
spinlock_t lock; /* for base */
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
};
|
||||
|
||||
static const char *amdxdna_fence_get_driver_name(struct dma_fence *fence)
|
||||
{
|
||||
return KBUILD_MODNAME;
|
||||
}
|
||||
|
||||
static const char *amdxdna_fence_get_timeline_name(struct dma_fence *fence)
|
||||
{
|
||||
struct amdxdna_fence *xdna_fence;
|
||||
|
||||
xdna_fence = container_of(fence, struct amdxdna_fence, base);
|
||||
|
||||
return xdna_fence->hwctx->name;
|
||||
}
|
||||
|
||||
static const struct dma_fence_ops fence_ops = {
|
||||
.get_driver_name = amdxdna_fence_get_driver_name,
|
||||
.get_timeline_name = amdxdna_fence_get_timeline_name,
|
||||
};
|
||||
|
||||
static struct dma_fence *amdxdna_fence_create(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
struct amdxdna_fence *fence;
|
||||
|
||||
fence = kzalloc(sizeof(*fence), GFP_KERNEL);
|
||||
if (!fence)
|
||||
return NULL;
|
||||
|
||||
fence->hwctx = hwctx;
|
||||
spin_lock_init(&fence->lock);
|
||||
dma_fence_init(&fence->base, &fence_ops, &fence->lock, hwctx->id, 0);
|
||||
return &fence->base;
|
||||
}
|
||||
|
||||
static void amdxdna_hwctx_destroy_rcu(struct amdxdna_hwctx *hwctx,
|
||||
struct srcu_struct *ss)
|
||||
{
|
||||
struct amdxdna_dev *xdna = hwctx->client->xdna;
|
||||
|
||||
synchronize_srcu(ss);
|
||||
|
||||
/* At this point, user is not able to submit new commands */
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
xdna->dev_info->ops->hwctx_fini(hwctx);
|
||||
|
@ -27,6 +75,46 @@ static void amdxdna_hwctx_destroy(struct amdxdna_hwctx *hwctx)
|
|||
kfree(hwctx);
|
||||
}
|
||||
|
||||
void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size)
|
||||
{
|
||||
struct amdxdna_cmd *cmd = abo->mem.kva;
|
||||
u32 num_masks, count;
|
||||
|
||||
if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
|
||||
num_masks = 0;
|
||||
else
|
||||
num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
|
||||
|
||||
if (size) {
|
||||
count = FIELD_GET(AMDXDNA_CMD_COUNT, cmd->header);
|
||||
if (unlikely(count <= num_masks)) {
|
||||
*size = 0;
|
||||
return NULL;
|
||||
}
|
||||
*size = (count - num_masks) * sizeof(u32);
|
||||
}
|
||||
return &cmd->data[num_masks];
|
||||
}
|
||||
|
||||
int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo)
|
||||
{
|
||||
struct amdxdna_cmd *cmd = abo->mem.kva;
|
||||
u32 num_masks, i;
|
||||
u32 *cu_mask;
|
||||
|
||||
if (amdxdna_cmd_get_op(abo) == ERT_CMD_CHAIN)
|
||||
return -1;
|
||||
|
||||
num_masks = 1 + FIELD_GET(AMDXDNA_CMD_EXTRA_CU_MASK, cmd->header);
|
||||
cu_mask = cmd->data;
|
||||
for (i = 0; i < num_masks; i++) {
|
||||
if (cu_mask[i])
|
||||
return ffs(cu_mask[i]) - 1;
|
||||
}
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* This should be called in close() and remove(). DO NOT call in other syscalls.
|
||||
* This guarantee that when hwctx and resources will be released, if user
|
||||
|
@ -43,7 +131,7 @@ void amdxdna_hwctx_remove_all(struct amdxdna_client *client)
|
|||
client->pid, hwctx->id);
|
||||
idr_remove(&client->hwctx_idr, hwctx->id);
|
||||
mutex_unlock(&client->hwctx_lock);
|
||||
amdxdna_hwctx_destroy(hwctx);
|
||||
amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
|
||||
mutex_lock(&client->hwctx_lock);
|
||||
}
|
||||
mutex_unlock(&client->hwctx_lock);
|
||||
|
@ -135,6 +223,12 @@ int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct d
|
|||
if (!drm_dev_enter(dev, &idx))
|
||||
return -ENODEV;
|
||||
|
||||
/*
|
||||
* Use hwctx_lock to achieve exclusion with other hwctx writers,
|
||||
* SRCU to synchronize with exec/wait command ioctls.
|
||||
*
|
||||
* The pushed jobs are handled by DRM scheduler during destroy.
|
||||
*/
|
||||
mutex_lock(&client->hwctx_lock);
|
||||
hwctx = idr_find(&client->hwctx_idr, args->handle);
|
||||
if (!hwctx) {
|
||||
|
@ -147,7 +241,7 @@ int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct d
|
|||
idr_remove(&client->hwctx_idr, hwctx->id);
|
||||
mutex_unlock(&client->hwctx_lock);
|
||||
|
||||
amdxdna_hwctx_destroy(hwctx);
|
||||
amdxdna_hwctx_destroy_rcu(hwctx, &client->hwctx_srcu);
|
||||
|
||||
XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle);
|
||||
out:
|
||||
|
@ -161,10 +255,10 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
|
|||
struct amdxdna_drm_config_hwctx *args = data;
|
||||
struct amdxdna_dev *xdna = to_xdna_dev(dev);
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
int ret, idx;
|
||||
u32 buf_size;
|
||||
void *buf;
|
||||
u64 val;
|
||||
int ret;
|
||||
|
||||
if (!xdna->dev_info->ops->hwctx_config)
|
||||
return -EOPNOTSUPP;
|
||||
|
@ -203,17 +297,231 @@ int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct dr
|
|||
}
|
||||
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
idx = srcu_read_lock(&client->hwctx_srcu);
|
||||
hwctx = idr_find(&client->hwctx_idr, args->handle);
|
||||
if (!hwctx) {
|
||||
XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle);
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
goto unlock_srcu;
|
||||
}
|
||||
|
||||
ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size);
|
||||
|
||||
unlock:
|
||||
unlock_srcu:
|
||||
srcu_read_unlock(&client->hwctx_srcu, idx);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
kfree(buf);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void
|
||||
amdxdna_arg_bos_put(struct amdxdna_sched_job *job)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < job->bo_cnt; i++) {
|
||||
if (!job->bos[i])
|
||||
break;
|
||||
drm_gem_object_put(job->bos[i]);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
amdxdna_arg_bos_lookup(struct amdxdna_client *client,
|
||||
struct amdxdna_sched_job *job,
|
||||
u32 *bo_hdls, u32 bo_cnt)
|
||||
{
|
||||
struct drm_gem_object *gobj;
|
||||
int i, ret;
|
||||
|
||||
job->bo_cnt = bo_cnt;
|
||||
for (i = 0; i < job->bo_cnt; i++) {
|
||||
struct amdxdna_gem_obj *abo;
|
||||
|
||||
gobj = drm_gem_object_lookup(client->filp, bo_hdls[i]);
|
||||
if (!gobj) {
|
||||
ret = -ENOENT;
|
||||
goto put_shmem_bo;
|
||||
}
|
||||
abo = to_xdna_obj(gobj);
|
||||
|
||||
mutex_lock(&abo->lock);
|
||||
if (abo->pinned) {
|
||||
mutex_unlock(&abo->lock);
|
||||
job->bos[i] = gobj;
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = amdxdna_gem_pin_nolock(abo);
|
||||
if (ret) {
|
||||
mutex_unlock(&abo->lock);
|
||||
drm_gem_object_put(gobj);
|
||||
goto put_shmem_bo;
|
||||
}
|
||||
abo->pinned = true;
|
||||
mutex_unlock(&abo->lock);
|
||||
|
||||
job->bos[i] = gobj;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
put_shmem_bo:
|
||||
amdxdna_arg_bos_put(job);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job)
|
||||
{
|
||||
trace_amdxdna_debug_point(job->hwctx->name, job->seq, "job release");
|
||||
amdxdna_arg_bos_put(job);
|
||||
amdxdna_gem_put_obj(job->cmd_bo);
|
||||
}
|
||||
|
||||
int amdxdna_cmd_submit(struct amdxdna_client *client,
|
||||
u32 cmd_bo_hdl, u32 *arg_bo_hdls, u32 arg_bo_cnt,
|
||||
u32 hwctx_hdl, u64 *seq)
|
||||
{
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
struct amdxdna_sched_job *job;
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
int ret, idx;
|
||||
|
||||
XDNA_DBG(xdna, "Command BO hdl %d, Arg BO count %d", cmd_bo_hdl, arg_bo_cnt);
|
||||
job = kzalloc(struct_size(job, bos, arg_bo_cnt), GFP_KERNEL);
|
||||
if (!job)
|
||||
return -ENOMEM;
|
||||
|
||||
if (cmd_bo_hdl != AMDXDNA_INVALID_BO_HANDLE) {
|
||||
job->cmd_bo = amdxdna_gem_get_obj(client, cmd_bo_hdl, AMDXDNA_BO_CMD);
|
||||
if (!job->cmd_bo) {
|
||||
XDNA_ERR(xdna, "Failed to get cmd bo from %d", cmd_bo_hdl);
|
||||
ret = -EINVAL;
|
||||
goto free_job;
|
||||
}
|
||||
} else {
|
||||
job->cmd_bo = NULL;
|
||||
}
|
||||
|
||||
ret = amdxdna_arg_bos_lookup(client, job, arg_bo_hdls, arg_bo_cnt);
|
||||
if (ret) {
|
||||
XDNA_ERR(xdna, "Argument BOs lookup failed, ret %d", ret);
|
||||
goto cmd_put;
|
||||
}
|
||||
|
||||
idx = srcu_read_lock(&client->hwctx_srcu);
|
||||
hwctx = idr_find(&client->hwctx_idr, hwctx_hdl);
|
||||
if (!hwctx) {
|
||||
XDNA_DBG(xdna, "PID %d failed to get hwctx %d",
|
||||
client->pid, hwctx_hdl);
|
||||
ret = -EINVAL;
|
||||
goto unlock_srcu;
|
||||
}
|
||||
|
||||
if (hwctx->status != HWCTX_STAT_READY) {
|
||||
XDNA_ERR(xdna, "HW Context is not ready");
|
||||
ret = -EINVAL;
|
||||
goto unlock_srcu;
|
||||
}
|
||||
|
||||
job->hwctx = hwctx;
|
||||
job->mm = current->mm;
|
||||
|
||||
job->fence = amdxdna_fence_create(hwctx);
|
||||
if (!job->fence) {
|
||||
XDNA_ERR(xdna, "Failed to create fence");
|
||||
ret = -ENOMEM;
|
||||
goto unlock_srcu;
|
||||
}
|
||||
kref_init(&job->refcnt);
|
||||
|
||||
ret = xdna->dev_info->ops->cmd_submit(hwctx, job, seq);
|
||||
if (ret)
|
||||
goto put_fence;
|
||||
|
||||
/*
|
||||
* The amdxdna_hwctx_destroy_rcu() will release hwctx and associated
|
||||
* resource after synchronize_srcu(). The submitted jobs should be
|
||||
* handled by the queue, for example DRM scheduler, in device layer.
|
||||
* For here we can unlock SRCU.
|
||||
*/
|
||||
srcu_read_unlock(&client->hwctx_srcu, idx);
|
||||
trace_amdxdna_debug_point(hwctx->name, *seq, "job pushed");
|
||||
|
||||
return 0;
|
||||
|
||||
put_fence:
|
||||
dma_fence_put(job->fence);
|
||||
unlock_srcu:
|
||||
srcu_read_unlock(&client->hwctx_srcu, idx);
|
||||
amdxdna_arg_bos_put(job);
|
||||
cmd_put:
|
||||
amdxdna_gem_put_obj(job->cmd_bo);
|
||||
free_job:
|
||||
kfree(job);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The submit command ioctl submits a command to firmware. One firmware command
|
||||
* may contain multiple command BOs for processing as a whole.
|
||||
* The command sequence number is returned which can be used for wait command ioctl.
|
||||
*/
|
||||
static int amdxdna_drm_submit_execbuf(struct amdxdna_client *client,
|
||||
struct amdxdna_drm_exec_cmd *args)
|
||||
{
|
||||
struct amdxdna_dev *xdna = client->xdna;
|
||||
u32 *arg_bo_hdls;
|
||||
u32 cmd_bo_hdl;
|
||||
int ret;
|
||||
|
||||
if (!args->arg_count || args->arg_count > MAX_ARG_COUNT) {
|
||||
XDNA_ERR(xdna, "Invalid arg bo count %d", args->arg_count);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* Only support single command for now. */
|
||||
if (args->cmd_count != 1) {
|
||||
XDNA_ERR(xdna, "Invalid cmd bo count %d", args->cmd_count);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cmd_bo_hdl = (u32)args->cmd_handles;
|
||||
arg_bo_hdls = kcalloc(args->arg_count, sizeof(u32), GFP_KERNEL);
|
||||
if (!arg_bo_hdls)
|
||||
return -ENOMEM;
|
||||
ret = copy_from_user(arg_bo_hdls, u64_to_user_ptr(args->args),
|
||||
args->arg_count * sizeof(u32));
|
||||
if (ret) {
|
||||
ret = -EFAULT;
|
||||
goto free_cmd_bo_hdls;
|
||||
}
|
||||
|
||||
ret = amdxdna_cmd_submit(client, cmd_bo_hdl, arg_bo_hdls,
|
||||
args->arg_count, args->hwctx, &args->seq);
|
||||
if (ret)
|
||||
XDNA_DBG(xdna, "Submit cmds failed, ret %d", ret);
|
||||
|
||||
free_cmd_bo_hdls:
|
||||
kfree(arg_bo_hdls);
|
||||
if (!ret)
|
||||
XDNA_DBG(xdna, "Pushed cmd %lld to scheduler", args->seq);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
|
||||
{
|
||||
struct amdxdna_client *client = filp->driver_priv;
|
||||
struct amdxdna_drm_exec_cmd *args = data;
|
||||
|
||||
if (args->ext || args->ext_flags)
|
||||
return -EINVAL;
|
||||
|
||||
switch (args->type) {
|
||||
case AMDXDNA_CMD_SUBMIT_EXEC_BUF:
|
||||
return amdxdna_drm_submit_execbuf(client, args);
|
||||
}
|
||||
|
||||
XDNA_ERR(client->xdna, "Invalid command type %d", args->type);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
|
|
@ -6,6 +6,54 @@
|
|||
#ifndef _AMDXDNA_CTX_H_
|
||||
#define _AMDXDNA_CTX_H_
|
||||
|
||||
#include <linux/bitfield.h>
|
||||
|
||||
#include "amdxdna_gem.h"
|
||||
|
||||
struct amdxdna_hwctx_priv;
|
||||
|
||||
enum ert_cmd_opcode {
|
||||
ERT_START_CU = 0,
|
||||
ERT_CMD_CHAIN = 19,
|
||||
ERT_START_NPU = 20,
|
||||
};
|
||||
|
||||
enum ert_cmd_state {
|
||||
ERT_CMD_STATE_INVALID,
|
||||
ERT_CMD_STATE_NEW,
|
||||
ERT_CMD_STATE_QUEUED,
|
||||
ERT_CMD_STATE_RUNNING,
|
||||
ERT_CMD_STATE_COMPLETED,
|
||||
ERT_CMD_STATE_ERROR,
|
||||
ERT_CMD_STATE_ABORT,
|
||||
ERT_CMD_STATE_SUBMITTED,
|
||||
ERT_CMD_STATE_TIMEOUT,
|
||||
ERT_CMD_STATE_NORESPONSE,
|
||||
};
|
||||
|
||||
/*
|
||||
* Interpretation of the beginning of data payload for ERT_START_NPU in
|
||||
* amdxdna_cmd. The rest of the payload in amdxdna_cmd is regular kernel args.
|
||||
*/
|
||||
struct amdxdna_cmd_start_npu {
|
||||
u64 buffer; /* instruction buffer address */
|
||||
u32 buffer_size; /* size of buffer in bytes */
|
||||
u32 prop_count; /* properties count */
|
||||
u32 prop_args[]; /* properties and regular kernel arguments */
|
||||
};
|
||||
|
||||
/*
|
||||
* Interpretation of the beginning of data payload for ERT_CMD_CHAIN in
|
||||
* amdxdna_cmd. The rest of the payload in amdxdna_cmd is cmd BO handles.
|
||||
*/
|
||||
struct amdxdna_cmd_chain {
|
||||
u32 command_count;
|
||||
u32 submit_index;
|
||||
u32 error_index;
|
||||
u32 reserved[3];
|
||||
u64 data[] __counted_by(command_count);
|
||||
};
|
||||
|
||||
/* Exec buffer command header format */
|
||||
#define AMDXDNA_CMD_STATE GENMASK(3, 0)
|
||||
#define AMDXDNA_CMD_EXTRA_CU_MASK GENMASK(11, 10)
|
||||
|
@ -41,9 +89,72 @@ struct amdxdna_hwctx {
|
|||
u32 syncobj_hdl;
|
||||
};
|
||||
|
||||
#define drm_job_to_xdna_job(j) \
|
||||
container_of(j, struct amdxdna_sched_job, base)
|
||||
|
||||
struct amdxdna_sched_job {
|
||||
struct drm_sched_job base;
|
||||
struct kref refcnt;
|
||||
struct amdxdna_hwctx *hwctx;
|
||||
struct mm_struct *mm;
|
||||
/* The fence to notice DRM scheduler that job is done by hardware */
|
||||
struct dma_fence *fence;
|
||||
/* user can wait on this fence */
|
||||
struct dma_fence *out_fence;
|
||||
bool job_done;
|
||||
u64 seq;
|
||||
struct amdxdna_gem_obj *cmd_bo;
|
||||
size_t bo_cnt;
|
||||
struct drm_gem_object *bos[] __counted_by(bo_cnt);
|
||||
};
|
||||
|
||||
static inline u32
|
||||
amdxdna_cmd_get_op(struct amdxdna_gem_obj *abo)
|
||||
{
|
||||
struct amdxdna_cmd *cmd = abo->mem.kva;
|
||||
|
||||
return FIELD_GET(AMDXDNA_CMD_OPCODE, cmd->header);
|
||||
}
|
||||
|
||||
static inline void
|
||||
amdxdna_cmd_set_state(struct amdxdna_gem_obj *abo, enum ert_cmd_state s)
|
||||
{
|
||||
struct amdxdna_cmd *cmd = abo->mem.kva;
|
||||
|
||||
cmd->header &= ~AMDXDNA_CMD_STATE;
|
||||
cmd->header |= FIELD_PREP(AMDXDNA_CMD_STATE, s);
|
||||
}
|
||||
|
||||
static inline enum ert_cmd_state
|
||||
amdxdna_cmd_get_state(struct amdxdna_gem_obj *abo)
|
||||
{
|
||||
struct amdxdna_cmd *cmd = abo->mem.kva;
|
||||
|
||||
return FIELD_GET(AMDXDNA_CMD_STATE, cmd->header);
|
||||
}
|
||||
|
||||
void *amdxdna_cmd_get_payload(struct amdxdna_gem_obj *abo, u32 *size);
|
||||
int amdxdna_cmd_get_cu_idx(struct amdxdna_gem_obj *abo);
|
||||
|
||||
static inline u32 amdxdna_hwctx_col_map(struct amdxdna_hwctx *hwctx)
|
||||
{
|
||||
return GENMASK(hwctx->start_col + hwctx->num_col - 1,
|
||||
hwctx->start_col);
|
||||
}
|
||||
|
||||
void amdxdna_sched_job_cleanup(struct amdxdna_sched_job *job);
|
||||
void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
|
||||
|
||||
int amdxdna_cmd_submit(struct amdxdna_client *client,
|
||||
u32 cmd_bo_hdls, u32 *arg_bo_hdls, u32 arg_bo_cnt,
|
||||
u32 hwctx_hdl, u64 *seq);
|
||||
|
||||
int amdxdna_cmd_wait(struct amdxdna_client *client, u32 hwctx_hdl,
|
||||
u64 seq, u32 timeout);
|
||||
|
||||
int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
|
||||
int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
|
||||
int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
|
||||
int amdxdna_drm_submit_cmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
|
||||
|
||||
#endif /* _AMDXDNA_CTX_H_ */
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_gem.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/iosys-map.h>
|
||||
#include <linux/vmalloc.h>
|
||||
|
||||
|
|
|
@ -3,10 +3,15 @@
|
|||
* Copyright (C) 2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/drm_gem.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/completion.h>
|
||||
|
||||
#include "amdxdna_gem.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_mailbox_helper.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_ioctl.h>
|
||||
#include <drm/drm_managed.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/pci.h>
|
||||
|
||||
|
@ -64,6 +65,7 @@ static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
|
|||
goto unbind_sva;
|
||||
}
|
||||
mutex_init(&client->hwctx_lock);
|
||||
init_srcu_struct(&client->hwctx_srcu);
|
||||
idr_init_base(&client->hwctx_idr, AMDXDNA_INVALID_CTX_HANDLE + 1);
|
||||
mutex_init(&client->mm_lock);
|
||||
|
||||
|
@ -93,6 +95,7 @@ static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp)
|
|||
XDNA_DBG(xdna, "closing pid %d", client->pid);
|
||||
|
||||
idr_destroy(&client->hwctx_idr);
|
||||
cleanup_srcu_struct(&client->hwctx_srcu);
|
||||
mutex_destroy(&client->hwctx_lock);
|
||||
mutex_destroy(&client->mm_lock);
|
||||
if (client->dev_heap)
|
||||
|
@ -133,6 +136,8 @@ static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
|
|||
DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_BO, amdxdna_drm_create_bo_ioctl, 0),
|
||||
DRM_IOCTL_DEF_DRV(AMDXDNA_GET_BO_INFO, amdxdna_drm_get_bo_info_ioctl, 0),
|
||||
DRM_IOCTL_DEF_DRV(AMDXDNA_SYNC_BO, amdxdna_drm_sync_bo_ioctl, 0),
|
||||
/* Execution */
|
||||
DRM_IOCTL_DEF_DRV(AMDXDNA_EXEC_CMD, amdxdna_drm_submit_cmd_ioctl, 0),
|
||||
};
|
||||
|
||||
static const struct file_operations amdxdna_fops = {
|
||||
|
@ -190,9 +195,16 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
|
|||
return -ENODEV;
|
||||
|
||||
drmm_mutex_init(&xdna->ddev, &xdna->dev_lock);
|
||||
init_rwsem(&xdna->notifier_lock);
|
||||
INIT_LIST_HEAD(&xdna->client_list);
|
||||
pci_set_drvdata(pdev, xdna);
|
||||
|
||||
if (IS_ENABLED(CONFIG_LOCKDEP)) {
|
||||
fs_reclaim_acquire(GFP_KERNEL);
|
||||
might_lock(&xdna->notifier_lock);
|
||||
fs_reclaim_release(GFP_KERNEL);
|
||||
}
|
||||
|
||||
mutex_lock(&xdna->dev_lock);
|
||||
ret = xdna->dev_info->ops->init(xdna);
|
||||
mutex_unlock(&xdna->dev_lock);
|
||||
|
|
|
@ -20,6 +20,7 @@ extern const struct drm_driver amdxdna_drm_drv;
|
|||
struct amdxdna_dev;
|
||||
struct amdxdna_gem_obj;
|
||||
struct amdxdna_hwctx;
|
||||
struct amdxdna_sched_job;
|
||||
|
||||
/*
|
||||
* struct amdxdna_dev_ops - Device hardware operation callbacks
|
||||
|
@ -31,6 +32,7 @@ struct amdxdna_dev_ops {
|
|||
void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
|
||||
int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
|
||||
void (*hmm_invalidate)(struct amdxdna_gem_obj *abo, unsigned long cur_seq);
|
||||
int (*cmd_submit)(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq);
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -69,6 +71,7 @@ struct amdxdna_dev {
|
|||
struct mutex dev_lock; /* per device lock */
|
||||
struct list_head client_list;
|
||||
struct amdxdna_fw_ver fw_ver;
|
||||
struct rw_semaphore notifier_lock; /* for mmu notifier*/
|
||||
};
|
||||
|
||||
/*
|
||||
|
@ -88,6 +91,8 @@ struct amdxdna_client {
|
|||
struct list_head node;
|
||||
pid_t pid;
|
||||
struct mutex hwctx_lock; /* protect hwctx */
|
||||
/* do NOT wait this srcu when hwctx_lock is held */
|
||||
struct srcu_struct hwctx_srcu;
|
||||
struct idr hwctx_idr;
|
||||
struct amdxdna_dev *xdna;
|
||||
struct drm_file *filp;
|
||||
|
|
|
@ -3,9 +3,14 @@
|
|||
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_gem_shmem_helper.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include "amdxdna_gem.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
static ssize_t vbnv_show(struct device *dev, struct device_attribute *attr, char *buf)
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#include "aie2_pci.h"
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#include "aie2_pci.h"
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#include "aie2_pci.h"
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
|
||||
#include <drm/amdxdna_accel.h>
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/sizes.h>
|
||||
|
||||
#include "aie2_pci.h"
|
||||
|
|
|
@ -9,8 +9,49 @@
|
|||
#if !defined(_TRACE_AMDXDNA_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||
#define _TRACE_AMDXDNA_H
|
||||
|
||||
#include <drm/gpu_scheduler.h>
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
TRACE_EVENT(amdxdna_debug_point,
|
||||
TP_PROTO(const char *name, u64 number, const char *str),
|
||||
|
||||
TP_ARGS(name, number, str),
|
||||
|
||||
TP_STRUCT__entry(__string(name, name)
|
||||
__field(u64, number)
|
||||
__string(str, str)),
|
||||
|
||||
TP_fast_assign(__assign_str(name);
|
||||
__entry->number = number;
|
||||
__assign_str(str);),
|
||||
|
||||
TP_printk("%s:%llu %s", __get_str(name), __entry->number,
|
||||
__get_str(str))
|
||||
);
|
||||
|
||||
TRACE_EVENT(xdna_job,
|
||||
TP_PROTO(struct drm_sched_job *sched_job, const char *name, const char *str, u64 seq),
|
||||
|
||||
TP_ARGS(sched_job, name, str, seq),
|
||||
|
||||
TP_STRUCT__entry(__string(name, name)
|
||||
__string(str, str)
|
||||
__field(u64, fence_context)
|
||||
__field(u64, fence_seqno)
|
||||
__field(u64, seq)),
|
||||
|
||||
TP_fast_assign(__assign_str(name);
|
||||
__assign_str(str);
|
||||
__entry->fence_context = sched_job->s_fence->finished.context;
|
||||
__entry->fence_seqno = sched_job->s_fence->finished.seqno;
|
||||
__entry->seq = seq;),
|
||||
|
||||
TP_printk("fence=(context:%llu, seqno:%lld), %s seq#:%lld %s",
|
||||
__entry->fence_context, __entry->fence_seqno,
|
||||
__get_str(name), __entry->seq,
|
||||
__get_str(str))
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(xdna_mbox_msg,
|
||||
TP_PROTO(char *name, u8 chann_id, u32 opcode, u32 msg_id),
|
||||
|
||||
|
|
|
@ -13,9 +13,11 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define AMDXDNA_INVALID_CMD_HANDLE (~0UL)
|
||||
#define AMDXDNA_INVALID_ADDR (~0UL)
|
||||
#define AMDXDNA_INVALID_CTX_HANDLE 0
|
||||
#define AMDXDNA_INVALID_BO_HANDLE 0
|
||||
#define AMDXDNA_INVALID_FENCE_HANDLE 0
|
||||
|
||||
enum amdxdna_device_type {
|
||||
AMDXDNA_DEV_TYPE_UNKNOWN = -1,
|
||||
|
@ -29,6 +31,7 @@ enum amdxdna_drm_ioctl_id {
|
|||
DRM_AMDXDNA_CREATE_BO,
|
||||
DRM_AMDXDNA_GET_BO_INFO,
|
||||
DRM_AMDXDNA_SYNC_BO,
|
||||
DRM_AMDXDNA_EXEC_CMD,
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -201,6 +204,37 @@ struct amdxdna_drm_sync_bo {
|
|||
__u64 size;
|
||||
};
|
||||
|
||||
enum amdxdna_cmd_type {
|
||||
AMDXDNA_CMD_SUBMIT_EXEC_BUF = 0,
|
||||
AMDXDNA_CMD_SUBMIT_DEPENDENCY,
|
||||
AMDXDNA_CMD_SUBMIT_SIGNAL,
|
||||
};
|
||||
|
||||
/**
|
||||
* struct amdxdna_drm_exec_cmd - Execute command.
|
||||
* @ext: MBZ.
|
||||
* @ext_flags: MBZ.
|
||||
* @hwctx: Hardware context handle.
|
||||
* @type: One of command type in enum amdxdna_cmd_type.
|
||||
* @cmd_handles: Array of command handles or the command handle itself
|
||||
* in case of just one.
|
||||
* @args: Array of arguments for all command handles.
|
||||
* @cmd_count: Number of command handles in the cmd_handles array.
|
||||
* @arg_count: Number of arguments in the args array.
|
||||
* @seq: Returned sequence number for this command.
|
||||
*/
|
||||
struct amdxdna_drm_exec_cmd {
|
||||
__u64 ext;
|
||||
__u64 ext_flags;
|
||||
__u32 hwctx;
|
||||
__u32 type;
|
||||
__u64 cmd_handles;
|
||||
__u64 args;
|
||||
__u32 cmd_count;
|
||||
__u32 arg_count;
|
||||
__u64 seq;
|
||||
};
|
||||
|
||||
#define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \
|
||||
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \
|
||||
struct amdxdna_drm_create_hwctx)
|
||||
|
@ -225,6 +259,10 @@ struct amdxdna_drm_sync_bo {
|
|||
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_SYNC_BO, \
|
||||
struct amdxdna_drm_sync_bo)
|
||||
|
||||
#define DRM_IOCTL_AMDXDNA_EXEC_CMD \
|
||||
DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_EXEC_CMD, \
|
||||
struct amdxdna_drm_exec_cmd)
|
||||
|
||||
#if defined(__cplusplus)
|
||||
} /* extern c end */
|
||||
#endif
|
||||
|
|
Loading…
Add table
Reference in a new issue