mirror of
git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
synced 2025-08-05 16:54:27 +00:00
accel/amdxdna: Add hardware resource solver
The AI Engine consists of 2D array of tiles arranged as columns. Provides the basic column allocation and release functions for the tile columns. Co-developed-by: Min Ma <min.ma@amd.com> Signed-off-by: Min Ma <min.ma@amd.com> Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com> Signed-off-by: Lizhi Hou <lizhi.hou@amd.com> Signed-off-by: Jeffrey Hugo <quic_jhugo@quicinc.com> Link: https://patchwork.freedesktop.org/patch/msgid/20241118172942.2014541-5-lizhi.hou@amd.com
This commit is contained in:
parent
b87f920b93
commit
c88d3325ae
5 changed files with 508 additions and 1 deletions
|
@ -5,6 +5,7 @@ amdxdna-y := \
|
|||
aie2_pci.o \
|
||||
aie2_psp.o \
|
||||
aie2_smu.o \
|
||||
aie2_solver.o \
|
||||
amdxdna_mailbox.o \
|
||||
amdxdna_mailbox_helper.o \
|
||||
amdxdna_pci_drv.o \
|
||||
|
|
|
@ -14,9 +14,14 @@
|
|||
|
||||
#include "aie2_msg_priv.h"
|
||||
#include "aie2_pci.h"
|
||||
#include "aie2_solver.h"
|
||||
#include "amdxdna_mailbox.h"
|
||||
#include "amdxdna_pci_drv.h"
|
||||
|
||||
int aie2_max_col = XRS_MAX_COL;
|
||||
module_param(aie2_max_col, uint, 0600);
|
||||
MODULE_PARM_DESC(aie2_max_col, "Maximum column could be used");
|
||||
|
||||
/*
|
||||
* The management mailbox channel is allocated by firmware.
|
||||
* The related register and ring buffer information is on SRAM BAR.
|
||||
|
@ -307,6 +312,7 @@ static int aie2_init(struct amdxdna_dev *xdna)
|
|||
{
|
||||
struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
|
||||
void __iomem *tbl[PCI_NUM_RESOURCES] = {0};
|
||||
struct init_config xrs_cfg = { 0 };
|
||||
struct amdxdna_dev_hdl *ndev;
|
||||
struct psp_config psp_conf;
|
||||
const struct firmware *fw;
|
||||
|
@ -403,7 +409,22 @@ static int aie2_init(struct amdxdna_dev *xdna)
|
|||
XDNA_ERR(xdna, "Query firmware failed, ret %d", ret);
|
||||
goto stop_hw;
|
||||
}
|
||||
ndev->total_col = ndev->metadata.cols;
|
||||
ndev->total_col = min(aie2_max_col, ndev->metadata.cols);
|
||||
|
||||
xrs_cfg.clk_list.num_levels = 3;
|
||||
xrs_cfg.clk_list.cu_clk_list[0] = 0;
|
||||
xrs_cfg.clk_list.cu_clk_list[1] = 800;
|
||||
xrs_cfg.clk_list.cu_clk_list[2] = 1000;
|
||||
xrs_cfg.sys_eff_factor = 1;
|
||||
xrs_cfg.ddev = &xdna->ddev;
|
||||
xrs_cfg.total_col = ndev->total_col;
|
||||
|
||||
xdna->xrs_hdl = xrsm_init(&xrs_cfg);
|
||||
if (!xdna->xrs_hdl) {
|
||||
XDNA_ERR(xdna, "Initialize resolver failed");
|
||||
ret = -EINVAL;
|
||||
goto stop_hw;
|
||||
}
|
||||
|
||||
release_firmware(fw);
|
||||
return 0;
|
||||
|
|
330
drivers/accel/amdxdna/aie2_solver.c
Normal file
330
drivers/accel/amdxdna/aie2_solver.c
Normal file
|
@ -0,0 +1,330 @@
|
|||
// SPDX-License-Identifier: GPL-2.0
|
||||
/*
|
||||
* Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#include <drm/drm_device.h>
|
||||
#include <drm/drm_managed.h>
|
||||
#include <drm/drm_print.h>
|
||||
#include <linux/bitops.h>
|
||||
#include <linux/bitmap.h>
|
||||
|
||||
#include "aie2_solver.h"
|
||||
|
||||
struct partition_node {
|
||||
struct list_head list;
|
||||
u32 nshared; /* # shared requests */
|
||||
u32 start_col; /* start column */
|
||||
u32 ncols; /* # columns */
|
||||
bool exclusive; /* can not be shared if set */
|
||||
};
|
||||
|
||||
struct solver_node {
|
||||
struct list_head list;
|
||||
u64 rid; /* Request ID from consumer */
|
||||
|
||||
struct partition_node *pt_node;
|
||||
void *cb_arg;
|
||||
u32 cols_len;
|
||||
u32 start_cols[] __counted_by(cols_len);
|
||||
};
|
||||
|
||||
struct solver_rgroup {
|
||||
u32 rgid;
|
||||
u32 nnode;
|
||||
u32 npartition_node;
|
||||
|
||||
DECLARE_BITMAP(resbit, XRS_MAX_COL);
|
||||
struct list_head node_list;
|
||||
struct list_head pt_node_list;
|
||||
};
|
||||
|
||||
struct solver_state {
|
||||
struct solver_rgroup rgp;
|
||||
struct init_config cfg;
|
||||
struct xrs_action_ops *actions;
|
||||
};
|
||||
|
||||
static u32 calculate_gops(struct aie_qos *rqos)
|
||||
{
|
||||
u32 service_rate = 0;
|
||||
|
||||
if (rqos->latency)
|
||||
service_rate = (1000 / rqos->latency);
|
||||
|
||||
if (rqos->fps > service_rate)
|
||||
return rqos->fps * rqos->gops;
|
||||
|
||||
return service_rate * rqos->gops;
|
||||
}
|
||||
|
||||
/*
|
||||
* qos_meet() - Check the QOS request can be met.
|
||||
*/
|
||||
static int qos_meet(struct solver_state *xrs, struct aie_qos *rqos, u32 cgops)
|
||||
{
|
||||
u32 request_gops = calculate_gops(rqos) * xrs->cfg.sys_eff_factor;
|
||||
|
||||
if (request_gops <= cgops)
|
||||
return 0;
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* sanity_check() - Do a basic sanity check on allocation request.
|
||||
*/
|
||||
static int sanity_check(struct solver_state *xrs, struct alloc_requests *req)
|
||||
{
|
||||
struct cdo_parts *cdop = &req->cdo;
|
||||
struct aie_qos *rqos = &req->rqos;
|
||||
u32 cu_clk_freq;
|
||||
|
||||
if (cdop->ncols > xrs->cfg.total_col)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* We can find at least one CDOs groups that meet the
|
||||
* GOPs requirement.
|
||||
*/
|
||||
cu_clk_freq = xrs->cfg.clk_list.cu_clk_list[xrs->cfg.clk_list.num_levels - 1];
|
||||
|
||||
if (qos_meet(xrs, rqos, cdop->qos_cap.opc * cu_clk_freq / 1000))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct solver_node *rg_search_node(struct solver_rgroup *rgp, u64 rid)
|
||||
{
|
||||
struct solver_node *node;
|
||||
|
||||
list_for_each_entry(node, &rgp->node_list, list) {
|
||||
if (node->rid == rid)
|
||||
return node;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void remove_partition_node(struct solver_rgroup *rgp,
|
||||
struct partition_node *pt_node)
|
||||
{
|
||||
pt_node->nshared--;
|
||||
if (pt_node->nshared > 0)
|
||||
return;
|
||||
|
||||
list_del(&pt_node->list);
|
||||
rgp->npartition_node--;
|
||||
|
||||
bitmap_clear(rgp->resbit, pt_node->start_col, pt_node->ncols);
|
||||
kfree(pt_node);
|
||||
}
|
||||
|
||||
static void remove_solver_node(struct solver_rgroup *rgp,
|
||||
struct solver_node *node)
|
||||
{
|
||||
list_del(&node->list);
|
||||
rgp->nnode--;
|
||||
|
||||
if (node->pt_node)
|
||||
remove_partition_node(rgp, node->pt_node);
|
||||
|
||||
kfree(node);
|
||||
}
|
||||
|
||||
static int get_free_partition(struct solver_state *xrs,
|
||||
struct solver_node *snode,
|
||||
struct alloc_requests *req)
|
||||
{
|
||||
struct partition_node *pt_node;
|
||||
u32 ncols = req->cdo.ncols;
|
||||
u32 col, i;
|
||||
|
||||
for (i = 0; i < snode->cols_len; i++) {
|
||||
col = snode->start_cols[i];
|
||||
if (find_next_bit(xrs->rgp.resbit, XRS_MAX_COL, col) >= col + ncols)
|
||||
break;
|
||||
}
|
||||
|
||||
if (i == snode->cols_len)
|
||||
return -ENODEV;
|
||||
|
||||
pt_node = kzalloc(sizeof(*pt_node), GFP_KERNEL);
|
||||
if (!pt_node)
|
||||
return -ENOMEM;
|
||||
|
||||
pt_node->nshared = 1;
|
||||
pt_node->start_col = col;
|
||||
pt_node->ncols = ncols;
|
||||
|
||||
/*
|
||||
* Before fully support latency in QoS, if a request
|
||||
* specifies a non-zero latency value, it will not share
|
||||
* the partition with other requests.
|
||||
*/
|
||||
if (req->rqos.latency)
|
||||
pt_node->exclusive = true;
|
||||
|
||||
list_add_tail(&pt_node->list, &xrs->rgp.pt_node_list);
|
||||
xrs->rgp.npartition_node++;
|
||||
bitmap_set(xrs->rgp.resbit, pt_node->start_col, pt_node->ncols);
|
||||
|
||||
snode->pt_node = pt_node;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int allocate_partition(struct solver_state *xrs,
|
||||
struct solver_node *snode,
|
||||
struct alloc_requests *req)
|
||||
{
|
||||
struct partition_node *pt_node, *rpt_node = NULL;
|
||||
int idx, ret;
|
||||
|
||||
ret = get_free_partition(xrs, snode, req);
|
||||
if (!ret)
|
||||
return ret;
|
||||
|
||||
/* try to get a share-able partition */
|
||||
list_for_each_entry(pt_node, &xrs->rgp.pt_node_list, list) {
|
||||
if (pt_node->exclusive)
|
||||
continue;
|
||||
|
||||
if (rpt_node && pt_node->nshared >= rpt_node->nshared)
|
||||
continue;
|
||||
|
||||
for (idx = 0; idx < snode->cols_len; idx++) {
|
||||
if (snode->start_cols[idx] != pt_node->start_col)
|
||||
continue;
|
||||
|
||||
if (req->cdo.ncols != pt_node->ncols)
|
||||
continue;
|
||||
|
||||
rpt_node = pt_node;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!rpt_node)
|
||||
return -ENODEV;
|
||||
|
||||
rpt_node->nshared++;
|
||||
snode->pt_node = rpt_node;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct solver_node *create_solver_node(struct solver_state *xrs,
|
||||
struct alloc_requests *req)
|
||||
{
|
||||
struct cdo_parts *cdop = &req->cdo;
|
||||
struct solver_node *node;
|
||||
int ret;
|
||||
|
||||
node = kzalloc(struct_size(node, start_cols, cdop->cols_len), GFP_KERNEL);
|
||||
if (!node)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
node->rid = req->rid;
|
||||
node->cols_len = cdop->cols_len;
|
||||
memcpy(node->start_cols, cdop->start_cols, cdop->cols_len * sizeof(u32));
|
||||
|
||||
ret = allocate_partition(xrs, node, req);
|
||||
if (ret)
|
||||
goto free_node;
|
||||
|
||||
list_add_tail(&node->list, &xrs->rgp.node_list);
|
||||
xrs->rgp.nnode++;
|
||||
return node;
|
||||
|
||||
free_node:
|
||||
kfree(node);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
static void fill_load_action(struct solver_state *xrs,
|
||||
struct solver_node *snode,
|
||||
struct xrs_action_load *action)
|
||||
{
|
||||
action->rid = snode->rid;
|
||||
action->part.start_col = snode->pt_node->start_col;
|
||||
action->part.ncols = snode->pt_node->ncols;
|
||||
}
|
||||
|
||||
int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg)
|
||||
{
|
||||
struct xrs_action_load load_act;
|
||||
struct solver_node *snode;
|
||||
struct solver_state *xrs;
|
||||
int ret;
|
||||
|
||||
xrs = (struct solver_state *)hdl;
|
||||
|
||||
ret = sanity_check(xrs, req);
|
||||
if (ret) {
|
||||
drm_err(xrs->cfg.ddev, "invalid request");
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (rg_search_node(&xrs->rgp, req->rid)) {
|
||||
drm_err(xrs->cfg.ddev, "rid %lld is in-use", req->rid);
|
||||
return -EEXIST;
|
||||
}
|
||||
|
||||
snode = create_solver_node(xrs, req);
|
||||
if (IS_ERR(snode))
|
||||
return PTR_ERR(snode);
|
||||
|
||||
fill_load_action(xrs, snode, &load_act);
|
||||
ret = xrs->cfg.actions->load(cb_arg, &load_act);
|
||||
if (ret)
|
||||
goto free_node;
|
||||
|
||||
snode->cb_arg = cb_arg;
|
||||
|
||||
drm_dbg(xrs->cfg.ddev, "start col %d ncols %d\n",
|
||||
snode->pt_node->start_col, snode->pt_node->ncols);
|
||||
|
||||
return 0;
|
||||
|
||||
free_node:
|
||||
remove_solver_node(&xrs->rgp, snode);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int xrs_release_resource(void *hdl, u64 rid)
|
||||
{
|
||||
struct solver_state *xrs = hdl;
|
||||
struct solver_node *node;
|
||||
|
||||
node = rg_search_node(&xrs->rgp, rid);
|
||||
if (!node) {
|
||||
drm_err(xrs->cfg.ddev, "node not exist");
|
||||
return -ENODEV;
|
||||
}
|
||||
|
||||
xrs->cfg.actions->unload(node->cb_arg);
|
||||
remove_solver_node(&xrs->rgp, node);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void *xrsm_init(struct init_config *cfg)
|
||||
{
|
||||
struct solver_rgroup *rgp;
|
||||
struct solver_state *xrs;
|
||||
|
||||
xrs = drmm_kzalloc(cfg->ddev, sizeof(*xrs), GFP_KERNEL);
|
||||
if (!xrs)
|
||||
return NULL;
|
||||
|
||||
memcpy(&xrs->cfg, cfg, sizeof(*cfg));
|
||||
|
||||
rgp = &xrs->rgp;
|
||||
INIT_LIST_HEAD(&rgp->node_list);
|
||||
INIT_LIST_HEAD(&rgp->pt_node_list);
|
||||
|
||||
return xrs;
|
||||
}
|
154
drivers/accel/amdxdna/aie2_solver.h
Normal file
154
drivers/accel/amdxdna/aie2_solver.h
Normal file
|
@ -0,0 +1,154 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
/*
|
||||
* Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
|
||||
*/
|
||||
|
||||
#ifndef _AIE2_SOLVER_H
|
||||
#define _AIE2_SOLVER_H
|
||||
|
||||
#define XRS_MAX_COL 128
|
||||
|
||||
/*
|
||||
* Structure used to describe a partition. A partition is column based
|
||||
* allocation unit described by its start column and number of columns.
|
||||
*/
|
||||
struct aie_part {
|
||||
u32 start_col;
|
||||
u32 ncols;
|
||||
};
|
||||
|
||||
/*
|
||||
* The QoS capabilities of a given AIE partition.
|
||||
*/
|
||||
struct aie_qos_cap {
|
||||
u32 opc; /* operations per cycle */
|
||||
u32 dma_bw; /* DMA bandwidth */
|
||||
};
|
||||
|
||||
/*
|
||||
* QoS requirement of a resource allocation.
|
||||
*/
|
||||
struct aie_qos {
|
||||
u32 gops; /* Giga operations */
|
||||
u32 fps; /* Frames per second */
|
||||
u32 dma_bw; /* DMA bandwidth */
|
||||
u32 latency; /* Frame response latency */
|
||||
u32 exec_time; /* Frame execution time */
|
||||
u32 priority; /* Request priority */
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure used to describe a relocatable CDO (Configuration Data Object).
|
||||
*/
|
||||
struct cdo_parts {
|
||||
u32 *start_cols; /* Start column array */
|
||||
u32 cols_len; /* Length of start column array */
|
||||
u32 ncols; /* # of column */
|
||||
struct aie_qos_cap qos_cap; /* CDO QoS capabilities */
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure used to describe a request to allocate.
|
||||
*/
|
||||
struct alloc_requests {
|
||||
u64 rid;
|
||||
struct cdo_parts cdo;
|
||||
struct aie_qos rqos; /* Requested QoS */
|
||||
};
|
||||
|
||||
/*
|
||||
* Load callback argument
|
||||
*/
|
||||
struct xrs_action_load {
|
||||
u32 rid;
|
||||
struct aie_part part;
|
||||
};
|
||||
|
||||
/*
|
||||
* Define the power level available
|
||||
*
|
||||
* POWER_LEVEL_MIN:
|
||||
* Lowest power level. Usually set when all actions are unloaded.
|
||||
*
|
||||
* POWER_LEVEL_n
|
||||
* Power levels 0 - n, is a step increase in system frequencies
|
||||
*/
|
||||
enum power_level {
|
||||
POWER_LEVEL_MIN = 0x0,
|
||||
POWER_LEVEL_0 = 0x1,
|
||||
POWER_LEVEL_1 = 0x2,
|
||||
POWER_LEVEL_2 = 0x3,
|
||||
POWER_LEVEL_3 = 0x4,
|
||||
POWER_LEVEL_4 = 0x5,
|
||||
POWER_LEVEL_5 = 0x6,
|
||||
POWER_LEVEL_6 = 0x7,
|
||||
POWER_LEVEL_7 = 0x8,
|
||||
POWER_LEVEL_NUM,
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure used to describe the frequency table.
|
||||
* Resource solver chooses the frequency from the table
|
||||
* to meet the QOS requirements.
|
||||
*/
|
||||
struct clk_list_info {
|
||||
u32 num_levels; /* available power levels */
|
||||
u32 cu_clk_list[POWER_LEVEL_NUM]; /* available aie clock frequencies in Mhz*/
|
||||
};
|
||||
|
||||
struct xrs_action_ops {
|
||||
int (*load)(void *cb_arg, struct xrs_action_load *action);
|
||||
int (*unload)(void *cb_arg);
|
||||
};
|
||||
|
||||
/*
|
||||
* Structure used to describe information for solver during initialization.
|
||||
*/
|
||||
struct init_config {
|
||||
u32 total_col;
|
||||
u32 sys_eff_factor; /* system efficiency factor */
|
||||
u32 latency_adj; /* latency adjustment in ms */
|
||||
struct clk_list_info clk_list; /* List of frequencies available in system */
|
||||
struct drm_device *ddev;
|
||||
struct xrs_action_ops *actions;
|
||||
};
|
||||
|
||||
/*
|
||||
* xrsm_init() - Register resource solver. Resource solver client needs
|
||||
* to call this function to register itself.
|
||||
*
|
||||
* @cfg: The system metrics for resource solver to use
|
||||
*
|
||||
* Return: A resource solver handle
|
||||
*
|
||||
* Note: We should only create one handle per AIE array to be managed.
|
||||
*/
|
||||
void *xrsm_init(struct init_config *cfg);
|
||||
|
||||
/*
|
||||
* xrs_allocate_resource() - Request to allocate resources for a given context
|
||||
* and a partition metadata. (See struct part_meta)
|
||||
*
|
||||
* @hdl: Resource solver handle obtained from xrs_init()
|
||||
* @req: Input to the Resource solver including request id
|
||||
* and partition metadata.
|
||||
* @cb_arg: callback argument pointer
|
||||
*
|
||||
* Return: 0 when successful.
|
||||
* Or standard error number when failing
|
||||
*
|
||||
* Note:
|
||||
* There is no lock mechanism inside resource solver. So it is
|
||||
* the caller's responsibility to lock down XCLBINs and grab
|
||||
* necessary lock.
|
||||
*/
|
||||
int xrs_allocate_resource(void *hdl, struct alloc_requests *req, void *cb_arg);
|
||||
|
||||
/*
|
||||
* xrs_release_resource() - Request to free resources for a given context.
|
||||
*
|
||||
* @hdl: Resource solver handle obtained from xrs_init()
|
||||
* @rid: The Request ID to identify the requesting context
|
||||
*/
|
||||
int xrs_release_resource(void *hdl, u64 rid);
|
||||
#endif /* _AIE2_SOLVER_H */
|
|
@ -58,6 +58,7 @@ struct amdxdna_dev {
|
|||
struct drm_device ddev;
|
||||
struct amdxdna_dev_hdl *dev_handle;
|
||||
const struct amdxdna_dev_info *dev_info;
|
||||
void *xrs_hdl;
|
||||
|
||||
struct mutex dev_lock; /* per device lock */
|
||||
struct amdxdna_fw_ver fw_ver;
|
||||
|
|
Loading…
Add table
Reference in a new issue