From 244fe1666364865154930f34d8df5489df1922b6 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 21 Aug 2024 15:38:32 -0400 Subject: [PATCH] drm/xe: Introduce GGTT documentation Document xe_ggtt and ensure it is part of the built kernel docs. v2: - Accepted all Michal's suggestions - Rebased on top of new set_pte per platform/wa function pointer v3: - Typos and other acronym fixes (Michal) Cc: Matthew Brost Cc: Michal Wajdeczko Reviewed-by: Himal Prasad Ghimiray #v1 Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240821193842.352557-2-rodrigo.vivi@intel.com Signed-off-by: Rodrigo Vivi --- Documentation/gpu/xe/xe_mm.rst | 15 ++++ drivers/gpu/drm/xe/xe_ggtt.c | 136 +++++++++++++++++++++++------ drivers/gpu/drm/xe/xe_ggtt_types.h | 34 ++++++-- 3 files changed, 150 insertions(+), 35 deletions(-) diff --git a/Documentation/gpu/xe/xe_mm.rst b/Documentation/gpu/xe/xe_mm.rst index 6c8fd8b4a466..95864a4502dd 100644 --- a/Documentation/gpu/xe/xe_mm.rst +++ b/Documentation/gpu/xe/xe_mm.rst @@ -7,6 +7,21 @@ Memory Management .. kernel-doc:: drivers/gpu/drm/xe/xe_bo_doc.h :doc: Buffer Objects (BO) +GGTT +==== + +.. kernel-doc:: drivers/gpu/drm/xe/xe_ggtt.c + :doc: Global Graphics Translation Table (GGTT) + +GGTT Internal API +----------------- + +.. kernel-doc:: drivers/gpu/drm/xe/xe_ggtt_types.h + :internal: + +.. kernel-doc:: drivers/gpu/drm/xe/xe_ggtt.c + :internal: + Pagetable building ================== diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c index add14f3dea1f..dd5cd0df705c 100644 --- a/drivers/gpu/drm/xe/xe_ggtt.c +++ b/drivers/gpu/drm/xe/xe_ggtt.c @@ -30,6 +30,39 @@ #include "xe_wa.h" #include "xe_wopcm.h" +/** + * DOC: Global Graphics Translation Table (GGTT) + * + * Xe GGTT implements the support for a Global Virtual Address space that is used + * for resources that are accessible to privileged (i.e. kernel-mode) processes, + * and not tied to a specific user-level process. For example, the Graphics + * micro-Controller (GuC) and Display Engine (if present) utilize this Global + * address space. + * + * The Global GTT (GGTT) translates from the Global virtual address to a physical + * address that can be accessed by HW. The GGTT is a flat, single-level table. + * + * Xe implements a simplified version of the GGTT specifically managing only a + * certain range of it that goes from the Write Once Protected Content Memory (WOPCM) + * Layout to a predefined GUC_GGTT_TOP. This approach avoids complications related to + * the GuC (Graphics Microcontroller) hardware limitations. The GuC address space + * is limited on both ends of the GGTT, because the GuC shim HW redirects + * accesses to those addresses to other HW areas instead of going through the + * GGTT. On the bottom end, the GuC can't access offsets below the WOPCM size, + * while on the top side the limit is fixed at GUC_GGTT_TOP. To keep things + * simple, instead of checking each object to see if they are accessed by GuC or + * not, we just exclude those areas from the allocator. Additionally, to simplify + * the driver load, we use the maximum WOPCM size in this logic instead of the + * programmed one, so we don't need to wait until the actual size to be + * programmed is determined (which requires FW fetch) before initializing the + * GGTT. These simplifications might waste space in the GGTT (about 20-25 MBs + * depending on the platform) but we can live with this. Another benefit of this + * is the GuC bootrom can't access anything below the WOPCM max size so anything + * the bootrom needs to access (e.g. a RSA key) needs to be placed in the GGTT + * above the WOPCM max size. Starting the GGTT allocations above the WOPCM max + * give us the correct placement for free. + */ + static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset, u16 pat_index) { @@ -164,12 +197,16 @@ static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = { .ggtt_set_pte = xe_ggtt_set_pte_and_flush, }; -/* - * Early GGTT initialization, which allows to create new mappings usable by the - * GuC. - * Mappings are not usable by the HW engines, as it doesn't have scratch / +/** + * xe_ggtt_init_early - Early GGTT initialization + * @ggtt: the &xe_ggtt to be initialized + * + * It allows to create new mappings usable by the GuC. + * Mappings are not usable by the HW engines, as it doesn't have scratch nor * initial clear done to it yet. That will happen in the regular, non-early - * GGTT init. + * GGTT initialization. + * + * Return: 0 on success or a negative error code on failure. */ int xe_ggtt_init_early(struct xe_ggtt *ggtt) { @@ -194,29 +231,6 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt) if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K) ggtt->flags |= XE_GGTT_FLAGS_64K; - /* - * 8B per entry, each points to a 4KB page. - * - * The GuC address space is limited on both ends of the GGTT, because - * the GuC shim HW redirects accesses to those addresses to other HW - * areas instead of going through the GGTT. On the bottom end, the GuC - * can't access offsets below the WOPCM size, while on the top side the - * limit is fixed at GUC_GGTT_TOP. To keep things simple, instead of - * checking each object to see if they are accessed by GuC or not, we - * just exclude those areas from the allocator. Additionally, to - * simplify the driver load, we use the maximum WOPCM size in this logic - * instead of the programmed one, so we don't need to wait until the - * actual size to be programmed is determined (which requires FW fetch) - * before initializing the GGTT. These simplifications might waste space - * in the GGTT (about 20-25 MBs depending on the platform) but we can - * live with this. - * - * Another benifit of this is the GuC bootrom can't access anything - * below the WOPCM max size so anything the bootom needs to access (e.g. - * a RSA key) needs to be placed in the GGTT above the WOPCM max size. - * Starting the GGTT allocations above the WOPCM max give us the correct - * placement for free. - */ if (ggtt->size > GUC_GGTT_TOP) ggtt->size = GUC_GGTT_TOP; @@ -262,6 +276,12 @@ static void xe_ggtt_initial_clear(struct xe_ggtt *ggtt) mutex_unlock(&ggtt->lock); } +/** + * xe_ggtt_init - Regular non-early GGTT initialization + * @ggtt: the &xe_ggtt to be initialized + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_init(struct xe_ggtt *ggtt) { struct xe_device *xe = tile_to_xe(ggtt->tile); @@ -382,6 +402,18 @@ void xe_ggtt_deballoon(struct xe_ggtt *ggtt, struct drm_mm_node *node) mutex_unlock(&ggtt->lock); } +/** + * xe_ggtt_insert_special_node_locked - Locked version to insert a &drm_mm_node into the GGTT + * @ggtt: the &xe_ggtt where node will be inserted + * @node: the &drm_mm_node to be inserted + * @size: size of the node + * @align: alignment constrain of the node + * @mm_flags: flags to control the node behavior + * + * To be used in cases where ggtt->lock is already taken. + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node *node, u32 size, u32 align, u32 mm_flags) { @@ -389,6 +421,15 @@ int xe_ggtt_insert_special_node_locked(struct xe_ggtt *ggtt, struct drm_mm_node mm_flags); } +/** + * xe_ggtt_insert_special_node - Insert a &drm_mm_node into the GGTT + * @ggtt: the &xe_ggtt where node will be inserted + * @node: the &drm_mm_node to be inserted + * @size: size of the node + * @align: alignment constrain of the node + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, u32 size, u32 align) { @@ -402,6 +443,11 @@ int xe_ggtt_insert_special_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, return ret; } +/** + * xe_ggtt_map_bo - Map the BO into GGTT + * @ggtt: the &xe_ggtt where node will be mapped + * @bo: the &xe_bo to be mapped + */ void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB; @@ -449,17 +495,39 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, return err; } +/** + * xe_ggtt_insert_bo_at - Insert BO at a specific GGTT space + * @ggtt: the &xe_ggtt where bo will be inserted + * @bo: the &xe_bo to be inserted + * @start: address where it will be inserted + * @end: end of the range where it will be inserted + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo, u64 start, u64 end) { return __xe_ggtt_insert_bo_at(ggtt, bo, start, end); } +/** + * xe_ggtt_insert_bo - Insert BO into GGTT + * @ggtt: the &xe_ggtt where bo will be inserted + * @bo: the &xe_bo to be inserted + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX); } +/** + * xe_ggtt_remove_node - Remove a &drm_mm_node from the GGTT + * @ggtt: the &xe_ggtt where node will be removed + * @node: the &drm_mm_node to be removed + * @invalidate: if node needs invalidation upon removal + */ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, bool invalidate) { @@ -488,6 +556,11 @@ void xe_ggtt_remove_node(struct xe_ggtt *ggtt, struct drm_mm_node *node, drm_dev_exit(idx); } +/** + * xe_ggtt_remove_bo - Remove a BO from the GGTT + * @ggtt: the &xe_ggtt where node will be removed + * @bo: the &xe_bo to be removed + */ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo) { if (XE_WARN_ON(!bo->ggtt_node.size)) @@ -544,6 +617,13 @@ void xe_ggtt_assign(struct xe_ggtt *ggtt, const struct drm_mm_node *node, u16 vf } #endif +/** + * xe_ggtt_dump - Dump GGTT for debug + * @ggtt: the &xe_ggtt to be dumped + * @p: the &drm_mm_printer helper handle to be used to dump the information + * + * Return: 0 on success or a negative error code on failure. + */ int xe_ggtt_dump(struct xe_ggtt *ggtt, struct drm_printer *p) { int err; diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h index 2245d88d8f39..154de298a4e3 100644 --- a/drivers/gpu/drm/xe/xe_ggtt_types.h +++ b/drivers/gpu/drm/xe/xe_ggtt_types.h @@ -13,30 +13,50 @@ struct xe_bo; struct xe_gt; +/** + * struct xe_ggtt - Main GGTT struct + * + * In general, each tile can contains its own Global Graphics Translation Table + * (GGTT) instance. + */ struct xe_ggtt { + /** @tile: Back pointer to tile where this GGTT belongs */ struct xe_tile *tile; - + /** @size: Total size of this GGTT */ u64 size; #define XE_GGTT_FLAGS_64K BIT(0) + /** + * @flags: Flags for this GGTT + * Acceptable flags: + * - %XE_GGTT_FLAGS_64K - if PTE size is 64K. Otherwise, regular is 4K. + */ unsigned int flags; - + /** @scratch: Internal object allocation used as a scratch page */ struct xe_bo *scratch; - + /** @lock: Mutex lock to protect GGTT data */ struct mutex lock; - + /** + * @gsm: The iomem pointer to the actual location of the translation + * table located in the GSM for easy PTE manipulation + */ u64 __iomem *gsm; - + /** @pt_ops: Page Table operations per platform */ const struct xe_ggtt_pt_ops *pt_ops; - + /** @mm: The memory manager used to manage individual GGTT allocations */ struct drm_mm mm; - /** @access_count: counts GGTT writes */ unsigned int access_count; }; +/** + * struct xe_ggtt_pt_ops - GGTT Page table operations + * Which can vary from platform to platform. + */ struct xe_ggtt_pt_ops { + /** @pte_encode_bo: Encode PTE address for a given BO */ u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index); + /** @ggtt_set_pte: Directly write into GGTT's PTE */ void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte); };