diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 18:50:03 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 18:50:03 +0000 |
commit | 01a69402cf9d38ff180345d55c2ee51c7e89fbc7 (patch) | |
tree | b406c5242a088c4f59c6e4b719b783f43aca6ae9 /drivers/gpu/drm/xe/xe_query.c | |
parent | Adding upstream version 6.7.12. (diff) | |
download | linux-01a69402cf9d38ff180345d55c2ee51c7e89fbc7.tar.xz linux-01a69402cf9d38ff180345d55c2ee51c7e89fbc7.zip |
Adding upstream version 6.8.9.upstream/6.8.9
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'drivers/gpu/drm/xe/xe_query.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_query.c | 552 |
1 files changed, 552 insertions, 0 deletions
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c new file mode 100644 index 0000000000..1a6f2f51ba --- /dev/null +++ b/drivers/gpu/drm/xe/xe_query.c @@ -0,0 +1,552 @@ +// SPDX-License-Identifier: MIT +/* + * Copyright © 2022 Intel Corporation + */ + +#include "xe_query.h" + +#include <linux/nospec.h> +#include <linux/sched/clock.h> + +#include <drm/ttm/ttm_placement.h> +#include <drm/xe_drm.h> + +#include "regs/xe_engine_regs.h" +#include "xe_bo.h" +#include "xe_device.h" +#include "xe_exec_queue.h" +#include "xe_ggtt.h" +#include "xe_gt.h" +#include "xe_guc_hwconfig.h" +#include "xe_macros.h" +#include "xe_mmio.h" +#include "xe_ttm_vram_mgr.h" + +static const u16 xe_to_user_engine_class[] = { + [XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER, + [XE_ENGINE_CLASS_COPY] = DRM_XE_ENGINE_CLASS_COPY, + [XE_ENGINE_CLASS_VIDEO_DECODE] = DRM_XE_ENGINE_CLASS_VIDEO_DECODE, + [XE_ENGINE_CLASS_VIDEO_ENHANCE] = DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE, + [XE_ENGINE_CLASS_COMPUTE] = DRM_XE_ENGINE_CLASS_COMPUTE, +}; + +static const enum xe_engine_class user_to_xe_engine_class[] = { + [DRM_XE_ENGINE_CLASS_RENDER] = XE_ENGINE_CLASS_RENDER, + [DRM_XE_ENGINE_CLASS_COPY] = XE_ENGINE_CLASS_COPY, + [DRM_XE_ENGINE_CLASS_VIDEO_DECODE] = XE_ENGINE_CLASS_VIDEO_DECODE, + [DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE] = XE_ENGINE_CLASS_VIDEO_ENHANCE, + [DRM_XE_ENGINE_CLASS_COMPUTE] = XE_ENGINE_CLASS_COMPUTE, +}; + +static size_t calc_hw_engine_info_size(struct xe_device *xe) +{ + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_gt *gt; + u8 gt_id; + int i = 0; + + for_each_gt(gt, xe, gt_id) + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + i++; + } + + return sizeof(struct drm_xe_query_engines) + + i * sizeof(struct drm_xe_engine); +} + +typedef u64 (*__ktime_func_t)(void); +static __ktime_func_t __clock_id_to_func(clockid_t clk_id) +{ + /* + * Use logic same as the perf subsystem to allow user to select the + * reference clock id to be used for timestamps. + */ + switch (clk_id) { + case CLOCK_MONOTONIC: + return &ktime_get_ns; + case CLOCK_MONOTONIC_RAW: + return &ktime_get_raw_ns; + case CLOCK_REALTIME: + return &ktime_get_real_ns; + case CLOCK_BOOTTIME: + return &ktime_get_boottime_ns; + case CLOCK_TAI: + return &ktime_get_clocktai_ns; + default: + return NULL; + } +} + +static void +__read_timestamps(struct xe_gt *gt, + struct xe_reg lower_reg, + struct xe_reg upper_reg, + u64 *engine_ts, + u64 *cpu_ts, + u64 *cpu_delta, + __ktime_func_t cpu_clock) +{ + u32 upper, lower, old_upper, loop = 0; + + upper = xe_mmio_read32(gt, upper_reg); + do { + *cpu_delta = local_clock(); + *cpu_ts = cpu_clock(); + lower = xe_mmio_read32(gt, lower_reg); + *cpu_delta = local_clock() - *cpu_delta; + old_upper = upper; + upper = xe_mmio_read32(gt, upper_reg); + } while (upper != old_upper && loop++ < 2); + + *engine_ts = (u64)upper << 32 | lower; +} + +static int +query_engine_cycles(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + struct drm_xe_query_engine_cycles __user *query_ptr; + struct drm_xe_engine_class_instance *eci; + struct drm_xe_query_engine_cycles resp; + size_t size = sizeof(resp); + __ktime_func_t cpu_clock; + struct xe_hw_engine *hwe; + struct xe_gt *gt; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + query_ptr = u64_to_user_ptr(query->data); + if (copy_from_user(&resp, query_ptr, size)) + return -EFAULT; + + cpu_clock = __clock_id_to_func(resp.clockid); + if (!cpu_clock) + return -EINVAL; + + eci = &resp.eci; + if (eci->gt_id >= XE_MAX_GT_PER_TILE) + return -EINVAL; + + gt = xe_device_get_gt(xe, eci->gt_id); + if (!gt) + return -EINVAL; + + if (eci->engine_class >= ARRAY_SIZE(user_to_xe_engine_class)) + return -EINVAL; + + hwe = xe_gt_hw_engine(gt, user_to_xe_engine_class[eci->engine_class], + eci->engine_instance, true); + if (!hwe) + return -EINVAL; + + xe_device_mem_access_get(xe); + xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL); + + __read_timestamps(gt, + RING_TIMESTAMP(hwe->mmio_base), + RING_TIMESTAMP_UDW(hwe->mmio_base), + &resp.engine_cycles, + &resp.cpu_timestamp, + &resp.cpu_delta, + cpu_clock); + + xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL); + xe_device_mem_access_put(xe); + resp.width = 36; + + /* Only write to the output fields of user query */ + if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp)) + return -EFAULT; + + if (put_user(resp.cpu_delta, &query_ptr->cpu_delta)) + return -EFAULT; + + if (put_user(resp.engine_cycles, &query_ptr->engine_cycles)) + return -EFAULT; + + if (put_user(resp.width, &query_ptr->width)) + return -EFAULT; + + return 0; +} + +static int query_engines(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + size_t size = calc_hw_engine_info_size(xe); + struct drm_xe_query_engines __user *query_ptr = + u64_to_user_ptr(query->data); + struct drm_xe_query_engines *engines; + struct xe_hw_engine *hwe; + enum xe_hw_engine_id id; + struct xe_gt *gt; + u8 gt_id; + int i = 0; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + engines = kmalloc(size, GFP_KERNEL); + if (!engines) + return -ENOMEM; + + for_each_gt(gt, xe, gt_id) + for_each_hw_engine(hwe, gt, id) { + if (xe_hw_engine_is_reserved(hwe)) + continue; + + engines->engines[i].instance.engine_class = + xe_to_user_engine_class[hwe->class]; + engines->engines[i].instance.engine_instance = + hwe->logical_instance; + engines->engines[i].instance.gt_id = gt->info.id; + engines->engines[i].instance.pad = 0; + memset(engines->engines[i].reserved, 0, + sizeof(engines->engines[i].reserved)); + + i++; + } + + engines->pad = 0; + engines->num_engines = i; + + if (copy_to_user(query_ptr, engines, size)) { + kfree(engines); + return -EFAULT; + } + kfree(engines); + + return 0; +} + +static size_t calc_mem_regions_size(struct xe_device *xe) +{ + u32 num_managers = 1; + int i; + + for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) + if (ttm_manager_type(&xe->ttm, i)) + num_managers++; + + return offsetof(struct drm_xe_query_mem_regions, mem_regions[num_managers]); +} + +static int query_mem_regions(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + size_t size = calc_mem_regions_size(xe); + struct drm_xe_query_mem_regions *mem_regions; + struct drm_xe_query_mem_regions __user *query_ptr = + u64_to_user_ptr(query->data); + struct ttm_resource_manager *man; + int ret, i; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + mem_regions = kzalloc(size, GFP_KERNEL); + if (XE_IOCTL_DBG(xe, !mem_regions)) + return -ENOMEM; + + man = ttm_manager_type(&xe->ttm, XE_PL_TT); + mem_regions->mem_regions[0].mem_class = DRM_XE_MEM_REGION_CLASS_SYSMEM; + /* + * The instance needs to be a unique number that represents the index + * in the placement mask used at xe_gem_create_ioctl() for the + * xe_bo_create() placement. + */ + mem_regions->mem_regions[0].instance = 0; + mem_regions->mem_regions[0].min_page_size = PAGE_SIZE; + mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT; + if (perfmon_capable()) + mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man); + mem_regions->num_mem_regions = 1; + + for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) { + man = ttm_manager_type(&xe->ttm, i); + if (man) { + mem_regions->mem_regions[mem_regions->num_mem_regions].mem_class = + DRM_XE_MEM_REGION_CLASS_VRAM; + mem_regions->mem_regions[mem_regions->num_mem_regions].instance = + mem_regions->num_mem_regions; + mem_regions->mem_regions[mem_regions->num_mem_regions].min_page_size = + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? + SZ_64K : PAGE_SIZE; + mem_regions->mem_regions[mem_regions->num_mem_regions].total_size = + man->size; + + if (perfmon_capable()) { + xe_ttm_vram_get_used(man, + &mem_regions->mem_regions + [mem_regions->num_mem_regions].used, + &mem_regions->mem_regions + [mem_regions->num_mem_regions].cpu_visible_used); + } + + mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size = + xe_ttm_vram_get_cpu_visible_size(man); + mem_regions->num_mem_regions++; + } + } + + if (!copy_to_user(query_ptr, mem_regions, size)) + ret = 0; + else + ret = -ENOSPC; + + kfree(mem_regions); + return ret; +} + +static int query_config(struct xe_device *xe, struct drm_xe_device_query *query) +{ + const u32 num_params = DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY + 1; + size_t size = + sizeof(struct drm_xe_query_config) + num_params * sizeof(u64); + struct drm_xe_query_config __user *query_ptr = + u64_to_user_ptr(query->data); + struct drm_xe_query_config *config; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + config = kzalloc(size, GFP_KERNEL); + if (!config) + return -ENOMEM; + + config->num_params = num_params; + config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] = + xe->info.devid | (xe->info.revid << 16); + if (xe_device_get_root_tile(xe)->mem.vram.usable_size) + config->info[DRM_XE_QUERY_CONFIG_FLAGS] = + DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM; + config->info[DRM_XE_QUERY_CONFIG_MIN_ALIGNMENT] = + xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K; + config->info[DRM_XE_QUERY_CONFIG_VA_BITS] = xe->info.va_bits; + config->info[DRM_XE_QUERY_CONFIG_MAX_EXEC_QUEUE_PRIORITY] = + xe_exec_queue_device_get_max_priority(xe); + + if (copy_to_user(query_ptr, config, size)) { + kfree(config); + return -EFAULT; + } + kfree(config); + + return 0; +} + +static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query) +{ + struct xe_gt *gt; + size_t size = sizeof(struct drm_xe_query_gt_list) + + xe->info.gt_count * sizeof(struct drm_xe_gt); + struct drm_xe_query_gt_list __user *query_ptr = + u64_to_user_ptr(query->data); + struct drm_xe_query_gt_list *gt_list; + u8 id; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + gt_list = kzalloc(size, GFP_KERNEL); + if (!gt_list) + return -ENOMEM; + + gt_list->num_gt = xe->info.gt_count; + + for_each_gt(gt, xe, id) { + if (xe_gt_is_media_type(gt)) + gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA; + else + gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN; + gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id; + gt_list->gt_list[id].gt_id = gt->info.id; + gt_list->gt_list[id].reference_clock = gt->info.reference_clock; + /* + * The mem_regions indexes in the mask below need to + * directly identify the struct + * drm_xe_query_mem_regions' instance constructed at + * query_mem_regions() + * + * For our current platforms: + * Bit 0 -> System Memory + * Bit 1 -> VRAM0 on Tile0 + * Bit 2 -> VRAM1 on Tile1 + * However the uAPI is generic and it's userspace's + * responsibility to check the mem_class, without any + * assumption. + */ + if (!IS_DGFX(xe)) + gt_list->gt_list[id].near_mem_regions = 0x1; + else + gt_list->gt_list[id].near_mem_regions = + BIT(gt_to_tile(gt)->id) << 1; + gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^ + gt_list->gt_list[id].near_mem_regions; + } + + if (copy_to_user(query_ptr, gt_list, size)) { + kfree(gt_list); + return -EFAULT; + } + kfree(gt_list); + + return 0; +} + +static int query_hwconfig(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + struct xe_gt *gt = xe_root_mmio_gt(xe); + size_t size = xe_guc_hwconfig_size(>->uc.guc); + void __user *query_ptr = u64_to_user_ptr(query->data); + void *hwconfig; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + hwconfig = kzalloc(size, GFP_KERNEL); + if (!hwconfig) + return -ENOMEM; + + xe_device_mem_access_get(xe); + xe_guc_hwconfig_copy(>->uc.guc, hwconfig); + xe_device_mem_access_put(xe); + + if (copy_to_user(query_ptr, hwconfig, size)) { + kfree(hwconfig); + return -EFAULT; + } + kfree(hwconfig); + + return 0; +} + +static size_t calc_topo_query_size(struct xe_device *xe) +{ + return xe->info.gt_count * + (3 * sizeof(struct drm_xe_query_topology_mask) + + sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) + + sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) + + sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss)); +} + +static int copy_mask(void __user **ptr, + struct drm_xe_query_topology_mask *topo, + void *mask, size_t mask_size) +{ + topo->num_bytes = mask_size; + + if (copy_to_user(*ptr, topo, sizeof(*topo))) + return -EFAULT; + *ptr += sizeof(topo); + + if (copy_to_user(*ptr, mask, mask_size)) + return -EFAULT; + *ptr += mask_size; + + return 0; +} + +static int query_gt_topology(struct xe_device *xe, + struct drm_xe_device_query *query) +{ + void __user *query_ptr = u64_to_user_ptr(query->data); + size_t size = calc_topo_query_size(xe); + struct drm_xe_query_topology_mask topo; + struct xe_gt *gt; + int id; + + if (query->size == 0) { + query->size = size; + return 0; + } else if (XE_IOCTL_DBG(xe, query->size != size)) { + return -EINVAL; + } + + for_each_gt(gt, xe, id) { + int err; + + topo.gt_id = id; + + topo.type = DRM_XE_TOPO_DSS_GEOMETRY; + err = copy_mask(&query_ptr, &topo, gt->fuse_topo.g_dss_mask, + sizeof(gt->fuse_topo.g_dss_mask)); + if (err) + return err; + + topo.type = DRM_XE_TOPO_DSS_COMPUTE; + err = copy_mask(&query_ptr, &topo, gt->fuse_topo.c_dss_mask, + sizeof(gt->fuse_topo.c_dss_mask)); + if (err) + return err; + + topo.type = DRM_XE_TOPO_EU_PER_DSS; + err = copy_mask(&query_ptr, &topo, + gt->fuse_topo.eu_mask_per_dss, + sizeof(gt->fuse_topo.eu_mask_per_dss)); + if (err) + return err; + } + + return 0; +} + +static int (* const xe_query_funcs[])(struct xe_device *xe, + struct drm_xe_device_query *query) = { + query_engines, + query_mem_regions, + query_config, + query_gt_list, + query_hwconfig, + query_gt_topology, + query_engine_cycles, +}; + +int xe_query_ioctl(struct drm_device *dev, void *data, struct drm_file *file) +{ + struct xe_device *xe = to_xe_device(dev); + struct drm_xe_device_query *query = data; + u32 idx; + + if (XE_IOCTL_DBG(xe, query->extensions) || + XE_IOCTL_DBG(xe, query->reserved[0] || query->reserved[1])) + return -EINVAL; + + if (XE_IOCTL_DBG(xe, query->query >= ARRAY_SIZE(xe_query_funcs))) + return -EINVAL; + + idx = array_index_nospec(query->query, ARRAY_SIZE(xe_query_funcs)); + if (XE_IOCTL_DBG(xe, !xe_query_funcs[idx])) + return -EINVAL; + + return xe_query_funcs[idx](xe, query); +} |