diff --git a/src/CMake/dkms-edge.cmake b/src/CMake/dkms-edge.cmake index 72dac2d37af..8a96603d38d 100644 --- a/src/CMake/dkms-edge.cmake +++ b/src/CMake/dkms-edge.cmake @@ -56,6 +56,7 @@ SET (XRT_DKMS_DRIVER_SRCS zocl/edge/zocl_edge_kds.c zocl/edge/zocl_error.c zocl/edge/zocl_mailbox.c + zocl/edge/zocl_hwctx.c zocl/include/zocl_aie.h zocl/include/zocl_bo.h @@ -76,6 +77,7 @@ SET (XRT_DKMS_DRIVER_SRCS zocl/include/zocl_xclbin.h zocl/include/zocl_xgq.h zocl/include/zocl_xgq_plat.h + zocl/include/zocl_hwctx.h zocl/zert/cu_scu.c zocl/zert/scu.c diff --git a/src/runtime_src/core/edge/drm/zocl/Makefile b/src/runtime_src/core/edge/drm/zocl/Makefile index 500a795191d..4503e165e99 100644 --- a/src/runtime_src/core/edge/drm/zocl/Makefile +++ b/src/runtime_src/core/edge/drm/zocl/Makefile @@ -63,7 +63,8 @@ zocl_edge-y := $(zocl_edge_dir)/zocl_dma.o \ $(zocl_edge_dir)/zocl_edge_xclbin.o \ $(zocl_edge_dir)/zocl_edge_kds.o \ $(zocl_edge_dir)/zocl_error.o \ - $(zocl_edge_dir)/zocl_aie.o + $(zocl_edge_dir)/zocl_aie.o \ + $(zocl_edge_dir)/zocl_hwctx.o zocl_zert_dir := $(make_dir)/zert zocl_zert-y := $(zocl_zert_dir)/zocl_ert.o \ diff --git a/src/runtime_src/core/edge/drm/zocl/common/zocl_drv.c b/src/runtime_src/core/edge/drm/zocl/common/zocl_drv.c index 1aefb7b0b7c..4d683099ea7 100644 --- a/src/runtime_src/core/edge/drm/zocl/common/zocl_drv.c +++ b/src/runtime_src/core/edge/drm/zocl/common/zocl_drv.c @@ -976,8 +976,18 @@ static const struct drm_ioctl_desc zocl_ioctls[] = { DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(ZOCL_EXECBUF, zocl_execbuf_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(ZOCL_HW_CTX_EXECBUF, zocl_hw_ctx_execbuf_ioctl, + DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(ZOCL_READ_AXLF, zocl_read_axlf_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(ZOCL_CREATE_HW_CTX, zocl_create_hw_ctx_ioctl, + DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(ZOCL_DESTROY_HW_CTX, zocl_destroy_hw_ctx_ioctl, + DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(ZOCL_OPEN_CU_CTX, zocl_open_cu_ctx_ioctl, + DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), + DRM_IOCTL_DEF_DRV(ZOCL_CLOSE_CU_CTX, zocl_close_cu_ctx_ioctl, + DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(ZOCL_SK_GETCMD, zocl_sk_getcmd_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW), DRM_IOCTL_DEF_DRV(ZOCL_SK_CREATE, zocl_sk_create_ioctl, diff --git a/src/runtime_src/core/edge/drm/zocl/common/zocl_ioctl.c b/src/runtime_src/core/edge/drm/zocl/common/zocl_ioctl.c index 96305dab7a5..b4559f652e4 100644 --- a/src/runtime_src/core/edge/drm/zocl/common/zocl_ioctl.c +++ b/src/runtime_src/core/edge/drm/zocl/common/zocl_ioctl.c @@ -17,6 +17,7 @@ #include "zocl_drv.h" #include "zocl_xclbin.h" #include "zocl_error.h" +#include "zocl_hwctx.h" /* * read_axlf and ctx should be protected by slot_xclbin_lock exclusively. @@ -27,8 +28,68 @@ zocl_read_axlf_ioctl(struct drm_device *ddev, void *data, struct drm_file *filp) struct drm_zocl_axlf *axlf_obj = data; struct drm_zocl_dev *zdev = ZOCL_GET_ZDEV(ddev); struct kds_client *client = filp->driver_priv; + int slot_id = -1; - return zocl_xclbin_read_axlf(zdev, axlf_obj, client); + return zocl_xclbin_read_axlf(zdev, axlf_obj, client, &slot_id); +} + +/* + * IOCTL to create hw context on a slot on device for a xclbin. + */ +int zocl_create_hw_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct drm_zocl_dev *zdev = ZOCL_GET_ZDEV(dev); + struct drm_zocl_create_hw_ctx *drm_hw_ctx = data; + struct kds_client *client = filp->driver_priv; + struct drm_zocl_axlf axlf_obj = {}; + int slot_id = -1; + int ret = 0; + + if (copy_from_user(&axlf_obj, drm_hw_ctx->axlf_ptr, sizeof(struct drm_zocl_axlf))) { + DRM_WARN("copy_from_user failed for axlf_ptr"); + return -EFAULT; + } + + ret = zocl_xclbin_read_axlf(zdev, &axlf_obj, client, &slot_id); + if (ret) { + DRM_WARN("xclbin download FAILED."); + return ret; + } + + return zocl_create_hw_ctx(zdev, drm_hw_ctx, filp, slot_id); +} + +/* + * IOCTL to destroy hw context on a slot on device + */ +int zocl_destroy_hw_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct drm_zocl_dev *zdev = ZOCL_GET_ZDEV(dev); + struct drm_zocl_destroy_hw_ctx *drm_hw_ctx = (struct drm_zocl_destroy_hw_ctx *)data; + + return zocl_destroy_hw_ctx(zdev, drm_hw_ctx, filp); +} + +/* + * IOCTL to open a cu context under the given hw context + */ +int zocl_open_cu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct drm_zocl_dev *zdev = ZOCL_GET_ZDEV(dev); + struct drm_zocl_open_cu_ctx *drm_cu_ctx = (struct drm_zocl_open_cu_ctx *)data; + + return zocl_open_cu_ctx(zdev, drm_cu_ctx, filp); +} + +/* + * IOCTL to close a opened cu context under the given hw context + */ +int zocl_close_cu_ctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct drm_zocl_dev *zdev = ZOCL_GET_ZDEV(dev); + struct drm_zocl_close_cu_ctx *drm_cu_ctx = (struct drm_zocl_close_cu_ctx *)data; + + return zocl_close_cu_ctx(zdev, drm_cu_ctx, filp); } /* @@ -99,6 +160,15 @@ zocl_execbuf_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) return zocl_command_ioctl(zdev, data, filp); } +int +zocl_hw_ctx_execbuf_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) +{ + struct drm_zocl_dev *zdev = ZOCL_GET_ZDEV(dev); + struct drm_zocl_hw_ctx_execbuf *drm_hw_ctx_execbuf = (struct drm_zocl_hw_ctx_execbuf *)data; + + return zocl_hw_ctx_execbuf(zdev, drm_hw_ctx_execbuf, filp); +} + int zocl_error_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { diff --git a/src/runtime_src/core/edge/drm/zocl/edge/zocl_edge_xclbin.c b/src/runtime_src/core/edge/drm/zocl/edge/zocl_edge_xclbin.c index 44139150689..2691f2547e1 100644 --- a/src/runtime_src/core/edge/drm/zocl/edge/zocl_edge_xclbin.c +++ b/src/runtime_src/core/edge/drm/zocl/edge/zocl_edge_xclbin.c @@ -172,6 +172,7 @@ zocl_resolver(struct drm_zocl_dev *zdev, struct axlf *axlf, xuid_t *xclbin_id, // option "true" in xrt.ini under [Runtime] section DRM_WARN("%s Force xclbin download", __func__); } else { + *slot_id = s_id; DRM_INFO("Exists xclbin %pUb to slot %d", xclbin_id, s_id); mutex_unlock(&slot->slot_xclbin_lock); @@ -200,7 +201,7 @@ zocl_resolver(struct drm_zocl_dev *zdev, struct axlf *axlf, xuid_t *xclbin_id, */ int zocl_xclbin_read_axlf(struct drm_zocl_dev *zdev, struct drm_zocl_axlf *axlf_obj, - struct kds_client *client) + struct kds_client *client, int *slot_idx) { struct axlf axlf_head; struct axlf *axlf = NULL; @@ -257,6 +258,8 @@ zocl_xclbin_read_axlf(struct drm_zocl_dev *zdev, struct drm_zocl_axlf *axlf_obj, ret = zocl_resolver(zdev, axlf, &axlf_head.m_header.uuid, qos, &slot_id); if (ret) { if (ret == -EEXIST) { + DRM_INFO("xclbin already downloaded to slot=%d", slot_id); + *slot_idx = slot_id; vfree(axlf); return 0; } @@ -266,6 +269,7 @@ zocl_xclbin_read_axlf(struct drm_zocl_dev *zdev, struct drm_zocl_axlf *axlf_obj, goto out0; } + *slot_idx = slot_id; slot = zdev->pr_slot[slot_id]; mutex_lock(&slot->slot_xclbin_lock); /* diff --git a/src/runtime_src/core/edge/drm/zocl/edge/zocl_hwctx.c b/src/runtime_src/core/edge/drm/zocl/edge/zocl_hwctx.c new file mode 100644 index 00000000000..86da9bc8034 --- /dev/null +++ b/src/runtime_src/core/edge/drm/zocl/edge/zocl_hwctx.c @@ -0,0 +1,484 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. + +#include "zocl_drv.h" +#include "kds_ert_table.h" +#include "zocl_util.h" +#include "zocl_hwctx.h" +#include + +int zocl_create_hw_ctx(struct drm_zocl_dev *zdev, struct drm_zocl_create_hw_ctx *drm_hw_ctx, struct drm_file *filp, int slot_id) +{ + struct kds_client_hw_ctx *kds_hw_ctx = NULL; + struct drm_zocl_slot *slot = NULL; + struct kds_client *client = filp->driver_priv; + int ret = 0; + + if (!client) { + DRM_ERROR("%s: Invalid client", __func__); + return -EINVAL; + } + + if (slot_id < 0) { + DRM_ERROR("%s: Invalid slot id =%d", __func__, slot_id); + return -EINVAL; + } + slot = zdev->pr_slot[slot_id]; + + mutex_lock(&client->lock); + kds_hw_ctx = kds_alloc_hw_ctx(client, slot->slot_xclbin->zx_uuid, slot->slot_idx); + if (!kds_hw_ctx) { + DRM_ERROR("%s: Failed to allocate memory for new hw ctx", __func__); + ret = -EINVAL; + goto error_out; + } + + //lock the bitstream. Unloack the bitstream for destory hw ctx + ret = zocl_lock_bitstream(slot, slot->slot_xclbin->zx_uuid); + if (ret) { + DRM_ERROR("%s: Locking the bistream failed", __func__); + kds_free_hw_ctx(client, kds_hw_ctx); + ret = -EINVAL; + goto error_out; + } + drm_hw_ctx->hw_context = kds_hw_ctx->hw_ctx_idx; + +error_out: + mutex_unlock(&client->lock); + return ret; +} + +int zocl_destroy_hw_ctx(struct drm_zocl_dev *zdev, struct drm_zocl_destroy_hw_ctx *drm_hw_ctx, struct drm_file *filp) +{ + struct kds_client_hw_ctx *kds_hw_ctx = NULL; + struct drm_zocl_slot *slot = NULL; + struct kds_client *client = filp->driver_priv; + int ret = 0; + + if (!client) { + DRM_ERROR("%s: Invalid client", __func__); + return -EINVAL; + } + + mutex_lock(&client->lock); + kds_hw_ctx = kds_get_hw_ctx_by_id(client, drm_hw_ctx->hw_context); + if (!kds_hw_ctx) { + DRM_ERROR("%s: No valid hw context is open", __func__); + mutex_unlock(&client->lock); + return -EINVAL; + } + + slot = zdev->pr_slot[kds_hw_ctx->slot_idx]; + ret = zocl_unlock_bitstream(slot, slot->slot_xclbin->zx_uuid); + if (ret) { + DRM_ERROR("%s: Unlocking the bistream failed", __func__); + mutex_unlock(&client->lock); + return -EINVAL; + } + ret = kds_free_hw_ctx(client, kds_hw_ctx); + mutex_unlock(&client->lock); + return ret; +} + +static int zocl_cu_ctx_to_info(struct drm_zocl_dev *zdev, struct drm_zocl_open_cu_ctx *drm_cu_ctx, + struct kds_client_hw_ctx *kds_hw_ctx, struct kds_client_cu_info *kds_cu_info) +{ + uint32_t slot_hndl = kds_hw_ctx->slot_idx; + struct kds_sched *kds = &zdev->kds; + char *kname_p = drm_cu_ctx->cu_name; + struct xrt_cu *xcu = NULL; + char iname[CU_NAME_MAX_LEN]; + char kname[CU_NAME_MAX_LEN]; + int i = 0; + + strcpy(kname, strsep(&kname_p, ":")); + strcpy(iname, strsep(&kname_p, ":")); + + /* Retrive the CU index from the given slot */ + for (i = 0; i < MAX_CUS; i++) { + xcu = kds->cu_mgmt.xcus[i]; + if (!xcu) + continue; + + if ((xcu->info.slot_idx == slot_hndl) && (!strcmp(xcu->info.kname, kname)) && (!strcmp(xcu->info.iname, iname))) { + kds_cu_info->cu_domain = DOMAIN_PL; + kds_cu_info->cu_idx = i; + goto done; + } + } + + /* Retrive the SCU index from the given slot */ + for (i = 0; i < MAX_CUS; i++) { + xcu = kds->scu_mgmt.xcus[i]; + if (!xcu) + continue; + + if ((xcu->info.slot_idx == slot_hndl) && (!strcmp(xcu->info.kname, kname)) && (!strcmp(xcu->info.iname, iname))) { + kds_cu_info->cu_domain = DOMAIN_PS; + kds_cu_info->cu_idx = i; + goto done; + } + } + return -EINVAL; + +done: + kds_cu_info->ctx = (void *)kds_hw_ctx; + if (drm_cu_ctx->flags == ZOCL_CTX_EXCLUSIVE) + kds_cu_info->flags = ZOCL_CTX_EXCLUSIVE; + else + kds_cu_info->flags = ZOCL_CTX_SHARED; + return 0; +} + +static inline void +zocl_close_cu_ctx_to_info(struct drm_zocl_close_cu_ctx *drm_cu_ctx, struct kds_client_cu_info *kds_cu_info) +{ + kds_cu_info->cu_domain = get_domain(drm_cu_ctx->cu_index); + kds_cu_info->cu_idx = get_domain_idx(drm_cu_ctx->cu_index); +} + +int zocl_open_cu_ctx(struct drm_zocl_dev *zdev, struct drm_zocl_open_cu_ctx *drm_cu_ctx, struct drm_file *filp) +{ + struct kds_client_hw_ctx *kds_hw_ctx = NULL; + struct kds_client_cu_ctx *kds_cu_ctx = NULL; + struct kds_client *client = filp->driver_priv; + struct kds_client_cu_info kds_cu_info = {}; + int ret = 0; + + if (!client) { + DRM_ERROR("%s: Invalid client", __func__); + return -EINVAL; + } + + mutex_lock(&client->lock); + + kds_hw_ctx = kds_get_hw_ctx_by_id(client, drm_cu_ctx->hw_context); + if (!kds_hw_ctx) { + DRM_ERROR("%s: No valid hw context is open", __func__); + ret = -EINVAL; + goto out; + } + + ret = zocl_cu_ctx_to_info(zdev, drm_cu_ctx, kds_hw_ctx, &kds_cu_info); + if (ret) { + DRM_ERROR("%s: No valid CU context found for this hw context", __func__); + goto out; + } + + kds_cu_ctx = kds_alloc_cu_hw_ctx(client, kds_hw_ctx, &kds_cu_info); + if (ret) { + DRM_ERROR("%s: Allocation of CU context failed", __func__); + ret = -EINVAL; + goto out; + } + + ret = kds_add_context(&zdev->kds, client, kds_cu_ctx); + if (ret) { + DRM_ERROR("%s: Failed to add kds context", __func__); + kds_free_cu_ctx(client, kds_cu_ctx); + goto out; + } + + drm_cu_ctx->cu_index = set_domain(kds_cu_ctx->cu_domain, kds_cu_ctx->cu_idx); + +out: + mutex_unlock(&client->lock); + return ret; +} + +int zocl_close_cu_ctx(struct drm_zocl_dev *zdev, struct drm_zocl_close_cu_ctx *drm_cu_ctx, struct drm_file *filp) +{ + struct kds_client_hw_ctx *kds_hw_ctx = NULL; + struct kds_client_cu_ctx *kds_cu_ctx = NULL; + struct kds_client *client = filp->driver_priv; + struct kds_client_cu_info kds_cu_info = {}; + int ret = 0; + + if (!client) { + DRM_ERROR("%s: Invalid client", __func__); + return -EINVAL; + } + + mutex_lock(&client->lock); + + kds_hw_ctx = kds_get_hw_ctx_by_id(client, drm_cu_ctx->hw_context); + if (!kds_hw_ctx) { + DRM_ERROR("%s: No valid hw context is open", __func__); + ret = -EINVAL; + goto out; + } + + zocl_close_cu_ctx_to_info(drm_cu_ctx, &kds_cu_info); + + kds_cu_ctx = kds_get_cu_hw_ctx(client, kds_hw_ctx, &kds_cu_info); + if (!kds_cu_ctx) { + DRM_ERROR("%s: No cu context is open", __func__); + ret = -EINVAL; + goto out; + } + + ret = kds_del_context(&zdev->kds, client, kds_cu_ctx); + if (ret) + goto out; + + ret = kds_free_cu_ctx(client, kds_cu_ctx); + +out: + mutex_unlock(&client->lock); + return ret; +} + +/** + * Callback function for async dma operation. This will also clean the + * command memory. + * + * @arg: kds command pointer + * @ret: return value of the dma operation. + */ +static void zocl_hwctx_kds_dma_complete(void *arg, int ret) +{ + struct kds_command *xcmd = (struct kds_command *)arg; + zocl_dma_handle_t *dma_handle = (zocl_dma_handle_t *)xcmd->priv; + + xcmd->status = KDS_COMPLETED; + if (ret) + xcmd->status = KDS_ERROR; + xcmd->cb.notify_host(xcmd, xcmd->status); + xcmd->cb.free(xcmd); + + kfree(dma_handle); +} + +/** + * Copy the user space command to kds command. Also register the callback + * function for the DMA operation. + * + * @zdev: zocl device structure + * @flip: DRM file private data + * @ecmd: ERT command structure + * @xcmd: KDS command structure + * + * @return 0 on success, Error code on failure. + */ +static int copybo_hwctx_ecmd2xcmd(struct drm_zocl_dev *zdev, struct drm_file *filp, + struct ert_start_copybo_cmd *ecmd, + struct kds_command *xcmd) +{ + struct drm_device *dev = zdev->ddev; + zocl_dma_handle_t *dma_handle; + struct drm_zocl_copy_bo args = { + .dst_handle = ecmd->dst_bo_hdl, + .src_handle = ecmd->src_bo_hdl, + .size = ert_copybo_size(ecmd), + .dst_offset = ert_copybo_dst_offset(ecmd), + .src_offset = ert_copybo_src_offset(ecmd), + }; + int ret = 0; + + dma_handle = kmalloc(sizeof(zocl_dma_handle_t), GFP_KERNEL); + if (!dma_handle) + return -ENOMEM; + + memset(dma_handle, 0, sizeof(zocl_dma_handle_t)); + + ret = zocl_dma_channel_instance(dma_handle, zdev); + if (ret) + return ret; + + /* We must set up callback for async dma operations. */ + dma_handle->dma_func = zocl_hwctx_kds_dma_complete; + dma_handle->dma_arg = xcmd; + xcmd->priv = dma_handle; + + return zocl_copy_bo_async(dev, filp, dma_handle, &args); +} + +static void notify_hwctx_execbuf(struct kds_command *xcmd, enum kds_status status) +{ + struct kds_client *client = xcmd->client; + struct ert_packet *ecmd = (struct ert_packet *)xcmd->execbuf; + + ecmd->state = kds_ert_table[status]; + + if (xcmd->timestamp_enabled) { + /* Only start kernel command supports timestamps */ + struct ert_start_kernel_cmd *scmd; + struct cu_cmd_state_timestamps *ts; + + scmd = (struct ert_start_kernel_cmd *)ecmd; + ts = ert_start_kernel_timestamps(scmd); + ts->skc_timestamps[ERT_CMD_STATE_NEW] = xcmd->timestamp[KDS_NEW]; + ts->skc_timestamps[ERT_CMD_STATE_QUEUED] = xcmd->timestamp[KDS_QUEUED]; + ts->skc_timestamps[ERT_CMD_STATE_RUNNING] = xcmd->timestamp[KDS_RUNNING]; + ts->skc_timestamps[ecmd->state] = xcmd->timestamp[status]; + } + + ZOCL_DRM_GEM_OBJECT_PUT_UNLOCKED(xcmd->gem_obj); + + if (xcmd->cu_idx >= 0) + client_stat_inc(client, xcmd->hw_ctx_id, c_cnt[xcmd->cu_idx]); + + atomic_inc(&client->event); + wake_up_interruptible(&client->waitq); +} + +static struct kds_client_cu_ctx * +zocl_get_hw_cu_ctx(struct kds_client_hw_ctx *kds_hw_ctx, int cu_idx) +{ + struct kds_client_cu_ctx *kds_cu_ctx = NULL; + bool found = false; + + list_for_each_entry(kds_cu_ctx, &kds_hw_ctx->cu_ctx_list, link) { + if (kds_cu_ctx->cu_idx == cu_idx) { + found = true; + break; + } + } + + if (found) + return kds_cu_ctx; + return NULL; +} + +static int +check_for_open_hw_cu_ctx(struct drm_zocl_dev *zdev, struct kds_client *client, struct kds_command *xcmd) +{ + struct kds_client_hw_ctx *kds_hw_ctx = NULL; + int first_cu_idx = -EINVAL; + u32 mask = 0; + int i, j; + int ret = 0; + + /* i for iterate masks, j for iterate bits */ + for (i = 0; i < xcmd->num_mask; ++i) { + if (xcmd->cu_mask[i] == 0) + continue; + + mask = xcmd->cu_mask[i]; + for (j = 0; mask > 0; ++j) { + if (!(mask & 0x1)) { + mask >>= 1; + continue; + } + + first_cu_idx = i * sizeof(u32) + j; + goto out; + } + } + +out: + if (first_cu_idx < 0) + return -EINVAL; + + mutex_lock(&client->lock); + kds_hw_ctx = kds_get_hw_ctx_by_id(client, xcmd->hw_ctx_id); + if (!kds_hw_ctx) { + mutex_unlock(&client->lock); + return -EINVAL; + } + + if (zocl_get_hw_cu_ctx(kds_hw_ctx, first_cu_idx) != NULL) + ret = 0; + else + ret = -EINVAL; + + mutex_unlock(&client->lock); + return ret; +} + +int zocl_hw_ctx_execbuf(struct drm_zocl_dev *zdev, struct drm_zocl_hw_ctx_execbuf *drm_hw_ctx_execbuf, struct drm_file *filp) +{ + struct drm_gem_object *gem_obj = NULL; + struct drm_device *dev = zdev->ddev; + struct kds_client *client = filp->driver_priv; + struct drm_zocl_bo *zocl_bo = NULL; + struct ert_packet *ecmd = NULL; + struct kds_command *xcmd = NULL; + int ret = 0; + + if (zdev->kds.bad_state) { + DRM_ERROR("%s: KDS is in bad state", __func__); + return -EDEADLK; + } + gem_obj = zocl_gem_object_lookup(dev, filp, drm_hw_ctx_execbuf->exec_bo_handle); + if (!gem_obj) { + DRM_ERROR("%s: Look up GEM BO %d failed", __func__, drm_hw_ctx_execbuf->exec_bo_handle); + ret = -EINVAL; + goto out; + } + + zocl_bo = to_zocl_bo(gem_obj); + if (!zocl_bo_execbuf(zocl_bo)) { + DRM_ERROR("%s: Command Buffer is not exec buf", __func__); + ret = -EINVAL; + goto out; + } + + ecmd = (struct ert_packet *)zocl_bo->cma_base.vaddr; + ecmd->state = ERT_CMD_STATE_NEW; + + xcmd = kds_alloc_command(client, ecmd->count * sizeof(u32)); + if (!xcmd) { + DRM_ERROR("%s: Failed to alloc xcmd", __func__); + ret = -ENOMEM; + goto out; + } + + xcmd->cb.free = kds_free_command; + xcmd->cb.notify_host = notify_hwctx_execbuf; + xcmd->execbuf = (u32 *)ecmd; + xcmd->gem_obj = gem_obj; + xcmd->exec_bo_handle = drm_hw_ctx_execbuf->exec_bo_handle; + xcmd->hw_ctx_id = drm_hw_ctx_execbuf->hw_ctx_id; + + switch (ecmd->opcode) { + case ERT_CONFIGURE: + xcmd->status = KDS_COMPLETED; + xcmd->cb.notify_host(xcmd, xcmd->status); + goto out1; + case ERT_START_CU: + start_krnl_ecmd2xcmd(to_start_krnl_pkg(ecmd), xcmd); + break; + case ERT_EXEC_WRITE: + DRM_WARN_ONCE("ERT_EXEC_WRITE is obsoleted, use ERT_START_KEY_VAL"); +#if KERNEL_VERSION(5, 4, 0) > LINUX_VERSION_CODE + __attribute__ ((fallthrough)); +#else + __attribute__ ((__fallthrough__)); +#endif + case ERT_START_KEY_VAL: + start_krnl_kv_ecmd2xcmd(to_start_krnl_pkg(ecmd), xcmd); + break; + case ERT_START_FA: + start_fa_ecmd2xcmd(to_start_krnl_pkg(ecmd), xcmd); + break; + case ERT_START_COPYBO: + ret = copybo_hwctx_ecmd2xcmd(zdev, filp, to_copybo_pkg(ecmd), xcmd); + if (ret) + goto out1; + goto out; + case ERT_ABORT: + abort_ecmd2xcmd(to_abort_pkg(ecmd), xcmd); + break; + default: + DRM_ERROR("%s: Unsupport command", __func__); + ret = -EINVAL; + goto out1; + } + + if (check_for_open_hw_cu_ctx(zdev, client, xcmd) < 0) { + DRM_ERROR("The client has no opening context\n"); + ret = -EINVAL; + goto out; + } + + ret = kds_add_command(&zdev->kds, xcmd); + goto out; + +out1: + xcmd->cb.free(xcmd); +out: + if (ret < 0) + ZOCL_DRM_GEM_OBJECT_PUT_UNLOCKED(gem_obj); + return ret; +} diff --git a/src/runtime_src/core/edge/drm/zocl/include/zocl_hwctx.h b/src/runtime_src/core/edge/drm/zocl/include/zocl_hwctx.h new file mode 100644 index 00000000000..426ebfb86da --- /dev/null +++ b/src/runtime_src/core/edge/drm/zocl/include/zocl_hwctx.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. + + +#ifndef _ZOCL_HWCTX_H_ +#define _ZOCL_HWCTX_H_ + +#include "zocl_xclbin.h" + + +int zocl_create_hw_ctx(struct drm_zocl_dev *zdev, struct drm_zocl_create_hw_ctx *drm_hw_ctx, struct drm_file *filp, int slot_id); + +int zocl_destroy_hw_ctx(struct drm_zocl_dev *zdev, struct drm_zocl_destroy_hw_ctx *drm_hw_ctx, struct drm_file *filp); + +int zocl_open_cu_ctx(struct drm_zocl_dev *zdev, struct drm_zocl_open_cu_ctx *drm_cu_ctx, struct drm_file *filp); + +int zocl_close_cu_ctx(struct drm_zocl_dev *zdev, struct drm_zocl_close_cu_ctx *drm_cu_ctx, struct drm_file *filp); + +int zocl_hw_ctx_execbuf(struct drm_zocl_dev *zdev, struct drm_zocl_hw_ctx_execbuf *drm_execbuf, struct drm_file *filp); + +#endif diff --git a/src/runtime_src/core/edge/drm/zocl/include/zocl_ioctl.h b/src/runtime_src/core/edge/drm/zocl/include/zocl_ioctl.h index 0200eb8657e..4fd172c1c04 100644 --- a/src/runtime_src/core/edge/drm/zocl/include/zocl_ioctl.h +++ b/src/runtime_src/core/edge/drm/zocl/include/zocl_ioctl.h @@ -35,8 +35,18 @@ int zocl_pread_bo_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int zocl_execbuf_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int zocl_hw_ctx_execbuf_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); int zocl_read_axlf_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); +int zocl_create_hw_ctx_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); +int zocl_destroy_hw_ctx_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); +int zocl_open_cu_ctx_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); +int zocl_close_cu_ctx_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); int zocl_sk_getcmd_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int zocl_sk_create_ioctl(struct drm_device *dev, void *data, diff --git a/src/runtime_src/core/edge/drm/zocl/include/zocl_xclbin.h b/src/runtime_src/core/edge/drm/zocl/include/zocl_xclbin.h index 4f1897099b5..f88f4bd7cb9 100644 --- a/src/runtime_src/core/edge/drm/zocl/include/zocl_xclbin.h +++ b/src/runtime_src/core/edge/drm/zocl/include/zocl_xclbin.h @@ -36,7 +36,7 @@ struct drm_zocl_slot *zocl_get_slot(struct drm_zocl_dev *zdev, int zocl_xclbin_refcount(struct drm_zocl_slot *slot); int zocl_xclbin_read_axlf(struct drm_zocl_dev *zdev, - struct drm_zocl_axlf *axlf_obj, struct kds_client *client); + struct drm_zocl_axlf *axlf_obj, struct kds_client *client, int *slot_idx); int zocl_xclbin_load_pdi(struct drm_zocl_dev *zdev, void *data, struct drm_zocl_slot *slot); int zocl_xclbin_load_pskernel(struct drm_zocl_dev *zdev, void *data, uint32_t slot_id); diff --git a/src/runtime_src/core/edge/include/zynq_ioctl.h b/src/runtime_src/core/edge/include/zynq_ioctl.h index dd95e333f68..b77f1db5ad0 100644 --- a/src/runtime_src/core/edge/include/zynq_ioctl.h +++ b/src/runtime_src/core/edge/include/zynq_ioctl.h @@ -59,6 +59,14 @@ * (experimental) * 15 Get Information about Compute Unit DRM_IOCTL_ZOCL_INFO_CU drm_zocl_info_cu * (experimental) + * 16 Create a hw context on a slot for DRM_IOCTL_ZOCL_CREATE_HW_CTX drm_zocl_create_hw_ctx + * a xclbin on the device + * 17 Destroy a hw context on a slot on DRM_IOCTL_ZOCL_DESTROY_HW_CTX drm_zocl_destroy_hw_ctx + * a device + * 18 Open cu context DRM_IOCTL_ZOCL_OPEN_CU_CTX drm_zocl_open_cu_ctx + * 19 Close cu context DRM_IOCTL_ZOCL_CLOSE_CU_CTX drm_zocl_close_cu_ctx + * 20 Send an execute job to a CU with hw DRM_IOCTL_ZOCL_HW_CTX_EXECBUF drm_zocl_hw_ctx_execbuf + * context * * ==== ====================================== ============================== ================================== */ @@ -66,6 +74,8 @@ #ifndef __ZYNQ_IOCTL_H__ #define __ZYNQ_IOCTL_H__ +#define CU_NAME_MAX_LEN 64 + #ifndef __KERNEL__ #include #include @@ -98,8 +108,18 @@ enum drm_zocl_ops { DRM_ZOCL_PCAP_DOWNLOAD, /* Send an execute job to a compute unit */ DRM_ZOCL_EXECBUF, + /* Send an execute job to a CU with hw ctx */ + DRM_ZOCL_HW_CTX_EXECBUF, /* Read the xclbin and map CUs */ DRM_ZOCL_READ_AXLF, + /* Create a hw context for a xlbin on the device */ + DRM_ZOCL_CREATE_HW_CTX, + /* Destroy a hw context */ + DRM_ZOCL_DESTROY_HW_CTX, + /* Open CU context */ + DRM_ZOCL_OPEN_CU_CTX, + /* Close CU context */ + DRM_ZOCL_CLOSE_CU_CTX, /* Get the soft kernel command */ DRM_ZOCL_SK_GETCMD, /* Create the soft kernel */ @@ -374,6 +394,18 @@ struct drm_zocl_execbuf { uint32_t exec_bo_handle; }; +/** + * struct drm_zocl_hw_ctx_execbuf - Submit a command buffer for execution on a CU + * used with DRM_IOCTL_ZOCL_HW_CTX_EXECBUF ioctl + * + * @hw_ctx_id: pass the hw context id + * @exec_bo_handle: BO handle of command buffer formatted as ERT command + */ +struct drm_zocl_hw_ctx_execbuf { + uint32_t hw_ctx_id; + uint32_t exec_bo_handle; +}; + /* * enum drm_zocl_platform_flags - can be used for axlf bitstream */ @@ -453,6 +485,58 @@ struct drm_zocl_axlf { uint32_t partition_id; }; +/** + * struct drm_zocl_create_hw_ctx - Create a hw context on a slot on device + * used with DRM_IOCTL_ZOCL_CREATE_HW_CTX ioctl + * + * @axlf_ptr: axlf pointer which need to be downloaded + * @qos: QOS information + * @hw_context: Returns context handle + */ +struct drm_zocl_create_hw_ctx { + struct drm_zocl_axlf *axlf_ptr; + uint32_t qos; + uint32_t hw_context; +}; + +/** + * struct drm_zocl_destroy_hw_ctx - Destroy a hw context on a slot on device + * used with DRM_IOCTL_ZOCL_DESTROY_HW_CTX ioctl + * + * @hw_context: Context handle that needs to be closed + */ +struct drm_zocl_destroy_hw_ctx { + uint32_t hw_context; +}; + +/** + * struct drm_zocl_open_cu_ctx - Opens a cu context under a hw context on the device + * used with DRM_IOCTL_ZOCL_OPEN_CU_CTX + * + * @hw_context: Open a cu context under this hw context handle + * @cu_name: Name of the cu on the device image for which the open context is being made + * @flags: Shared or Exclusive context (ZOCL_CTX_SHARED/ZOCL_CTX_EXCLUSIVE) + * @cu_index: Reture the acquired cu index. This will be required for closing + */ +struct drm_zocl_open_cu_ctx { + uint32_t hw_context; + char cu_name[CU_NAME_MAX_LEN]; + uint32_t flags; + uint32_t cu_index; +}; + +/** + * struct drm_zocl_close_cu_ctx - Closes a cu context opened under a hw context on device + * used with DRM_IOCTL_ZOCL_CLOSE_CU_CTX + * + * @hw_context: close cu context under this hw context handle + * @cu_index: Index of the cu on the device image for which the close request is being made + */ +struct drm_zocl_close_cu_ctx { + uint32_t hw_context; + uint32_t cu_index; +}; + #define ZOCL_MAX_NAME_LENGTH 32 #define ZOCL_MAX_PATH_LENGTH 255 #define AIE_INFO_SIZE 4096 @@ -571,8 +655,18 @@ struct drm_zocl_error_inject { DRM_ZOCL_PREAD_BO, struct drm_zocl_pread_bo) #define DRM_IOCTL_ZOCL_EXECBUF DRM_IOWR(DRM_COMMAND_BASE + \ DRM_ZOCL_EXECBUF, struct drm_zocl_execbuf) +#define DRM_IOCTL_ZOCL_HW_CTX_EXECBUF DRM_IOWR(DRM_COMMAND_BASE + \ + DRM_ZOCL_HW_CTX_EXECBUF, struct drm_zocl_hw_ctx_execbuf) #define DRM_IOCTL_ZOCL_READ_AXLF DRM_IOWR(DRM_COMMAND_BASE + \ DRM_ZOCL_READ_AXLF, struct drm_zocl_axlf) +#define DRM_IOCTL_ZOCL_CREATE_HW_CTX DRM_IOWR(DRM_COMMAND_BASE + \ + DRM_ZOCL_CREATE_HW_CTX, struct drm_zocl_create_hw_ctx) +#define DRM_IOCTL_ZOCL_DESTROY_HW_CTX DRM_IOWR(DRM_COMMAND_BASE + \ + DRM_ZOCL_DESTROY_HW_CTX, struct drm_zocl_destroy_hw_ctx) +#define DRM_IOCTL_ZOCL_OPEN_CU_CTX DRM_IOWR(DRM_COMMAND_BASE + \ + DRM_ZOCL_OPEN_CU_CTX, struct drm_zocl_open_cu_ctx) +#define DRM_IOCTL_ZOCL_CLOSE_CU_CTX DRM_IOWR(DRM_COMMAND_BASE + \ + DRM_ZOCL_CLOSE_CU_CTX, struct drm_zocl_close_cu_ctx) #define DRM_IOCTL_ZOCL_SK_GETCMD DRM_IOWR(DRM_COMMAND_BASE + \ DRM_ZOCL_SK_GETCMD, struct drm_zocl_sk_getcmd) #define DRM_IOCTL_ZOCL_SK_CREATE DRM_IOWR(DRM_COMMAND_BASE + \ diff --git a/src/runtime_src/core/edge/user/device_linux.h b/src/runtime_src/core/edge/user/device_linux.h index 88df54f0c31..fd60f0b2b5f 100644 --- a/src/runtime_src/core/edge/user/device_linux.h +++ b/src/runtime_src/core/edge/user/device_linux.h @@ -110,6 +110,12 @@ class device_linux : public shim return xrt::shim_int::create_hw_context(get_device_handle(), xclbin_uuid, cfg_param, mode); } + void + register_xclbin(const xrt::xclbin& xclbin) const override + { + xrt::shim_int::register_xclbin(get_device_handle(), xclbin); + } + std::unique_ptr alloc_bo(size_t size, uint64_t flags) override { diff --git a/src/runtime_src/core/edge/user/hwctx_object.cpp b/src/runtime_src/core/edge/user/hwctx_object.cpp index 97daa2e0228..40b1ee50e01 100644 --- a/src/runtime_src/core/edge/user/hwctx_object.cpp +++ b/src/runtime_src/core/edge/user/hwctx_object.cpp @@ -36,6 +36,16 @@ namespace zynqaie { } #endif + hwctx_object::~hwctx_object() + { + try { + m_shim->destroy_hw_context(m_slotidx); + } + catch (const std::exception& ex) { + xrt_core::send_exception_message(ex.what()); + } + } + std::unique_ptr hwctx_object::alloc_bo(void* userptr, size_t size, uint64_t flags) { @@ -65,7 +75,7 @@ namespace zynqaie { void hwctx_object::exec_buf(xrt_core::buffer_handle* cmd) { - m_shim->xclExecBuf(cmd->get_xcl_handle()); + m_shim->hwctx_exec_buf(this, cmd->get_xcl_handle()); } std::unique_ptr diff --git a/src/runtime_src/core/edge/user/hwctx_object.h b/src/runtime_src/core/edge/user/hwctx_object.h index bd8bd91ff3a..5c9cd3d0183 100644 --- a/src/runtime_src/core/edge/user/hwctx_object.h +++ b/src/runtime_src/core/edge/user/hwctx_object.h @@ -31,6 +31,8 @@ namespace zynqaie { public: hwctx_object(ZYNQ::shim* shim, slot_id slotidx, xrt::uuid uuid, xrt::hw_context::access_mode mode); + ~hwctx_object(); + void update_access_mode(access_mode mode) override { diff --git a/src/runtime_src/core/edge/user/shim.cpp b/src/runtime_src/core/edge/user/shim.cpp index 94ccd8c7cb6..9d6195a963b 100644 --- a/src/runtime_src/core/edge/user/shim.cpp +++ b/src/runtime_src/core/edge/user/shim.cpp @@ -119,7 +119,7 @@ shim(unsigned index) , mCuMaps(128, {nullptr, 0}) { xclLog(XRT_INFO, "%s", __func__); - + hw_context_enable = xrt_core::config::get_hw_context_flag(); const std::string zocl_drm_device = "/dev/dri/" + get_render_devname(); mKernelFD = open(zocl_drm_device.c_str(), O_RDWR); // Validity of mKernelFD is checked using handleCheck in every shim function @@ -1124,34 +1124,286 @@ xrt_core::cuidx_type shim:: open_cu_context(const xrt_core::hwctx_handle* hwctx_hdl, const std::string& cuname) { - // Edge does not yet support multiple xclbins. Call - // regular flow. Default access mode to shared unless explicitly - // exclusive. auto hwctx = static_cast(hwctx_hdl); auto shared = (hwctx->get_mode() != xrt::hw_context::access_mode::exclusive); - auto cuidx = mCoreDevice->get_cuidx(hwctx->get_slotidx(), cuname); - xclOpenContext(hwctx->get_xclbin_uuid().get(), cuidx.index, shared); - return cuidx; + if (!hw_context_enable) { + // for legacy flow + auto cuidx = mCoreDevice->get_cuidx(0, cuname); + xclOpenContext(hwctx->get_xclbin_uuid().get(), cuidx.index, shared); + return cuidx; + } + else { + // This is for multi slot case + unsigned int flags = shared ? ZOCL_CTX_SHARED : ZOCL_CTX_EXCLUSIVE; + drm_zocl_open_cu_ctx cu_ctx = {}; + cu_ctx.flags = flags; + cu_ctx.hw_context = hwctx_hdl->get_slotidx(); + std:strncpy(cu_ctx.cu_name, cuname.c_str(), sizeof(cu_ctx.cu_name)); + cu_ctx.cu_name[sizeof(cu_ctx.cu_name) - 1] = 0; + if (ioctl(mKernelFD, DRM_IOCTL_ZOCL_OPEN_CU_CTX, &cu_ctx)) + throw xrt_core::error("Failed to open cu context"); + + return xrt_core::cuidx_type{cu_ctx.cu_index}; + } } void shim:: close_cu_context(const xrt_core::hwctx_handle* hwctx_hdl, xrt_core::cuidx_type cuidx) { - // To-be-implemented auto hwctx = static_cast(hwctx_hdl); - if (xclCloseContext(hwctx->get_xclbin_uuid().get(), cuidx.index)) - throw xrt_core::system_error(errno, "failed to close cu context (" + std::to_string(cuidx.index) + ")"); + + if (!hw_context_enable) { + // for legacy flow + if (xclCloseContext(hwctx->get_xclbin_uuid().get(), cuidx.index)) + throw xrt_core::system_error(errno, "failed to close cu context (" + std::to_string(cuidx.index) + ")"); + } + else { + // This is for multi slot case + drm_zocl_close_cu_ctx cu_ctx = {}; + cu_ctx.hw_context = hwctx_hdl->get_slotidx(); + cu_ctx.cu_index = cuidx.index; + if (ioctl(mKernelFD, DRM_IOCTL_ZOCL_CLOSE_CU_CTX, &cu_ctx)) + throw xrt_core::system_error(errno, "failed to close cu context (" + std::to_string(cuidx.index) + ")"); + } +} + +int shim::prepare_hw_axlf(const axlf *buffer, struct drm_zocl_axlf *axlf_obj) +{ + int ret = 0; + unsigned int flags = DRM_ZOCL_PLATFORM_BASE; + int off = 0; + std::string dtbo_path(""); + +#ifndef __HWEM__ + auto is_pr_platform = (buffer->m_header.m_mode == XCLBIN_PR ) ? true : false; + auto is_flat_enabled = xrt_core::config::get_enable_flat(); //default value is false + auto force_program = xrt_core::config::get_force_program_xclbin(); //default value is false + auto overlay_header = xclbin::get_axlf_section(buffer, axlf_section_kind::OVERLAY); + + if (is_pr_platform) + flags = DRM_ZOCL_PLATFORM_PR; + /* + * If its non-PR-platform and enable_flat=true in xrt.ini, download the full + * bitstream. But if OVERLAY section is present in xclbin, userspace apis are + * used to download full bitstream + */ + else if (is_flat_enabled && !overlay_header) { + if (!ZYNQ::shim::handleCheck(this)) { + xclLog(XRT_ERROR, "%s: No DRM render device found", __func__); + return -ENODEV; + } + flags = DRM_ZOCL_PLATFORM_FLAT; + } + + if (force_program) { + flags = flags | DRM_ZOCL_FORCE_PROGRAM; + } + +#if defined(XRT_ENABLE_LIBDFX) + // if OVERLAY section is present use libdfx apis to load bitstream and dtbo(overlay) + if(overlay_header) { + try { + // if xclbin is already loaded ret val is '1', dont call ioctl in this case + if (libdfx::libdfxLoadAxlf(this->mCoreDevice, buffer, overlay_header, mKernelFD, flags, dtbo_path)) + return 0; + } + catch(const std::exception& e){ + xclLog(XRT_ERROR, "%s: loading xclbin with OVERLAY section failed: %s", __func__,e.what()); + return -EPERM; + } + } +#endif //XRT_ENABLE_LIBDFX + +#endif //__HWEM__ + +/* Get the AIE_METADATA and get the hw_gen information */ + uint8_t hw_gen = xrt_core::edge::aie::get_hw_gen(mCoreDevice.get()); + uint32_t partition_id = xrt_core::edge::aie::get_partition_id(mCoreDevice.get()); + + axlf_obj->za_xclbin_ptr = const_cast(buffer), + axlf_obj->za_flags = flags, + axlf_obj->za_ksize = 0, + axlf_obj->za_kernels = NULL, + axlf_obj->za_slot_id = 0, // TODO Cleanup: Once uuid interface id available we need to remove this + axlf_obj->za_dtbo_path = const_cast(dtbo_path.c_str()), + axlf_obj->za_dtbo_path_len = static_cast(dtbo_path.length()), + axlf_obj->hw_gen = hw_gen, + axlf_obj->partition_id = partition_id, + axlf_obj->kds_cfg.polling = xrt_core::config::get_ert_polling(); + + std::vector krnl_binary; + if (!xrt_core::xclbin::is_pdi_only(buffer)) { + auto kernels = xrt_core::xclbin::get_kernels(buffer); + /* Calculate size of kernels */ + for (auto& kernel : kernels) { + axlf_obj->za_ksize += sizeof(kernel_info) + sizeof(argument_info) * kernel.args.size(); + } + + /* Check PCIe's shim.cpp for details of kernels binary */ + krnl_binary.resize(axlf_obj->za_ksize); + axlf_obj->za_kernels = krnl_binary.data(); + for (auto& kernel : kernels) { + auto krnl = reinterpret_cast(axlf_obj->za_kernels + off); + if (kernel.name.size() > sizeof(krnl->name)) + return -EINVAL; + std::strncpy(krnl->name, kernel.name.c_str(), sizeof(krnl->name)-1); + krnl->name[sizeof(krnl->name)-1] = '\0'; + krnl->range = kernel.range; + krnl->anums = kernel.args.size(); + + krnl->features = 0; + if (kernel.sw_reset) + krnl->features |= KRNL_SW_RESET; + + int ai = 0; + for (auto& arg : kernel.args) { + if (arg.name.size() > sizeof(krnl->args[ai].name)) { + xclLog(XRT_ERROR, "%s: Argument name length %d>%d", __func__, arg.name.size(), sizeof(krnl->args[ai].name)); + return -EINVAL; + } + std::strncpy(krnl->args[ai].name, arg.name.c_str(), sizeof(krnl->args[ai].name)-1); + krnl->args[ai].name[sizeof(krnl->args[ai].name)-1] = '\0'; + krnl->args[ai].offset = arg.offset; + krnl->args[ai].size = arg.size; + // XCLBIN doesn't define argument direction yet and it only support + // input arguments. + // Driver use 1 for input argument and 2 for output. + // Let's refine this line later. + krnl->args[ai].dir = 1; + ai++; + } + off += sizeof(kernel_info) + sizeof(argument_info) * kernel.args.size(); + } + } + +#ifdef __HWEM__ + if (!secondXclbinLoadCheck(this->mCoreDevice, buffer)) { + return 0; // skipping to load the 2nd xclbin for hw_emu embedded designs + } +#endif //__HWEM__ + + return 0; +} + +int shim::load_hw_axlf(xclDeviceHandle handle, const xclBin *buffer, drm_zocl_create_hw_ctx *hw_ctx) +{ + drm_zocl_axlf axlf_obj = {}; + auto top = reinterpret_cast(buffer); + auto ret = prepare_hw_axlf(top, &axlf_obj); + if (ret) + return -errno; + + hw_ctx->axlf_ptr = &axlf_obj; + ret = ioctl(mKernelFD, DRM_IOCTL_ZOCL_CREATE_HW_CTX, hw_ctx); + if (ret) + return -errno; + + auto core_device = xrt_core::get_userpf_device(handle); + core_device->register_axlf(buffer); + + bool checkDrmFD = xrt_core::config::get_enable_flat() ? false : true; + ZYNQ::shim *drv = ZYNQ::shim::handleCheck(handle, checkDrmFD); + + #ifdef XRT_ENABLE_AIE + auto data = core_device->get_axlf_section(AIE_METADATA); + if(data.first && data.second) + drv->registerAieArray(); + #endif + + #ifndef __HWEM__ + xdp::hal::update_device(handle); + xdp::aie::update_device(handle); + #endif + xdp::aie::ctr::update_device(handle); + xdp::aie::sts::update_device(handle); + #ifndef __HWEM__ + START_DEVICE_PROFILING_CB(handle); + #else + xdp::hal::hw_emu::update_device(handle); + #endif + + return 0; } std::unique_ptr shim:: -create_hw_context(const xrt::uuid& xclbin_uuid, +create_hw_context(xclDeviceHandle handle, + const xrt::uuid& xclbin_uuid, const xrt::hw_context::cfg_param_type&, xrt::hw_context::access_mode mode) { - return std::make_unique(this, 0, xclbin_uuid, mode); + const static int qos_val = 0; + if (!hw_context_enable) { + //for legacy flow + return std::make_unique(this, 0, xclbin_uuid, mode); + } + else { + // This is for multi slot case + auto xclbin = mCoreDevice->get_xclbin(xclbin_uuid); + auto buffer = reinterpret_cast(xclbin.get_axlf()); + int rcode = 0; + drm_zocl_create_hw_ctx hw_ctx = {}; + hw_ctx.qos = qos_val; + auto shim = get_shim_object(handle); + + if(auto ret = shim->load_hw_axlf(handle, buffer, &hw_ctx)) { + if (ret) { + if (ret == -EOPNOTSUPP) { + xclLog(XRT_ERROR, "XCLBIN does not match shell on the card."); + } + xclLog(XRT_ERROR, "See dmesg log for details. Err = %d", ret); + throw xrt_core::error("Failed to create hardware context"); + } + } + //success + mCoreDevice->register_axlf(buffer); + return std::make_unique(this, hw_ctx.hw_context, xclbin_uuid, mode); + } +} + +void +shim:: +destroy_hw_context(xrt_core::hwctx_handle::slot_id slot) +{ + if (!hw_context_enable) { + //for legacy flow, nothing to be done. + return; + } + else { + // This is for multi slot case + drm_zocl_destroy_hw_ctx hw_ctx = {}; + hw_ctx.hw_context = slot; + + auto ret = ioctl(mKernelFD, DRM_IOCTL_ZOCL_DESTROY_HW_CTX, &hw_ctx); + if (ret) + throw xrt_core::system_error(errno, "Failed to destroy hardware context"); + } +} + +void +shim:: +register_xclbin(const xrt::xclbin&){ + xclLog(XRT_INFO, "%s: xclbin successfully registered for this device without loading the xclbin", __func__); +} + +void +shim:: +hwctx_exec_buf(const xrt_core::hwctx_handle* hwctx_hdl, xclBufferHandle boh) { + auto hwctx = static_cast(hwctx_hdl); + if (!hw_context_enable) { + //for legacy flow + xclExecBuf(boh); + } + else { + // This is for multi slot case + drm_zocl_hw_ctx_execbuf exec = {hwctx->get_slotidx(), boh}; + int result = ioctl(mKernelFD, DRM_IOCTL_ZOCL_HW_CTX_EXECBUF, &exec); + xclLog(XRT_DEBUG, "%s: cmdBO handle %d, ioctl return %d", __func__, boh, result); + if (result == -EDEADLK) + xclLog(XRT_ERROR, "CU might hang, please reset device"); + } } int @@ -1873,7 +2125,14 @@ create_hw_context(xclDeviceHandle handle, xrt::hw_context::access_mode mode) { auto shim = get_shim_object(handle); - return shim->create_hw_context(xclbin_uuid, cfg_param, mode); + return shim->create_hw_context(handle, xclbin_uuid, cfg_param, mode); +} + +void +register_xclbin(xclDeviceHandle handle, const xrt::xclbin& xclbin) +{ + auto shim = get_shim_object(handle); + shim->register_xclbin(xclbin); } std::unique_ptr diff --git a/src/runtime_src/core/edge/user/shim.h b/src/runtime_src/core/edge/user/shim.h index d0dafebb0fc..95cf7bcd9c5 100644 --- a/src/runtime_src/core/edge/user/shim.h +++ b/src/runtime_src/core/edge/user/shim.h @@ -39,6 +39,9 @@ class shim { static const int BUFFER_ALIGNMENT = 0x80; // TODO: UKP public: + void + register_xclbin(const xrt::xclbin&); + // Shim handle for shared objects, like buffer and sync objects class shared_object : public xrt_core::shared_handle { @@ -261,7 +264,13 @@ class shim { close_cu_context(const xrt_core::hwctx_handle* hwctx_hdl, xrt_core::cuidx_type cuidx); std::unique_ptr - create_hw_context(const xrt::uuid&, const xrt::hw_context::cfg_param_type&, xrt::hw_context::access_mode); + create_hw_context(xclDeviceHandle handle, const xrt::uuid&, const xrt::hw_context::cfg_param_type&, xrt::hw_context::access_mode); + + void + destroy_hw_context(xrt_core::hwctx_handle::slot_id slotidx); + + void + hwctx_exec_buf(const xrt_core::hwctx_handle* hwctx_hdl, xclBufferHandle boh); //////////////////////////////////////////////////////////////// int xclOpenContext(const uuid_t xclbinId, unsigned int ipIndex, bool shared); @@ -292,6 +301,8 @@ class shim { // Bitstream/bin download int xclLoadXclBin(const xclBin *buffer); int xclLoadAxlf(const axlf *buffer); + int prepare_hw_axlf(const axlf *buffer, struct drm_zocl_axlf *axlf_obj); + int load_hw_axlf(xclDeviceHandle handle, const xclBin *buffer, drm_zocl_create_hw_ctx *hw_ctx); int xclSyncBO(unsigned int boHandle, xclBOSyncDirection dir, size_t size, size_t offset); @@ -350,6 +361,7 @@ class shim { std::unique_ptr mCmdBOCache; zynq_device *mDev = nullptr; size_t mKernelClockFreq; + bool hw_context_enable = true; /* * Mapped CU register space for xclRegRead/Write(). We support at most