From 9a53f2645570048e320358df807b1be0966081f6 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Wed, 29 Jan 2025 10:40:50 +0100 Subject: [PATCH] [v2] add initial Command Buffers support (#2629) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [v2] add initial Command Buffers support --------- Co-authored-by: Mikołaj Komar Co-authored-by: Łukasz Ślusarczyk --- include/ur_api.h | 6 +- scripts/core/exp-command-buffer.yml | 6 +- scripts/templates/queue_api.cpp.mako | 2 + scripts/templates/queue_api.hpp.mako | 7 + source/adapters/cuda/command_buffer.cpp | 2 +- source/adapters/cuda/command_buffer.hpp | 2 +- source/adapters/hip/command_buffer.cpp | 2 +- source/adapters/hip/command_buffer.hpp | 2 +- source/adapters/level_zero/CMakeLists.txt | 4 + source/adapters/level_zero/command_buffer.cpp | 42 +-- source/adapters/level_zero/command_buffer.hpp | 8 +- .../level_zero/helpers/kernel_helpers.hpp | 2 +- source/adapters/level_zero/v2/api.cpp | 58 ---- .../adapters/level_zero/v2/command_buffer.cpp | 179 ++++++++++ .../adapters/level_zero/v2/command_buffer.hpp | 51 +++ .../level_zero/v2/command_list_manager.cpp | 115 +++++++ .../level_zero/v2/command_list_manager.hpp | 52 +++ source/adapters/level_zero/v2/queue_api.cpp | 3 + source/adapters/level_zero/v2/queue_api.hpp | 8 + .../v2/queue_immediate_in_order.cpp | 310 ++++++++---------- .../v2/queue_immediate_in_order.hpp | 31 +- source/adapters/opencl/common.hpp | 2 +- source/loader/ur_libapi.cpp | 6 +- source/ur_api.cpp | 6 +- .../update/invalid_update.cpp | 2 +- .../update/local_memory_update.cpp | 2 +- 26 files changed, 614 insertions(+), 296 deletions(-) create mode 100644 source/adapters/level_zero/v2/command_buffer.cpp create mode 100644 source/adapters/level_zero/v2/command_buffer.hpp create mode 100644 source/adapters/level_zero/v2/command_list_manager.cpp create mode 100644 source/adapters/level_zero/v2/command_list_manager.hpp diff --git a/include/ur_api.h b/include/ur_api.h index 31d384dfc1..81d7a92c01 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -11101,7 +11101,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// is not nullptr. /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true -/// on creation of the command buffer `hCommand` belongs to. +/// on creation of the command-buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been /// finalized. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP @@ -11154,7 +11154,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( /// supported by the device associated with `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true -/// on creation of the command buffer `hCommand` belongs to. +/// on creation of the command-buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been /// finalized. /// + If no `phEvent` parameter was set on creation of the command @@ -11185,7 +11185,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateSignalEventExp( /// supported by the device associated with `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true -/// on creation of the command buffer `hCommand` belongs to. +/// on creation of the command-buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been /// finalized. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 6abcb48695..bec96d99a5 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -1211,7 +1211,7 @@ returns: - "If $X_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET is not supported by the device but `pUpdateKernelLaunch->pNewGlobalWorkOffset` is not nullptr." - "If $X_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE is not supported by the device but `pUpdateKernelLaunch->hNewKernel` is not nullptr." - $X_RESULT_ERROR_INVALID_OPERATION: - - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." + - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command-buffer `hCommand` belongs to." - "If the command-buffer `hCommand` belongs to has not been finalized." - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP: - "If `hCommand` is not a kernel execution command." @@ -1244,7 +1244,7 @@ returns: - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: - "If $X_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_EVENTS is not supported by the device associated with `hCommand`." - $X_RESULT_ERROR_INVALID_OPERATION: - - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." + - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command-buffer `hCommand` belongs to." - "If the command-buffer `hCommand` belongs to has not been finalized." - "If no `phEvent` parameter was set on creation of the command associated with `hCommand`." - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP @@ -1270,7 +1270,7 @@ returns: - $X_RESULT_ERROR_UNSUPPORTED_FEATURE: - "If $X_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_EVENTS is not supported by the device associated with `hCommand`." - $X_RESULT_ERROR_INVALID_OPERATION: - - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." + - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command-buffer `hCommand` belongs to." - "If the command-buffer `hCommand` belongs to has not been finalized." - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP - $X_RESULT_ERROR_INVALID_EVENT_WAIT_LIST: diff --git a/scripts/templates/queue_api.cpp.mako b/scripts/templates/queue_api.cpp.mako index 89f857e007..14def952ac 100644 --- a/scripts/templates/queue_api.cpp.mako +++ b/scripts/templates/queue_api.cpp.mako @@ -20,6 +20,8 @@ from templates import helper as th * */ +// Do not edit. This file is auto generated from a template: scripts/templates/queue_api.cpp.mako + #include "queue_api.hpp" #include "ur_util.hpp" diff --git a/scripts/templates/queue_api.hpp.mako b/scripts/templates/queue_api.hpp.mako index 352abbeb43..b39226e798 100644 --- a/scripts/templates/queue_api.hpp.mako +++ b/scripts/templates/queue_api.hpp.mako @@ -20,9 +20,12 @@ from templates import helper as th * */ +// Do not edit. This file is auto generated from a template: scripts/templates/queue_api.hpp.mako + #pragma once #include +#include struct ur_queue_handle_t_ { virtual ~ur_queue_handle_t_(); @@ -32,4 +35,8 @@ struct ur_queue_handle_t_ { %for obj in th.get_queue_related_functions(specs, n, tags): virtual ${x}_result_t ${th.transform_queue_related_function_name(n, tags, obj, format=["type"])} = 0; %endfor + + virtual ur_result_t + enqueueCommandBuffer(ze_command_list_handle_t, ur_event_handle_t *, + uint32_t, const ur_event_handle_t *) = 0; }; diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 37018dde6c..545ef8f6e3 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -1313,7 +1313,7 @@ updateKernelArguments(kernel_command_handle *Command, } /** - * Updates the command buffer command with new values from the update + * Updates the command-buffer command with new values from the update * description. * @param[in] Command The command to be updated. * @param[in] UpdateCommandDesc The update command description. diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index 3658f8c47d..9b40088f75 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -362,7 +362,7 @@ struct ur_exp_command_buffer_handle_t_ { // UR context associated with this command-buffer ur_context_handle_t Context; - // Device associated with this command buffer + // Device associated with this command-buffer ur_device_handle_t Device; // Whether commands in the command-buffer can be updated bool IsUpdatable; diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 887eb75287..8d22dc04a0 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -1024,7 +1024,7 @@ updateKernelArguments(ur_exp_command_buffer_command_handle_t Command, } /** - * Updates the command buffer command with new values from the update + * Updates the command-buffer command with new values from the update * description. * @param[in] Command The command to be updated. * @param[in] UpdateCommandDesc The update command description. diff --git a/source/adapters/hip/command_buffer.hpp b/source/adapters/hip/command_buffer.hpp index a236a32c24..b59cb30c97 100644 --- a/source/adapters/hip/command_buffer.hpp +++ b/source/adapters/hip/command_buffer.hpp @@ -168,7 +168,7 @@ struct ur_exp_command_buffer_handle_t_ { // UR context associated with this command-buffer ur_context_handle_t Context; - // Device associated with this command buffer + // Device associated with this command-buffer ur_device_handle_t Device; // Whether commands in the command-buffer can be updated bool IsUpdatable; diff --git a/source/adapters/level_zero/CMakeLists.txt b/source/adapters/level_zero/CMakeLists.txt index 8a5508c1b3..5bb5cf67fb 100644 --- a/source/adapters/level_zero/CMakeLists.txt +++ b/source/adapters/level_zero/CMakeLists.txt @@ -143,7 +143,9 @@ if(UR_BUILD_ADAPTER_L0_V2) ${CMAKE_CURRENT_SOURCE_DIR}/virtual_mem.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../../ur/ur.cpp # v2-only sources + ${CMAKE_CURRENT_SOURCE_DIR}/v2/command_buffer.hpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_manager.hpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/context.hpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool_cache.hpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool.hpp @@ -157,7 +159,9 @@ if(UR_BUILD_ADAPTER_L0_V2) ${CMAKE_CURRENT_SOURCE_DIR}/v2/queue_immediate_in_order.hpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/usm.hpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/api.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/v2/command_buffer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_cache.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/v2/command_list_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/context.cpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool_cache.cpp ${CMAKE_CURRENT_SOURCE_DIR}/v2/event_pool.cpp diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 902da42d2c..22c31ad8e0 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -173,7 +173,7 @@ ur_result_t getEventsFromSyncPoints( /** * If needed, creates a sync point for a given command and returns the L0 * events associated with the sync point. - * This operations is skipped if the command buffer is in order. + * This operations is skipped if the command-buffer is in order. * @param[in] CommandType The type of the command. * @param[in] CommandBuffer The CommandBuffer where the command is appended. * @param[in] NumSyncPointsInWaitList Number of sync points that are @@ -252,7 +252,7 @@ ur_result_t enqueueCommandBufferMemCopyHelper( } // Helper function for common code when enqueuing rectangular memory operations -// to a command buffer. +// to a command-buffer. ur_result_t enqueueCommandBufferMemCopyRectHelper( ur_command_t CommandType, ur_exp_command_buffer_handle_t CommandBuffer, void *Dst, const void *Src, ur_rect_offset_t SrcOrigin, @@ -584,10 +584,10 @@ ur_result_t createMainCommandList(ur_context_handle_t Context, } /** - * Checks whether the command buffer can be constructed using in order + * Checks whether the command-buffer can be constructed using in order * command-lists. - * @param[in] Context The Context associated with the command buffer. - * @param[in] CommandBufferDesc The description of the command buffer. + * @param[in] Context The Context associated with the command-buffer. + * @param[in] CommandBufferDesc The description of the command-buffer. * @return Returns true if in order command-lists can be enabled. */ bool canBeInOrder(ur_context_handle_t Context, @@ -810,7 +810,7 @@ finalizeImmediateAppendPath(ur_exp_command_buffer_handle_t CommandBuffer) { CommandBuffer->AllResetEvent->ZeEvent)); // All the events are reset by default. So signal the all reset event for - // the first run of the command buffer + // the first run of the command-buffer ZE2UR_CALL(zeEventHostSignal, (CommandBuffer->AllResetEvent->ZeEvent)); } @@ -887,7 +887,7 @@ urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t CommandBuffer) { /** * Sets the kernel arguments for a kernel command that will be appended to the - * command buffer. + * command-buffer. * @param[in] Device The Device associated with the command-buffer where the * kernel command will be appended. * @param[in,out] Arguments stored in the ur_kernel_handle_t object to be set @@ -918,7 +918,7 @@ ur_result_t setKernelPendingArguments( } /** - * Creates a new command handle to use in future updates to the command buffer. + * Creates a new command handle to use in future updates to the command-buffer. * @param[in] CommandBuffer The CommandBuffer associated with the new command. * @param[in] Kernel The Kernel associated with the new command. * @param[in] WorkDim Dimensions of the kernel associated with the new command. @@ -1315,7 +1315,7 @@ ur_result_t urCommandBufferAppendUSMPrefetchExp( std::ignore = Flags; if (CommandBuffer->IsInOrderCmdList) { - // Add the prefetch command to the command buffer. + // Add the prefetch command to the command-buffer. // Note that L0 does not handle migration flags. ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, (CommandBuffer->ZeComputeCommandList, Mem, Size)); @@ -1332,7 +1332,7 @@ ur_result_t urCommandBufferAppendUSMPrefetchExp( ZeEventList.data())); } - // Add the prefetch command to the command buffer. + // Add the prefetch command to the command-buffer. // Note that L0 does not handle migration flags. ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, (CommandBuffer->ZeComputeCommandList, Mem, Size)); @@ -1463,7 +1463,7 @@ ur_result_t urCommandBufferAppendUSMFillExp( /** * Gets an L0 command queue that supports the chosen engine. - * @param[in] Queue The UR queue used to submit the command buffer. + * @param[in] Queue The UR queue used to submit the command-buffer. * @param[in] UseCopyEngine Which engine to use. true for the copy engine and * false for the compute engine. * @param[out] ZeCommandQueue The L0 command queue. @@ -1478,9 +1478,9 @@ ur_result_t getZeCommandQueue(ur_queue_handle_t Queue, bool UseCopyEngine, } /** - * Waits for the all the dependencies of the command buffer - * @param[in] CommandBuffer The command buffer. - * @param[in] Queue The UR queue used to submit the command buffer. + * Waits for the all the dependencies of the command-buffer + * @param[in] CommandBuffer The command-buffer. + * @param[in] Queue The UR queue used to submit the command-buffer. * @param[in] NumEventsInWaitList The number of events to wait for. * @param[in] EventWaitList List of events to wait for. * @return UR_RESULT_SUCCESS or an error code on failure @@ -1546,10 +1546,10 @@ ur_result_t appendProfilingQueries(ur_exp_command_buffer_handle_t CommandBuffer, ur_event_handle_t SignalEvent, ur_event_handle_t WaitEvent, ur_event_handle_t ProfilingEvent) { - // Multiple submissions of a command buffer implies that we need to save - // the event timestamps before resubmiting the command buffer. We + // Multiple submissions of a command-buffer implies that we need to save + // the event timestamps before resubmiting the command-buffer. We // therefore copy these timestamps in a dedicated USM memory section - // before completing the command buffer execution, and then attach this + // before completing the command-buffer execution, and then attach this // memory to the event returned to users to allow the profiling // engine to recover these timestamps. command_buffer_profiling_t *Profiling = new command_buffer_profiling_t(); @@ -2129,9 +2129,9 @@ ur_result_t updateKernelCommand( */ ur_result_t waitForOngoingExecution(ur_exp_command_buffer_handle_t CommandBuffer) { - // Calling function has taken a lock for the command buffer so we can safely + // Calling function has taken a lock for the command-buffer so we can safely // check and modify this value here. - // If command buffer was recently synchronized we can return early. + // If command-buffer was recently synchronized we can return early. if (!CommandBuffer->NeedsUpdateSynchronization) { return UR_RESULT_SUCCESS; } @@ -2147,7 +2147,7 @@ waitForOngoingExecution(ur_exp_command_buffer_handle_t CommandBuffer) { } else if (ze_fence_handle_t &ZeFence = CommandBuffer->ZeActiveFence) { ZE2UR_CALL(zeFenceHostSynchronize, (ZeFence, UINT64_MAX)); } - // Mark that command buffer was recently synchronized + // Mark that command-buffer was recently synchronized CommandBuffer->NeedsUpdateSynchronization = false; return UR_RESULT_SUCCESS; } @@ -2162,7 +2162,7 @@ ur_result_t urCommandBufferUpdateKernelLaunchExp( UR_ASSERT(KernelCommandHandle->Kernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - // Lock command, kernel and command buffer for update. + // Lock command, kernel and command-buffer for update. std::scoped_lock Guard( Command->Mutex, Command->CommandBuffer->Mutex, KernelCommandHandle->Kernel->Mutex); diff --git a/source/adapters/level_zero/command_buffer.hpp b/source/adapters/level_zero/command_buffer.hpp index fee6f165ce..b6a3bb24a8 100644 --- a/source/adapters/level_zero/command_buffer.hpp +++ b/source/adapters/level_zero/command_buffer.hpp @@ -73,7 +73,7 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object { // UR context associated with this command-buffer ur_context_handle_t Context; - // Device associated with this command buffer + // Device associated with this command-buffer ur_device_handle_t Device; // Level Zero command list handle that has the compute engine commands for // this command-buffer. @@ -129,7 +129,7 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object { // Indicates if command-buffer commands can be updated after it is closed. bool IsUpdatable = false; - // Indicates if command buffer was finalized. + // Indicates if command-buffer was finalized. bool IsFinalized = false; // Command-buffer profiling is enabled. bool IsProfilingEnabled = false; @@ -141,8 +141,8 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object { // This list is needed to release all kernels retained by the // command_buffer. std::vector KernelsList; - // Track whether synchronization is required when updating the command buffer - // Set this value to true when a command buffer is enqueued, and false after + // Track whether synchronization is required when updating the command-buffer + // Set this value to true when a command-buffer is enqueued, and false after // any fence or event synchronization to avoid repeated calls to synchronize. bool NeedsUpdateSynchronization = false; }; diff --git a/source/adapters/level_zero/helpers/kernel_helpers.hpp b/source/adapters/level_zero/helpers/kernel_helpers.hpp index 93b4c7c14d..49345bb57e 100644 --- a/source/adapters/level_zero/helpers/kernel_helpers.hpp +++ b/source/adapters/level_zero/helpers/kernel_helpers.hpp @@ -33,7 +33,7 @@ ur_result_t calculateKernelWorkDimensions( /** * Sets the global offset for a kernel command that will be appended to the - * command buffer. + * command-buffer. * @param[in] Context Context associated with the queue. * @param[in] Kernel The handle to the kernel that will be appended. * @param[in] WorkDim The number of work dimensions. diff --git a/source/adapters/level_zero/v2/api.cpp b/source/adapters/level_zero/v2/api.cpp index 9ae9bddcb9..edd9687445 100644 --- a/source/adapters/level_zero/v2/api.cpp +++ b/source/adapters/level_zero/v2/api.cpp @@ -239,47 +239,6 @@ ur_result_t urBindlessImagesReleaseExternalSemaphoreExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t -urCommandBufferCreateExp(ur_context_handle_t hContext, - ur_device_handle_t hDevice, - const ur_exp_command_buffer_desc_t *pCommandBufferDesc, - ur_exp_command_buffer_handle_t *phCommandBuffer) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t -urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t -urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t -urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - -ur_result_t urCommandBufferAppendKernelLaunchExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ur_kernel_handle_t hKernel, - uint32_t workDim, const size_t *pGlobalWorkOffset, - const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, - uint32_t numKernelAlternatives, ur_kernel_handle_t *phKernelAlternatives, - uint32_t numSyncPointsInWaitList, - const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, - uint32_t NumEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_exp_command_buffer_sync_point_t *pSyncPoint, ur_event_handle_t *phEvent, - ur_exp_command_buffer_command_handle_t *phCommand) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - ur_result_t urCommandBufferAppendUSMMemcpyExp( ur_exp_command_buffer_handle_t hCommandBuffer, void *pDst, const void *pSrc, size_t size, uint32_t numSyncPointsInWaitList, @@ -415,14 +374,6 @@ ur_result_t urCommandBufferAppendUSMAdviseExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t urCommandBufferEnqueueExp( - ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, - uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, - ur_event_handle_t *phEvent) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - ur_result_t urCommandBufferRetainCommandExp( ur_exp_command_buffer_command_handle_t hCommand) { logger::error("{} function not implemented!", __FUNCTION__); @@ -443,15 +394,6 @@ ur_result_t urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -ur_result_t -urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, - ur_exp_command_buffer_info_t propName, - size_t propSize, void *pPropValue, - size_t *pPropSizeRet) { - logger::error("{} function not implemented!", __FUNCTION__); - return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; -} - ur_result_t urCommandBufferUpdateSignalEventExp( ur_exp_command_buffer_command_handle_t hCommand, ur_event_handle_t *phEvent) { diff --git a/source/adapters/level_zero/v2/command_buffer.cpp b/source/adapters/level_zero/v2/command_buffer.cpp new file mode 100644 index 0000000000..eace40918b --- /dev/null +++ b/source/adapters/level_zero/v2/command_buffer.cpp @@ -0,0 +1,179 @@ +//===--------- command_buffer.cpp - Level Zero Adapter ---------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "command_buffer.hpp" +#include "../helpers/kernel_helpers.hpp" +#include "../ur_interface_loader.hpp" +#include "logger/ur_logger.hpp" + +namespace { + +// Checks whether zeCommandListImmediateAppendCommandListsExp can be used for a +// given context. +void checkImmediateAppendSupport(ur_context_handle_t context) { + if (!context->getPlatform()->ZeCommandListImmediateAppendExt.Supported) { + logger::error("Adapter v2 is used but " + "the current driver does not support the " + "zeCommandListImmediateAppendCommandListsExp entrypoint."); + throw UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } +} + +} // namespace + +ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_( + ur_context_handle_t context, ur_device_handle_t device, + v2::raii::command_list_unique_handle &&commandList, + const ur_exp_command_buffer_desc_t *desc) + : commandListManager( + context, device, + std::forward(commandList)), + isUpdatable(desc ? desc->isUpdatable : false) {} + +ur_result_t ur_exp_command_buffer_handle_t_::finalizeCommandBuffer() { + // It is not allowed to append to command list from multiple threads. + std::scoped_lock guard(this->Mutex); + UR_ASSERT(!isFinalized, UR_RESULT_ERROR_INVALID_OPERATION); + // Close the command lists and have them ready for dispatch. + ZE2UR_CALL(zeCommandListClose, (this->commandListManager.getZeCommandList())); + isFinalized = true; + return UR_RESULT_SUCCESS; +} + +namespace ur::level_zero { + +ur_result_t +urCommandBufferCreateExp(ur_context_handle_t context, ur_device_handle_t device, + const ur_exp_command_buffer_desc_t *commandBufferDesc, + ur_exp_command_buffer_handle_t *commandBuffer) try { + checkImmediateAppendSupport(context); + + if (commandBufferDesc->isUpdatable && + !context->getPlatform()->ZeMutableCmdListExt.Supported) { + throw UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + using queue_group_type = ur_device_handle_t_::queue_group_info_t::type; + uint32_t queueGroupOrdinal = + device->QueueGroup[queue_group_type::Compute].ZeOrdinal; + v2::raii::command_list_unique_handle zeCommandList = + context->commandListCache.getRegularCommandList(device->ZeDevice, true, + queueGroupOrdinal, true); + + *commandBuffer = new ur_exp_command_buffer_handle_t_( + context, device, std::move(zeCommandList), commandBufferDesc); + return UR_RESULT_SUCCESS; + +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +ur_result_t +urCommandBufferRetainExp(ur_exp_command_buffer_handle_t hCommandBuffer) try { + hCommandBuffer->RefCount.increment(); + return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +ur_result_t +urCommandBufferReleaseExp(ur_exp_command_buffer_handle_t hCommandBuffer) try { + if (!hCommandBuffer->RefCount.decrementAndTest()) + return UR_RESULT_SUCCESS; + + delete hCommandBuffer; + return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +ur_result_t +urCommandBufferFinalizeExp(ur_exp_command_buffer_handle_t hCommandBuffer) try { + UR_ASSERT(hCommandBuffer, UR_RESULT_ERROR_INVALID_NULL_POINTER); + UR_CALL(hCommandBuffer->finalizeCommandBuffer()); + return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +ur_result_t urCommandBufferAppendKernelLaunchExp( + ur_exp_command_buffer_handle_t commandBuffer, ur_kernel_handle_t hKernel, + uint32_t workDim, const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, const size_t *pLocalWorkSize, + uint32_t numKernelAlternatives, ur_kernel_handle_t *kernelAlternatives, + uint32_t numSyncPointsInWaitList, + const ur_exp_command_buffer_sync_point_t *syncPointWaitList, + uint32_t numEventsInWaitList, const ur_event_handle_t *eventWaitList, + ur_exp_command_buffer_sync_point_t *retSyncPoint, ur_event_handle_t *event, + ur_exp_command_buffer_command_handle_t *command) try { + // TODO: These parameters aren't implemented in V1 yet, and are a fair amount + // of work. Need to know semantics: should they be checked before kernel + // execution (difficult) or before kernel appending to list (easy fix). + std::ignore = numEventsInWaitList; + std::ignore = eventWaitList; + std::ignore = event; + + // sync mechanic can be ignored, because all lists are in-order + std::ignore = numSyncPointsInWaitList; + std::ignore = syncPointWaitList; + std::ignore = retSyncPoint; + + // TODO + std::ignore = numKernelAlternatives; + std::ignore = kernelAlternatives; + std::ignore = command; + UR_CALL(commandBuffer->commandListManager.appendKernelLaunch( + hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, 0, + nullptr, nullptr)); + return UR_RESULT_SUCCESS; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +ur_result_t urCommandBufferEnqueueExp( + ur_exp_command_buffer_handle_t hCommandBuffer, ur_queue_handle_t hQueue, + uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent) try { + return hQueue->enqueueCommandBuffer( + hCommandBuffer->commandListManager.getZeCommandList(), phEvent, + numEventsInWaitList, phEventWaitList); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +ur_result_t +urCommandBufferGetInfoExp(ur_exp_command_buffer_handle_t hCommandBuffer, + ur_exp_command_buffer_info_t propName, + size_t propSize, void *pPropValue, + size_t *pPropSizeRet) try { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + + switch (propName) { + case UR_EXP_COMMAND_BUFFER_INFO_REFERENCE_COUNT: + return ReturnValue(uint32_t{hCommandBuffer->RefCount.load()}); + case UR_EXP_COMMAND_BUFFER_INFO_DESCRIPTOR: { + ur_exp_command_buffer_desc_t Descriptor{}; + Descriptor.stype = UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_DESC; + Descriptor.pNext = nullptr; + Descriptor.isUpdatable = hCommandBuffer->isUpdatable; + Descriptor.isInOrder = true; + Descriptor.enableProfiling = hCommandBuffer->isProfilingEnabled; + + return ReturnValue(Descriptor); + } + default: + assert(!"Command-buffer info request not implemented"); + } + return UR_RESULT_ERROR_INVALID_ENUMERATION; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + +} // namespace ur::level_zero diff --git a/source/adapters/level_zero/v2/command_buffer.hpp b/source/adapters/level_zero/v2/command_buffer.hpp new file mode 100644 index 0000000000..2a1cf3c569 --- /dev/null +++ b/source/adapters/level_zero/v2/command_buffer.hpp @@ -0,0 +1,51 @@ +//===--------- command_buffer.hpp - Level Zero Adapter ---------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "command_list_manager.hpp" +#include "common.hpp" +#include "context.hpp" +#include "kernel.hpp" +#include "queue_api.hpp" +#include + +struct ur_exp_command_buffer_handle_t_ : public _ur_object { + ur_exp_command_buffer_handle_t_( + ur_context_handle_t context, ur_device_handle_t device, + v2::raii::command_list_unique_handle &&commandList, + const ur_exp_command_buffer_desc_t *desc); + + ~ur_exp_command_buffer_handle_t_() = default; + + ur_command_list_manager commandListManager; + + ur_result_t finalizeCommandBuffer(); + // Indicates if command-buffer commands can be updated after it is closed. + const bool isUpdatable = false; + // Command-buffer profiling is enabled. + const bool isProfilingEnabled = false; + +private: + // Indicates if command-buffer was finalized. + bool isFinalized = false; +}; + +struct ur_exp_command_buffer_command_handle_t_ : public _ur_object { + ur_exp_command_buffer_command_handle_t_(ur_exp_command_buffer_handle_t, + uint64_t); + +private: + ~ur_exp_command_buffer_command_handle_t_(); + + // Command-buffer of this command. + ur_exp_command_buffer_handle_t commandBuffer; + // L0 command ID identifying this command + uint64_t commandId; +}; diff --git a/source/adapters/level_zero/v2/command_list_manager.cpp b/source/adapters/level_zero/v2/command_list_manager.cpp new file mode 100644 index 0000000000..3592a1227e --- /dev/null +++ b/source/adapters/level_zero/v2/command_list_manager.cpp @@ -0,0 +1,115 @@ +//===--------- command_list_manager.cpp - Level Zero Adapter --------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "command_list_manager.hpp" +#include "../helpers/kernel_helpers.hpp" +#include "../ur_interface_loader.hpp" +#include "context.hpp" +#include "kernel.hpp" + +ur_command_list_manager::ur_command_list_manager( + ur_context_handle_t context, ur_device_handle_t device, + v2::raii::command_list_unique_handle &&commandList, v2::event_flags_t flags, + ur_queue_handle_t queue) + : context(context), device(device), + eventPool(context->eventPoolCache.borrow(device->Id.value(), flags)), + zeCommandList(std::move(commandList)), queue(queue) { + UR_CALL_THROWS(ur::level_zero::urContextRetain(context)); + UR_CALL_THROWS(ur::level_zero::urDeviceRetain(device)); +} + +ur_command_list_manager::~ur_command_list_manager() { + ur::level_zero::urContextRelease(context); + ur::level_zero::urDeviceRelease(device); +} + +std::pair +ur_command_list_manager::getWaitListView(const ur_event_handle_t *phWaitEvents, + uint32_t numWaitEvents) { + + waitList.resize(numWaitEvents); + for (uint32_t i = 0; i < numWaitEvents; i++) { + waitList[i] = phWaitEvents[i]->getZeEvent(); + } + + return {waitList.data(), static_cast(numWaitEvents)}; +} + +ze_event_handle_t +ur_command_list_manager::getSignalEvent(ur_event_handle_t *hUserEvent, + ur_command_t commandType) { + if (hUserEvent && queue) { + *hUserEvent = eventPool->allocate(); + (*hUserEvent)->resetQueueAndCommand(queue, commandType); + return (*hUserEvent)->getZeEvent(); + } else { + return nullptr; + } +} + +ur_result_t ur_command_list_manager::appendKernelLaunch( + ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { + TRACK_SCOPE_LATENCY("ur_command_list_manager::appendKernelLaunch"); + + UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); + UR_ASSERT(hKernel->getProgramHandle(), UR_RESULT_ERROR_INVALID_NULL_POINTER); + + UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + + ze_kernel_handle_t hZeKernel = hKernel->getZeHandle(device); + + std::scoped_lock Lock(this->Mutex, + hKernel->Mutex); + + ze_group_count_t zeThreadGroupDimensions{1, 1, 1}; + uint32_t WG[3]{}; + UR_CALL(calculateKernelWorkDimensions(hZeKernel, device, + zeThreadGroupDimensions, WG, workDim, + pGlobalWorkSize, pLocalWorkSize)); + + auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_KERNEL_LAUNCH); + + auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); + + bool memoryMigrated = false; + auto memoryMigrate = [&](void *src, void *dst, size_t size) { + ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, + (zeCommandList.get(), dst, src, size, nullptr, + waitList.second, waitList.first)); + memoryMigrated = true; + }; + + UR_CALL(hKernel->prepareForSubmission(context, device, pGlobalWorkOffset, + workDim, WG[0], WG[1], WG[2], + memoryMigrate)); + + if (memoryMigrated) { + // If memory was migrated, we don't need to pass the wait list to + // the copy command again. + waitList.first = nullptr; + waitList.second = 0; + } + + TRACK_SCOPE_LATENCY( + "ur_command_list_manager::zeCommandListAppendLaunchKernel"); + ZE2UR_CALL(zeCommandListAppendLaunchKernel, + (zeCommandList.get(), hZeKernel, &zeThreadGroupDimensions, + zeSignalEvent, waitList.second, waitList.first)); + + return UR_RESULT_SUCCESS; +} + +ze_command_list_handle_t ur_command_list_manager::getZeCommandList() { + return zeCommandList.get(); +} diff --git a/source/adapters/level_zero/v2/command_list_manager.hpp b/source/adapters/level_zero/v2/command_list_manager.hpp new file mode 100644 index 0000000000..b24433044a --- /dev/null +++ b/source/adapters/level_zero/v2/command_list_manager.hpp @@ -0,0 +1,52 @@ +//===--------- command_list_manager.hpp - Level Zero Adapter --------------===// +// +// Copyright (C) 2024 Intel Corporation +// +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM +// Exceptions. See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#pragma once + +#include "command_list_cache.hpp" +#include "common.hpp" +#include "event_pool_cache.hpp" +#include "queue_api.hpp" +#include + +struct ur_command_list_manager : public _ur_object { + + ur_command_list_manager(ur_context_handle_t context, + ur_device_handle_t device, + v2::raii::command_list_unique_handle &&commandList, + v2::event_flags_t flags = v2::EVENT_FLAGS_COUNTER, + ur_queue_handle_t_ *queue = nullptr); + ~ur_command_list_manager(); + + ur_result_t appendKernelLaunch(ur_kernel_handle_t hKernel, uint32_t workDim, + const size_t *pGlobalWorkOffset, + const size_t *pGlobalWorkSize, + const size_t *pLocalWorkSize, + uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, + ur_event_handle_t *phEvent); + + ze_command_list_handle_t getZeCommandList(); + + std::pair + getWaitListView(const ur_event_handle_t *phWaitEvents, + uint32_t numWaitEvents); + ze_event_handle_t getSignalEvent(ur_event_handle_t *hUserEvent, + ur_command_t commandType); + +private: + // UR context associated with this command-buffer + ur_context_handle_t context; + // Device associated with this command-buffer + ur_device_handle_t device; + v2::raii::cache_borrowed_event_pool eventPool; + v2::raii::command_list_unique_handle zeCommandList; + ur_queue_handle_t queue; + std::vector waitList; +}; diff --git a/source/adapters/level_zero/v2/queue_api.cpp b/source/adapters/level_zero/v2/queue_api.cpp index f4e2f47c09..28ff527413 100644 --- a/source/adapters/level_zero/v2/queue_api.cpp +++ b/source/adapters/level_zero/v2/queue_api.cpp @@ -11,6 +11,9 @@ * */ +// Do not edit. This file is auto generated from a template: +// scripts/templates/queue_api.cpp.mako + #include "queue_api.hpp" #include "ur_util.hpp" diff --git a/source/adapters/level_zero/v2/queue_api.hpp b/source/adapters/level_zero/v2/queue_api.hpp index e9e98874e8..88d812bbba 100644 --- a/source/adapters/level_zero/v2/queue_api.hpp +++ b/source/adapters/level_zero/v2/queue_api.hpp @@ -11,9 +11,13 @@ * */ +// Do not edit. This file is auto generated from a template: +// scripts/templates/queue_api.hpp.mako + #pragma once #include +#include struct ur_queue_handle_t_ { virtual ~ur_queue_handle_t_(); @@ -158,4 +162,8 @@ struct ur_queue_handle_t_ { const ur_exp_enqueue_native_command_properties_t *, uint32_t, const ur_event_handle_t *, ur_event_handle_t *) = 0; + + virtual ur_result_t enqueueCommandBuffer(ze_command_list_handle_t, + ur_event_handle_t *, uint32_t, + const ur_event_handle_t *) = 0; }; diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp index af65df78a2..8da52fe6b6 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.cpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.cpp @@ -24,13 +24,7 @@ namespace v2 { std::pair ur_queue_immediate_in_order_t::getWaitListView( const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents) { - - waitList.resize(numWaitEvents); - for (uint32_t i = 0; i < numWaitEvents; i++) { - waitList[i] = phWaitEvents[i]->getZeEvent(); - } - - return {waitList.data(), static_cast(numWaitEvents)}; + return commandListManager.getWaitListView(phWaitEvents, numWaitEvents); } static int32_t getZeOrdinal(ur_device_handle_t hDevice) { @@ -58,25 +52,6 @@ static ze_command_queue_priority_t getZePriority(ur_queue_flags_t flags) { return ZE_COMMAND_QUEUE_PRIORITY_NORMAL; } -ur_command_list_handler_t::ur_command_list_handler_t( - ur_context_handle_t hContext, ur_device_handle_t hDevice, - const ur_queue_properties_t *pProps) - : commandList(hContext->commandListCache.getImmediateCommandList( - hDevice->ZeDevice, true, getZeOrdinal(hDevice), - true /* always enable copy offload */, - ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS, - getZePriority(pProps ? pProps->flags : ur_queue_flags_t{}), - getZeIndex(pProps))) {} - -ur_command_list_handler_t::ur_command_list_handler_t( - ze_command_list_handle_t hZeCommandList, bool ownZeHandle) - : commandList(hZeCommandList, - [ownZeHandle](ze_command_list_handle_t hZeCommandList) { - if (ownZeHandle) { - ZE_CALL_NOCHECK(zeCommandListDestroy, (hZeCommandList)); - } - }) {} - static event_flags_t eventFlagsFromQueueFlags(ur_queue_flags_t flags) { event_flags_t eventFlags = EVENT_FLAGS_COUNTER; if (flags & UR_QUEUE_FLAG_PROFILING_ENABLE) @@ -88,29 +63,35 @@ ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t( ur_context_handle_t hContext, ur_device_handle_t hDevice, const ur_queue_properties_t *pProps) : hContext(hContext), hDevice(hDevice), flags(pProps ? pProps->flags : 0), - eventPool(hContext->eventPoolCache.borrow( - hDevice->Id.value(), eventFlagsFromQueueFlags(flags))), - handler(hContext, hDevice, pProps) {} + commandListManager( + hContext, hDevice, + hContext->commandListCache.getImmediateCommandList( + hDevice->ZeDevice, true, getZeOrdinal(hDevice), + true /* always enable copy offload */, + ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS, + getZePriority(pProps ? pProps->flags : ur_queue_flags_t{}), + getZeIndex(pProps)), + eventFlagsFromQueueFlags(flags), this) {} ur_queue_immediate_in_order_t::ur_queue_immediate_in_order_t( ur_context_handle_t hContext, ur_device_handle_t hDevice, ur_native_handle_t hNativeHandle, ur_queue_flags_t flags, bool ownZeQueue) : hContext(hContext), hDevice(hDevice), flags(flags), - eventPool(hContext->eventPoolCache.borrow( - hDevice->Id.value(), eventFlagsFromQueueFlags(flags))), - handler(reinterpret_cast(hNativeHandle), - ownZeQueue) {} - -ur_event_handle_t + commandListManager( + hContext, hDevice, + raii::command_list_unique_handle( + reinterpret_cast(hNativeHandle), + [ownZeQueue](ze_command_list_handle_t hZeCommandList) { + if (ownZeQueue) { + ZE_CALL_NOCHECK(zeCommandListDestroy, (hZeCommandList)); + } + }), + eventFlagsFromQueueFlags(flags)) {} + +ze_event_handle_t ur_queue_immediate_in_order_t::getSignalEvent(ur_event_handle_t *hUserEvent, ur_command_t commandType) { - if (hUserEvent) { - *hUserEvent = eventPool->allocate(); - (*hUserEvent)->resetQueueAndCommand(this, commandType); - return *hUserEvent; - } else { - return nullptr; - } + return commandListManager.getSignalEvent(hUserEvent, commandType); } ur_result_t @@ -133,7 +114,7 @@ ur_queue_immediate_in_order_t::queueGetInfo(ur_queue_info_t propName, return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_QUEUE_INFO_EMPTY: { auto status = ZE_CALL_NOCHECK(zeCommandListHostSynchronize, - (handler.commandList.get(), 0)); + (commandListManager.getZeCommandList(), 0)); if (status == ZE_RESULT_SUCCESS) { return ReturnValue(true); } else if (status == ZE_RESULT_NOT_READY) { @@ -175,8 +156,8 @@ void ur_queue_immediate_in_order_t::deferEventFree(ur_event_handle_t hEvent) { ur_result_t ur_queue_immediate_in_order_t::queueGetNativeHandle( ur_queue_native_desc_t *pDesc, ur_native_handle_t *phNativeQueue) { std::ignore = pDesc; - *phNativeQueue = - reinterpret_cast(this->handler.commandList.get()); + *phNativeQueue = reinterpret_cast( + this->commandListManager.getZeCommandList()); return UR_RESULT_SUCCESS; } @@ -189,7 +170,7 @@ ur_result_t ur_queue_immediate_in_order_t::queueFinish() { TRACK_SCOPE_LATENCY( "ur_queue_immediate_in_order_t::zeCommandListHostSynchronize"); ZE2UR_CALL(zeCommandListHostSynchronize, - (handler.commandList.get(), UINT64_MAX)); + (commandListManager.getZeCommandList(), UINT64_MAX)); // Free deferred events for (auto &hEvent : deferredEvents) { @@ -223,52 +204,9 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunch( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) { TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::enqueueKernelLaunch"); - UR_ASSERT(hKernel, UR_RESULT_ERROR_INVALID_NULL_HANDLE); - UR_ASSERT(hKernel->getProgramHandle(), UR_RESULT_ERROR_INVALID_NULL_POINTER); - - UR_ASSERT(workDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - UR_ASSERT(workDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - - ze_kernel_handle_t hZeKernel = hKernel->getZeHandle(hDevice); - - std::scoped_lock Lock(this->Mutex, - hKernel->Mutex); - - ze_group_count_t zeThreadGroupDimensions{1, 1, 1}; - uint32_t WG[3]{}; - UR_CALL(calculateKernelWorkDimensions(hZeKernel, hDevice, - zeThreadGroupDimensions, WG, workDim, - pGlobalWorkSize, pLocalWorkSize)); - - auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_KERNEL_LAUNCH); - - auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); - - bool memoryMigrated = false; - auto memoryMigrate = [&](void *src, void *dst, size_t size) { - ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler.commandList.get(), dst, src, size, nullptr, - waitList.second, waitList.first)); - memoryMigrated = true; - }; - - UR_CALL(hKernel->prepareForSubmission(hContext, hDevice, pGlobalWorkOffset, - workDim, WG[0], WG[1], WG[2], - memoryMigrate)); - - if (memoryMigrated) { - // If memory was migrated, we don't need to pass the wait list to - // the copy command again. - waitList.first = nullptr; - waitList.second = 0; - } - - TRACK_SCOPE_LATENCY( - "ur_queue_immediate_in_order_t::zeCommandListAppendLaunchKernel"); - auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; - ZE2UR_CALL(zeCommandListAppendLaunchKernel, - (handler.commandList.get(), hZeKernel, &zeThreadGroupDimensions, - zeSignalEvent, waitList.second, waitList.first)); + UR_CALL(commandListManager.appendKernelLaunch( + hKernel, workDim, pGlobalWorkOffset, pGlobalWorkSize, pLocalWorkSize, + numEventsInWaitList, phEventWaitList, phEvent)); recordSubmittedKernel(hKernel); @@ -287,20 +225,20 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWait( return UR_RESULT_SUCCESS; } - auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_EVENTS_WAIT); + auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_EVENTS_WAIT); auto [pWaitEvents, numWaitEvents] = getWaitListView(phEventWaitList, numEventsInWaitList); if (numWaitEvents > 0) { - ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (handler.commandList.get(), numWaitEvents, pWaitEvents)); + ZE2UR_CALL( + zeCommandListAppendWaitOnEvents, + (commandListManager.getZeCommandList(), numWaitEvents, pWaitEvents)); } - if (signalEvent) { + if (zeSignalEvent) { ZE2UR_CALL(zeCommandListAppendSignalEvent, - (handler.commandList.get(), signalEvent->getZeEvent())); + (commandListManager.getZeCommandList(), zeSignalEvent)); } - return UR_RESULT_SUCCESS; } @@ -317,13 +255,13 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueEventsWaitWithBarrierImpl( return UR_RESULT_SUCCESS; } - auto signalEvent = + auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_EVENTS_WAIT_WITH_BARRIER); auto [pWaitEvents, numWaitEvents] = getWaitListView(phEventWaitList, numEventsInWaitList); ZE2UR_CALL(zeCommandListAppendBarrier, - (handler.commandList.get(), signalEvent->getZeEvent(), + (commandListManager.getZeCommandList(), zeSignalEvent, numWaitEvents, pWaitEvents)); return UR_RESULT_SUCCESS; @@ -358,7 +296,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCopyUnlocked( size_t dstOffset, size_t size, uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, ur_command_t commandType) { - auto signalEvent = getSignalEvent(phEvent, commandType); + auto zeSignalEvent = getSignalEvent(phEvent, commandType); auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); @@ -367,8 +305,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCopyUnlocked( hDevice, ur_mem_handle_t_::device_access_mode_t::read_only, srcOffset, size, [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler.commandList.get(), dst, src, size, nullptr, - waitList.second, waitList.first)); + (commandListManager.getZeCommandList(), dst, src, + size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); @@ -376,8 +314,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCopyUnlocked( hDevice, ur_mem_handle_t_::device_access_mode_t::write_only, dstOffset, size, [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler.commandList.get(), dst, src, size, nullptr, - waitList.second, waitList.first)); + (commandListManager.getZeCommandList(), dst, src, + size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); @@ -388,14 +326,13 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCopyUnlocked( waitList.second = 0; } - auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; ZE2UR_CALL(zeCommandListAppendMemoryCopy, - (handler.commandList.get(), pDst, pSrc, size, zeSignalEvent, - waitList.second, waitList.first)); + (commandListManager.getZeCommandList(), pDst, pSrc, size, + zeSignalEvent, waitList.second, waitList.first)); if (blocking) { ZE2UR_CALL(zeCommandListHostSynchronize, - (handler.commandList.get(), UINT64_MAX)); + (commandListManager.getZeCommandList(), UINT64_MAX)); } return UR_RESULT_SUCCESS; @@ -447,7 +384,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueRegionCopyUnlocked( auto zeParams = ur2zeRegionParams(srcOrigin, dstOrigin, region, srcRowPitch, dstRowPitch, srcSlicePitch, dstSlicePitch); - auto signalEvent = getSignalEvent(phEvent, commandType); + auto zeSignalEvent = getSignalEvent(phEvent, commandType); auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); @@ -456,16 +393,16 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueRegionCopyUnlocked( hDevice, ur_mem_handle_t_::device_access_mode_t::read_only, 0, src->getSize(), [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler.commandList.get(), dst, src, size, nullptr, - waitList.second, waitList.first)); + (commandListManager.getZeCommandList(), dst, src, + size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); auto pDst = ur_cast(dst->getDevicePtr( hDevice, ur_mem_handle_t_::device_access_mode_t::write_only, 0, dst->getSize(), [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler.commandList.get(), dst, src, size, nullptr, - waitList.second, waitList.first)); + (commandListManager.getZeCommandList(), dst, src, + size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); @@ -476,16 +413,15 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueRegionCopyUnlocked( waitList.second = 0; } - auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; ZE2UR_CALL(zeCommandListAppendMemoryCopyRegion, - (handler.commandList.get(), pDst, &zeParams.dstRegion, + (commandListManager.getZeCommandList(), pDst, &zeParams.dstRegion, zeParams.dstPitch, zeParams.dstSlicePitch, pSrc, &zeParams.srcRegion, zeParams.srcPitch, zeParams.srcSlicePitch, zeSignalEvent, waitList.second, waitList.first)); if (blocking) { ZE2UR_CALL(zeCommandListHostSynchronize, - (handler.commandList.get(), UINT64_MAX)); + (commandListManager.getZeCommandList(), UINT64_MAX)); } return UR_RESULT_SUCCESS; @@ -649,7 +585,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferMap( std::scoped_lock lock(this->Mutex, hBuffer->getMutex()); - auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_BUFFER_MAP); + auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_BUFFER_MAP); auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); @@ -657,8 +593,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferMap( auto pDst = ur_cast(hBuffer->mapHostPtr( mapFlags, offset, size, [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler.commandList.get(), dst, src, size, nullptr, - waitList.second, waitList.first)); + (commandListManager.getZeCommandList(), dst, src, + size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); *ppRetMap = pDst; @@ -666,17 +602,18 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferMap( if (!memoryMigrated && waitList.second) { // If memory was not migrated, we need to wait on the events here. ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (handler.commandList.get(), waitList.second, waitList.first)); + (commandListManager.getZeCommandList(), waitList.second, + waitList.first)); } - if (signalEvent) { + if (zeSignalEvent) { ZE2UR_CALL(zeCommandListAppendSignalEvent, - (handler.commandList.get(), signalEvent->getZeEvent())); + (commandListManager.getZeCommandList(), zeSignalEvent)); } if (blockingMap) { ZE2UR_CALL(zeCommandListHostSynchronize, - (handler.commandList.get(), UINT64_MAX)); + (commandListManager.getZeCommandList(), UINT64_MAX)); } return UR_RESULT_SUCCESS; @@ -689,29 +626,28 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemUnmap( std::scoped_lock lock(this->Mutex); - auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_UNMAP); + auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_MEM_UNMAP); auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); // TODO: currently unmapHostPtr deallocates memory immediately, // since the memory might be used by the user, we need to make sure // all dependencies are completed. - ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (handler.commandList.get(), waitList.second, waitList.first)); + ZE2UR_CALL( + zeCommandListAppendWaitOnEvents, + (commandListManager.getZeCommandList(), waitList.second, waitList.first)); bool memoryMigrated = false; hMem->unmapHostPtr(pMappedPtr, [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler.commandList.get(), dst, src, size, nullptr, - waitList.second, waitList.first)); + (commandListManager.getZeCommandList(), dst, src, size, + nullptr, waitList.second, waitList.first)); memoryMigrated = true; }); - - if (signalEvent) { + if (zeSignalEvent) { ZE2UR_CALL(zeCommandListAppendSignalEvent, - (handler.commandList.get(), signalEvent->getZeEvent())); + (commandListManager.getZeCommandList(), zeSignalEvent)); } - return UR_RESULT_SUCCESS; } @@ -721,7 +657,7 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericFillUnlocked( const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, ur_command_t commandType) { - auto signalEvent = getSignalEvent(phEvent, commandType); + auto zeSignalEvent = getSignalEvent(phEvent, commandType); auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); @@ -730,8 +666,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericFillUnlocked( hDevice, ur_mem_handle_t_::device_access_mode_t::read_only, offset, size, [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler.commandList.get(), dst, src, size, nullptr, - waitList.second, waitList.first)); + (commandListManager.getZeCommandList(), dst, src, + size, nullptr, waitList.second, waitList.first)); memoryMigrated = true; })); @@ -746,10 +682,10 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericFillUnlocked( // PatternSize must be a power of two for zeCommandListAppendMemoryFill. // When it's not, the fill is emulated with zeCommandListAppendMemoryCopy. - auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; ZE2UR_CALL(zeCommandListAppendMemoryFill, - (handler.commandList.get(), pDst, pPattern, patternSize, size, - zeSignalEvent, waitList.second, waitList.first)); + (commandListManager.getZeCommandList(), pDst, pPattern, + patternSize, size, zeSignalEvent, waitList.second, + waitList.first)); return UR_RESULT_SUCCESS; } @@ -777,19 +713,18 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMMemcpy( std::scoped_lock lock(this->Mutex); - auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_MEMCPY); + auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_MEMCPY); auto [pWaitEvents, numWaitEvents] = getWaitListView(phEventWaitList, numEventsInWaitList); - auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; ZE2UR_CALL(zeCommandListAppendMemoryCopy, - (handler.commandList.get(), pDst, pSrc, size, zeSignalEvent, - numWaitEvents, pWaitEvents)); + (commandListManager.getZeCommandList(), pDst, pSrc, size, + zeSignalEvent, numWaitEvents, pWaitEvents)); if (blocking) { ZE2UR_CALL(zeCommandListHostSynchronize, - (handler.commandList.get(), UINT64_MAX)); + (commandListManager.getZeCommandList(), UINT64_MAX)); } return UR_RESULT_SUCCESS; @@ -805,22 +740,22 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueUSMPrefetch( std::scoped_lock lock(this->Mutex); - auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_PREFETCH); + auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_PREFETCH); auto [pWaitEvents, numWaitEvents] = getWaitListView(phEventWaitList, numEventsInWaitList); if (pWaitEvents) { - ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (handler.commandList.get(), numWaitEvents, pWaitEvents)); + ZE2UR_CALL( + zeCommandListAppendWaitOnEvents, + (commandListManager.getZeCommandList(), numWaitEvents, pWaitEvents)); } // TODO: figure out how to translate "flags" ZE2UR_CALL(zeCommandListAppendMemoryPrefetch, - (handler.commandList.get(), pMem, size)); - - if (signalEvent) { + (commandListManager.getZeCommandList(), pMem, size)); + if (zeSignalEvent) { ZE2UR_CALL(zeCommandListAppendSignalEvent, - (handler.commandList.get(), signalEvent->getZeEvent())); + (commandListManager.getZeCommandList(), zeSignalEvent)); } return UR_RESULT_SUCCESS; @@ -838,25 +773,25 @@ ur_queue_immediate_in_order_t::enqueueUSMAdvise(const void *pMem, size_t size, auto zeAdvice = ur_cast(advice); - auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_ADVISE); + auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_USM_ADVISE); auto [pWaitEvents, numWaitEvents] = getWaitListView(nullptr, 0); if (pWaitEvents) { - ZE2UR_CALL(zeCommandListAppendWaitOnEvents, - (handler.commandList.get(), numWaitEvents, pWaitEvents)); + ZE2UR_CALL( + zeCommandListAppendWaitOnEvents, + (commandListManager.getZeCommandList(), numWaitEvents, pWaitEvents)); } // TODO: figure out how to translate "flags" ZE2UR_CALL(zeCommandListAppendMemAdvise, - (handler.commandList.get(), this->hDevice->ZeDevice, pMem, size, - zeAdvice)); + (commandListManager.getZeCommandList(), this->hDevice->ZeDevice, + pMem, size, zeAdvice)); - if (signalEvent) { + if (zeSignalEvent) { ZE2UR_CALL(zeCommandListAppendSignalEvent, - (handler.commandList.get(), signalEvent->getZeEvent())); + (commandListManager.getZeCommandList(), zeSignalEvent)); } - return UR_RESULT_SUCCESS; } @@ -1058,15 +993,15 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueCooperativeKernelLaunchExp( zeThreadGroupDimensions, WG, workDim, pGlobalWorkSize, pLocalWorkSize)); - auto signalEvent = getSignalEvent(phEvent, UR_COMMAND_KERNEL_LAUNCH); + auto zeSignalEvent = getSignalEvent(phEvent, UR_COMMAND_KERNEL_LAUNCH); auto waitList = getWaitListView(phEventWaitList, numEventsInWaitList); bool memoryMigrated = false; auto memoryMigrate = [&](void *src, void *dst, size_t size) { ZE2UR_CALL_THROWS(zeCommandListAppendMemoryCopy, - (handler.commandList.get(), dst, src, size, nullptr, - waitList.second, waitList.first)); + (commandListManager.getZeCommandList(), dst, src, size, + nullptr, waitList.second, waitList.first)); memoryMigrated = true; }; @@ -1083,10 +1018,10 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueCooperativeKernelLaunchExp( TRACK_SCOPE_LATENCY("ur_queue_immediate_in_order_t::" "zeCommandListAppendLaunchCooperativeKernel"); - auto zeSignalEvent = signalEvent ? signalEvent->getZeEvent() : nullptr; ZE2UR_CALL(zeCommandListAppendLaunchCooperativeKernel, - (handler.commandList.get(), hZeKernel, &zeThreadGroupDimensions, - zeSignalEvent, waitList.second, waitList.first)); + (commandListManager.getZeCommandList(), hZeKernel, + &zeThreadGroupDimensions, zeSignalEvent, waitList.second, + waitList.first)); recordSubmittedKernel(hKernel); @@ -1101,33 +1036,58 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueTimestampRecordingExp( std::scoped_lock lock(this->Mutex); - auto signalEvent = - getSignalEvent(phEvent, UR_COMMAND_TIMESTAMP_RECORDING_EXP); - - if (!signalEvent) { + if (!phEvent && !*phEvent) { return UR_RESULT_ERROR_INVALID_NULL_HANDLE; } - + getSignalEvent(phEvent, UR_COMMAND_TIMESTAMP_RECORDING_EXP); auto [pWaitEvents, numWaitEvents] = getWaitListView(phEventWaitList, numEventsInWaitList); - signalEvent->recordStartTimestamp(); + (*phEvent)->recordStartTimestamp(); auto [timestampPtr, zeSignalEvent] = - signalEvent->getEventEndTimestampAndHandle(); + (*phEvent)->getEventEndTimestampAndHandle(); ZE2UR_CALL(zeCommandListAppendWriteGlobalTimestamp, - (handler.commandList.get(), timestampPtr, zeSignalEvent, - numWaitEvents, pWaitEvents)); + (commandListManager.getZeCommandList(), timestampPtr, + zeSignalEvent, numWaitEvents, pWaitEvents)); if (blocking) { ZE2UR_CALL(zeCommandListHostSynchronize, - (handler.commandList.get(), UINT64_MAX)); + (commandListManager.getZeCommandList(), UINT64_MAX)); } return UR_RESULT_SUCCESS; } +ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCommandListsExp( + uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, + ur_event_handle_t *phEvent, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_command_t callerCommand) { + TRACK_SCOPE_LATENCY( + "ur_queue_immediate_in_order_t::enqueueGenericCommandListsExp"); + + std::scoped_lock Lock(this->Mutex); + auto zeSignalEvent = getSignalEvent(phEvent, callerCommand); + + auto [pWaitEvents, numWaitEvents] = + getWaitListView(phEventWaitList, numEventsInWaitList); + + ZE2UR_CALL(zeCommandListImmediateAppendCommandListsExp, + (commandListManager.getZeCommandList(), numCommandLists, + phCommandLists, zeSignalEvent, numWaitEvents, pWaitEvents)); + + return UR_RESULT_SUCCESS; +} + +ur_result_t ur_queue_immediate_in_order_t::enqueueCommandBuffer( + ze_command_list_handle_t commandBufferCommandList, + ur_event_handle_t *phEvent, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList) { + return enqueueGenericCommandListsExp(1, &commandBufferCommandList, phEvent, + numEventsInWaitList, phEventWaitList, + UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP); +} ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunchCustomExp( ur_kernel_handle_t hKernel, uint32_t workDim, const size_t *pGlobalWorkOffset, const size_t *pGlobalWorkSize, diff --git a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp index e0d7f747b3..6cf8b0c51c 100644 --- a/source/adapters/level_zero/v2/queue_immediate_in_order.hpp +++ b/source/adapters/level_zero/v2/queue_immediate_in_order.hpp @@ -19,33 +19,19 @@ #include "ur/ur.hpp" +#include "command_list_manager.hpp" + namespace v2 { using queue_group_type = ur_device_handle_t_::queue_group_info_t::type; -struct ur_command_list_handler_t { - ur_command_list_handler_t(ur_context_handle_t hContext, - ur_device_handle_t hDevice, - const ur_queue_properties_t *pProps); - - ur_command_list_handler_t(ze_command_list_handle_t hZeCommandList, - bool ownZeHandle); - - raii::command_list_unique_handle commandList; -}; - struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ { private: ur_context_handle_t hContext; ur_device_handle_t hDevice; ur_queue_flags_t flags; - raii::cache_borrowed_event_pool eventPool; - - ur_command_list_handler_t handler; - - std::vector waitList; - + ur_command_list_manager commandListManager; std::vector deferredEvents; std::vector submittedKernels; @@ -53,7 +39,7 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ { getWaitListView(const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents); - ur_event_handle_t getSignalEvent(ur_event_handle_t *hUserEvent, + ze_event_handle_t getSignalEvent(ur_event_handle_t *hUserEvent, ur_command_t commandType); void deferEventFree(ur_event_handle_t hEvent) override; @@ -78,6 +64,11 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ { const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent, ur_command_t commandType); + ur_result_t enqueueGenericCommandListsExp( + uint32_t numCommandLists, ze_command_list_handle_t *phCommandLists, + ur_event_handle_t *phEvent, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList, ur_command_t callerCommand); + ur_result_t enqueueEventsWaitWithBarrierImpl(uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, @@ -277,6 +268,10 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ { uint32_t numEventsInWaitList, const ur_event_handle_t *phEventWaitList, ur_event_handle_t *phEvent) override; ur_result_t + enqueueCommandBuffer(ze_command_list_handle_t commandBufferCommandList, + ur_event_handle_t *phEvent, uint32_t numEventsInWaitList, + const ur_event_handle_t *phEventWaitList) override; + ur_result_t enqueueNativeCommandExp(ur_exp_enqueue_native_command_function_t, void *, uint32_t, const ur_mem_handle_t *, const ur_exp_enqueue_native_command_properties_t *, diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index 95fc57319d..6857220dc2 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -206,7 +206,7 @@ CONSTFIX char EnqueueReadGlobalVariableName[] = // Names of host pipe functions queried from OpenCL CONSTFIX char EnqueueReadHostPipeName[] = "clEnqueueReadHostPipeINTEL"; CONSTFIX char EnqueueWriteHostPipeName[] = "clEnqueueWriteHostPipeINTEL"; -// Names of command buffer functions queried from OpenCL +// Names of command-buffer functions queried from OpenCL CONSTFIX char CreateCommandBufferName[] = "clCreateCommandBufferKHR"; CONSTFIX char RetainCommandBufferName[] = "clRetainCommandBufferKHR"; CONSTFIX char ReleaseCommandBufferName[] = "clReleaseCommandBufferKHR"; diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index ad50aee76c..031b4f3989 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -8838,7 +8838,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// is not nullptr. /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true -/// on creation of the command buffer `hCommand` belongs to. +/// on creation of the command-buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been /// finalized. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP @@ -8901,7 +8901,7 @@ ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( /// supported by the device associated with `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true -/// on creation of the command buffer `hCommand` belongs to. +/// on creation of the command-buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been /// finalized. /// + If no `phEvent` parameter was set on creation of the command @@ -8941,7 +8941,7 @@ ur_result_t UR_APICALL urCommandBufferUpdateSignalEventExp( /// supported by the device associated with `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true -/// on creation of the command buffer `hCommand` belongs to. +/// on creation of the command-buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been /// finalized. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP diff --git a/source/ur_api.cpp b/source/ur_api.cpp index f5357b8501..b9d0c3c390 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -7720,7 +7720,7 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// is not nullptr. /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true -/// on creation of the command buffer `hCommand` belongs to. +/// on creation of the command-buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been /// finalized. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP @@ -7776,7 +7776,7 @@ ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( /// supported by the device associated with `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true -/// on creation of the command buffer `hCommand` belongs to. +/// on creation of the command-buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been /// finalized. /// + If no `phEvent` parameter was set on creation of the command @@ -7810,7 +7810,7 @@ ur_result_t UR_APICALL urCommandBufferUpdateSignalEventExp( /// supported by the device associated with `hCommand`. /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true -/// on creation of the command buffer `hCommand` belongs to. +/// on creation of the command-buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been /// finalized. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP diff --git a/test/conformance/exp_command_buffer/update/invalid_update.cpp b/test/conformance/exp_command_buffer/update/invalid_update.cpp index 1717c194b7..d76416363d 100644 --- a/test/conformance/exp_command_buffer/update/invalid_update.cpp +++ b/test/conformance/exp_command_buffer/update/invalid_update.cpp @@ -119,7 +119,7 @@ TEST_P(InvalidUpdateTest, NotUpdatableCommandBuffer) { // Append a kernel commands to command-buffer and close command-buffer // Should be an error because we are trying to get command handle but - // command buffer is not updatable. + // command-buffer is not updatable. ur_exp_command_buffer_command_handle_t test_command_handle = nullptr; ASSERT_EQ_RESULT(urCommandBufferAppendKernelLaunchExp( test_cmd_buf_handle, kernel, n_dimensions, diff --git a/test/conformance/exp_command_buffer/update/local_memory_update.cpp b/test/conformance/exp_command_buffer/update/local_memory_update.cpp index d55094a52c..5ed585b5a9 100644 --- a/test/conformance/exp_command_buffer/update/local_memory_update.cpp +++ b/test/conformance/exp_command_buffer/update/local_memory_update.cpp @@ -1310,4 +1310,4 @@ TEST_P(LocalMemoryUpdateTestOutOfOrder, UpdateAllParameters) { uint32_t *new_X = (uint32_t *)shared_ptrs[3]; uint32_t *new_Y = (uint32_t *)shared_ptrs[4]; Validate(new_output, new_X, new_Y, new_A, global_size, local_size); -} \ No newline at end of file +}