From ad277f0dc6598242c0f470080fdbfa93282a3469 Mon Sep 17 00:00:00 2001 From: "Zhang, Winston" Date: Tue, 19 Nov 2024 10:00:44 -0800 Subject: [PATCH 1/6] [L0] Interrupt-based event implementation To expose this functionality in UR, we want two ways of enabling low-power events: Queue-wide enabling so all events created on the queue are low-powered. As a property passed to urEnqueueEventsWaitWithBarrier making the resulting event a low-power event. This will require the existing interface to be extended with properties, potentially through a new experimental function. Signed-off-by: Zhang, Winston --- source/adapters/level_zero/command_buffer.cpp | 2 +- source/adapters/level_zero/context.cpp | 11 ++++++- source/adapters/level_zero/context.hpp | 21 ++++++++++++- source/adapters/level_zero/device.cpp | 4 +-- source/adapters/level_zero/event.cpp | 16 ++++++---- source/adapters/level_zero/event.hpp | 5 +++- source/adapters/level_zero/queue.cpp | 30 ++++++++++++------- source/adapters/level_zero/queue.hpp | 7 ++++- 8 files changed, 72 insertions(+), 24 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 56c53b5331..230ff1c160 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -215,7 +215,7 @@ ur_result_t createSyncPointAndGetZeEvents( UR_CALL(EventCreate(CommandBuffer->Context, nullptr /*Queue*/, false /*IsMultiDevice*/, HostVisible, &LaunchEvent, false /*CounterBasedEventEnabled*/, - !CommandBuffer->IsProfilingEnabled)); + !CommandBuffer->IsProfilingEnabled, false)); LaunchEvent->CommandType = CommandType; ZeLaunchEvent = LaunchEvent->ZeEvent; diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 7c1c412ee4..58601dbc0d 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -478,7 +478,8 @@ static const uint32_t MaxNumEventsPerPool = [] { ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( ze_event_pool_handle_t &Pool, size_t &Index, bool HostVisible, bool ProfilingEnabled, ur_device_handle_t Device, - bool CounterBasedEventEnabled, bool UsingImmCmdList) { + bool CounterBasedEventEnabled, bool UsingImmCmdList, + bool InterruptBasedEventEnabled) { // Lock while updating event pool machinery. std::scoped_lock Lock(ZeEventPoolCacheMutex); @@ -537,6 +538,14 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( counterBasedExt.flags); ZeEventPoolDesc.pNext = &counterBasedExt; } + if (InterruptBasedEventEnabled) { + ze_intel_event_sync_mode_exp_desc_t eventSyncMode = { + ZE_INTEL_STRUCTURE_TYPE_EVENT_SYNC_MODE_EXP_DESC, nullptr, 0}; + eventSyncMode.syncModeFlags = + ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_LOW_POWER_WAIT | + ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_SIGNAL_INTERRUPT; + ZeEventPoolDesc.pNext = &eventSyncMode; + } std::vector ZeDevices; if (ZeDevice) { diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index 0d3b2846e2..023f0de09c 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -33,6 +33,24 @@ struct l0_command_list_cache_info { bool IsImmediate = false; }; +typedef uint32_t ze_intel_event_sync_mode_exp_flags_t; +typedef enum _ze_intel_event_sync_mode_exp_flag_t { + ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_LOW_POWER_WAIT = ZE_BIT(0), + ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_SIGNAL_INTERRUPT = ZE_BIT(1), + ZE_INTEL_EVENT_SYNC_MODE_EXP_EXP_FLAG_FORCE_UINT32 = 0x7fffffff + +} ze_intel_event_sync_mode_exp_flag_t; + +#define ZE_INTEL_STRUCTURE_TYPE_EVENT_SYNC_MODE_EXP_DESC \ + (ze_structure_type_t)0x00030016 + +typedef struct _ze_intel_event_sync_mode_exp_desc_t { + ze_structure_type_t stype; + const void *pNext; + + ze_intel_event_sync_mode_exp_flags_t syncModeFlags; +} ze_intel_event_sync_mode_exp_desc_t; + struct ur_context_handle_t_ : _ur_object { ur_context_handle_t_(ze_context_handle_t ZeContext, uint32_t NumDevices, const ur_device_handle_t *Devs, bool OwnZeContext) @@ -208,7 +226,8 @@ struct ur_context_handle_t_ : _ur_object { bool ProfilingEnabled, ur_device_handle_t Device, bool CounterBasedEventEnabled, - bool UsingImmCmdList); + bool UsingImmCmdList, + bool InterruptBasedEventEnabled); // Get ur_event_handle_t from cache. ur_event_handle_t getEventFromContextCache(bool HostVisible, diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 865edebc08..6ae9f8ec83 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -485,6 +485,8 @@ ur_result_t urDeviceGetInfo( case UR_DEVICE_INFO_BUILT_IN_KERNELS: // TODO: To find out correct value return ReturnValue(""); + case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: + return ReturnValue(UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP); case UR_DEVICE_INFO_QUEUE_PROPERTIES: return ReturnValue( ur_queue_flag_t(UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE | @@ -1151,8 +1153,6 @@ ur_result_t urDeviceGetInfo( return ReturnValue(true); case UR_DEVICE_INFO_USM_POOL_SUPPORT: return ReturnValue(true); - case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: - return ReturnValue(false); default: logger::error("Unsupported ParamName in urGetDeviceInfo"); logger::error("ParamNameParamName={}(0x{})", ParamName, diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 96da4be0fd..028a791bb7 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -422,7 +422,8 @@ ur_result_t urEnqueueEventsWaitWithBarrier( ur_result_t urEnqueueEventsWaitWithBarrierExt( ur_queue_handle_t Queue, ///< [in] handle of the queue object const ur_exp_enqueue_ext_properties_t - *, ///< [in][optional] pointer to the extended enqueue properties + *EnqueueExtProp, ///< [in][optional] pointer to the extended enqueue + ///< properties uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] @@ -913,7 +914,7 @@ ur_result_t urExtEventCreate( UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, true /*HostVisible*/, Event, false /*CounterBasedEventEnabled*/, - false /*ForceDisableProfiling*/)); + false /*ForceDisableProfiling*/, false)); (*Event)->RefCountExternal++; if (!(*Event)->CounterBasedEventsEnabled) @@ -935,7 +936,7 @@ ur_result_t urEventCreateWithNativeHandle( UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, true /*HostVisible*/, Event, false /*CounterBasedEventEnabled*/, - false /*ForceDisableProfiling*/)); + false /*ForceDisableProfiling*/, false)); (*Event)->RefCountExternal++; if (!(*Event)->CounterBasedEventsEnabled) @@ -1293,7 +1294,8 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, bool IsMultiDevice, bool HostVisible, ur_event_handle_t *RetEvent, bool CounterBasedEventEnabled, - bool ForceDisableProfiling) { + bool ForceDisableProfiling, + bool InterruptBasedEventEnabled) { bool ProfilingEnabled = ForceDisableProfiling ? false : (!Queue || Queue->isProfilingEnabled()); bool UsingImmediateCommandlists = !Queue || Queue->UsingImmCmdLists; @@ -1317,14 +1319,15 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, if (auto Res = Context->getFreeSlotInExistingOrNewPool( ZeEventPool, Index, HostVisible, ProfilingEnabled, Device, - CounterBasedEventEnabled, UsingImmediateCommandlists)) + CounterBasedEventEnabled, UsingImmediateCommandlists, + Queue->interruptBasedEventsEnabled())) return Res; ZeStruct ZeEventDesc; ZeEventDesc.index = Index; ZeEventDesc.wait = 0; - if (HostVisible || CounterBasedEventEnabled) { + if (HostVisible || CounterBasedEventEnabled || InterruptBasedEventEnabled) { ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; } else { // @@ -1350,6 +1353,7 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, return UR_RESULT_ERROR_UNKNOWN; } (*RetEvent)->CounterBasedEventsEnabled = CounterBasedEventEnabled; + (*RetEvent)->InterruptBasedEventsEnabled = InterruptBasedEventEnabled; if (HostVisible) (*RetEvent)->HostVisibleEvent = reinterpret_cast(*RetEvent); diff --git a/source/adapters/level_zero/event.hpp b/source/adapters/level_zero/event.hpp index 2c9e698e3c..de018e7060 100644 --- a/source/adapters/level_zero/event.hpp +++ b/source/adapters/level_zero/event.hpp @@ -33,7 +33,8 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, bool IsMultiDevice, bool HostVisible, ur_event_handle_t *RetEvent, bool CounterBasedEventEnabled, - bool ForceDisableProfiling); + bool ForceDisableProfiling, + bool InterruptBasedEventEnabled); } // extern "C" // This is an experimental option that allows to disable caching of events in @@ -251,6 +252,8 @@ struct ur_event_handle_t_ : _ur_object { std::optional completionBatch; // Keeps track of whether we are using Counter-based Events. bool CounterBasedEventsEnabled = false; + // Keeps track of whether we are using Interrupt-based Events. + bool InterruptBasedEventsEnabled = false; }; // Helper function to implement zeHostSynchronize. diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index c4598f3472..e493dcc60a 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -104,10 +104,10 @@ ur_result_t ur_completion_batch::seal(ur_queue_handle_t queue, assert(st == ACCUMULATING); if (!barrierEvent) { - UR_CALL(EventCreate(queue->Context, queue, false /*IsMultiDevice*/, - true /*HostVisible*/, &barrierEvent, - false /*CounterBasedEventEnabled*/, - false /*ForceDisableProfiling*/)); + UR_CALL(EventCreate( + queue->Context, queue, false /*IsMultiDevice*/, true /*HostVisible*/, + &barrierEvent, false /*CounterBasedEventEnabled*/, + false /*ForceDisableProfiling*/, false /*InterruptBasedEventEnabled*/)); } // Instead of collecting all the batched events, we simply issue a global @@ -1494,6 +1494,11 @@ bool ur_queue_handle_t_::doReuseDiscardedEvents() { return ReuseDiscardedEvents && isInOrderQueue() && isDiscardEvents(); } +bool ur_queue_handle_t_::interruptBasedEventsEnabled() { + return isInOrderQueue() && Device->useDriverInOrderLists() && + isLowPowerEvents(); +} + ur_result_t ur_queue_handle_t_::resetDiscardedEvent(ur_command_list_ptr_t CommandList) { if (LastCommandEvent && LastCommandEvent->IsDiscarded) { @@ -1654,6 +1659,10 @@ bool ur_queue_handle_t_::isInOrderQueue() const { 0); } +bool ur_queue_handle_t_::isLowPowerEvents() const { + return ((this->Properties & UR_QUEUE_FLAG_LOW_POWER_EVENTS_EXP) != 0); +} + // Helper function to perform the necessary cleanup of the events from reset cmd // list. ur_result_t CleanupEventListFromResetCmdList( @@ -1868,12 +1877,10 @@ ur_result_t setSignalEvent(ur_queue_handle_t Queue, bool UseCopyEngine, // visible pool. // \param HostVisible tells if the event must be created in the // host-visible pool. If not set then this function will decide. -ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue, - ur_event_handle_t *Event, - ur_command_t CommandType, - ur_command_list_ptr_t CommandList, - bool IsInternal, bool IsMultiDevice, - std::optional HostVisible) { +ur_result_t createEventAndAssociateQueue( + ur_queue_handle_t Queue, ur_event_handle_t *Event, ur_command_t CommandType, + ur_command_list_ptr_t CommandList, bool IsInternal, bool IsMultiDevice, + std::optional HostVisible, std::optional InterruptBasedEvents) { if (!HostVisible.has_value()) { // Internal/discarded events do not need host-scope visibility. @@ -1888,7 +1895,8 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue, if (*Event == nullptr) UR_CALL(EventCreate( Queue->Context, Queue, IsMultiDevice, HostVisible.value(), Event, - Queue->CounterBasedEventsEnabled, false /*ForceDisableProfiling*/)); + Queue->CounterBasedEventsEnabled, false /*ForceDisableProfiling*/, + HostVisible.has_value() ? true : Queue->interruptBasedEventsEnabled())); (*Event)->UrQueue = Queue; (*Event)->CommandType = CommandType; diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 1108e4c268..786f1bdd51 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -533,6 +533,8 @@ struct ur_queue_handle_t_ : _ur_object { // queue. bool doReuseDiscardedEvents(); + bool interruptBasedEventsEnabled(); + // Append command to provided command list to wait and reset the last event if // it is discarded and create new ur_event_handle_t wrapper using the same // native event and put it to the cache. We call this method after each @@ -557,6 +559,9 @@ struct ur_queue_handle_t_ : _ur_object { // Returns true if the queue has discard events property. bool isDiscardEvents() const; + // Returns true if the queue has low power events property. + bool isLowPowerEvents() const; + // Returns true if the queue has explicit priority set by user. bool isPriorityLow() const; bool isPriorityHigh() const; @@ -708,7 +713,7 @@ struct ur_queue_handle_t_ : _ur_object { ur_result_t createEventAndAssociateQueue( ur_queue_handle_t Queue, ur_event_handle_t *Event, ur_command_t CommandType, ur_command_list_ptr_t CommandList, bool IsInternal, bool IsMultiDevice, - std::optional HostVisible = std::nullopt); + std::optional HostVisible = std::nullopt, std::optional InterruptBasedEvents = std::nullopt); // This helper function checks to see if an event for a command can be included // at the end of a command list batch. This will only be true if the event does From 933760b3f2ad62ca46880eeb80a27c8869e211fd Mon Sep 17 00:00:00 2001 From: "Zhang, Winston" Date: Wed, 20 Nov 2024 17:44:04 -0800 Subject: [PATCH 2/6] [L0] Fix urEnqueueEventsWaitWithBarrier option1 Signed-off-by: Zhang, Winston --- source/adapters/level_zero/event.cpp | 295 ++++++++++++++++++++++++++- source/adapters/level_zero/queue.cpp | 4 +- 2 files changed, 296 insertions(+), 3 deletions(-) diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 028a791bb7..4d7991b1a6 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -435,9 +435,300 @@ ur_result_t urEnqueueEventsWaitWithBarrierExt( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - return ur::level_zero::urEnqueueEventsWaitWithBarrier( - Queue, NumEventsInWaitList, EventWaitList, OutEvent); + bool InterruptBased = + EnqueueExtProp && + (EnqueueExtProp->flags & UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS); + if (!InterruptBased) { + return ur::level_zero::urEnqueueEventsWaitWithBarrier( + Queue, NumEventsInWaitList, EventWaitList, OutEvent); + } + // Lock automatically releases when this goes out of scope. + std::scoped_lock lock(Queue->Mutex); + + // Helper function for appending a barrier to a command list. + auto insertBarrierIntoCmdList = [&Queue](ur_command_list_ptr_t CmdList, + _ur_ze_event_list_t &EventWaitList, + ur_event_handle_t &Event, + bool IsInternal) { + UR_CALL(createEventAndAssociateQueue( + Queue, &Event, UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, CmdList, IsInternal, + false, std::nullopt, true)); + Event->WaitList = EventWaitList; + + // For in-order queue we don't need a real barrier, just wait for + // requested events in potentially different queues and add a "barrier" + // event signal because it is already guaranteed that previous commands + // in this queue are completed when the signal is started. + // + // Only consideration here is that when profiling is used, signalEvent + // cannot be used if EventWaitList.Length == 0. In those cases, we need + // to fallback directly to barrier to have correct timestamps. See here: + // https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=appendsignalevent#_CPPv430zeCommandListAppendSignalEvent24ze_command_list_handle_t17ze_event_handle_t + // + // TODO: this and other special handling of in-order queues to be + // updated when/if Level Zero adds native support for in-order queues. + // + if (Queue->isInOrderQueue() && InOrderBarrierBySignal && + !Queue->isProfilingEnabled()) { + if (EventWaitList.Length) { + if (CmdList->second.IsInOrderList) { + for (unsigned i = EventWaitList.Length; i-- > 0;) { + // If the event is a multidevice event, then given driver in order + // lists, we cannot include this into the wait event list due to + // driver limitations. + if (EventWaitList.UrEventList[i]->IsMultiDevice) { + EventWaitList.Length--; + if (EventWaitList.Length != i) { + std::swap(EventWaitList.UrEventList[i], + EventWaitList.UrEventList[EventWaitList.Length]); + std::swap(EventWaitList.ZeEventList[i], + EventWaitList.ZeEventList[EventWaitList.Length]); + } + } + } + } + ZE2UR_CALL( + zeCommandListAppendWaitOnEvents, + (CmdList->first, EventWaitList.Length, EventWaitList.ZeEventList)); + } + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (CmdList->first, Event->ZeEvent)); + } else { + ZE2UR_CALL(zeCommandListAppendBarrier, + (CmdList->first, Event->ZeEvent, EventWaitList.Length, + EventWaitList.ZeEventList)); + } + + return UR_RESULT_SUCCESS; + }; + + // If the queue is in-order then each command in it effectively acts as a + // barrier, so we don't need to do anything except if we were requested + // a "barrier" event to be created. Or if we need to wait for events in + // potentially different queues. + // + if (Queue->isInOrderQueue() && NumEventsInWaitList == 0 && !OutEvent) { + return UR_RESULT_SUCCESS; + } + + ur_event_handle_t ResultEvent = nullptr; + bool IsInternal = OutEvent == nullptr; + // For in-order queue and wait-list which is empty or has events from + // the same queue just use the last command event as the barrier event. + // This optimization is disabled when profiling is enabled to ensure + // accurate profiling values & the overhead that profiling incurs. + if (Queue->isInOrderQueue() && !Queue->isProfilingEnabled() && + WaitListEmptyOrAllEventsFromSameQueue(Queue, NumEventsInWaitList, + EventWaitList) && + Queue->LastCommandEvent && !Queue->LastCommandEvent->IsDiscarded) { + UR_CALL(ur::level_zero::urEventRetain(Queue->LastCommandEvent)); + ResultEvent = Queue->LastCommandEvent; + if (OutEvent) { + *OutEvent = ResultEvent; + } + return UR_RESULT_SUCCESS; + } + + // Indicator for whether batching is allowed. This may be changed later in + // this function, but allow it by default. + bool OkToBatch = true; + + // If we have a list of events to make the barrier from, then we can create a + // barrier on these and use the resulting event as our future barrier. + // We use the same approach if + // UR_L0_USE_MULTIPLE_COMMANDLIST_BARRIERS is not set to a + // positive value. + // We use the same approach if we have in-order queue because every command + // depends on previous one, so we don't need to insert barrier to multiple + // command lists. + if (NumEventsInWaitList || !UseMultipleCmdlistBarriers || + Queue->isInOrderQueue()) { + // Retain the events as they will be owned by the result event. + _ur_ze_event_list_t TmpWaitList; + UR_CALL(TmpWaitList.createAndRetainUrZeEventList( + NumEventsInWaitList, EventWaitList, Queue, false /*UseCopyEngine=*/)); + + // Get an arbitrary command-list in the queue. + ur_command_list_ptr_t CmdList; + UR_CALL(Queue->Context->getAvailableCommandList( + Queue, CmdList, false /*UseCopyEngine=*/, NumEventsInWaitList, + EventWaitList, OkToBatch, nullptr /*ForcedCmdQueue*/)); + + // Insert the barrier into the command-list and execute. + UR_CALL(insertBarrierIntoCmdList(CmdList, TmpWaitList, ResultEvent, + IsInternal)); + + UR_CALL( + Queue->executeCommandList(CmdList, false /*IsBlocking*/, OkToBatch)); + + // Because of the dependency between commands in the in-order queue we don't + // need to keep track of any active barriers if we have in-order queue. + if (UseMultipleCmdlistBarriers && !Queue->isInOrderQueue()) { + auto UREvent = reinterpret_cast(ResultEvent); + Queue->ActiveBarriers.add(UREvent); + } + + if (OutEvent) { + *OutEvent = ResultEvent; + } + return UR_RESULT_SUCCESS; + } + + // Since there are no events to explicitly create a barrier for, we are + // inserting a queue-wide barrier. + + // Command list(s) for putting barriers. + std::vector CmdLists; + + // There must be at least one L0 queue. + auto &ComputeGroup = Queue->ComputeQueueGroupsByTID.get(); + auto &CopyGroup = Queue->CopyQueueGroupsByTID.get(); + UR_ASSERT(!ComputeGroup.ZeQueues.empty() || !CopyGroup.ZeQueues.empty(), + UR_RESULT_ERROR_INVALID_QUEUE); + + size_t NumQueues = 0; + for (auto &QueueMap : + {Queue->ComputeQueueGroupsByTID, Queue->CopyQueueGroupsByTID}) + for (auto &QueueGroup : QueueMap) + NumQueues += QueueGroup.second.ZeQueues.size(); + + OkToBatch = true; + // Get an available command list tied to each command queue. We need + // these so a queue-wide barrier can be inserted into each command + // queue. + CmdLists.reserve(NumQueues); + for (auto &QueueMap : + {Queue->ComputeQueueGroupsByTID, Queue->CopyQueueGroupsByTID}) + for (auto &QueueGroup : QueueMap) { + bool UseCopyEngine = + QueueGroup.second.Type != ur_queue_handle_t_::queue_type::Compute; + if (Queue->UsingImmCmdLists) { + // If immediate command lists are being used, each will act as their own + // queue, so we must insert a barrier into each. + for (auto &ImmCmdList : QueueGroup.second.ImmCmdLists) + if (ImmCmdList != Queue->CommandListMap.end()) + CmdLists.push_back(ImmCmdList); + } else { + for (auto ZeQueue : QueueGroup.second.ZeQueues) { + if (ZeQueue) { + ur_command_list_ptr_t CmdList; + UR_CALL(Queue->Context->getAvailableCommandList( + Queue, CmdList, UseCopyEngine, NumEventsInWaitList, + EventWaitList, OkToBatch, &ZeQueue)); + CmdLists.push_back(CmdList); + } + } + } + } + + // If no activity has occurred on the queue then there will be no cmdlists. + // We need one for generating an Event, so create one. + if (CmdLists.size() == 0) { + // Get any available command list. + ur_command_list_ptr_t CmdList; + UR_CALL(Queue->Context->getAvailableCommandList( + Queue, CmdList, false /*UseCopyEngine=*/, NumEventsInWaitList, + EventWaitList, OkToBatch, nullptr /*ForcedCmdQueue*/)); + CmdLists.push_back(CmdList); + } + + if (CmdLists.size() > 1) { + // Insert a barrier into each unique command queue using the available + // command-lists. + std::vector EventWaitVector(CmdLists.size()); + for (size_t I = 0; I < CmdLists.size(); ++I) { + _ur_ze_event_list_t waitlist; + UR_CALL(insertBarrierIntoCmdList( + CmdLists[I], waitlist, EventWaitVector[I], true /*IsInternal*/)); + } + // If there were multiple queues we need to create a "convergence" event to + // be our active barrier. This convergence event is signalled by a barrier + // on all the events from the barriers we have inserted into each queue. + // Use the first command list as our convergence command list. + ur_command_list_ptr_t &ConvergenceCmdList = CmdLists[0]; + + // Create an event list. It will take ownership over all relevant events so + // we relinquish ownership and let it keep all events it needs. + _ur_ze_event_list_t BaseWaitList; + UR_CALL(BaseWaitList.createAndRetainUrZeEventList( + EventWaitVector.size(), + reinterpret_cast(EventWaitVector.data()), + Queue, ConvergenceCmdList->second.isCopy(Queue))); + + // Insert a barrier with the events from each command-queue into the + // convergence command list. The resulting event signals the convergence of + // all barriers. + UR_CALL(insertBarrierIntoCmdList(ConvergenceCmdList, BaseWaitList, + ResultEvent, IsInternal)); + } else { + // If there is only a single queue then insert a barrier and the single + // result event can be used as our active barrier and used as the return + // event. Take into account whether output event is discarded or not. + _ur_ze_event_list_t waitlist; + UR_CALL(insertBarrierIntoCmdList(CmdLists[0], waitlist, ResultEvent, + IsInternal)); + } + + // Execute each command list so the barriers can be encountered. + for (ur_command_list_ptr_t &CmdList : CmdLists) { + bool IsCopy = + CmdList->second.isCopy(reinterpret_cast(Queue)); + const auto &CommandBatch = + (IsCopy) ? Queue->CopyCommandBatch : Queue->ComputeCommandBatch; + // Only batch if the matching CmdList is already open. + OkToBatch = CommandBatch.OpenCommandList == CmdList; + + UR_CALL( + Queue->executeCommandList(CmdList, false /*IsBlocking*/, OkToBatch)); + } + + UR_CALL(Queue->ActiveBarriers.clear()); + Queue->ActiveBarriers.add(ResultEvent); + if (OutEvent) { + *OutEvent = ResultEvent; + } + return UR_RESULT_SUCCESS; } +/* +ur_result_t urEnqueueEventsWaitWithBarrierExt( + ur_queue_handle_t Queue, ///< [in] handle of the queue object + const ur_exp_enqueue_ext_properties_t + *EnqueueExtProp, ///< [in][optional] pointer to the extended enqueue +properties uint32_t NumEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, + ///< the numEventsInWaitList must be 0, indicating that + ///< all previously enqueued commands must be complete. + ur_event_handle_t + *OutEvent ///< [in,out][optional] return an event object that identifies + ///< this particular command instance. +) { + bool InterruptBased = EnqueueExtProp && (EnqueueExtProp->flags & +UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS); ur_event_handle_t ResultEvent = +nullptr; + + if (InterruptBased) { + // Create the event with interrupt-based properties + ur_command_list_ptr_t CmdList; + UR_CALL(Queue->Context->getAvailableCommandList(Queue, CmdList, false, +NumEventsInWaitList, EventWaitList, true, nullptr)); + UR_CALL(createEventAndAssociateQueue(Queue, &ResultEvent, +UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, CmdList, true, false, std::nullopt, +InterruptBased)); + } + + ur_result_t result = ur::level_zero::urEnqueueEventsWaitWithBarrier( + Queue, NumEventsInWaitList, EventWaitList, OutEvent); + + if (InterruptBased && OutEvent) { + *OutEvent = ResultEvent; + } + return result; +} + +*/ ur_result_t urEventGetInfo( ur_event_handle_t Event, ///< [in] handle of the event object diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index e493dcc60a..cae3d3d989 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -1896,7 +1896,9 @@ ur_result_t createEventAndAssociateQueue( UR_CALL(EventCreate( Queue->Context, Queue, IsMultiDevice, HostVisible.value(), Event, Queue->CounterBasedEventsEnabled, false /*ForceDisableProfiling*/, - HostVisible.has_value() ? true : Queue->interruptBasedEventsEnabled())); + InterruptBasedEvents.has_value() + ? InterruptBasedEvents.value() + : Queue->interruptBasedEventsEnabled())); (*Event)->UrQueue = Queue; (*Event)->CommandType = CommandType; From 28e7d38fc4a1f16db558ca1493ce3cad5007e4d1 Mon Sep 17 00:00:00 2001 From: "Zhang, Winston" Date: Thu, 21 Nov 2024 18:05:44 -0800 Subject: [PATCH 3/6] [L0] Cleaned up urEnqueueEventsWaitWithBarrier(Ext) with helper option Signed-off-by: Zhang, Winston --- source/adapters/level_zero/event.cpp | 430 ++++++--------------------- source/adapters/level_zero/event.hpp | 6 + 2 files changed, 93 insertions(+), 343 deletions(-) diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 4d7991b1a6..b9eb6bacd6 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -156,7 +156,7 @@ static const bool InOrderBarrierBySignal = [] { return (UrRet ? std::atoi(UrRet) : true); }(); -ur_result_t urEnqueueEventsWaitWithBarrier( +ur_result_t EnqueueEventsWaitWithBarrier( ur_queue_handle_t Queue, ///< [in] handle of the queue object uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t @@ -166,69 +166,69 @@ ur_result_t urEnqueueEventsWaitWithBarrier( ///< the numEventsInWaitList must be 0, indicating that ///< all previously enqueued commands must be complete. ur_event_handle_t - *OutEvent ///< [in,out][optional] return an event object that identifies - ///< this particular command instance. -) { + *OutEvent, ///< [in,out][optional] return an event object that + ///< identifies this particular command instance. + bool InterruptBasedEventsEnabled) { // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); // Helper function for appending a barrier to a command list. - auto insertBarrierIntoCmdList = [&Queue](ur_command_list_ptr_t CmdList, - _ur_ze_event_list_t &EventWaitList, - ur_event_handle_t &Event, - bool IsInternal) { - UR_CALL(createEventAndAssociateQueue(Queue, &Event, - UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, - CmdList, IsInternal, false)); - - Event->WaitList = EventWaitList; - - // For in-order queue we don't need a real barrier, just wait for - // requested events in potentially different queues and add a "barrier" - // event signal because it is already guaranteed that previous commands - // in this queue are completed when the signal is started. - // - // Only consideration here is that when profiling is used, signalEvent - // cannot be used if EventWaitList.Lenght == 0. In those cases, we need - // to fallback directly to barrier to have correct timestamps. See here: - // https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=appendsignalevent#_CPPv430zeCommandListAppendSignalEvent24ze_command_list_handle_t17ze_event_handle_t - // - // TODO: this and other special handling of in-order queues to be - // updated when/if Level Zero adds native support for in-order queues. - // - if (Queue->isInOrderQueue() && InOrderBarrierBySignal && - !Queue->isProfilingEnabled()) { - if (EventWaitList.Length) { - if (CmdList->second.IsInOrderList) { - for (unsigned i = EventWaitList.Length; i-- > 0;) { - // If the event is a multidevice event, then given driver in order - // lists, we cannot include this into the wait event list due to - // driver limitations. - if (EventWaitList.UrEventList[i]->IsMultiDevice) { - EventWaitList.Length--; - if (EventWaitList.Length != i) { - std::swap(EventWaitList.UrEventList[i], - EventWaitList.UrEventList[EventWaitList.Length]); - std::swap(EventWaitList.ZeEventList[i], - EventWaitList.ZeEventList[EventWaitList.Length]); + auto insertBarrierIntoCmdList = + [&Queue](ur_command_list_ptr_t CmdList, + _ur_ze_event_list_t &EventWaitList, ur_event_handle_t &Event, + bool IsInternal, bool InterruptBasedEventsEnabled) { + UR_CALL(createEventAndAssociateQueue( + Queue, &Event, UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, CmdList, + IsInternal, InterruptBasedEventsEnabled)); + + Event->WaitList = EventWaitList; + + // For in-order queue we don't need a real barrier, just wait for + // requested events in potentially different queues and add a "barrier" + // event signal because it is already guaranteed that previous commands + // in this queue are completed when the signal is started. + // + // Only consideration here is that when profiling is used, signalEvent + // cannot be used if EventWaitList.Lenght == 0. In those cases, we need + // to fallback directly to barrier to have correct timestamps. See here: + // https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=appendsignalevent#_CPPv430zeCommandListAppendSignalEvent24ze_command_list_handle_t17ze_event_handle_t + // + // TODO: this and other special handling of in-order queues to be + // updated when/if Level Zero adds native support for in-order queues. + // + if (Queue->isInOrderQueue() && InOrderBarrierBySignal && + !Queue->isProfilingEnabled()) { + if (EventWaitList.Length) { + if (CmdList->second.IsInOrderList) { + for (unsigned i = EventWaitList.Length; i-- > 0;) { + // If the event is a multidevice event, then given driver in + // order lists, we cannot include this into the wait event list + // due to driver limitations. + if (EventWaitList.UrEventList[i]->IsMultiDevice) { + EventWaitList.Length--; + if (EventWaitList.Length != i) { + std::swap(EventWaitList.UrEventList[i], + EventWaitList.UrEventList[EventWaitList.Length]); + std::swap(EventWaitList.ZeEventList[i], + EventWaitList.ZeEventList[EventWaitList.Length]); + } + } } } + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, + (CmdList->first, EventWaitList.Length, + EventWaitList.ZeEventList)); } + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (CmdList->first, Event->ZeEvent)); + } else { + ZE2UR_CALL(zeCommandListAppendBarrier, + (CmdList->first, Event->ZeEvent, EventWaitList.Length, + EventWaitList.ZeEventList)); } - ZE2UR_CALL( - zeCommandListAppendWaitOnEvents, - (CmdList->first, EventWaitList.Length, EventWaitList.ZeEventList)); - } - ZE2UR_CALL(zeCommandListAppendSignalEvent, - (CmdList->first, Event->ZeEvent)); - } else { - ZE2UR_CALL(zeCommandListAppendBarrier, - (CmdList->first, Event->ZeEvent, EventWaitList.Length, - EventWaitList.ZeEventList)); - } - return UR_RESULT_SUCCESS; - }; + return UR_RESULT_SUCCESS; + }; // If the queue is in-order then each command in it effectively acts as a // barrier, so we don't need to do anything except if we were requested @@ -285,7 +285,7 @@ ur_result_t urEnqueueEventsWaitWithBarrier( // Insert the barrier into the command-list and execute. UR_CALL(insertBarrierIntoCmdList(CmdList, TmpWaitList, ResultEvent, - IsInternal)); + IsInternal, InterruptBasedEventsEnabled)); UR_CALL( Queue->executeCommandList(CmdList, false /*IsBlocking*/, OkToBatch)); @@ -367,8 +367,9 @@ ur_result_t urEnqueueEventsWaitWithBarrier( std::vector EventWaitVector(CmdLists.size()); for (size_t I = 0; I < CmdLists.size(); ++I) { _ur_ze_event_list_t waitlist; - UR_CALL(insertBarrierIntoCmdList( - CmdLists[I], waitlist, EventWaitVector[I], true /*IsInternal*/)); + UR_CALL(insertBarrierIntoCmdList(CmdLists[I], waitlist, + EventWaitVector[I], true /*IsInternal*/, + InterruptBasedEventsEnabled)); } // If there were multiple queues we need to create a "convergence" event to // be our active barrier. This convergence event is signalled by a barrier @@ -388,14 +389,15 @@ ur_result_t urEnqueueEventsWaitWithBarrier( // convergence command list. The resulting event signals the convergence of // all barriers. UR_CALL(insertBarrierIntoCmdList(ConvergenceCmdList, BaseWaitList, - ResultEvent, IsInternal)); + ResultEvent, IsInternal, + InterruptBasedEventsEnabled)); } else { // If there is only a single queue then insert a barrier and the single // result event can be used as our active barrier and used as the return // event. Take into account whether output event is discarded or not. _ur_ze_event_list_t waitlist; UR_CALL(insertBarrierIntoCmdList(CmdLists[0], waitlist, ResultEvent, - IsInternal)); + IsInternal, InterruptBasedEventsEnabled)); } // Execute each command list so the barriers can be encountered. @@ -419,11 +421,8 @@ ur_result_t urEnqueueEventsWaitWithBarrier( return UR_RESULT_SUCCESS; } -ur_result_t urEnqueueEventsWaitWithBarrierExt( - ur_queue_handle_t Queue, ///< [in] handle of the queue object - const ur_exp_enqueue_ext_properties_t - *EnqueueExtProp, ///< [in][optional] pointer to the extended enqueue - ///< properties +ur_result_t urEnqueueEventsWaitWithBarrier( + ur_queue_handle_t Queue, ///< [in] handle of the queue object uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] @@ -435,266 +434,18 @@ ur_result_t urEnqueueEventsWaitWithBarrierExt( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - bool InterruptBased = - EnqueueExtProp && - (EnqueueExtProp->flags & UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS); - if (!InterruptBased) { - return ur::level_zero::urEnqueueEventsWaitWithBarrier( - Queue, NumEventsInWaitList, EventWaitList, OutEvent); - } - // Lock automatically releases when this goes out of scope. - std::scoped_lock lock(Queue->Mutex); - - // Helper function for appending a barrier to a command list. - auto insertBarrierIntoCmdList = [&Queue](ur_command_list_ptr_t CmdList, - _ur_ze_event_list_t &EventWaitList, - ur_event_handle_t &Event, - bool IsInternal) { - UR_CALL(createEventAndAssociateQueue( - Queue, &Event, UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, CmdList, IsInternal, - false, std::nullopt, true)); - Event->WaitList = EventWaitList; - - // For in-order queue we don't need a real barrier, just wait for - // requested events in potentially different queues and add a "barrier" - // event signal because it is already guaranteed that previous commands - // in this queue are completed when the signal is started. - // - // Only consideration here is that when profiling is used, signalEvent - // cannot be used if EventWaitList.Length == 0. In those cases, we need - // to fallback directly to barrier to have correct timestamps. See here: - // https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=appendsignalevent#_CPPv430zeCommandListAppendSignalEvent24ze_command_list_handle_t17ze_event_handle_t - // - // TODO: this and other special handling of in-order queues to be - // updated when/if Level Zero adds native support for in-order queues. - // - if (Queue->isInOrderQueue() && InOrderBarrierBySignal && - !Queue->isProfilingEnabled()) { - if (EventWaitList.Length) { - if (CmdList->second.IsInOrderList) { - for (unsigned i = EventWaitList.Length; i-- > 0;) { - // If the event is a multidevice event, then given driver in order - // lists, we cannot include this into the wait event list due to - // driver limitations. - if (EventWaitList.UrEventList[i]->IsMultiDevice) { - EventWaitList.Length--; - if (EventWaitList.Length != i) { - std::swap(EventWaitList.UrEventList[i], - EventWaitList.UrEventList[EventWaitList.Length]); - std::swap(EventWaitList.ZeEventList[i], - EventWaitList.ZeEventList[EventWaitList.Length]); - } - } - } - } - ZE2UR_CALL( - zeCommandListAppendWaitOnEvents, - (CmdList->first, EventWaitList.Length, EventWaitList.ZeEventList)); - } - ZE2UR_CALL(zeCommandListAppendSignalEvent, - (CmdList->first, Event->ZeEvent)); - } else { - ZE2UR_CALL(zeCommandListAppendBarrier, - (CmdList->first, Event->ZeEvent, EventWaitList.Length, - EventWaitList.ZeEventList)); - } - - return UR_RESULT_SUCCESS; - }; - - // If the queue is in-order then each command in it effectively acts as a - // barrier, so we don't need to do anything except if we were requested - // a "barrier" event to be created. Or if we need to wait for events in - // potentially different queues. - // - if (Queue->isInOrderQueue() && NumEventsInWaitList == 0 && !OutEvent) { - return UR_RESULT_SUCCESS; - } - - ur_event_handle_t ResultEvent = nullptr; - bool IsInternal = OutEvent == nullptr; - // For in-order queue and wait-list which is empty or has events from - // the same queue just use the last command event as the barrier event. - // This optimization is disabled when profiling is enabled to ensure - // accurate profiling values & the overhead that profiling incurs. - if (Queue->isInOrderQueue() && !Queue->isProfilingEnabled() && - WaitListEmptyOrAllEventsFromSameQueue(Queue, NumEventsInWaitList, - EventWaitList) && - Queue->LastCommandEvent && !Queue->LastCommandEvent->IsDiscarded) { - UR_CALL(ur::level_zero::urEventRetain(Queue->LastCommandEvent)); - ResultEvent = Queue->LastCommandEvent; - if (OutEvent) { - *OutEvent = ResultEvent; - } - return UR_RESULT_SUCCESS; - } - - // Indicator for whether batching is allowed. This may be changed later in - // this function, but allow it by default. - bool OkToBatch = true; - - // If we have a list of events to make the barrier from, then we can create a - // barrier on these and use the resulting event as our future barrier. - // We use the same approach if - // UR_L0_USE_MULTIPLE_COMMANDLIST_BARRIERS is not set to a - // positive value. - // We use the same approach if we have in-order queue because every command - // depends on previous one, so we don't need to insert barrier to multiple - // command lists. - if (NumEventsInWaitList || !UseMultipleCmdlistBarriers || - Queue->isInOrderQueue()) { - // Retain the events as they will be owned by the result event. - _ur_ze_event_list_t TmpWaitList; - UR_CALL(TmpWaitList.createAndRetainUrZeEventList( - NumEventsInWaitList, EventWaitList, Queue, false /*UseCopyEngine=*/)); - - // Get an arbitrary command-list in the queue. - ur_command_list_ptr_t CmdList; - UR_CALL(Queue->Context->getAvailableCommandList( - Queue, CmdList, false /*UseCopyEngine=*/, NumEventsInWaitList, - EventWaitList, OkToBatch, nullptr /*ForcedCmdQueue*/)); - - // Insert the barrier into the command-list and execute. - UR_CALL(insertBarrierIntoCmdList(CmdList, TmpWaitList, ResultEvent, - IsInternal)); - - UR_CALL( - Queue->executeCommandList(CmdList, false /*IsBlocking*/, OkToBatch)); - - // Because of the dependency between commands in the in-order queue we don't - // need to keep track of any active barriers if we have in-order queue. - if (UseMultipleCmdlistBarriers && !Queue->isInOrderQueue()) { - auto UREvent = reinterpret_cast(ResultEvent); - Queue->ActiveBarriers.add(UREvent); - } - - if (OutEvent) { - *OutEvent = ResultEvent; - } - return UR_RESULT_SUCCESS; - } - - // Since there are no events to explicitly create a barrier for, we are - // inserting a queue-wide barrier. - - // Command list(s) for putting barriers. - std::vector CmdLists; - - // There must be at least one L0 queue. - auto &ComputeGroup = Queue->ComputeQueueGroupsByTID.get(); - auto &CopyGroup = Queue->CopyQueueGroupsByTID.get(); - UR_ASSERT(!ComputeGroup.ZeQueues.empty() || !CopyGroup.ZeQueues.empty(), - UR_RESULT_ERROR_INVALID_QUEUE); - - size_t NumQueues = 0; - for (auto &QueueMap : - {Queue->ComputeQueueGroupsByTID, Queue->CopyQueueGroupsByTID}) - for (auto &QueueGroup : QueueMap) - NumQueues += QueueGroup.second.ZeQueues.size(); - - OkToBatch = true; - // Get an available command list tied to each command queue. We need - // these so a queue-wide barrier can be inserted into each command - // queue. - CmdLists.reserve(NumQueues); - for (auto &QueueMap : - {Queue->ComputeQueueGroupsByTID, Queue->CopyQueueGroupsByTID}) - for (auto &QueueGroup : QueueMap) { - bool UseCopyEngine = - QueueGroup.second.Type != ur_queue_handle_t_::queue_type::Compute; - if (Queue->UsingImmCmdLists) { - // If immediate command lists are being used, each will act as their own - // queue, so we must insert a barrier into each. - for (auto &ImmCmdList : QueueGroup.second.ImmCmdLists) - if (ImmCmdList != Queue->CommandListMap.end()) - CmdLists.push_back(ImmCmdList); - } else { - for (auto ZeQueue : QueueGroup.second.ZeQueues) { - if (ZeQueue) { - ur_command_list_ptr_t CmdList; - UR_CALL(Queue->Context->getAvailableCommandList( - Queue, CmdList, UseCopyEngine, NumEventsInWaitList, - EventWaitList, OkToBatch, &ZeQueue)); - CmdLists.push_back(CmdList); - } - } - } - } - - // If no activity has occurred on the queue then there will be no cmdlists. - // We need one for generating an Event, so create one. - if (CmdLists.size() == 0) { - // Get any available command list. - ur_command_list_ptr_t CmdList; - UR_CALL(Queue->Context->getAvailableCommandList( - Queue, CmdList, false /*UseCopyEngine=*/, NumEventsInWaitList, - EventWaitList, OkToBatch, nullptr /*ForcedCmdQueue*/)); - CmdLists.push_back(CmdList); - } - - if (CmdLists.size() > 1) { - // Insert a barrier into each unique command queue using the available - // command-lists. - std::vector EventWaitVector(CmdLists.size()); - for (size_t I = 0; I < CmdLists.size(); ++I) { - _ur_ze_event_list_t waitlist; - UR_CALL(insertBarrierIntoCmdList( - CmdLists[I], waitlist, EventWaitVector[I], true /*IsInternal*/)); - } - // If there were multiple queues we need to create a "convergence" event to - // be our active barrier. This convergence event is signalled by a barrier - // on all the events from the barriers we have inserted into each queue. - // Use the first command list as our convergence command list. - ur_command_list_ptr_t &ConvergenceCmdList = CmdLists[0]; - - // Create an event list. It will take ownership over all relevant events so - // we relinquish ownership and let it keep all events it needs. - _ur_ze_event_list_t BaseWaitList; - UR_CALL(BaseWaitList.createAndRetainUrZeEventList( - EventWaitVector.size(), - reinterpret_cast(EventWaitVector.data()), - Queue, ConvergenceCmdList->second.isCopy(Queue))); - - // Insert a barrier with the events from each command-queue into the - // convergence command list. The resulting event signals the convergence of - // all barriers. - UR_CALL(insertBarrierIntoCmdList(ConvergenceCmdList, BaseWaitList, - ResultEvent, IsInternal)); - } else { - // If there is only a single queue then insert a barrier and the single - // result event can be used as our active barrier and used as the return - // event. Take into account whether output event is discarded or not. - _ur_ze_event_list_t waitlist; - UR_CALL(insertBarrierIntoCmdList(CmdLists[0], waitlist, ResultEvent, - IsInternal)); - } - - // Execute each command list so the barriers can be encountered. - for (ur_command_list_ptr_t &CmdList : CmdLists) { - bool IsCopy = - CmdList->second.isCopy(reinterpret_cast(Queue)); - const auto &CommandBatch = - (IsCopy) ? Queue->CopyCommandBatch : Queue->ComputeCommandBatch; - // Only batch if the matching CmdList is already open. - OkToBatch = CommandBatch.OpenCommandList == CmdList; - - UR_CALL( - Queue->executeCommandList(CmdList, false /*IsBlocking*/, OkToBatch)); - } - - UR_CALL(Queue->ActiveBarriers.clear()); - Queue->ActiveBarriers.add(ResultEvent); - if (OutEvent) { - *OutEvent = ResultEvent; - } - return UR_RESULT_SUCCESS; + return static_cast(EnqueueEventsWaitWithBarrier)( + Queue, NumEventsInWaitList, EventWaitList, OutEvent, + Queue->interruptBasedEventsEnabled()); } -/* + ur_result_t urEnqueueEventsWaitWithBarrierExt( ur_queue_handle_t Queue, ///< [in] handle of the queue object const ur_exp_enqueue_ext_properties_t *EnqueueExtProp, ///< [in][optional] pointer to the extended enqueue -properties uint32_t NumEventsInWaitList, ///< [in] size of the event wait list + uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] ///< pointer to a list of events that must be complete @@ -705,31 +456,24 @@ properties uint32_t NumEventsInWaitList, ///< [in] size of the event wait list *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - bool InterruptBased = EnqueueExtProp && (EnqueueExtProp->flags & -UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS); ur_event_handle_t ResultEvent = -nullptr; - - if (InterruptBased) { - // Create the event with interrupt-based properties - ur_command_list_ptr_t CmdList; - UR_CALL(Queue->Context->getAvailableCommandList(Queue, CmdList, false, -NumEventsInWaitList, EventWaitList, true, nullptr)); - UR_CALL(createEventAndAssociateQueue(Queue, &ResultEvent, -UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, CmdList, true, false, std::nullopt, -InterruptBased)); - } - - ur_result_t result = ur::level_zero::urEnqueueEventsWaitWithBarrier( - Queue, NumEventsInWaitList, EventWaitList, OutEvent); - - if (InterruptBased && OutEvent) { - *OutEvent = ResultEvent; - } - return result; + bool InterruptBased = + EnqueueExtProp && + (EnqueueExtProp->flags & UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS); + if (InterruptBased) { + // Create the event with interrupt-based properties + return static_cast(EnqueueEventsWaitWithBarrier)( + Queue, NumEventsInWaitList, EventWaitList, OutEvent, true); + } else { + return static_cast(EnqueueEventsWaitWithBarrier)( + Queue, NumEventsInWaitList, EventWaitList, OutEvent, + Queue->interruptBasedEventsEnabled()); + } } -*/ - ur_result_t urEventGetInfo( ur_event_handle_t Event, ///< [in] handle of the event object ur_event_info_t PropName, ///< [in] the name of the event property to query diff --git a/source/adapters/level_zero/event.hpp b/source/adapters/level_zero/event.hpp index de018e7060..d894b2ef4e 100644 --- a/source/adapters/level_zero/event.hpp +++ b/source/adapters/level_zero/event.hpp @@ -279,6 +279,12 @@ template <> ze_result_t zeHostSynchronize(ze_command_queue_handle_t Handle); ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, bool SetEventCompleted); +ur_result_t EnqueueEventsWaitWithBarrier(ur_queue_handle_t Queue, + uint32_t NumEventsInWaitList, + const ur_event_handle_t *EventList, + ur_event_handle_t *OutEvent, + bool InterruptBasedEventsEnabled); + // Get value of device scope events env var setting or default setting static const EventsScope DeviceEventsSetting = [] { char *UrRet = std::getenv("UR_L0_DEVICE_SCOPE_EVENTS"); From d9576ca3ba96496f69c55ce8910f1f6f37875005 Mon Sep 17 00:00:00 2001 From: "Zhang, Winston" Date: Thu, 21 Nov 2024 18:37:58 -0800 Subject: [PATCH 4/6] [L0] Rebased against top of main Signed-off-by: Zhang, Winston --- source/adapters/level_zero/command_buffer.cpp | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 230ff1c160..c573a1d5cb 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -215,7 +215,8 @@ ur_result_t createSyncPointAndGetZeEvents( UR_CALL(EventCreate(CommandBuffer->Context, nullptr /*Queue*/, false /*IsMultiDevice*/, HostVisible, &LaunchEvent, false /*CounterBasedEventEnabled*/, - !CommandBuffer->IsProfilingEnabled, false)); + !CommandBuffer->IsProfilingEnabled, + false /*InterruptBasedEventEnabled*/)); LaunchEvent->CommandType = CommandType; ZeLaunchEvent = LaunchEvent->ZeEvent; @@ -662,13 +663,15 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, if (Device->hasMainCopyEngine()) { UR_CALL(EventCreate(Context, nullptr /*Queue*/, false, false, &CopyFinishedEvent, UseCounterBasedEvents, - !EnableProfiling)); + !EnableProfiling, + false /*InterruptBasedEventEnabled*/)); } if (EnableProfiling) { UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, false /*HostVisible*/, &ComputeFinishedEvent, - UseCounterBasedEvents, !EnableProfiling)); + UseCounterBasedEvents, !EnableProfiling, + false /*InterruptBasedEventEnabled*/)); } } @@ -677,7 +680,8 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, if (WaitEventPath) { UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, false /*HostVisible*/, &WaitEvent, - false /*CounterBasedEventEnabled*/, !EnableProfiling)); + false /*CounterBasedEventEnabled*/, !EnableProfiling, + false /*InterruptBasedEventEnabled*/)); } // Create ZeCommandListResetEvents only if counter-based events are not being @@ -689,7 +693,8 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, if (!UseCounterBasedEvents) { UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, false /*HostVisible*/, &AllResetEvent, - false /*CounterBasedEventEnabled*/, !EnableProfiling)); + false /*CounterBasedEventEnabled*/, !EnableProfiling, + false /*InterruptBasedEventEnabled*/)); UR_CALL(createMainCommandList(Context, Device, false, false, false, ZeCommandListResetEvents)); @@ -697,7 +702,8 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, // The ExecutionFinishedEvent is only waited on by ZeCommandListResetEvents. UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, false /*HostVisible*/, &ExecutionFinishedEvent, - false /*CounterBasedEventEnabled*/, !EnableProfiling)); + false /*CounterBasedEventEnabled*/, !EnableProfiling, + false /*InterruptBased*/)); } try { From 085631310ec66f24a6d04055c7506a507b102488 Mon Sep 17 00:00:00 2001 From: "Zhang, Winston" Date: Mon, 25 Nov 2024 16:40:59 -0800 Subject: [PATCH 5/6] [L0] Interrupt-based event implementation Signed-off-by: Zhang, Winston --- source/adapters/level_zero/context.cpp | 14 +++-- source/adapters/level_zero/context.hpp | 85 ++++++++++++++++++++------ source/adapters/level_zero/device.cpp | 2 +- source/adapters/level_zero/event.cpp | 9 +-- source/adapters/level_zero/queue.cpp | 29 +++++---- source/adapters/level_zero/queue.hpp | 6 +- 6 files changed, 101 insertions(+), 44 deletions(-) diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 58601dbc0d..821a8a4a05 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -488,9 +488,9 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( if (Device) { ZeDevice = Device->ZeDevice; } - std::list *ZePoolCache = - getZeEventPoolCache(HostVisible, ProfilingEnabled, - CounterBasedEventEnabled, UsingImmCmdList, ZeDevice); + std::list *ZePoolCache = getZeEventPoolCache( + HostVisible, ProfilingEnabled, CounterBasedEventEnabled, UsingImmCmdList, + InterruptBasedEventEnabled, ZeDevice); if (!ZePoolCache->empty()) { if (NumEventsAvailableInEventPool[ZePoolCache->front()] == 0) { @@ -572,7 +572,7 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( ur_event_handle_t ur_context_handle_t_::getEventFromContextCache( bool HostVisible, bool WithProfiling, ur_device_handle_t Device, - bool CounterBasedEventEnabled) { + bool CounterBasedEventEnabled, bool InterruptBasedEventEnabled) { std::scoped_lock Lock(EventCacheMutex); auto Cache = getEventCache(HostVisible, WithProfiling, Device); if (Cache->empty()) @@ -583,6 +583,9 @@ ur_event_handle_t ur_context_handle_t_::getEventFromContextCache( if (Event->CounterBasedEventsEnabled != CounterBasedEventEnabled) { return nullptr; } + if (Event->InterruptBasedEventsEnabled != InterruptBasedEventEnabled) { + return nullptr; + } Cache->erase(It); // We have to reset event before using it. Event->reset(); @@ -623,7 +626,8 @@ ur_context_handle_t_::decrementUnreleasedEventsInPool(ur_event_handle_t Event) { std::list *ZePoolCache = getZeEventPoolCache( Event->isHostVisible(), Event->isProfilingEnabled(), - Event->CounterBasedEventsEnabled, UsingImmediateCommandlists, ZeDevice); + Event->CounterBasedEventsEnabled, UsingImmediateCommandlists, + Event->InterruptBasedEventsEnabled, ZeDevice); // Put the empty pool to the cache of the pools. if (NumEventsUnreleasedInEventPool[Event->ZeEventPool] == 0) diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index 023f0de09c..815a325a46 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -168,9 +168,9 @@ struct ur_context_handle_t_ : _ur_object { // head. // // Cache of event pools to which host-visible events are added to. - std::vector> ZeEventPoolCache{12}; + std::vector> ZeEventPoolCache{30}; std::vector> - ZeEventPoolCacheDeviceMap{12}; + ZeEventPoolCacheDeviceMap{30}; // This map will be used to determine if a pool is full or not // by storing number of empty slots available in the pool. @@ -233,7 +233,8 @@ struct ur_context_handle_t_ : _ur_object { ur_event_handle_t getEventFromContextCache(bool HostVisible, bool WithProfiling, ur_device_handle_t Device, - bool CounterBasedEventEnabled); + bool CounterBasedEventEnabled, + bool InterruptBasedEventEnabled); // Add ur_event_handle_t to cache. void addEventToContextCache(ur_event_handle_t); @@ -244,17 +245,29 @@ struct ur_context_handle_t_ : _ur_object { HostVisibleCounterBasedRegularCacheType, HostInvisibleCounterBasedRegularCacheType, HostVisibleCounterBasedImmediateCacheType, - HostInvisibleCounterBasedImmediateCacheType + HostInvisibleCounterBasedImmediateCacheType, + + HostVisibleInterruptBasedRegularCacheType, + HostInvisibleInterruptBasedRegularCacheType, + HostVisibleInterruptBasedImmediateCacheType, + HostInvisibleInterruptBasedImmediateCacheType, + + HostVisibleInterruptAndCounterBasedRegularCacheType, + HostInvisibleInterruptAndCounterBasedRegularCacheType, + HostVisibleInterruptAndCounterBasedImmediateCacheType, + HostInvisibleInterruptAndCounterBasedImmediateCacheType }; std::list * getZeEventPoolCache(bool HostVisible, bool WithProfiling, bool CounterBasedEventEnabled, bool UsingImmediateCmdList, + bool InterruptBasedEventEnabled, ze_device_handle_t ZeDevice) { EventPoolCacheType CacheType; calculateCacheIndex(HostVisible, CounterBasedEventEnabled, - UsingImmediateCmdList, CacheType); + InterruptBasedEventEnabled, UsingImmediateCmdList, + CacheType); if (ZeDevice) { auto ZeEventPoolCacheMap = WithProfiling ? &ZeEventPoolCacheDeviceMap[CacheType * 2] @@ -274,23 +287,57 @@ struct ur_context_handle_t_ : _ur_object { ur_result_t calculateCacheIndex(bool HostVisible, bool CounterBasedEventEnabled, bool UsingImmediateCmdList, + bool InterruptBasedEventEnabled, EventPoolCacheType &CacheType) { - if (CounterBasedEventEnabled && HostVisible && !UsingImmediateCmdList) { - CacheType = HostVisibleCounterBasedRegularCacheType; - } else if (CounterBasedEventEnabled && !HostVisible && - !UsingImmediateCmdList) { - CacheType = HostInvisibleCounterBasedRegularCacheType; - } else if (CounterBasedEventEnabled && HostVisible && - UsingImmediateCmdList) { - CacheType = HostVisibleCounterBasedImmediateCacheType; - } else if (CounterBasedEventEnabled && !HostVisible && - UsingImmediateCmdList) { - CacheType = HostInvisibleCounterBasedImmediateCacheType; - } else if (!CounterBasedEventEnabled && HostVisible) { - CacheType = HostVisibleCacheType; + if (InterruptBasedEventEnabled) { + if (CounterBasedEventEnabled) { + if (HostVisible) { + if (UsingImmediateCmdList) { + CacheType = HostVisibleInterruptAndCounterBasedImmediateCacheType; + } else { + CacheType = HostVisibleInterruptAndCounterBasedRegularCacheType; + } + } else { + if (UsingImmediateCmdList) { + CacheType = HostInvisibleInterruptAndCounterBasedImmediateCacheType; + } else { + CacheType = HostInvisibleInterruptAndCounterBasedRegularCacheType; + } + } + } else { + if (HostVisible) { + if (UsingImmediateCmdList) { + CacheType = HostVisibleInterruptBasedImmediateCacheType; + } else { + CacheType = HostVisibleInterruptBasedRegularCacheType; + } + } else { + if (UsingImmediateCmdList) { + CacheType = HostInvisibleInterruptBasedImmediateCacheType; + } else { + CacheType = HostInvisibleInterruptBasedRegularCacheType; + } + } + } } else { - CacheType = HostInvisibleCacheType; + if (CounterBasedEventEnabled && HostVisible && !UsingImmediateCmdList) { + CacheType = HostVisibleCounterBasedRegularCacheType; + } else if (CounterBasedEventEnabled && !HostVisible && + !UsingImmediateCmdList) { + CacheType = HostInvisibleCounterBasedRegularCacheType; + } else if (CounterBasedEventEnabled && HostVisible && + UsingImmediateCmdList) { + CacheType = HostVisibleCounterBasedImmediateCacheType; + } else if (CounterBasedEventEnabled && !HostVisible && + UsingImmediateCmdList) { + CacheType = HostInvisibleCounterBasedImmediateCacheType; + } else if (!CounterBasedEventEnabled && HostVisible) { + CacheType = HostVisibleCacheType; + } else { + CacheType = HostInvisibleCacheType; + } } + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 6ae9f8ec83..80f6ecbfb8 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -486,7 +486,7 @@ ur_result_t urDeviceGetInfo( // TODO: To find out correct value return ReturnValue(""); case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: - return ReturnValue(UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP); + return ReturnValue(static_cast(true)); case UR_DEVICE_INFO_QUEUE_PROPERTIES: return ReturnValue( ur_queue_flag_t(UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE | diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index b9eb6bacd6..ab8580f833 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -438,7 +438,7 @@ ur_result_t urEnqueueEventsWaitWithBarrier( ur_queue_handle_t, uint32_t, const ur_event_handle_t *, ur_event_handle_t *, bool)>(EnqueueEventsWaitWithBarrier)( Queue, NumEventsInWaitList, EventWaitList, OutEvent, - Queue->interruptBasedEventsEnabled()); + Queue == nullptr ? false : Queue->InterruptBasedEventsEnabled); } ur_result_t urEnqueueEventsWaitWithBarrierExt( @@ -470,7 +470,7 @@ ur_result_t urEnqueueEventsWaitWithBarrierExt( ur_queue_handle_t, uint32_t, const ur_event_handle_t *, ur_event_handle_t *, bool)>(EnqueueEventsWaitWithBarrier)( Queue, NumEventsInWaitList, EventWaitList, OutEvent, - Queue->interruptBasedEventsEnabled()); + Queue ? Queue->InterruptBasedEventsEnabled : false); } } @@ -1342,7 +1342,8 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, } if (auto CachedEvent = Context->getEventFromContextCache( - HostVisible, ProfilingEnabled, Device, CounterBasedEventEnabled)) { + HostVisible, ProfilingEnabled, Device, CounterBasedEventEnabled, + InterruptBasedEventEnabled)) { *RetEvent = CachedEvent; return UR_RESULT_SUCCESS; } @@ -1355,7 +1356,7 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, if (auto Res = Context->getFreeSlotInExistingOrNewPool( ZeEventPool, Index, HostVisible, ProfilingEnabled, Device, CounterBasedEventEnabled, UsingImmediateCommandlists, - Queue->interruptBasedEventsEnabled())) + InterruptBasedEventEnabled)) return Res; ZeStruct ZeEventDesc; diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index cae3d3d989..826f8887dd 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -1192,10 +1192,20 @@ ur_queue_handle_t_::ur_queue_handle_t_( } return std::atoi(UrRet) != 0; }(); + static const bool useInterruptBasedEvents = [] { + const char *UrRet = std::getenv("UR_L0_USE_INTERRUPT_BASED_EVENTS"); + if (!UrRet) { + return true; + } + return std::atoi(UrRet) != 0; + }(); this->CounterBasedEventsEnabled = UsingImmCmdLists && isInOrderQueue() && Device->useDriverInOrderLists() && useDriverCounterBasedEvents && Device->Platform->ZeDriverEventPoolCountingEventsExtensionFound; + this->InterruptBasedEventsEnabled = useInterruptBasedEvents && + isLowPowerEvents() && isInOrderQueue() && + Device->useDriverInOrderLists(); } void ur_queue_handle_t_::adjustBatchSizeForFullBatch(bool IsCopy) { @@ -1494,11 +1504,6 @@ bool ur_queue_handle_t_::doReuseDiscardedEvents() { return ReuseDiscardedEvents && isInOrderQueue() && isDiscardEvents(); } -bool ur_queue_handle_t_::interruptBasedEventsEnabled() { - return isInOrderQueue() && Device->useDriverInOrderLists() && - isLowPowerEvents(); -} - ur_result_t ur_queue_handle_t_::resetDiscardedEvent(ur_command_list_ptr_t CommandList) { if (LastCommandEvent && LastCommandEvent->IsDiscarded) { @@ -1877,10 +1882,12 @@ ur_result_t setSignalEvent(ur_queue_handle_t Queue, bool UseCopyEngine, // visible pool. // \param HostVisible tells if the event must be created in the // host-visible pool. If not set then this function will decide. -ur_result_t createEventAndAssociateQueue( - ur_queue_handle_t Queue, ur_event_handle_t *Event, ur_command_t CommandType, - ur_command_list_ptr_t CommandList, bool IsInternal, bool IsMultiDevice, - std::optional HostVisible, std::optional InterruptBasedEvents) { +ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue, + ur_event_handle_t *Event, + ur_command_t CommandType, + ur_command_list_ptr_t CommandList, + bool IsInternal, bool IsMultiDevice, + std::optional HostVisible) { if (!HostVisible.has_value()) { // Internal/discarded events do not need host-scope visibility. @@ -1896,9 +1903,7 @@ ur_result_t createEventAndAssociateQueue( UR_CALL(EventCreate( Queue->Context, Queue, IsMultiDevice, HostVisible.value(), Event, Queue->CounterBasedEventsEnabled, false /*ForceDisableProfiling*/, - InterruptBasedEvents.has_value() - ? InterruptBasedEvents.value() - : Queue->interruptBasedEventsEnabled())); + Queue->InterruptBasedEventsEnabled)); (*Event)->UrQueue = Queue; (*Event)->CommandType = CommandType; diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 786f1bdd51..90e95e2c2e 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -375,6 +375,8 @@ struct ur_queue_handle_t_ : _ur_object { // Keeps track of whether we are using Counter-based Events bool CounterBasedEventsEnabled = false; + bool InterruptBasedEventsEnabled = false; + // Map of all command lists used in this queue. ur_command_list_map_t CommandListMap; @@ -533,8 +535,6 @@ struct ur_queue_handle_t_ : _ur_object { // queue. bool doReuseDiscardedEvents(); - bool interruptBasedEventsEnabled(); - // Append command to provided command list to wait and reset the last event if // it is discarded and create new ur_event_handle_t wrapper using the same // native event and put it to the cache. We call this method after each @@ -713,7 +713,7 @@ struct ur_queue_handle_t_ : _ur_object { ur_result_t createEventAndAssociateQueue( ur_queue_handle_t Queue, ur_event_handle_t *Event, ur_command_t CommandType, ur_command_list_ptr_t CommandList, bool IsInternal, bool IsMultiDevice, - std::optional HostVisible = std::nullopt, std::optional InterruptBasedEvents = std::nullopt); + std::optional HostVisible = std::nullopt); // This helper function checks to see if an event for a command can be included // at the end of a command list batch. This will only be true if the event does From f7fa43586d1f7f735e640c459f3482cef6ab4db0 Mon Sep 17 00:00:00 2001 From: "Zhang, Winston" Date: Mon, 25 Nov 2024 17:06:34 -0800 Subject: [PATCH 6/6] [L0] Interrupt-based event implementation Signed-off-by: Zhang, Winston --- source/adapters/level_zero/context.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index 815a325a46..c66df48c9a 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -266,7 +266,7 @@ struct ur_context_handle_t_ : _ur_object { EventPoolCacheType CacheType; calculateCacheIndex(HostVisible, CounterBasedEventEnabled, - InterruptBasedEventEnabled, UsingImmediateCmdList, + UsingImmediateCmdList, InterruptBasedEventEnabled, CacheType); if (ZeDevice) { auto ZeEventPoolCacheMap =