diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 56c53b5331..c573a1d5cb 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -215,7 +215,8 @@ ur_result_t createSyncPointAndGetZeEvents( UR_CALL(EventCreate(CommandBuffer->Context, nullptr /*Queue*/, false /*IsMultiDevice*/, HostVisible, &LaunchEvent, false /*CounterBasedEventEnabled*/, - !CommandBuffer->IsProfilingEnabled)); + !CommandBuffer->IsProfilingEnabled, + false /*InterruptBasedEventEnabled*/)); LaunchEvent->CommandType = CommandType; ZeLaunchEvent = LaunchEvent->ZeEvent; @@ -662,13 +663,15 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, if (Device->hasMainCopyEngine()) { UR_CALL(EventCreate(Context, nullptr /*Queue*/, false, false, &CopyFinishedEvent, UseCounterBasedEvents, - !EnableProfiling)); + !EnableProfiling, + false /*InterruptBasedEventEnabled*/)); } if (EnableProfiling) { UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, false /*HostVisible*/, &ComputeFinishedEvent, - UseCounterBasedEvents, !EnableProfiling)); + UseCounterBasedEvents, !EnableProfiling, + false /*InterruptBasedEventEnabled*/)); } } @@ -677,7 +680,8 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, if (WaitEventPath) { UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, false /*HostVisible*/, &WaitEvent, - false /*CounterBasedEventEnabled*/, !EnableProfiling)); + false /*CounterBasedEventEnabled*/, !EnableProfiling, + false /*InterruptBasedEventEnabled*/)); } // Create ZeCommandListResetEvents only if counter-based events are not being @@ -689,7 +693,8 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, if (!UseCounterBasedEvents) { UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, false /*HostVisible*/, &AllResetEvent, - false /*CounterBasedEventEnabled*/, !EnableProfiling)); + false /*CounterBasedEventEnabled*/, !EnableProfiling, + false /*InterruptBasedEventEnabled*/)); UR_CALL(createMainCommandList(Context, Device, false, false, false, ZeCommandListResetEvents)); @@ -697,7 +702,8 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, // The ExecutionFinishedEvent is only waited on by ZeCommandListResetEvents. UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, false /*HostVisible*/, &ExecutionFinishedEvent, - false /*CounterBasedEventEnabled*/, !EnableProfiling)); + false /*CounterBasedEventEnabled*/, !EnableProfiling, + false /*InterruptBased*/)); } try { diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 7c1c412ee4..821a8a4a05 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -478,7 +478,8 @@ static const uint32_t MaxNumEventsPerPool = [] { ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( ze_event_pool_handle_t &Pool, size_t &Index, bool HostVisible, bool ProfilingEnabled, ur_device_handle_t Device, - bool CounterBasedEventEnabled, bool UsingImmCmdList) { + bool CounterBasedEventEnabled, bool UsingImmCmdList, + bool InterruptBasedEventEnabled) { // Lock while updating event pool machinery. std::scoped_lock Lock(ZeEventPoolCacheMutex); @@ -487,9 +488,9 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( if (Device) { ZeDevice = Device->ZeDevice; } - std::list *ZePoolCache = - getZeEventPoolCache(HostVisible, ProfilingEnabled, - CounterBasedEventEnabled, UsingImmCmdList, ZeDevice); + std::list *ZePoolCache = getZeEventPoolCache( + HostVisible, ProfilingEnabled, CounterBasedEventEnabled, UsingImmCmdList, + InterruptBasedEventEnabled, ZeDevice); if (!ZePoolCache->empty()) { if (NumEventsAvailableInEventPool[ZePoolCache->front()] == 0) { @@ -537,6 +538,14 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( counterBasedExt.flags); ZeEventPoolDesc.pNext = &counterBasedExt; } + if (InterruptBasedEventEnabled) { + ze_intel_event_sync_mode_exp_desc_t eventSyncMode = { + ZE_INTEL_STRUCTURE_TYPE_EVENT_SYNC_MODE_EXP_DESC, nullptr, 0}; + eventSyncMode.syncModeFlags = + ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_LOW_POWER_WAIT | + ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_SIGNAL_INTERRUPT; + ZeEventPoolDesc.pNext = &eventSyncMode; + } std::vector ZeDevices; if (ZeDevice) { @@ -563,7 +572,7 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( ur_event_handle_t ur_context_handle_t_::getEventFromContextCache( bool HostVisible, bool WithProfiling, ur_device_handle_t Device, - bool CounterBasedEventEnabled) { + bool CounterBasedEventEnabled, bool InterruptBasedEventEnabled) { std::scoped_lock Lock(EventCacheMutex); auto Cache = getEventCache(HostVisible, WithProfiling, Device); if (Cache->empty()) @@ -574,6 +583,9 @@ ur_event_handle_t ur_context_handle_t_::getEventFromContextCache( if (Event->CounterBasedEventsEnabled != CounterBasedEventEnabled) { return nullptr; } + if (Event->InterruptBasedEventsEnabled != InterruptBasedEventEnabled) { + return nullptr; + } Cache->erase(It); // We have to reset event before using it. Event->reset(); @@ -614,7 +626,8 @@ ur_context_handle_t_::decrementUnreleasedEventsInPool(ur_event_handle_t Event) { std::list *ZePoolCache = getZeEventPoolCache( Event->isHostVisible(), Event->isProfilingEnabled(), - Event->CounterBasedEventsEnabled, UsingImmediateCommandlists, ZeDevice); + Event->CounterBasedEventsEnabled, UsingImmediateCommandlists, + Event->InterruptBasedEventsEnabled, ZeDevice); // Put the empty pool to the cache of the pools. if (NumEventsUnreleasedInEventPool[Event->ZeEventPool] == 0) diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index 0d3b2846e2..c66df48c9a 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -33,6 +33,24 @@ struct l0_command_list_cache_info { bool IsImmediate = false; }; +typedef uint32_t ze_intel_event_sync_mode_exp_flags_t; +typedef enum _ze_intel_event_sync_mode_exp_flag_t { + ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_LOW_POWER_WAIT = ZE_BIT(0), + ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_SIGNAL_INTERRUPT = ZE_BIT(1), + ZE_INTEL_EVENT_SYNC_MODE_EXP_EXP_FLAG_FORCE_UINT32 = 0x7fffffff + +} ze_intel_event_sync_mode_exp_flag_t; + +#define ZE_INTEL_STRUCTURE_TYPE_EVENT_SYNC_MODE_EXP_DESC \ + (ze_structure_type_t)0x00030016 + +typedef struct _ze_intel_event_sync_mode_exp_desc_t { + ze_structure_type_t stype; + const void *pNext; + + ze_intel_event_sync_mode_exp_flags_t syncModeFlags; +} ze_intel_event_sync_mode_exp_desc_t; + struct ur_context_handle_t_ : _ur_object { ur_context_handle_t_(ze_context_handle_t ZeContext, uint32_t NumDevices, const ur_device_handle_t *Devs, bool OwnZeContext) @@ -150,9 +168,9 @@ struct ur_context_handle_t_ : _ur_object { // head. // // Cache of event pools to which host-visible events are added to. - std::vector> ZeEventPoolCache{12}; + std::vector> ZeEventPoolCache{30}; std::vector> - ZeEventPoolCacheDeviceMap{12}; + ZeEventPoolCacheDeviceMap{30}; // This map will be used to determine if a pool is full or not // by storing number of empty slots available in the pool. @@ -208,13 +226,15 @@ struct ur_context_handle_t_ : _ur_object { bool ProfilingEnabled, ur_device_handle_t Device, bool CounterBasedEventEnabled, - bool UsingImmCmdList); + bool UsingImmCmdList, + bool InterruptBasedEventEnabled); // Get ur_event_handle_t from cache. ur_event_handle_t getEventFromContextCache(bool HostVisible, bool WithProfiling, ur_device_handle_t Device, - bool CounterBasedEventEnabled); + bool CounterBasedEventEnabled, + bool InterruptBasedEventEnabled); // Add ur_event_handle_t to cache. void addEventToContextCache(ur_event_handle_t); @@ -225,17 +245,29 @@ struct ur_context_handle_t_ : _ur_object { HostVisibleCounterBasedRegularCacheType, HostInvisibleCounterBasedRegularCacheType, HostVisibleCounterBasedImmediateCacheType, - HostInvisibleCounterBasedImmediateCacheType + HostInvisibleCounterBasedImmediateCacheType, + + HostVisibleInterruptBasedRegularCacheType, + HostInvisibleInterruptBasedRegularCacheType, + HostVisibleInterruptBasedImmediateCacheType, + HostInvisibleInterruptBasedImmediateCacheType, + + HostVisibleInterruptAndCounterBasedRegularCacheType, + HostInvisibleInterruptAndCounterBasedRegularCacheType, + HostVisibleInterruptAndCounterBasedImmediateCacheType, + HostInvisibleInterruptAndCounterBasedImmediateCacheType }; std::list * getZeEventPoolCache(bool HostVisible, bool WithProfiling, bool CounterBasedEventEnabled, bool UsingImmediateCmdList, + bool InterruptBasedEventEnabled, ze_device_handle_t ZeDevice) { EventPoolCacheType CacheType; calculateCacheIndex(HostVisible, CounterBasedEventEnabled, - UsingImmediateCmdList, CacheType); + UsingImmediateCmdList, InterruptBasedEventEnabled, + CacheType); if (ZeDevice) { auto ZeEventPoolCacheMap = WithProfiling ? &ZeEventPoolCacheDeviceMap[CacheType * 2] @@ -255,23 +287,57 @@ struct ur_context_handle_t_ : _ur_object { ur_result_t calculateCacheIndex(bool HostVisible, bool CounterBasedEventEnabled, bool UsingImmediateCmdList, + bool InterruptBasedEventEnabled, EventPoolCacheType &CacheType) { - if (CounterBasedEventEnabled && HostVisible && !UsingImmediateCmdList) { - CacheType = HostVisibleCounterBasedRegularCacheType; - } else if (CounterBasedEventEnabled && !HostVisible && - !UsingImmediateCmdList) { - CacheType = HostInvisibleCounterBasedRegularCacheType; - } else if (CounterBasedEventEnabled && HostVisible && - UsingImmediateCmdList) { - CacheType = HostVisibleCounterBasedImmediateCacheType; - } else if (CounterBasedEventEnabled && !HostVisible && - UsingImmediateCmdList) { - CacheType = HostInvisibleCounterBasedImmediateCacheType; - } else if (!CounterBasedEventEnabled && HostVisible) { - CacheType = HostVisibleCacheType; + if (InterruptBasedEventEnabled) { + if (CounterBasedEventEnabled) { + if (HostVisible) { + if (UsingImmediateCmdList) { + CacheType = HostVisibleInterruptAndCounterBasedImmediateCacheType; + } else { + CacheType = HostVisibleInterruptAndCounterBasedRegularCacheType; + } + } else { + if (UsingImmediateCmdList) { + CacheType = HostInvisibleInterruptAndCounterBasedImmediateCacheType; + } else { + CacheType = HostInvisibleInterruptAndCounterBasedRegularCacheType; + } + } + } else { + if (HostVisible) { + if (UsingImmediateCmdList) { + CacheType = HostVisibleInterruptBasedImmediateCacheType; + } else { + CacheType = HostVisibleInterruptBasedRegularCacheType; + } + } else { + if (UsingImmediateCmdList) { + CacheType = HostInvisibleInterruptBasedImmediateCacheType; + } else { + CacheType = HostInvisibleInterruptBasedRegularCacheType; + } + } + } } else { - CacheType = HostInvisibleCacheType; + if (CounterBasedEventEnabled && HostVisible && !UsingImmediateCmdList) { + CacheType = HostVisibleCounterBasedRegularCacheType; + } else if (CounterBasedEventEnabled && !HostVisible && + !UsingImmediateCmdList) { + CacheType = HostInvisibleCounterBasedRegularCacheType; + } else if (CounterBasedEventEnabled && HostVisible && + UsingImmediateCmdList) { + CacheType = HostVisibleCounterBasedImmediateCacheType; + } else if (CounterBasedEventEnabled && !HostVisible && + UsingImmediateCmdList) { + CacheType = HostInvisibleCounterBasedImmediateCacheType; + } else if (!CounterBasedEventEnabled && HostVisible) { + CacheType = HostVisibleCacheType; + } else { + CacheType = HostInvisibleCacheType; + } } + return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index 865edebc08..80f6ecbfb8 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -485,6 +485,8 @@ ur_result_t urDeviceGetInfo( case UR_DEVICE_INFO_BUILT_IN_KERNELS: // TODO: To find out correct value return ReturnValue(""); + case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: + return ReturnValue(static_cast(true)); case UR_DEVICE_INFO_QUEUE_PROPERTIES: return ReturnValue( ur_queue_flag_t(UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE | @@ -1151,8 +1153,6 @@ ur_result_t urDeviceGetInfo( return ReturnValue(true); case UR_DEVICE_INFO_USM_POOL_SUPPORT: return ReturnValue(true); - case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP: - return ReturnValue(false); default: logger::error("Unsupported ParamName in urGetDeviceInfo"); logger::error("ParamNameParamName={}(0x{})", ParamName, diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 96da4be0fd..ab8580f833 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -156,7 +156,7 @@ static const bool InOrderBarrierBySignal = [] { return (UrRet ? std::atoi(UrRet) : true); }(); -ur_result_t urEnqueueEventsWaitWithBarrier( +ur_result_t EnqueueEventsWaitWithBarrier( ur_queue_handle_t Queue, ///< [in] handle of the queue object uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t @@ -166,69 +166,69 @@ ur_result_t urEnqueueEventsWaitWithBarrier( ///< the numEventsInWaitList must be 0, indicating that ///< all previously enqueued commands must be complete. ur_event_handle_t - *OutEvent ///< [in,out][optional] return an event object that identifies - ///< this particular command instance. -) { + *OutEvent, ///< [in,out][optional] return an event object that + ///< identifies this particular command instance. + bool InterruptBasedEventsEnabled) { // Lock automatically releases when this goes out of scope. std::scoped_lock lock(Queue->Mutex); // Helper function for appending a barrier to a command list. - auto insertBarrierIntoCmdList = [&Queue](ur_command_list_ptr_t CmdList, - _ur_ze_event_list_t &EventWaitList, - ur_event_handle_t &Event, - bool IsInternal) { - UR_CALL(createEventAndAssociateQueue(Queue, &Event, - UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, - CmdList, IsInternal, false)); - - Event->WaitList = EventWaitList; - - // For in-order queue we don't need a real barrier, just wait for - // requested events in potentially different queues and add a "barrier" - // event signal because it is already guaranteed that previous commands - // in this queue are completed when the signal is started. - // - // Only consideration here is that when profiling is used, signalEvent - // cannot be used if EventWaitList.Lenght == 0. In those cases, we need - // to fallback directly to barrier to have correct timestamps. See here: - // https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=appendsignalevent#_CPPv430zeCommandListAppendSignalEvent24ze_command_list_handle_t17ze_event_handle_t - // - // TODO: this and other special handling of in-order queues to be - // updated when/if Level Zero adds native support for in-order queues. - // - if (Queue->isInOrderQueue() && InOrderBarrierBySignal && - !Queue->isProfilingEnabled()) { - if (EventWaitList.Length) { - if (CmdList->second.IsInOrderList) { - for (unsigned i = EventWaitList.Length; i-- > 0;) { - // If the event is a multidevice event, then given driver in order - // lists, we cannot include this into the wait event list due to - // driver limitations. - if (EventWaitList.UrEventList[i]->IsMultiDevice) { - EventWaitList.Length--; - if (EventWaitList.Length != i) { - std::swap(EventWaitList.UrEventList[i], - EventWaitList.UrEventList[EventWaitList.Length]); - std::swap(EventWaitList.ZeEventList[i], - EventWaitList.ZeEventList[EventWaitList.Length]); + auto insertBarrierIntoCmdList = + [&Queue](ur_command_list_ptr_t CmdList, + _ur_ze_event_list_t &EventWaitList, ur_event_handle_t &Event, + bool IsInternal, bool InterruptBasedEventsEnabled) { + UR_CALL(createEventAndAssociateQueue( + Queue, &Event, UR_COMMAND_EVENTS_WAIT_WITH_BARRIER, CmdList, + IsInternal, InterruptBasedEventsEnabled)); + + Event->WaitList = EventWaitList; + + // For in-order queue we don't need a real barrier, just wait for + // requested events in potentially different queues and add a "barrier" + // event signal because it is already guaranteed that previous commands + // in this queue are completed when the signal is started. + // + // Only consideration here is that when profiling is used, signalEvent + // cannot be used if EventWaitList.Lenght == 0. In those cases, we need + // to fallback directly to barrier to have correct timestamps. See here: + // https://spec.oneapi.io/level-zero/latest/core/api.html?highlight=appendsignalevent#_CPPv430zeCommandListAppendSignalEvent24ze_command_list_handle_t17ze_event_handle_t + // + // TODO: this and other special handling of in-order queues to be + // updated when/if Level Zero adds native support for in-order queues. + // + if (Queue->isInOrderQueue() && InOrderBarrierBySignal && + !Queue->isProfilingEnabled()) { + if (EventWaitList.Length) { + if (CmdList->second.IsInOrderList) { + for (unsigned i = EventWaitList.Length; i-- > 0;) { + // If the event is a multidevice event, then given driver in + // order lists, we cannot include this into the wait event list + // due to driver limitations. + if (EventWaitList.UrEventList[i]->IsMultiDevice) { + EventWaitList.Length--; + if (EventWaitList.Length != i) { + std::swap(EventWaitList.UrEventList[i], + EventWaitList.UrEventList[EventWaitList.Length]); + std::swap(EventWaitList.ZeEventList[i], + EventWaitList.ZeEventList[EventWaitList.Length]); + } + } } } + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, + (CmdList->first, EventWaitList.Length, + EventWaitList.ZeEventList)); } + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (CmdList->first, Event->ZeEvent)); + } else { + ZE2UR_CALL(zeCommandListAppendBarrier, + (CmdList->first, Event->ZeEvent, EventWaitList.Length, + EventWaitList.ZeEventList)); } - ZE2UR_CALL( - zeCommandListAppendWaitOnEvents, - (CmdList->first, EventWaitList.Length, EventWaitList.ZeEventList)); - } - ZE2UR_CALL(zeCommandListAppendSignalEvent, - (CmdList->first, Event->ZeEvent)); - } else { - ZE2UR_CALL(zeCommandListAppendBarrier, - (CmdList->first, Event->ZeEvent, EventWaitList.Length, - EventWaitList.ZeEventList)); - } - return UR_RESULT_SUCCESS; - }; + return UR_RESULT_SUCCESS; + }; // If the queue is in-order then each command in it effectively acts as a // barrier, so we don't need to do anything except if we were requested @@ -285,7 +285,7 @@ ur_result_t urEnqueueEventsWaitWithBarrier( // Insert the barrier into the command-list and execute. UR_CALL(insertBarrierIntoCmdList(CmdList, TmpWaitList, ResultEvent, - IsInternal)); + IsInternal, InterruptBasedEventsEnabled)); UR_CALL( Queue->executeCommandList(CmdList, false /*IsBlocking*/, OkToBatch)); @@ -367,8 +367,9 @@ ur_result_t urEnqueueEventsWaitWithBarrier( std::vector EventWaitVector(CmdLists.size()); for (size_t I = 0; I < CmdLists.size(); ++I) { _ur_ze_event_list_t waitlist; - UR_CALL(insertBarrierIntoCmdList( - CmdLists[I], waitlist, EventWaitVector[I], true /*IsInternal*/)); + UR_CALL(insertBarrierIntoCmdList(CmdLists[I], waitlist, + EventWaitVector[I], true /*IsInternal*/, + InterruptBasedEventsEnabled)); } // If there were multiple queues we need to create a "convergence" event to // be our active barrier. This convergence event is signalled by a barrier @@ -388,14 +389,15 @@ ur_result_t urEnqueueEventsWaitWithBarrier( // convergence command list. The resulting event signals the convergence of // all barriers. UR_CALL(insertBarrierIntoCmdList(ConvergenceCmdList, BaseWaitList, - ResultEvent, IsInternal)); + ResultEvent, IsInternal, + InterruptBasedEventsEnabled)); } else { // If there is only a single queue then insert a barrier and the single // result event can be used as our active barrier and used as the return // event. Take into account whether output event is discarded or not. _ur_ze_event_list_t waitlist; UR_CALL(insertBarrierIntoCmdList(CmdLists[0], waitlist, ResultEvent, - IsInternal)); + IsInternal, InterruptBasedEventsEnabled)); } // Execute each command list so the barriers can be encountered. @@ -419,10 +421,30 @@ ur_result_t urEnqueueEventsWaitWithBarrier( return UR_RESULT_SUCCESS; } +ur_result_t urEnqueueEventsWaitWithBarrier( + ur_queue_handle_t Queue, ///< [in] handle of the queue object + uint32_t NumEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t + *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] + ///< pointer to a list of events that must be complete + ///< before this command can be executed. If nullptr, + ///< the numEventsInWaitList must be 0, indicating that + ///< all previously enqueued commands must be complete. + ur_event_handle_t + *OutEvent ///< [in,out][optional] return an event object that identifies + ///< this particular command instance. +) { + return static_cast(EnqueueEventsWaitWithBarrier)( + Queue, NumEventsInWaitList, EventWaitList, OutEvent, + Queue == nullptr ? false : Queue->InterruptBasedEventsEnabled); +} + ur_result_t urEnqueueEventsWaitWithBarrierExt( ur_queue_handle_t Queue, ///< [in] handle of the queue object const ur_exp_enqueue_ext_properties_t - *, ///< [in][optional] pointer to the extended enqueue properties + *EnqueueExtProp, ///< [in][optional] pointer to the extended enqueue uint32_t NumEventsInWaitList, ///< [in] size of the event wait list const ur_event_handle_t *EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] @@ -434,8 +456,22 @@ ur_result_t urEnqueueEventsWaitWithBarrierExt( *OutEvent ///< [in,out][optional] return an event object that identifies ///< this particular command instance. ) { - return ur::level_zero::urEnqueueEventsWaitWithBarrier( - Queue, NumEventsInWaitList, EventWaitList, OutEvent); + bool InterruptBased = + EnqueueExtProp && + (EnqueueExtProp->flags & UR_EXP_ENQUEUE_EXT_FLAG_LOW_POWER_EVENTS); + if (InterruptBased) { + // Create the event with interrupt-based properties + return static_cast(EnqueueEventsWaitWithBarrier)( + Queue, NumEventsInWaitList, EventWaitList, OutEvent, true); + } else { + return static_cast(EnqueueEventsWaitWithBarrier)( + Queue, NumEventsInWaitList, EventWaitList, OutEvent, + Queue ? Queue->InterruptBasedEventsEnabled : false); + } } ur_result_t urEventGetInfo( @@ -913,7 +949,7 @@ ur_result_t urExtEventCreate( UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, true /*HostVisible*/, Event, false /*CounterBasedEventEnabled*/, - false /*ForceDisableProfiling*/)); + false /*ForceDisableProfiling*/, false)); (*Event)->RefCountExternal++; if (!(*Event)->CounterBasedEventsEnabled) @@ -935,7 +971,7 @@ ur_result_t urEventCreateWithNativeHandle( UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, true /*HostVisible*/, Event, false /*CounterBasedEventEnabled*/, - false /*ForceDisableProfiling*/)); + false /*ForceDisableProfiling*/, false)); (*Event)->RefCountExternal++; if (!(*Event)->CounterBasedEventsEnabled) @@ -1293,7 +1329,8 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, bool IsMultiDevice, bool HostVisible, ur_event_handle_t *RetEvent, bool CounterBasedEventEnabled, - bool ForceDisableProfiling) { + bool ForceDisableProfiling, + bool InterruptBasedEventEnabled) { bool ProfilingEnabled = ForceDisableProfiling ? false : (!Queue || Queue->isProfilingEnabled()); bool UsingImmediateCommandlists = !Queue || Queue->UsingImmCmdLists; @@ -1305,7 +1342,8 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, } if (auto CachedEvent = Context->getEventFromContextCache( - HostVisible, ProfilingEnabled, Device, CounterBasedEventEnabled)) { + HostVisible, ProfilingEnabled, Device, CounterBasedEventEnabled, + InterruptBasedEventEnabled)) { *RetEvent = CachedEvent; return UR_RESULT_SUCCESS; } @@ -1317,14 +1355,15 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, if (auto Res = Context->getFreeSlotInExistingOrNewPool( ZeEventPool, Index, HostVisible, ProfilingEnabled, Device, - CounterBasedEventEnabled, UsingImmediateCommandlists)) + CounterBasedEventEnabled, UsingImmediateCommandlists, + InterruptBasedEventEnabled)) return Res; ZeStruct ZeEventDesc; ZeEventDesc.index = Index; ZeEventDesc.wait = 0; - if (HostVisible || CounterBasedEventEnabled) { + if (HostVisible || CounterBasedEventEnabled || InterruptBasedEventEnabled) { ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST; } else { // @@ -1350,6 +1389,7 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, return UR_RESULT_ERROR_UNKNOWN; } (*RetEvent)->CounterBasedEventsEnabled = CounterBasedEventEnabled; + (*RetEvent)->InterruptBasedEventsEnabled = InterruptBasedEventEnabled; if (HostVisible) (*RetEvent)->HostVisibleEvent = reinterpret_cast(*RetEvent); diff --git a/source/adapters/level_zero/event.hpp b/source/adapters/level_zero/event.hpp index 2c9e698e3c..d894b2ef4e 100644 --- a/source/adapters/level_zero/event.hpp +++ b/source/adapters/level_zero/event.hpp @@ -33,7 +33,8 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, bool IsMultiDevice, bool HostVisible, ur_event_handle_t *RetEvent, bool CounterBasedEventEnabled, - bool ForceDisableProfiling); + bool ForceDisableProfiling, + bool InterruptBasedEventEnabled); } // extern "C" // This is an experimental option that allows to disable caching of events in @@ -251,6 +252,8 @@ struct ur_event_handle_t_ : _ur_object { std::optional completionBatch; // Keeps track of whether we are using Counter-based Events. bool CounterBasedEventsEnabled = false; + // Keeps track of whether we are using Interrupt-based Events. + bool InterruptBasedEventsEnabled = false; }; // Helper function to implement zeHostSynchronize. @@ -276,6 +279,12 @@ template <> ze_result_t zeHostSynchronize(ze_command_queue_handle_t Handle); ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, bool SetEventCompleted); +ur_result_t EnqueueEventsWaitWithBarrier(ur_queue_handle_t Queue, + uint32_t NumEventsInWaitList, + const ur_event_handle_t *EventList, + ur_event_handle_t *OutEvent, + bool InterruptBasedEventsEnabled); + // Get value of device scope events env var setting or default setting static const EventsScope DeviceEventsSetting = [] { char *UrRet = std::getenv("UR_L0_DEVICE_SCOPE_EVENTS"); diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index c4598f3472..826f8887dd 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -104,10 +104,10 @@ ur_result_t ur_completion_batch::seal(ur_queue_handle_t queue, assert(st == ACCUMULATING); if (!barrierEvent) { - UR_CALL(EventCreate(queue->Context, queue, false /*IsMultiDevice*/, - true /*HostVisible*/, &barrierEvent, - false /*CounterBasedEventEnabled*/, - false /*ForceDisableProfiling*/)); + UR_CALL(EventCreate( + queue->Context, queue, false /*IsMultiDevice*/, true /*HostVisible*/, + &barrierEvent, false /*CounterBasedEventEnabled*/, + false /*ForceDisableProfiling*/, false /*InterruptBasedEventEnabled*/)); } // Instead of collecting all the batched events, we simply issue a global @@ -1192,10 +1192,20 @@ ur_queue_handle_t_::ur_queue_handle_t_( } return std::atoi(UrRet) != 0; }(); + static const bool useInterruptBasedEvents = [] { + const char *UrRet = std::getenv("UR_L0_USE_INTERRUPT_BASED_EVENTS"); + if (!UrRet) { + return true; + } + return std::atoi(UrRet) != 0; + }(); this->CounterBasedEventsEnabled = UsingImmCmdLists && isInOrderQueue() && Device->useDriverInOrderLists() && useDriverCounterBasedEvents && Device->Platform->ZeDriverEventPoolCountingEventsExtensionFound; + this->InterruptBasedEventsEnabled = useInterruptBasedEvents && + isLowPowerEvents() && isInOrderQueue() && + Device->useDriverInOrderLists(); } void ur_queue_handle_t_::adjustBatchSizeForFullBatch(bool IsCopy) { @@ -1654,6 +1664,10 @@ bool ur_queue_handle_t_::isInOrderQueue() const { 0); } +bool ur_queue_handle_t_::isLowPowerEvents() const { + return ((this->Properties & UR_QUEUE_FLAG_LOW_POWER_EVENTS_EXP) != 0); +} + // Helper function to perform the necessary cleanup of the events from reset cmd // list. ur_result_t CleanupEventListFromResetCmdList( @@ -1888,7 +1902,8 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue, if (*Event == nullptr) UR_CALL(EventCreate( Queue->Context, Queue, IsMultiDevice, HostVisible.value(), Event, - Queue->CounterBasedEventsEnabled, false /*ForceDisableProfiling*/)); + Queue->CounterBasedEventsEnabled, false /*ForceDisableProfiling*/, + Queue->InterruptBasedEventsEnabled)); (*Event)->UrQueue = Queue; (*Event)->CommandType = CommandType; diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 1108e4c268..90e95e2c2e 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -375,6 +375,8 @@ struct ur_queue_handle_t_ : _ur_object { // Keeps track of whether we are using Counter-based Events bool CounterBasedEventsEnabled = false; + bool InterruptBasedEventsEnabled = false; + // Map of all command lists used in this queue. ur_command_list_map_t CommandListMap; @@ -557,6 +559,9 @@ struct ur_queue_handle_t_ : _ur_object { // Returns true if the queue has discard events property. bool isDiscardEvents() const; + // Returns true if the queue has low power events property. + bool isLowPowerEvents() const; + // Returns true if the queue has explicit priority set by user. bool isPriorityLow() const; bool isPriorityHigh() const;