Skip to content

Commit

Permalink
[L0] Interrupt-based event implementation
Browse files Browse the repository at this point in the history
To expose this functionality in UR, we want two ways of enabling low-power events:

Queue-wide enabling so all events created on the queue are low-powered.
As a property passed to urEnqueueEventsWaitWithBarrier making the resulting event a low-power event. This will require the existing interface to be extended with properties, potentially through a new experimental function.

Signed-off-by: Zhang, Winston <[email protected]>
  • Loading branch information
winstonzhang-intel committed Nov 19, 2024
1 parent e3247c2 commit b063947
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 27 deletions.
11 changes: 7 additions & 4 deletions source/adapters/level_zero/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ ur_result_t createSyncPointAndGetZeEvents(
UR_CALL(EventCreate(CommandBuffer->Context, nullptr /*Queue*/,
false /*IsMultiDevice*/, HostVisible, &LaunchEvent,
false /*CounterBasedEventEnabled*/,
!CommandBuffer->IsProfilingEnabled));
!CommandBuffer->IsProfilingEnabled, false));
LaunchEvent->CommandType = CommandType;
ZeLaunchEvent = LaunchEvent->ZeEvent;

Expand Down Expand Up @@ -522,13 +522,16 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,

UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/,
false /*HostVisible*/, &SignalEvent,
false /*CounterBasedEventEnabled*/, !EnableProfiling));
false /*CounterBasedEventEnabled*/, !EnableProfiling,
false));
UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/,
false /*HostVisible*/, &WaitEvent,
false /*CounterBasedEventEnabled*/, !EnableProfiling));
false /*CounterBasedEventEnabled*/, !EnableProfiling,
false));
UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/,
false /*HostVisible*/, &AllResetEvent,
false /*CounterBasedEventEnabled*/, !EnableProfiling));
false /*CounterBasedEventEnabled*/, !EnableProfiling,
false));
std::vector<ze_event_handle_t> PrecondEvents = {WaitEvent->ZeEvent,
AllResetEvent->ZeEvent};

Expand Down
11 changes: 10 additions & 1 deletion source/adapters/level_zero/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,8 @@ static const uint32_t MaxNumEventsPerPool = [] {
ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
ze_event_pool_handle_t &Pool, size_t &Index, bool HostVisible,
bool ProfilingEnabled, ur_device_handle_t Device,
bool CounterBasedEventEnabled, bool UsingImmCmdList) {
bool CounterBasedEventEnabled, bool UsingImmCmdList,
bool InterruptBasedEventEnabled) {
// Lock while updating event pool machinery.
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);

Expand Down Expand Up @@ -537,6 +538,14 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
counterBasedExt.flags);
ZeEventPoolDesc.pNext = &counterBasedExt;
}
if (InterruptBasedEventEnabled) {
ze_intel_event_sync_mode_exp_desc_t eventSyncMode = {
ZE_INTEL_STRUCTURE_TYPE_EVENT_SYNC_MODE_EXP_DESC, nullptr, 0};
eventSyncMode.syncModeFlags =
ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_LOW_POWER_WAIT |
ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_SIGNAL_INTERRUPT;
ZeEventPoolDesc.pNext = &eventSyncMode;
}

std::vector<ze_device_handle_t> ZeDevices;
if (ZeDevice) {
Expand Down
21 changes: 20 additions & 1 deletion source/adapters/level_zero/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,24 @@ struct l0_command_list_cache_info {
bool IsImmediate = false;
};

typedef uint32_t ze_intel_event_sync_mode_exp_flags_t;
typedef enum _ze_intel_event_sync_mode_exp_flag_t {
ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_LOW_POWER_WAIT = ZE_BIT(0),
ZE_INTEL_EVENT_SYNC_MODE_EXP_FLAG_SIGNAL_INTERRUPT = ZE_BIT(1),
ZE_INTEL_EVENT_SYNC_MODE_EXP_EXP_FLAG_FORCE_UINT32 = 0x7fffffff

} ze_intel_event_sync_mode_exp_flag_t;

#define ZE_INTEL_STRUCTURE_TYPE_EVENT_SYNC_MODE_EXP_DESC \
(ze_structure_type_t)0x00030016

typedef struct _ze_intel_event_sync_mode_exp_desc_t {
ze_structure_type_t stype;
const void *pNext;

ze_intel_event_sync_mode_exp_flags_t syncModeFlags;
} ze_intel_event_sync_mode_exp_desc_t;

struct ur_context_handle_t_ : _ur_object {
ur_context_handle_t_(ze_context_handle_t ZeContext, uint32_t NumDevices,
const ur_device_handle_t *Devs, bool OwnZeContext)
Expand Down Expand Up @@ -208,7 +226,8 @@ struct ur_context_handle_t_ : _ur_object {
bool ProfilingEnabled,
ur_device_handle_t Device,
bool CounterBasedEventEnabled,
bool UsingImmCmdList);
bool UsingImmCmdList,
bool InterruptBasedEventEnabled);

// Get ur_event_handle_t from cache.
ur_event_handle_t getEventFromContextCache(bool HostVisible,
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/level_zero/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,8 @@ ur_result_t urDeviceGetInfo(
case UR_DEVICE_INFO_BUILT_IN_KERNELS:
// TODO: To find out correct value
return ReturnValue("");
case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP:
return ReturnValue(UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP);
case UR_DEVICE_INFO_QUEUE_PROPERTIES:
return ReturnValue(
ur_queue_flag_t(UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE |
Expand Down Expand Up @@ -1151,8 +1153,6 @@ ur_result_t urDeviceGetInfo(
return ReturnValue(true);
case UR_DEVICE_INFO_USM_POOL_SUPPORT:
return ReturnValue(true);
case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP:
return ReturnValue(false);
default:
logger::error("Unsupported ParamName in urGetDeviceInfo");
logger::error("ParamNameParamName={}(0x{})", ParamName,
Expand Down
16 changes: 10 additions & 6 deletions source/adapters/level_zero/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,8 @@ ur_result_t urEnqueueEventsWaitWithBarrier(
ur_result_t urEnqueueEventsWaitWithBarrierExt(
ur_queue_handle_t Queue, ///< [in] handle of the queue object
const ur_exp_enqueue_ext_properties_t
*, ///< [in][optional] pointer to the extended enqueue properties
*EnqueueExtProp, ///< [in][optional] pointer to the extended enqueue
///< properties
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
const ur_event_handle_t
*EventWaitList, ///< [in][optional][range(0, numEventsInWaitList)]
Expand Down Expand Up @@ -912,7 +913,7 @@ ur_result_t urExtEventCreate(
UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/,
true /*HostVisible*/, Event,
false /*CounterBasedEventEnabled*/,
false /*ForceDisableProfiling*/));
false /*ForceDisableProfiling*/, false));

(*Event)->RefCountExternal++;
if (!(*Event)->CounterBasedEventsEnabled)
Expand All @@ -934,7 +935,7 @@ ur_result_t urEventCreateWithNativeHandle(
UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/,
true /*HostVisible*/, Event,
false /*CounterBasedEventEnabled*/,
false /*ForceDisableProfiling*/));
false /*ForceDisableProfiling*/, false));

(*Event)->RefCountExternal++;
if (!(*Event)->CounterBasedEventsEnabled)
Expand Down Expand Up @@ -1292,7 +1293,8 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
bool IsMultiDevice, bool HostVisible,
ur_event_handle_t *RetEvent,
bool CounterBasedEventEnabled,
bool ForceDisableProfiling) {
bool ForceDisableProfiling,
bool InterruptBasedEventEnabled) {
bool ProfilingEnabled =
ForceDisableProfiling ? false : (!Queue || Queue->isProfilingEnabled());
bool UsingImmediateCommandlists = !Queue || Queue->UsingImmCmdLists;
Expand All @@ -1316,14 +1318,15 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,

if (auto Res = Context->getFreeSlotInExistingOrNewPool(
ZeEventPool, Index, HostVisible, ProfilingEnabled, Device,
CounterBasedEventEnabled, UsingImmediateCommandlists))
CounterBasedEventEnabled, UsingImmediateCommandlists,
Queue->interruptBasedEventsEnabled()))
return Res;

ZeStruct<ze_event_desc_t> ZeEventDesc;
ZeEventDesc.index = Index;
ZeEventDesc.wait = 0;

if (HostVisible || CounterBasedEventEnabled) {
if (HostVisible || CounterBasedEventEnabled || InterruptBasedEventEnabled) {
ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
} else {
//
Expand All @@ -1349,6 +1352,7 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
return UR_RESULT_ERROR_UNKNOWN;
}
(*RetEvent)->CounterBasedEventsEnabled = CounterBasedEventEnabled;
(*RetEvent)->InterruptBasedEventsEnabled = InterruptBasedEventEnabled;
if (HostVisible)
(*RetEvent)->HostVisibleEvent =
reinterpret_cast<ur_event_handle_t>(*RetEvent);
Expand Down
5 changes: 4 additions & 1 deletion source/adapters/level_zero/event.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
bool IsMultiDevice, bool HostVisible,
ur_event_handle_t *RetEvent,
bool CounterBasedEventEnabled,
bool ForceDisableProfiling);
bool ForceDisableProfiling,
bool InterruptBasedEventEnabled);
} // extern "C"

// This is an experimental option that allows to disable caching of events in
Expand Down Expand Up @@ -251,6 +252,8 @@ struct ur_event_handle_t_ : _ur_object {
std::optional<ur_completion_batch_it> completionBatch;
// Keeps track of whether we are using Counter-based Events.
bool CounterBasedEventsEnabled = false;
// Keeps track of whether we are using Interrupt-based Events.
bool InterruptBasedEventsEnabled = false;
};

// Helper function to implement zeHostSynchronize.
Expand Down
30 changes: 19 additions & 11 deletions source/adapters/level_zero/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,10 +104,10 @@ ur_result_t ur_completion_batch::seal(ur_queue_handle_t queue,
assert(st == ACCUMULATING);

if (!barrierEvent) {
UR_CALL(EventCreate(queue->Context, queue, false /*IsMultiDevice*/,
true /*HostVisible*/, &barrierEvent,
false /*CounterBasedEventEnabled*/,
false /*ForceDisableProfiling*/));
UR_CALL(EventCreate(
queue->Context, queue, false /*IsMultiDevice*/, true /*HostVisible*/,
&barrierEvent, false /*CounterBasedEventEnabled*/,
false /*ForceDisableProfiling*/, false /*InterruptBasedEventEnabled*/));
}

// Instead of collecting all the batched events, we simply issue a global
Expand Down Expand Up @@ -1494,6 +1494,11 @@ bool ur_queue_handle_t_::doReuseDiscardedEvents() {
return ReuseDiscardedEvents && isInOrderQueue() && isDiscardEvents();
}

bool ur_queue_handle_t_::interruptBasedEventsEnabled() {
return isInOrderQueue() && Device->useDriverInOrderLists() &&
isLowPowerEvents();
}

ur_result_t
ur_queue_handle_t_::resetDiscardedEvent(ur_command_list_ptr_t CommandList) {
if (LastCommandEvent && LastCommandEvent->IsDiscarded) {
Expand Down Expand Up @@ -1654,6 +1659,10 @@ bool ur_queue_handle_t_::isInOrderQueue() const {
0);
}

bool ur_queue_handle_t_::isLowPowerEvents() const {
return ((this->Properties & UR_QUEUE_FLAG_LOW_POWER_EVENTS_EXP) != 0);
}

// Helper function to perform the necessary cleanup of the events from reset cmd
// list.
ur_result_t CleanupEventListFromResetCmdList(
Expand Down Expand Up @@ -1868,12 +1877,10 @@ ur_result_t setSignalEvent(ur_queue_handle_t Queue, bool UseCopyEngine,
// visible pool.
// \param HostVisible tells if the event must be created in the
// host-visible pool. If not set then this function will decide.
ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue,
ur_event_handle_t *Event,
ur_command_t CommandType,
ur_command_list_ptr_t CommandList,
bool IsInternal, bool IsMultiDevice,
std::optional<bool> HostVisible) {
ur_result_t createEventAndAssociateQueue(
ur_queue_handle_t Queue, ur_event_handle_t *Event, ur_command_t CommandType,
ur_command_list_ptr_t CommandList, bool IsInternal, bool IsMultiDevice,
std::optional<bool> HostVisible, std::optional<bool> InterruptBasedEvents) {

if (!HostVisible.has_value()) {
// Internal/discarded events do not need host-scope visibility.
Expand All @@ -1888,7 +1895,8 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue,
if (*Event == nullptr)
UR_CALL(EventCreate(
Queue->Context, Queue, IsMultiDevice, HostVisible.value(), Event,
Queue->CounterBasedEventsEnabled, false /*ForceDisableProfiling*/));
Queue->CounterBasedEventsEnabled, false /*ForceDisableProfiling*/,
HostVisible.has_value() ? true : Queue->interruptBasedEventsEnabled()));

(*Event)->UrQueue = Queue;
(*Event)->CommandType = CommandType;
Expand Down
7 changes: 6 additions & 1 deletion source/adapters/level_zero/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,8 @@ struct ur_queue_handle_t_ : _ur_object {
// queue.
bool doReuseDiscardedEvents();

bool interruptBasedEventsEnabled();

// Append command to provided command list to wait and reset the last event if
// it is discarded and create new ur_event_handle_t wrapper using the same
// native event and put it to the cache. We call this method after each
Expand All @@ -557,6 +559,9 @@ struct ur_queue_handle_t_ : _ur_object {
// Returns true if the queue has discard events property.
bool isDiscardEvents() const;

// Returns true if the queue has low power events property.
bool isLowPowerEvents() const;

// Returns true if the queue has explicit priority set by user.
bool isPriorityLow() const;
bool isPriorityHigh() const;
Expand Down Expand Up @@ -708,7 +713,7 @@ struct ur_queue_handle_t_ : _ur_object {
ur_result_t createEventAndAssociateQueue(
ur_queue_handle_t Queue, ur_event_handle_t *Event, ur_command_t CommandType,
ur_command_list_ptr_t CommandList, bool IsInternal, bool IsMultiDevice,
std::optional<bool> HostVisible = std::nullopt);
std::optional<bool> HostVisible = std::nullopt, std::optional<bool> InterruptBasedEvents = std::nullopt);

// This helper function checks to see if an event for a command can be included
// at the end of a command list batch. This will only be true if the event does
Expand Down

0 comments on commit b063947

Please sign in to comment.