Skip to content

Commit

Permalink
Merge branch 'main' into num_compute_units
Browse files Browse the repository at this point in the history
  • Loading branch information
dyniols authored Jan 17, 2025
2 parents 3e3282c + 30d183a commit cc8ae30
Show file tree
Hide file tree
Showing 22 changed files with 985 additions and 591 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build-hw-reusable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ jobs:

- name: Test adapter specific
working-directory: ${{github.workspace}}/build
run: ctest -C ${{matrix.build_type}} --output-on-failure -L "adapter-specific" --timeout 180
run: ctest -C ${{matrix.build_type}} --output-on-failure -L "adapter-specific" -E "memcheck" --timeout 180
# Don't run adapter specific tests when building multiple adapters
if: ${{ matrix.adapter.other_name == '' }}

Expand Down
12 changes: 7 additions & 5 deletions scripts/benchmarks/benches/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,9 @@ def benchmarks(self) -> list[Benchmark]:

if options.ur is not None:
benches += [
SubmitKernelUR(self, 0),
SubmitKernelUR(self, 1),
SubmitKernelUR(self, 0, 0),
SubmitKernelUR(self, 1, 0),
SubmitKernelUR(self, 1, 1),
]

return benches
Expand Down Expand Up @@ -180,13 +181,14 @@ def bin_args(self) -> list[str]:
]

class SubmitKernelUR(ComputeBenchmark):
def __init__(self, bench, ioq):
def __init__(self, bench, ioq, measureCompletion):
self.ioq = ioq
self.measureCompletion = measureCompletion
super().__init__(bench, "api_overhead_benchmark_ur", "SubmitKernel")

def name(self):
order = "in order" if self.ioq else "out of order"
return f"api_overhead_benchmark_ur SubmitKernel {order}"
return f"api_overhead_benchmark_ur SubmitKernel {order}" + (" with measure completion" if self.measureCompletion else "")

def explicit_group(self):
return "SubmitKernel"
Expand All @@ -195,7 +197,7 @@ def bin_args(self) -> list[str]:
return [
f"--Ioq={self.ioq}",
"--DiscardEvents=0",
"--MeasureCompletion=0",
f"--MeasureCompletion={self.measureCompletion}",
"--iterations=100000",
"--Profiling=0",
"--NumKernels=10",
Expand Down
13 changes: 6 additions & 7 deletions source/adapters/level_zero/v2/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ ur_kernel_handle_t_::ur_kernel_handle_t_(
}

ur_result_t ur_kernel_handle_t_::release() {
if (!RefCount.decrementAndTest())
return UR_RESULT_SUCCESS;

// manually release kernels to allow errors to be propagated
for (auto &singleDeviceKernelOpt : deviceKernels) {
if (singleDeviceKernelOpt.has_value()) {
Expand All @@ -104,6 +107,8 @@ ur_result_t ur_kernel_handle_t_::release() {

UR_CALL_THROWS(ur::level_zero::urProgramRelease(hProgram));

delete this;

return UR_RESULT_SUCCESS;
}

Expand Down Expand Up @@ -362,13 +367,7 @@ ur_result_t urKernelRetain(
ur_result_t urKernelRelease(
ur_kernel_handle_t hKernel ///< [in] handle for the Kernel to release
) try {
if (!hKernel->RefCount.decrementAndTest())
return UR_RESULT_SUCCESS;

hKernel->release();
delete hKernel;

return UR_RESULT_SUCCESS;
return hKernel->release();
} catch (...) {
return exceptionToResult(std::current_exception());
}
Expand Down
18 changes: 17 additions & 1 deletion source/adapters/level_zero/v2/queue_immediate_in_order.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -186,13 +186,25 @@ ur_result_t ur_queue_immediate_in_order_t::queueFinish() {

// Free deferred events
for (auto &hEvent : deferredEvents) {
hEvent->releaseDeferred();
UR_CALL(hEvent->releaseDeferred());
}
deferredEvents.clear();

// Free deferred kernels
for (auto &hKernel : submittedKernels) {
UR_CALL(hKernel->release());
}
submittedKernels.clear();

return UR_RESULT_SUCCESS;
}

void ur_queue_immediate_in_order_t::recordSubmittedKernel(
ur_kernel_handle_t hKernel) {
submittedKernels.push_back(hKernel);
hKernel->RefCount.increment();
}

ur_result_t ur_queue_immediate_in_order_t::queueFlush() {
return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -251,6 +263,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueKernelLaunch(
(handler.commandList.get(), hZeKernel, &zeThreadGroupDimensions,
zeSignalEvent, waitList.second, waitList.first));

recordSubmittedKernel(hKernel);

return UR_RESULT_SUCCESS;
}

Expand Down Expand Up @@ -1063,6 +1077,8 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueCooperativeKernelLaunchExp(
(handler.commandList.get(), hZeKernel, &zeThreadGroupDimensions,
zeSignalEvent, waitList.second, waitList.first));

recordSubmittedKernel(hKernel);

return UR_RESULT_SUCCESS;
}

Expand Down
3 changes: 3 additions & 0 deletions source/adapters/level_zero/v2/queue_immediate_in_order.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ {
std::vector<ze_event_handle_t> waitList;

std::vector<ur_event_handle_t> deferredEvents;
std::vector<ur_kernel_handle_t> submittedKernels;

std::pair<ze_event_handle_t *, uint32_t>
getWaitListView(const ur_event_handle_t *phWaitEvents,
Expand Down Expand Up @@ -82,6 +83,8 @@ struct ur_queue_immediate_in_order_t : _ur_object, public ur_queue_handle_t_ {
const ur_event_handle_t *phEventWaitList,
ur_event_handle_t *phEvent);

void recordSubmittedKernel(ur_kernel_handle_t hKernel);

public:
ur_queue_immediate_in_order_t(ur_context_handle_t, ur_device_handle_t,
const ur_queue_properties_t *);
Expand Down
24 changes: 24 additions & 0 deletions test/adapters/level_zero/v2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,3 +61,27 @@ add_adapter_test(level_zero_memory_residency
"UR_ADAPTERS_FORCE_LOAD=\"$<TARGET_FILE:ur_adapter_level_zero_v2>\""
"ZES_ENABLE_SYSMAN=1"
)

if(NOT WIN32)
add_adapter_test(level_zero_deferred_kernel
FIXTURE KERNELS
SOURCES
deferred_kernel.cpp
ENVIRONMENT
"UR_ADAPTERS_FORCE_LOAD=\"$<TARGET_FILE:ur_adapter_level_zero_v2>\""
)

set(backend level_zero)
add_adapter_memcheck_test(level_zero_deferred_kernel
FIXTURE KERNELS
SOURCES
deferred_kernel.cpp
ENVIRONMENT
"UR_ADAPTERS_FORCE_LOAD=\"$<TARGET_FILE:ur_adapter_level_zero_v2>\""
)

target_link_libraries(test-adapter-level_zero_deferred_kernel PRIVATE
LevelZeroLoader
LevelZeroLoader-Headers
)
endif()
166 changes: 166 additions & 0 deletions test/adapters/level_zero/v2/deferred_kernel.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
// Copyright (C) 2024 Intel Corporation
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
// See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <ze_api.h>

#include "../../../conformance/enqueue/helpers.h"
#include "../ze_helpers.hpp"
#include "uur/fixtures.h"
#include "uur/raii.h"

struct urEnqueueKernelLaunchTest : uur::urKernelExecutionTest {
void SetUp() override {
program_name = "fill";
UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::SetUp());
}

uint32_t val = 42;
size_t global_size = 32;
size_t global_offset = 0;
size_t n_dimensions = 1;
};
UUR_INSTANTIATE_DEVICE_TEST_SUITE_P(urEnqueueKernelLaunchTest);

TEST_P(urEnqueueKernelLaunchTest, DeferredKernelRelease) {
ur_mem_handle_t buffer = nullptr;
AddBuffer1DArg(sizeof(val) * global_size, &buffer);
AddPodArg(val);

auto zeEvent = createZeEvent(context, device);

ur_event_handle_t event;
ASSERT_SUCCESS(urEventCreateWithNativeHandle(
reinterpret_cast<ur_native_handle_t>(zeEvent.get()), context, nullptr,
&event));

ASSERT_SUCCESS(urEnqueueEventsWait(queue, 1, &event, nullptr));
ASSERT_SUCCESS(urEnqueueKernelLaunch(queue, kernel, n_dimensions,
&global_offset, &global_size, nullptr,
0, nullptr, nullptr));
ASSERT_SUCCESS(urKernelRelease(kernel));

// Kernel should still be alive since kernel launch is pending
ur_context_handle_t contextFromKernel;
ASSERT_SUCCESS(urKernelGetInfo(kernel, UR_KERNEL_INFO_CONTEXT,
sizeof(ur_context_handle_t),
&contextFromKernel, nullptr));

ASSERT_EQ(context, contextFromKernel);

ze_event_handle_t ze_event = nullptr;
ASSERT_SUCCESS(urEventGetNativeHandle(
event, reinterpret_cast<ur_native_handle_t *>(&ze_event)));
ASSERT_EQ(zeEventHostSignal(ze_event), ZE_RESULT_SUCCESS);

ASSERT_SUCCESS(urQueueFinish(queue));

kernel = nullptr;

ASSERT_SUCCESS(urEventRelease(event));
}

struct urMultiQueueLaunchKernelDeferFreeTest
: uur::urMultiQueueMultiDeviceTest<2> {
std::string KernelName;

static constexpr char ProgramName[] = "foo";
static constexpr size_t ArraySize = 100;
static constexpr uint32_t InitialValue = 1;

ur_program_handle_t program = nullptr;
ur_kernel_handle_t kernel = nullptr;

void SetUp() override {
if (devices.size() < 2) {
GTEST_SKIP() << "This test requires at least 2 devices";
}

UUR_RETURN_ON_FATAL_FAILURE(
uur::urMultiQueueMultiDeviceTest<2>::SetUp());

KernelName = uur::KernelsEnvironment::instance->GetEntryPointNames(
ProgramName)[0];

std::shared_ptr<std::vector<char>> il_binary;
std::vector<ur_program_metadata_t> metadatas{};

uur::KernelsEnvironment::instance->LoadSource(ProgramName, platform,
il_binary);

const ur_program_properties_t properties = {
UR_STRUCTURE_TYPE_PROGRAM_PROPERTIES, nullptr,
static_cast<uint32_t>(metadatas.size()),
metadatas.empty() ? nullptr : metadatas.data()};

ASSERT_SUCCESS(urProgramCreateWithIL(context, il_binary->data(),
il_binary->size(), &properties,
&program));

UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(
urProgramBuild(context, program, nullptr));
ASSERT_SUCCESS(urKernelCreate(program, KernelName.data(), &kernel));
}

void TearDown() override {
// kernel will be release in the actual test

urProgramRelease(program);
UUR_RETURN_ON_FATAL_FAILURE(
uur::urMultiQueueMultiDeviceTest<2>::TearDown());
}
};

UUR_INSTANTIATE_PLATFORM_TEST_SUITE_P(urMultiQueueLaunchKernelDeferFreeTest);

TEST_P(urMultiQueueLaunchKernelDeferFreeTest, Success) {
auto zeEvent1 = createZeEvent(context, devices[0]);
auto zeEvent2 = createZeEvent(context, devices[1]);

ur_event_handle_t event1;
ASSERT_SUCCESS(urEventCreateWithNativeHandle(
reinterpret_cast<ur_native_handle_t>(zeEvent1.get()), context, nullptr,
&event1));
ur_event_handle_t event2;
ASSERT_SUCCESS(urEventCreateWithNativeHandle(
reinterpret_cast<ur_native_handle_t>(zeEvent2.get()), context, nullptr,
&event2));

size_t global_offset = 0;
size_t global_size = 1;

ASSERT_SUCCESS(urEnqueueEventsWait(queues[0], 1, &event1, nullptr));
ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[0], kernel, 1, &global_offset,
&global_size, nullptr, 0, nullptr,
nullptr));

ASSERT_SUCCESS(urEnqueueEventsWait(queues[1], 1, &event2, nullptr));
ASSERT_SUCCESS(urEnqueueKernelLaunch(queues[1], kernel, 1, &global_offset,
&global_size, nullptr, 0, nullptr,
nullptr));

ASSERT_SUCCESS(urKernelRelease(kernel));

// Kernel should still be alive since both kernels are pending
ur_context_handle_t contextFromKernel;
ASSERT_SUCCESS(urKernelGetInfo(kernel, UR_KERNEL_INFO_CONTEXT,
sizeof(ur_context_handle_t),
&contextFromKernel, nullptr));
ASSERT_EQ(context, contextFromKernel);

ASSERT_EQ(zeEventHostSignal(zeEvent2.get()), ZE_RESULT_SUCCESS);
ASSERT_SUCCESS(urQueueFinish(queues[1]));

// Kernel should still be alive since kernel launch is pending
ASSERT_SUCCESS(urKernelGetInfo(kernel, UR_KERNEL_INFO_CONTEXT,
sizeof(ur_context_handle_t),
&contextFromKernel, nullptr));
ASSERT_EQ(context, contextFromKernel);

ASSERT_EQ(zeEventHostSignal(zeEvent1.get()), ZE_RESULT_SUCCESS);
ASSERT_SUCCESS(urQueueFinish(queues[0]));

ASSERT_SUCCESS(urEventRelease(event1));
ASSERT_SUCCESS(urEventRelease(event2));
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
{{IGNORE}}
{{.*}} ERROR SUMMARY: 0 errors from 0 contexts {{.*}}
Loading

0 comments on commit cc8ae30

Please sign in to comment.