Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

End-to-end code generation example #610

Merged
merged 8 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ option(UR_BUILD_ADAPTER_L0 "build level 0 adapter from SYCL" OFF)
option(UR_BUILD_ADAPTER_OPENCL "build opencl adapter from SYCL" OFF)
option(UR_BUILD_ADAPTER_CUDA "build cuda adapter from SYCL" OFF)
option(UR_BUILD_ADAPTER_HIP "build hip adapter from SYCL" OFF)
option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF)

set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
Expand Down
3 changes: 3 additions & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
include_directories(${CMAKE_CURRENT_SOURCE_DIR}/include)

add_subdirectory(hello_world)
if(UR_BUILD_EXAMPLE_CODEGEN)
add_subdirectory(codegen)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now that we have GPU-equipped runners, it should be possible to create a workflow where this example is built and run as part of tests. Otherwise, it will be easy to accidentally break it.

endif()
if(UR_ENABLE_TRACING)
add_subdirectory(collector)
endif()
47 changes: 47 additions & 0 deletions examples/codegen/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# Copyright (C) 2022 Intel Corporation
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

set(TARGET_NAME codegen)

find_package(LLVM CONFIG)
find_package(PkgConfig)

set(TRANSLATOR_FOUND "FALSE")
if(${PkgConfig_FOUND})
pkg_check_modules(LLVMSPIRVLib IMPORTED_TARGET LLVMSPIRVLib)
endif()

if(LLVM_FOUND AND PkgConfig_FOUND AND LLVMSPIRVLib_FOUND)
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}")
llvm_map_components_to_libnames(llvm_libs support core irreader bitwriter)

add_ur_executable(${TARGET_NAME}
${CMAKE_CURRENT_SOURCE_DIR}/codegen.cpp
${CMAKE_CURRENT_SOURCE_DIR}/helpers.cpp
)

target_include_directories(${TARGET_NAME} PRIVATE ${LLVM_INCLUDE_DIRS})
target_compile_definitions(${TARGET_NAME} PRIVATE ${LLVM_DEFINITIONS})
target_link_libraries(${TARGET_NAME}
PRIVATE
${CMAKE_DL_LIBS}
${PROJECT_NAME}::headers
${PROJECT_NAME}::loader
LLVM
PkgConfig::LLVMSPIRVLib
)
# TODO: Depend on building adapters.

if(MSVC)
set_target_properties(${TARGET_NAME}
PROPERTIES
VS_DEBUGGER_COMMAND_ARGUMENTS ""
VS_DEBUGGER_WORKING_DIRECTORY "$(OutDir)"
)
endif()
else()
message(STATUS "The environment did not satisfy dependency requirements (LLVM, PkgConfig, LLVMSPIRVLib) for codegen example (skipping target).")
endif()
140 changes: 140 additions & 0 deletions examples/codegen/codegen.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
/*
*
* Copyright (C) 2023 Intel Corporation
*
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
* See LICENSE.TXT
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*
* @file codegen.cpp
*
* @brief UR code generation and execution example for use with the Level Zero adapter.
*
* The codegen example demonstrates a complete flow for generating LLVM IR,
* translating it to SPIR-V, and submitting the kernel to Level Zero Runtime via UR API.
*/

#include <iostream>
#include <vector>

#include "helpers.h"
#include "ur_api.h"

void ur_check(const ur_result_t r) {
if (r != UR_RESULT_SUCCESS) {
urTearDown(nullptr);
throw std::runtime_error("Unified runtime error: " + std::to_string(r));
}
}

std::vector<ur_adapter_handle_t> get_adapters() {
uint32_t adapterCount = 0;
ur_check(urAdapterGet(0, nullptr, &adapterCount));

if (!adapterCount) {
throw std::runtime_error("No adapters available.");
}

std::vector<ur_adapter_handle_t> adapters(adapterCount);
ur_check(urAdapterGet(adapterCount, adapters.data(), nullptr));
return adapters;
}

std::vector<ur_platform_handle_t>
get_platforms(std::vector<ur_adapter_handle_t> &adapters) {
uint32_t platformCount = 0;
ur_check(urPlatformGet(adapters.data(), adapters.size(), 1, nullptr,
&platformCount));

if (!platformCount) {
throw std::runtime_error("No platforms available.");
}

std::vector<ur_platform_handle_t> platforms(platformCount);
ur_check(urPlatformGet(adapters.data(), adapters.size(), platformCount,
platforms.data(), nullptr));
return platforms;
}

std::vector<ur_device_handle_t> get_gpus(ur_platform_handle_t p) {
uint32_t deviceCount = 0;
ur_check(urDeviceGet(p, UR_DEVICE_TYPE_GPU, 0, nullptr, &deviceCount));

if (!deviceCount) {
throw std::runtime_error("No GPUs available.");
}

std::vector<ur_device_handle_t> devices(deviceCount);
ur_check(urDeviceGet(p, UR_DEVICE_TYPE_GPU, deviceCount, devices.data(),
nullptr));
return devices;
}

template <typename T, size_t N> struct alignas(4096) AlignedArray {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this for page alignment?
getpagesize() ?

T data[N];
};

int main() {
ur_loader_config_handle_t loader_config = nullptr;
ur_check(urInit(UR_DEVICE_INIT_FLAG_GPU, loader_config));

auto adapters = get_adapters();
auto platforms = get_platforms(adapters);
auto gpus = get_gpus(platforms.front());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it make sense to filter out any platforms that aren't level-zero (or platforms that don't support spir64)? How would this behave on CUDA/HIP?

auto spv = generate_plus_one_spv();

constexpr int a_size = 32;
AlignedArray<int, a_size> a, b;
for (auto i = 0; i < a_size; ++i) {
a.data[i] = a_size - i;
b.data[i] = i;
}

auto current_device = gpus.front();

ur_context_handle_t hContext;
ur_check(urContextCreate(1, &current_device, nullptr, &hContext));

ur_program_handle_t hProgram;
ur_check(urProgramCreateWithIL(hContext, spv.data(), spv.size(), nullptr,
&hProgram));
ur_check(urProgramBuild(hContext, hProgram, nullptr));

ur_mem_handle_t dA, dB;
ur_check(urMemBufferCreate(hContext, UR_MEM_FLAG_READ_WRITE,
a_size * sizeof(int), nullptr, &dA));
ur_check(urMemBufferCreate(hContext, UR_MEM_FLAG_READ_WRITE,
a_size * sizeof(int), nullptr, &dB));

ur_kernel_handle_t hKernel;
ur_check(urKernelCreate(hProgram, "plus1", &hKernel));
ur_check(urKernelSetArgMemObj(hKernel, 0, nullptr, dA));
ur_check(urKernelSetArgMemObj(hKernel, 1, nullptr, dB));

ur_queue_handle_t queue;
ur_check(urQueueCreate(hContext, current_device, nullptr, &queue));

ur_check(urEnqueueMemBufferWrite(queue, dA, true, 0, a_size * sizeof(int),
a.data, 0, nullptr, nullptr));
ur_check(urEnqueueMemBufferWrite(queue, dB, true, 0, a_size * sizeof(int),
b.data, 0, nullptr, nullptr));

const size_t gWorkOffset[] = {0, 0, 0};
const size_t gWorkSize[] = {128, 1, 1};
const size_t lWorkSize[] = {1, 1, 1};
ur_event_handle_t event;
ur_check(urEnqueueKernelLaunch(queue, hKernel, 3, gWorkOffset, gWorkSize,
lWorkSize, 0, nullptr, &event));

ur_check(urEnqueueMemBufferRead(queue, dB, true, 0, a_size * sizeof(int),
b.data, 1, &event, nullptr));

ur_check(urQueueFinish(queue));

for (int i = 0; i < a_size; ++i) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This prints out the result but never prints out the initial array. We should either check for the expected value programmatically or be consistent with how the data is printed.

std::cout << b.data[i] << " ";
}
std::cout << std::endl;

return urTearDown(nullptr) == UR_RESULT_SUCCESS ? 0 : 1;
}
103 changes: 103 additions & 0 deletions examples/codegen/helpers.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
/*
*
* Copyright (C) 2023 Intel Corporation
*
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
* See LICENSE.TXT
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include "helpers.h"

#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/raw_ostream.h"
#include <llvm/IR/LLVMContext.h>

#include <LLVMSPIRVLib/LLVMSPIRVLib.h>

#include <sstream>

std::string generate_plus_one_spv() {
using namespace llvm;
LLVMContext ctx;
std::unique_ptr<Module> module =
std::make_unique<Module>("code_generated", ctx);
module->setTargetTriple("spir64-unknown-unknown");
IRBuilder<> builder(ctx);

std::vector<Type *> args{Type::getInt32PtrTy(ctx, 1),
Type::getInt32PtrTy(ctx, 1)};
FunctionType *f_type = FunctionType::get(Type::getVoidTy(ctx), args, false);
Function *f =
Function::Create(f_type, GlobalValue::LinkageTypes::ExternalLinkage,
"plus1", module.get());
f->setCallingConv(CallingConv::SPIR_KERNEL);

// get_global_id
FunctionType *ggi_type = FunctionType::get(Type::getInt32Ty(ctx),
{Type::getInt32Ty(ctx)}, false);
Function *get_global_idj =
Function::Create(ggi_type, GlobalValue::LinkageTypes::ExternalLinkage,
"_Z13get_global_idj", module.get());
get_global_idj->setCallingConv(CallingConv::SPIR_FUNC);

BasicBlock *entry = BasicBlock::Create(ctx, "entry", f);

builder.SetInsertPoint(entry);
Constant *zero = ConstantInt::get(Type::getInt32Ty(ctx), 0);
Constant *onei = ConstantInt::get(Type::getInt32Ty(ctx), 1);
Value *idx = builder.CreateCall(get_global_idj, zero, "idx");
auto argit = f->args().begin();
#if LLVM_VERSION_MAJOR > 15
Value *firstElemSrc =
builder.CreateGEP(argit->getType(), argit, idx, "src.idx");
++argit;
Value *firstElemDst =
builder.CreateGEP(argit->getType(), argit, idx, "dst.idx");
#elif LLVM_VERSION_MAJOR > 12
Value *firstElemSrc = builder.CreateGEP(
argit->getType()->getPointerElementType(), argit, idx, "src.idx");
++argit;
Value *firstElemDst = builder.CreateGEP(
argit->getType()->getPointerElementType(), argit, idx, "dst.idx");
#else
Value *firstElemSrc = builder.CreateGEP(f->args().begin(), idx, "src.idx");
Value *firstElemDst = builder.CreateGEP(++argit, idx, "dst.idx");
#endif
Value *ldSrc =
builder.CreateLoad(Type::getInt32Ty(ctx), firstElemSrc, "ld");
Value *result = builder.CreateAdd(ldSrc, onei, "foo");
builder.CreateStore(result, firstElemDst);
builder.CreateRetVoid();

// set metadata -- pretend we're opencl (see
// https://github.com/KhronosGroup/SPIRV-LLVM-Translator/blob/master/docs/SPIRVRepresentationInLLVM.rst#spir-v-instructions-mapped-to-llvm-metadata)
Metadata *spirv_src_ops[] = {
ConstantAsMetadata::get(
ConstantInt::get(Type::getInt32Ty(ctx), 3 /*OpenCL_C*/)),
ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(ctx),
102000 /*OpenCL ver 1.2*/))};
NamedMDNode *spirv_src = module->getOrInsertNamedMetadata("spirv.Source");
spirv_src->addOperand(MDNode::get(ctx, spirv_src_ops));

module->print(errs(), nullptr);

SPIRV::TranslatorOpts opts;
opts.enableAllExtensions();
opts.setDesiredBIsRepresentation(SPIRV::BIsRepresentation::OpenCL12);
opts.setDebugInfoEIS(SPIRV::DebugInfoEIS::OpenCL_DebugInfo_100);

std::ostringstream ss;
std::string err;
auto success = writeSpirv(module.get(), opts, ss, err);
if (!success) {
errs() << "Spirv translation failed with error: " << err << "\n";
} else {
errs() << "Spirv tranlsation success.\n";
}
errs() << "Code size: " << ss.str().size() << "\n";

return ss.str();
}
14 changes: 14 additions & 0 deletions examples/codegen/helpers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/*
*
* Copyright (C) 2023 Intel Corporation
*
* Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
* See LICENSE.TXT
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#pragma once

#include <string>

std::string generate_plus_one_spv();
37 changes: 37 additions & 0 deletions scripts/deps.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
name: examples
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

scripts is where we keep the specification and associated tooling. The third_party directory might be a better place for this (and a short README.md would be very useful about how to use this)

channels:
- conda-forge
dependencies:
- _libgcc_mutex=0.1
- _openmp_mutex=4.5
- bzip2=1.0.8
- c-ares=1.19.1
- ca-certificates=2023.5.7
- cmake=3.26.4
- expat=2.5.0
- keyutils=1.6.1
- krb5=1.20.1
- level-zero=1.11.0
- level-zero-devel=1.11.0
- libcurl=8.1.2
- libedit=3.1.20191231
- libev=4.33
- libexpat=2.5.0
- libgcc-ng=13.1.0
- libgomp=13.1.0
- libllvm14=14.0.6
- libnghttp2=1.52.0
- libssh2=1.11.0
- libstdcxx-ng=13.1.0
- libuv=1.44.2
- libzlib=1.2.13
- llvm-spirv=14.0.0
- llvm-tools=14.0.6
- llvmdev=14.0.6
- ncurses=6.4
- openssl=3.1.1
- pkg-config=0.29.2
- rhash=1.4.3
- xz=5.2.6
- zlib=1.2.13
- zstd=1.5.2