diff --git a/CMakeLists.txt b/CMakeLists.txt index bd41ba58b6..d77a2aedb4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -456,11 +456,6 @@ else() set(llvm_libs ${LLVM_LIB}) endif() -# Clang (UDF Compiler) -find_package(Clang REQUIRED) -include_directories(${CLANG_INCLUDE_DIRS}) -add_definitions(${CLANG_DEFINITIONS}) - # Boost find_package(Boost COMPONENTS log log_setup filesystem program_options regex system thread timer locale iostreams REQUIRED) @@ -692,6 +687,7 @@ add_subdirectory(TableArchiver) add_subdirectory(ThriftHandler) add_subdirectory(Geospatial) add_subdirectory(Distributed) +add_subdirectory(UdfCompiler) if(ENABLE_DBE) add_subdirectory(Embedded) diff --git a/QueryEngine/CMakeLists.txt b/QueryEngine/CMakeLists.txt index 2b55318eae..5748be40a5 100644 --- a/QueryEngine/CMakeLists.txt +++ b/QueryEngine/CMakeLists.txt @@ -114,7 +114,6 @@ set(query_engine_source_files TableGenerations.cpp TableOptimizer.cpp TargetExprBuilder.cpp - UDFCompiler.cpp Utils/DiamondCodegen.cpp StringFunctions.cpp StringOpsIR.cpp @@ -275,7 +274,6 @@ set(QUERY_ENGINE_LIBS ${Arrow_LIBRARIES} ) -list(APPEND QUERY_ENGINE_LIBS ${clang_libs}) list(APPEND QUERY_ENGINE_LIBS ${llvm_libs} ${ZLIB_LIBRARIES}) target_link_libraries(QueryEngine ${QUERY_ENGINE_LIBS}) diff --git a/QueryEngine/Execute.h b/QueryEngine/Execute.h index 7e880d8fcf..0198ab8875 100644 --- a/QueryEngine/Execute.h +++ b/QueryEngine/Execute.h @@ -148,9 +148,7 @@ class QuerySessionStatus { }; using QuerySessionMap = std::map>; -extern void read_udf_gpu_module(const std::string& udf_ir_filename); -extern void read_udf_cpu_module(const std::string& udf_ir_filename); -extern bool is_udf_module_present(bool cpu_only = false); + extern void read_rt_udf_gpu_module(const std::string& udf_ir); extern void read_rt_udf_cpu_module(const std::string& udf_ir); extern bool is_rt_udf_module_present(bool cpu_only = false); @@ -389,6 +387,8 @@ class Executor { static size_t getArenaBlockSize(); + static void addUdfIrToModule(const std::string& udf_ir_filename, const bool is_cuda_ir); + /** * Returns pointer to the intermediate tables vector currently stored by this executor. */ diff --git a/QueryEngine/ExtensionsIR.cpp b/QueryEngine/ExtensionsIR.cpp index 2eae9385f8..008218f968 100644 --- a/QueryEngine/ExtensionsIR.cpp +++ b/QueryEngine/ExtensionsIR.cpp @@ -725,7 +725,7 @@ llvm::StructType* CodeGenerator::createPointStructType(const std::string& udf_fu llvm::StructType* generated_struct_type = llvm::StructType::get(cgen_state_->context_, {llvm::Type::getInt8PtrTy(cgen_state_->context_), - llvm::Type::getInt64Ty(cgen_state_->context_), + llvm::Type::getInt32Ty(cgen_state_->context_), llvm::Type::getInt32Ty(cgen_state_->context_), llvm::Type::getInt32Ty(cgen_state_->context_), llvm::Type::getInt32Ty(cgen_state_->context_)}, @@ -813,7 +813,7 @@ llvm::StructType* CodeGenerator::createLineStringStructType( llvm::StructType* generated_struct_type = llvm::StructType::get(cgen_state_->context_, {llvm::Type::getInt8PtrTy(cgen_state_->context_), - llvm::Type::getInt64Ty(cgen_state_->context_), + llvm::Type::getInt32Ty(cgen_state_->context_), llvm::Type::getInt32Ty(cgen_state_->context_), llvm::Type::getInt32Ty(cgen_state_->context_), llvm::Type::getInt32Ty(cgen_state_->context_)}, @@ -903,9 +903,9 @@ llvm::StructType* CodeGenerator::createPolygonStructType(const std::string& udf_ llvm::StructType* generated_struct_type = llvm::StructType::get(cgen_state_->context_, {llvm::Type::getInt8PtrTy(cgen_state_->context_), - llvm::Type::getInt64Ty(cgen_state_->context_), - llvm::Type::getInt32PtrTy(cgen_state_->context_), - llvm::Type::getInt64Ty(cgen_state_->context_), + llvm::Type::getInt32Ty(cgen_state_->context_), + llvm::Type::getInt8PtrTy(cgen_state_->context_), + llvm::Type::getInt32Ty(cgen_state_->context_), llvm::Type::getInt32Ty(cgen_state_->context_), llvm::Type::getInt32Ty(cgen_state_->context_), llvm::Type::getInt32Ty(cgen_state_->context_)}, @@ -966,36 +966,39 @@ void CodeGenerator::codegenGeoPolygonArgs(const std::string& udf_func_name, CHECK(input_srid); CHECK(output_srid); + auto& builder = cgen_state_->ir_builder_; + auto polygon_abstraction = createPolygonStructType(udf_func_name, param_num); - auto alloc_mem = cgen_state_->ir_builder_.CreateAlloca(polygon_abstraction, nullptr); + auto alloc_mem = builder.CreateAlloca(polygon_abstraction, nullptr); - auto polygon_buf_ptr = - cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 0); - cgen_state_->ir_builder_.CreateStore(polygon_buf, polygon_buf_ptr); + const auto polygon_buf_ptr = builder.CreateStructGEP(polygon_abstraction, alloc_mem, 0); + builder.CreateStore(polygon_buf, polygon_buf_ptr); - auto polygon_size_ptr = - cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 1); - cgen_state_->ir_builder_.CreateStore(polygon_size, polygon_size_ptr); + const auto polygon_size_ptr = + builder.CreateStructGEP(polygon_abstraction, alloc_mem, 1); + builder.CreateStore(polygon_size, polygon_size_ptr); - auto ring_sizes_buf_ptr = - cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 2); - cgen_state_->ir_builder_.CreateStore(ring_sizes_buf, ring_sizes_buf_ptr); + const auto ring_sizes_buf_ptr = + builder.CreateStructGEP(polygon_abstraction, alloc_mem, 2); + const auto ring_sizes_ptr_ty = + llvm::dyn_cast(ring_sizes_buf_ptr->getType()); + CHECK(ring_sizes_ptr_ty); + builder.CreateStore( + builder.CreateBitCast(ring_sizes_buf, ring_sizes_ptr_ty->getPointerElementType()), + ring_sizes_buf_ptr); - auto ring_size_ptr = - cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 3); - cgen_state_->ir_builder_.CreateStore(num_rings, ring_size_ptr); + const auto ring_size_ptr = builder.CreateStructGEP(polygon_abstraction, alloc_mem, 3); + builder.CreateStore(num_rings, ring_size_ptr); - auto polygon_compression_ptr = - cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 4); - cgen_state_->ir_builder_.CreateStore(compression, polygon_compression_ptr); + const auto polygon_compression_ptr = + builder.CreateStructGEP(polygon_abstraction, alloc_mem, 4); + builder.CreateStore(compression, polygon_compression_ptr); - auto input_srid_ptr = - cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 5); - cgen_state_->ir_builder_.CreateStore(input_srid, input_srid_ptr); + const auto input_srid_ptr = builder.CreateStructGEP(polygon_abstraction, alloc_mem, 5); + builder.CreateStore(input_srid, input_srid_ptr); - auto output_srid_ptr = - cgen_state_->ir_builder_.CreateStructGEP(polygon_abstraction, alloc_mem, 6); - cgen_state_->ir_builder_.CreateStore(output_srid, output_srid_ptr); + const auto output_srid_ptr = builder.CreateStructGEP(polygon_abstraction, alloc_mem, 6); + builder.CreateStore(output_srid, output_srid_ptr); output_args.push_back(alloc_mem); } @@ -1008,11 +1011,11 @@ llvm::StructType* CodeGenerator::createMultiPolygonStructType( llvm::StructType* generated_struct_type = llvm::StructType::get(cgen_state_->context_, {llvm::Type::getInt8PtrTy(cgen_state_->context_), - llvm::Type::getInt64Ty(cgen_state_->context_), - llvm::Type::getInt32PtrTy(cgen_state_->context_), - llvm::Type::getInt64Ty(cgen_state_->context_), - llvm::Type::getInt32PtrTy(cgen_state_->context_), - llvm::Type::getInt64Ty(cgen_state_->context_), + llvm::Type::getInt32Ty(cgen_state_->context_), + llvm::Type::getInt8PtrTy(cgen_state_->context_), + llvm::Type::getInt32Ty(cgen_state_->context_), + llvm::Type::getInt8PtrTy(cgen_state_->context_), + llvm::Type::getInt32Ty(cgen_state_->context_), llvm::Type::getInt32Ty(cgen_state_->context_), llvm::Type::getInt32Ty(cgen_state_->context_), llvm::Type::getInt32Ty(cgen_state_->context_)}, @@ -1074,45 +1077,56 @@ void CodeGenerator::codegenGeoMultiPolygonArgs(const std::string& udf_func_name, CHECK(input_srid); CHECK(output_srid); - auto multi_polygon_abstraction = createMultiPolygonStructType(udf_func_name, param_num); - auto alloc_mem = - cgen_state_->ir_builder_.CreateAlloca(multi_polygon_abstraction, nullptr); + auto& builder = cgen_state_->ir_builder_; - auto polygon_coords_ptr = - cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 0); - cgen_state_->ir_builder_.CreateStore(polygon_coords, polygon_coords_ptr); - - auto polygon_coords_size_ptr = - cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 1); - cgen_state_->ir_builder_.CreateStore(polygon_coords_size, polygon_coords_size_ptr); - - auto ring_sizes_buf_ptr = - cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 2); - cgen_state_->ir_builder_.CreateStore(ring_sizes_buf, ring_sizes_buf_ptr); - - auto ring_sizes_ptr = - cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 3); - cgen_state_->ir_builder_.CreateStore(ring_sizes, ring_sizes_ptr); - - auto polygon_bounds_buf_ptr = - cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 4); - cgen_state_->ir_builder_.CreateStore(polygon_bounds, polygon_bounds_buf_ptr); - - auto polygon_bounds_sizes_ptr = - cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 5); - cgen_state_->ir_builder_.CreateStore(polygon_bounds_sizes, polygon_bounds_sizes_ptr); - - auto polygon_compression_ptr = - cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 6); - cgen_state_->ir_builder_.CreateStore(compression, polygon_compression_ptr); - - auto input_srid_ptr = - cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 7); - cgen_state_->ir_builder_.CreateStore(input_srid, input_srid_ptr); - - auto output_srid_ptr = - cgen_state_->ir_builder_.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 8); - cgen_state_->ir_builder_.CreateStore(output_srid, output_srid_ptr); + auto multi_polygon_abstraction = createMultiPolygonStructType(udf_func_name, param_num); + auto alloc_mem = builder.CreateAlloca(multi_polygon_abstraction, nullptr); + + const auto polygon_coords_ptr = + builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 0); + builder.CreateStore(polygon_coords, polygon_coords_ptr); + + const auto polygon_coords_size_ptr = + builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 1); + builder.CreateStore(polygon_coords_size, polygon_coords_size_ptr); + + const auto ring_sizes_buf_ptr = + builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 2); + const auto ring_sizes_ptr_ty = + llvm::dyn_cast(ring_sizes_buf_ptr->getType()); + CHECK(ring_sizes_ptr_ty); + builder.CreateStore( + builder.CreateBitCast(ring_sizes_buf, ring_sizes_ptr_ty->getPointerElementType()), + ring_sizes_buf_ptr); + + const auto ring_sizes_ptr = + builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 3); + builder.CreateStore(ring_sizes, ring_sizes_ptr); + + const auto polygon_bounds_buf_ptr = + builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 4); + const auto bounds_ptr_ty = + llvm::dyn_cast(polygon_bounds_buf_ptr->getType()); + CHECK(bounds_ptr_ty); + builder.CreateStore( + builder.CreateBitCast(polygon_bounds, bounds_ptr_ty->getPointerElementType()), + polygon_bounds_buf_ptr); + + const auto polygon_bounds_sizes_ptr = + builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 5); + builder.CreateStore(polygon_bounds_sizes, polygon_bounds_sizes_ptr); + + const auto polygon_compression_ptr = + builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 6); + builder.CreateStore(compression, polygon_compression_ptr); + + const auto input_srid_ptr = + builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 7); + builder.CreateStore(input_srid, input_srid_ptr); + + const auto output_srid_ptr = + builder.CreateStructGEP(multi_polygon_abstraction, alloc_mem, 8); + builder.CreateStore(output_srid, output_srid_ptr); output_args.push_back(alloc_mem); } @@ -1250,7 +1264,7 @@ std::vector CodeGenerator::codegenFunctionOperCastArgs( cgen_state_->emitExternalCall("fast_fixlen_array_buff", llvm::Type::getInt8PtrTy(cgen_state_->context_), {orig_arg_lvs[k], posArg(arg)}); - len_lv = cgen_state_->llInt(int64_t(fixlen)); + len_lv = cgen_state_->llInt(int32_t(fixlen)); } else { // TODO: remove const_arr and related code if it's not needed ptr_lv = (const_arr) ? orig_arg_lvs[k] @@ -1271,9 +1285,6 @@ std::vector CodeGenerator::codegenFunctionOperCastArgs( if (is_ext_arg_type_geo(ext_func_arg)) { if (arg_ti.get_type() == kPOINT || arg_ti.get_type() == kLINESTRING) { auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti); - auto& builder = cgen_state_->ir_builder_; - auto array_size_arg = - builder.CreateZExt(len_lv, get_int_type(64, cgen_state_->context_)); auto compression_val = codegenCompression(arg_ti); auto input_srid_val = cgen_state_->llInt(arg_ti.get_input_srid()); auto output_srid_val = cgen_state_->llInt(arg_ti.get_output_srid()); @@ -1283,7 +1294,7 @@ std::vector CodeGenerator::codegenFunctionOperCastArgs( codegenGeoPointArgs(ext_func_sig->getName(), ij + dj, array_buf_arg, - array_size_arg, + len_lv, compression_val, input_srid_val, output_srid_val, @@ -1293,7 +1304,7 @@ std::vector CodeGenerator::codegenFunctionOperCastArgs( codegenGeoLineStringArgs(ext_func_sig->getName(), ij + dj, array_buf_arg, - array_size_arg, + len_lv, compression_val, input_srid_val, output_srid_val, @@ -1315,20 +1326,20 @@ std::vector CodeGenerator::codegenFunctionOperCastArgs( case kPOLYGON: { if (ext_func_arg == ExtArgumentType::GeoPolygon) { auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti); - auto& builder = cgen_state_->ir_builder_; - auto array_size_arg = - builder.CreateZExt(len_lv, get_int_type(64, cgen_state_->context_)); auto compression_val = codegenCompression(arg_ti); auto input_srid_val = cgen_state_->llInt(arg_ti.get_input_srid()); auto output_srid_val = cgen_state_->llInt(arg_ti.get_output_srid()); auto [ring_size_buff, ring_size] = - codegenArrayBuff(orig_arg_lvs[k + 1], posArg(arg), SQLTypes::kINT, true); + codegenArrayBuff(orig_arg_lvs[k + 1], + posArg(arg), + SQLTypes::kINT, + /*cast_and_extend=*/false); CHECK_EQ(k, ij); codegenGeoPolygonArgs(ext_func_sig->getName(), ij + dj, array_buf_arg, - array_size_arg, + len_lv, ring_size_buff, ring_size, compression_val, @@ -1342,8 +1353,10 @@ std::vector CodeGenerator::codegenFunctionOperCastArgs( auto [ring_size_buff, ring_size] = (const_arr) ? std::make_pair(orig_arg_lvs[k + 1], const_arr_size.at(orig_arg_lvs[k + 1])) - : codegenArrayBuff( - orig_arg_lvs[k + 1], posArg(arg), SQLTypes::kINT, true); + : codegenArrayBuff(orig_arg_lvs[k + 1], + posArg(arg), + SQLTypes::kINT, + /*cast_and_extend=*/false); args.push_back(ring_size_buff); args.push_back(ring_size); j += 2; @@ -1353,23 +1366,26 @@ std::vector CodeGenerator::codegenFunctionOperCastArgs( case kMULTIPOLYGON: { if (ext_func_arg == ExtArgumentType::GeoMultiPolygon) { auto array_buf_arg = castArrayPointer(ptr_lv, elem_ti); - auto& builder = cgen_state_->ir_builder_; - auto array_size_arg = - builder.CreateZExt(len_lv, get_int_type(64, cgen_state_->context_)); auto compression_val = codegenCompression(arg_ti); auto input_srid_val = cgen_state_->llInt(arg_ti.get_input_srid()); auto output_srid_val = cgen_state_->llInt(arg_ti.get_output_srid()); auto [ring_size_buff, ring_size] = - codegenArrayBuff(orig_arg_lvs[k + 1], posArg(arg), SQLTypes::kINT, true); + codegenArrayBuff(orig_arg_lvs[k + 1], + posArg(arg), + SQLTypes::kINT, + /*cast_and_extend=*/false); auto [poly_bounds_buff, poly_bounds_size] = - codegenArrayBuff(orig_arg_lvs[k + 2], posArg(arg), SQLTypes::kINT, true); + codegenArrayBuff(orig_arg_lvs[k + 2], + posArg(arg), + SQLTypes::kINT, + /*cast_and_extend=*/false); CHECK_EQ(k, ij); codegenGeoMultiPolygonArgs(ext_func_sig->getName(), ij + dj, array_buf_arg, - array_size_arg, + len_lv, ring_size_buff, ring_size, poly_bounds_buff, diff --git a/QueryEngine/NativeCodegen.cpp b/QueryEngine/NativeCodegen.cpp index 03a86abca4..e5931aca6b 100644 --- a/QueryEngine/NativeCodegen.cpp +++ b/QueryEngine/NativeCodegen.cpp @@ -985,6 +985,14 @@ std::map get_device_parameters(bool cpu_only) { return result; } +namespace { + +bool is_udf_module_present(bool cpu_only = false) { + return (cpu_only || udf_gpu_module != nullptr) && (udf_cpu_module != nullptr); +} + +} // namespace + std::shared_ptr CodeGenerator::generateNativeGPUCode( llvm::Function* func, llvm::Function* wrapper_func, @@ -1725,14 +1733,12 @@ std::unique_ptr g_rt_libdevice_module( read_libdevice_module(getGlobalLLVMContext())); #endif -bool is_udf_module_present(bool cpu_only) { - return (cpu_only || udf_gpu_module != nullptr) && (udf_cpu_module != nullptr); -} - bool is_rt_udf_module_present(bool cpu_only) { return (cpu_only || rt_udf_gpu_module != nullptr) && (rt_udf_cpu_module != nullptr); } +namespace { + void read_udf_gpu_module(const std::string& udf_ir_filename) { llvm::SMDiagnostic parse_error; @@ -1763,6 +1769,17 @@ void read_udf_cpu_module(const std::string& udf_ir_filename) { } } +} // namespace + +void Executor::addUdfIrToModule(const std::string& udf_ir_filename, + const bool is_cuda_ir) { + if (is_cuda_ir) { + read_udf_gpu_module(udf_ir_filename); + } else { + read_udf_cpu_module(udf_ir_filename); + } +} + void read_rt_udf_gpu_module(const std::string& udf_ir_string) { llvm::SMDiagnostic parse_error; diff --git a/QueryEngine/OmniSciTypes.h b/QueryEngine/OmniSciTypes.h index 49b83487f1..93bcce4ee5 100644 --- a/QueryEngine/OmniSciTypes.h +++ b/QueryEngine/OmniSciTypes.h @@ -17,6 +17,7 @@ #pragma once #include +#include #include /* `../` is required for UDFCompiler */ @@ -69,9 +70,9 @@ struct Array { } }; -struct GeoLineString { +struct GeoPoint { int8_t* ptr; - int64_t sz; + int32_t sz; int32_t compression; int32_t input_srid; int32_t output_srid; @@ -85,14 +86,14 @@ struct GeoLineString { DEVICE int32_t getOutputSrid() const { return output_srid; } }; -struct GeoPoint { +struct GeoLineString { int8_t* ptr; - int64_t sz; + int32_t sz; int32_t compression; int32_t input_srid; int32_t output_srid; - DEVICE int64_t getSize() const { return sz; } + DEVICE int32_t getSize() const { return sz; } DEVICE int32_t getCompression() const { return compression; } @@ -103,17 +104,17 @@ struct GeoPoint { struct GeoPolygon { int8_t* ptr_coords; - int64_t coords_size; - int32_t* ring_sizes; - int64_t num_rings; + int32_t coords_size; + int8_t* ring_sizes; + int32_t num_rings; int32_t compression; int32_t input_srid; int32_t output_srid; - DEVICE int32_t* getRingSizes() { return ring_sizes; } - DEVICE int64_t getCoordsSize() const { return coords_size; } + DEVICE int8_t* getRingSizes() { return ring_sizes; } + DEVICE int32_t getCoordsSize() const { return coords_size; } - DEVICE int64_t getNumRings() const { return num_rings; } + DEVICE int32_t getNumRings() const { return num_rings; } DEVICE int32_t getCompression() const { return compression; } @@ -124,23 +125,23 @@ struct GeoPolygon { struct GeoMultiPolygon { int8_t* ptr_coords; - int64_t coords_size; - int32_t* ring_sizes; - int64_t num_rings; - int32_t* poly_sizes; - int64_t num_polys; + int32_t coords_size; + int8_t* ring_sizes; + int32_t num_rings; + int8_t* poly_sizes; + int32_t num_polys; int32_t compression; int32_t input_srid; int32_t output_srid; - DEVICE int32_t* getRingSizes() { return ring_sizes; } - DEVICE int64_t getCoordsSize() const { return coords_size; } + DEVICE int8_t* getRingSizes() { return ring_sizes; } + DEVICE int32_t getCoordsSize() const { return coords_size; } - DEVICE int64_t getNumRings() const { return num_rings; } + DEVICE int32_t getNumRings() const { return num_rings; } - DEVICE int32_t* getPolygonSizes() { return poly_sizes; } + DEVICE int8_t* getPolygonSizes() { return poly_sizes; } - DEVICE int64_t getNumPolygons() const { return num_polys; } + DEVICE int32_t getNumPolygons() const { return num_polys; } DEVICE int32_t getCompression() const { return compression; } @@ -159,7 +160,7 @@ struct Column { #ifndef __CUDACC__ throw std::runtime_error("column buffer index is out of range"); #else - static T null_value; + static DEVICE T null_value; set_null(null_value); return null_value; #endif diff --git a/QueryEngine/UDFCompiler.h b/QueryEngine/UDFCompiler.h deleted file mode 100644 index 3262fc2999..0000000000 --- a/QueryEngine/UDFCompiler.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright 2019 OmniSci, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - * @file UDFCompiler.h - * @author Michael Collison - * @brief External interface for parsing AST and bitcode files - * - * Copyright (c) 2018 OmniSci, Inc. - */ - -#ifndef UDF_COMPILER_H -#define UDF_COMPILER_H - -#include -#include -#include -#include -#include - -#include "CudaMgr/CudaMgr.h" - -class UdfClangDriver { - public: - UdfClangDriver(const std::string&); - clang::driver::Driver* getClangDriver() { return &the_driver; } - std::tuple getClangVersion() const { return clang_version; } - - private: - llvm::IntrusiveRefCntPtr diag_options; - clang::DiagnosticConsumer* diag_client; - llvm::IntrusiveRefCntPtr diag_id; - clang::DiagnosticsEngine diags; - std::unique_ptr diag_client_owner; - clang::driver::Driver the_driver; - std::tuple clang_version; -}; - -class UdfCompiler { - public: - UdfCompiler(const std::string& udf_file_name, - CudaMgr_Namespace::NvidiaDeviceArch target_arch, - const std::string& clang_path = ""); - UdfCompiler(const std::string& udf_file_name, - CudaMgr_Namespace::NvidiaDeviceArch target_arch, - const std::string& clang_path, - const std::vector clang_options); - int compileUdf(); - const std::string& getAstFileName() const; - - private: - void init(const std::string& clang_path); - std::string removeFileExtension(const std::string& path); - std::string getFileExt(std::string& s); - int parseToAst(const char* file_name); - std::string genGpuIrFilename(const char* udf_file_name); - std::string genCpuIrFilename(const char* udf_file_name); - int compileToGpuByteCode(const char* udf_file_name, bool cpu_mode); - int compileToCpuByteCode(const char* udf_file_name); - void replaceExtn(std::string& s, const std::string& new_ext); - int compileFromCommandLine(const std::vector& command_line); - void readCompiledModules(); - void readGpuCompiledModule(); - void readCpuCompiledModule(); - int compileForGpu(); - - private: - std::string udf_file_name_; - std::string udf_ast_file_name_; -#ifdef HAVE_CUDA - CudaMgr_Namespace::NvidiaDeviceArch target_arch_; -#endif - std::string clang_path_; - std::vector clang_options_; -}; -#endif diff --git a/Shared/InlineNullValues.h b/Shared/InlineNullValues.h index 4e25e9d6d7..08aef0748c 100644 --- a/Shared/InlineNullValues.h +++ b/Shared/InlineNullValues.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #define NULL_BOOLEAN INT8_MIN @@ -100,12 +101,12 @@ DEVICE T inline_fp_null_array_value() { } template <> -constexpr inline float inline_fp_null_array_value() { +DEVICE inline float inline_fp_null_array_value() { return NULL_ARRAY_FLOAT; } template <> -constexpr inline double inline_fp_null_array_value() { +DEVICE inline double inline_fp_null_array_value() { return NULL_ARRAY_DOUBLE; } diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt index a1e7815233..3abf6c7446 100644 --- a/Tests/CMakeLists.txt +++ b/Tests/CMakeLists.txt @@ -203,7 +203,7 @@ target_link_libraries(LoadTableTest ${THRIFT_HANDLER_TEST_LIBRARIES}) target_link_libraries(JSONTest gtest Logger Shared) if(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") - target_link_libraries(UdfTest gtest ${EXECUTE_TEST_LIBS}) + target_link_libraries(UdfTest gtest UdfCompiler ${EXECUTE_TEST_LIBS}) endif() target_link_libraries(TableUpdateDeleteBenchmark benchmark ${EXECUTE_TEST_LIBS}) diff --git a/Tests/CachedHashTableTest.cpp b/Tests/CachedHashTableTest.cpp index 70dcd7742a..e7858f0433 100644 --- a/Tests/CachedHashTableTest.cpp +++ b/Tests/CachedHashTableTest.cpp @@ -31,7 +31,6 @@ #include "QueryEngine/Execute.h" #include "QueryEngine/MurmurHash1Inl.h" #include "QueryEngine/ResultSet.h" -#include "QueryEngine/UDFCompiler.h" #include "QueryRunner/QueryRunner.h" #include "Shared/SystemParameters.h" #include "TestHelpers.h" diff --git a/Tests/JoinHashTableTest.cpp b/Tests/JoinHashTableTest.cpp index edc15a2690..2e1f78cbac 100644 --- a/Tests/JoinHashTableTest.cpp +++ b/Tests/JoinHashTableTest.cpp @@ -33,7 +33,6 @@ #include "QueryEngine/ExternalCacheInvalidators.h" #include "QueryEngine/JoinHashTable/OverlapsJoinHashTable.h" #include "QueryEngine/ResultSet.h" -#include "QueryEngine/UDFCompiler.h" #include "QueryRunner/QueryRunner.h" #include "Shared/thread_count.h" #include "TestHelpers.h" diff --git a/Tests/Udf/udf_sample.cpp b/Tests/Udf/udf_sample.cpp index ba3fd73eaf..b1bfe8e46d 100644 --- a/Tests/Udf/udf_sample.cpp +++ b/Tests/Udf/udf_sample.cpp @@ -94,58 +94,49 @@ double ST_Y_Point(int8_t* p, int64_t psize, int32_t ic, int32_t isr, int32_t osr EXTENSION_NOINLINE double ST_Perimeter_Polygon(int8_t* poly, - int64_t polysize, - int32_t* poly_ring_sizes, - int64_t poly_num_rings, + int32_t polysize, + int8_t* poly_ring_sizes, + int32_t poly_num_rings, int32_t ic, int32_t isr, int32_t osr); EXTENSION_NOINLINE double ST_Perimeter_Polygon_Geodesic(int8_t* poly, - int64_t polysize, - int32_t* poly_ring_sizes, - int64_t poly_num_rings, + int32_t polysize, + int8_t* poly_ring_sizes_in, + int32_t poly_num_rings, int32_t ic, int32_t isr, int32_t osr); EXTENSION_NOINLINE double ST_Perimeter_MultiPolygon(int8_t* mpoly_coords, - int64_t mpoly_coords_size, - int32_t* mpoly_ring_sizes, - int64_t mpoly_num_rings, - int32_t* mpoly_poly_sizes, - int64_t mpoly_num_polys, + int32_t mpoly_coords_size, + int8_t* mpoly_ring_sizes, + int32_t mpoly_num_rings, + int8_t* mpoly_poly_sizes, + int32_t mpoly_num_polys, int32_t ic, int32_t isr, int32_t osr); EXTENSION_NOINLINE double ST_Area_Polygon(int8_t* poly_coords, - int64_t poly_coords_size, - int32_t* poly_ring_sizes, - int64_t poly_num_rings, + int32_t poly_coords_size, + int8_t* poly_ring_sizes, + int32_t poly_num_rings, int32_t ic, int32_t isr, int32_t osr); -EXTENSION_NOINLINE -double ST_Area_Polygon_Geodesic(int8_t* poly_coords, - int64_t poly_coords_size, - int32_t* poly_ring_sizes, - int64_t poly_num_rings, - int32_t ic, - int32_t isr, - int32_t osr); - EXTENSION_NOINLINE double ST_Area_MultiPolygon(int8_t* mpoly_coords, - int64_t mpoly_coords_size, - int32_t* mpoly_ring_sizes, - int64_t mpoly_num_rings, - int32_t* mpoly_poly_sizes, - int64_t mpoly_num_polys, + int32_t mpoly_coords_size, + int8_t* mpoly_ring_sizes, + int32_t mpoly_num_rings, + int8_t* mpoly_poly_sizes_in, + int32_t mpoly_num_polys, int32_t ic, int32_t isr, int32_t osr); diff --git a/Tests/UdfTest.cpp b/Tests/UdfTest.cpp index a80404c009..b17b33c7bf 100644 --- a/Tests/UdfTest.cpp +++ b/Tests/UdfTest.cpp @@ -30,8 +30,9 @@ #include "QueryEngine/Execute.h" #include "QueryEngine/ExtensionFunctionsWhitelist.h" #include "QueryEngine/ResultSet.h" -#include "QueryEngine/UDFCompiler.h" #include "QueryRunner/QueryRunner.h" +#include "UdfCompiler/UdfCompiler.h" + #include "TestHelpers.h" #ifndef BASE_PATH @@ -124,12 +125,14 @@ class SQLTestEnv : public ::testing::Environment { } std::vector udf_compiler_options{std::string("-D UDF_COMPILER_OPTION")}; - UdfCompiler compiler( - udf_file.string(), g_device_arch, std::string(""), udf_compiler_options); - auto compile_result = compiler.compileUdf(); - EXPECT_EQ(compile_result, 0); + UdfCompiler compiler(g_device_arch, std::string(""), udf_compiler_options); + auto compile_result = compiler.compileUdf(udf_file.string()); + Executor::addUdfIrToModule(compile_result.first, /*is_cuda_ir=*/false); + if (!compile_result.second.empty()) { + Executor::addUdfIrToModule(compile_result.second, /*is_cuda_ir=*/true); + } - QR::init(BASE_PATH, compiler.getAstFileName()); + QR::init(BASE_PATH, compiler.getAstFileName(udf_file.string())); g_calcite = QR::get()->getCalcite(); } @@ -182,19 +185,25 @@ class UDFCompilerTest : public ::testing::Test { }; TEST_F(UDFCompilerTest, CompileTest) { - UdfCompiler compiler(getUdfFileName(), g_device_arch); - auto compile_result = compiler.compileUdf(); + UdfCompiler compiler(g_device_arch); + auto [cpu_ir_file, cuda_ir_file] = compiler.compileUdf(getUdfFileName()); + + EXPECT_TRUE(!cpu_ir_file.empty()); + if (QR::get()->gpusPresent()) { + EXPECT_TRUE(!cuda_ir_file.empty()); + } else { + EXPECT_TRUE(cuda_ir_file.empty()); + } +} - EXPECT_EQ(compile_result, 0); - // TODO cannot test invalid file path because the compileUdf function uses - // LOG(FATAL) which stops the process and does not return +TEST_F(UDFCompilerTest, InvalidPath) { + UdfCompiler compiler(g_device_arch); + EXPECT_ANY_THROW(compiler.compileUdf(getUdfFileName() + ".invalid")); } TEST_F(UDFCompilerTest, CompilerOptionTest) { - UdfCompiler compiler(getUdfFileName(), g_device_arch); - auto compile_result = compiler.compileUdf(); - - EXPECT_EQ(compile_result, 0); + UdfCompiler compiler(g_device_arch); + EXPECT_NO_THROW(compiler.compileUdf(getUdfFileName())); // This function signature is only visible via the -DUDF_COMPILER_OPTION // definition. This definition was passed to the UdfCompiler is Setup. @@ -206,18 +215,18 @@ TEST_F(UDFCompilerTest, CompilerOptionTest) { } TEST_F(UDFCompilerTest, CompilerPathTest) { - UdfCompiler compiler( - getUdfFileName(), g_device_arch, llvm::sys::findProgramByName("clang++").get()); - auto compile_result = compiler.compileUdf(); + UdfCompiler compiler(g_device_arch, llvm::sys::findProgramByName("clang++").get()); + EXPECT_NO_THROW(compiler.compileUdf(getUdfFileName())); +} - EXPECT_EQ(compile_result, 0); +TEST_F(UDFCompilerTest, BadClangPath) { + UdfCompiler compiler(g_device_arch, /*clang_path_override=*/get_udf_filename()); + EXPECT_ANY_THROW(compiler.compileUdf(getUdfFileName())); } TEST_F(UDFCompilerTest, CalciteRegistration) { - UdfCompiler compiler(getUdfFileName(), g_device_arch); - auto compile_result = compiler.compileUdf(); - - ASSERT_EQ(compile_result, 0); + UdfCompiler compiler(g_device_arch); + EXPECT_NO_THROW(compiler.compileUdf(getUdfFileName())); ASSERT_TRUE(g_calcite != nullptr); @@ -241,10 +250,8 @@ TEST_F(UDFCompilerTest, CalciteRegistration) { } TEST_F(UDFCompilerTest, UdfQuery) { - UdfCompiler compiler(getUdfFileName(), g_device_arch); - auto compile_result = compiler.compileUdf(); - - ASSERT_EQ(compile_result, 0); + UdfCompiler compiler(g_device_arch); + EXPECT_NO_THROW(compiler.compileUdf(getUdfFileName())); run_ddl_statement("DROP TABLE IF EXISTS stocks;"); run_ddl_statement("DROP TABLE IF EXISTS sal_emp;"); diff --git a/ThriftHandler/CMakeLists.txt b/ThriftHandler/CMakeLists.txt index d56b9ae87c..6389849076 100644 --- a/ThriftHandler/CMakeLists.txt +++ b/ThriftHandler/CMakeLists.txt @@ -20,4 +20,4 @@ target_link_libraries(token_completion_hints mapd_thrift) add_library(thrift_handler ${THRIFT_HANDLER_SOURCES}) add_dependencies(thrift_handler Parser) -target_link_libraries(thrift_handler token_completion_hints QueryState ${THRIFT_HANDLER_LIBS}) +target_link_libraries(thrift_handler token_completion_hints QueryState UdfCompiler ${THRIFT_HANDLER_LIBS}) diff --git a/ThriftHandler/DBHandler.cpp b/ThriftHandler/DBHandler.cpp index 807a94dc47..9812a4b1bd 100644 --- a/ThriftHandler/DBHandler.cpp +++ b/ThriftHandler/DBHandler.cpp @@ -23,7 +23,6 @@ #include "DBHandler.h" #include "DistributedLoader.h" -#include "QueryEngine/UDFCompiler.h" #include "TokenCompletionHints.h" #ifdef HAVE_PROFILER @@ -71,6 +70,7 @@ #include "Shared/mapd_shared_mutex.h" #include "Shared/measure.h" #include "Shared/scope.h" +#include "UdfCompiler/UdfCompiler.h" #ifdef HAVE_AWS_S3 #include @@ -365,12 +365,14 @@ void DBHandler::initialize(const bool is_new_db) { const CudaMgr_Namespace::NvidiaDeviceArch device_arch = cuda_mgr ? cuda_mgr->getDeviceArch() : CudaMgr_Namespace::NvidiaDeviceArch::Kepler; - UdfCompiler compiler(udf_filename_, device_arch, clang_path_, clang_options_); - int compile_result = compiler.compileUdf(); + UdfCompiler compiler(device_arch, clang_path_, clang_options_); - if (compile_result == 0) { - udf_ast_filename = compiler.getAstFileName(); + const auto [cpu_udf_ir_file, cuda_udf_ir_file] = compiler.compileUdf(udf_filename_); + Executor::addUdfIrToModule(cpu_udf_ir_file, /*is_cuda_ir=*/false); + if (!cuda_udf_ir_file.empty()) { + Executor::addUdfIrToModule(cuda_udf_ir_file, /*is_cuda_ir=*/true); } + udf_ast_filename = compiler.getAstFileName(udf_filename_); } } catch (const std::exception& e) { LOG(FATAL) << "Failed to initialize UDF compiler: " << e.what(); diff --git a/UdfCompiler/CMakeLists.txt b/UdfCompiler/CMakeLists.txt new file mode 100644 index 0000000000..ce0369c129 --- /dev/null +++ b/UdfCompiler/CMakeLists.txt @@ -0,0 +1,12 @@ +set(udf_compiler_source_files + UdfCompiler.cpp) + +add_library(UdfCompiler ${udf_compiler_source_files}) + +# Clang +find_package(Clang REQUIRED) +include_directories(${CLANG_INCLUDE_DIRS}) +add_definitions(${CLANG_DEFINITIONS}) + +target_link_libraries(UdfCompiler Logger ${clang_libs}) + diff --git a/QueryEngine/UDFCompiler.cpp b/UdfCompiler/UdfCompiler.cpp similarity index 74% rename from QueryEngine/UDFCompiler.cpp rename to UdfCompiler/UdfCompiler.cpp index 941aaddd4a..842a801901 100644 --- a/QueryEngine/UDFCompiler.cpp +++ b/UdfCompiler/UdfCompiler.cpp @@ -1,5 +1,5 @@ /* - * Copyright 2019 OmniSci, Inc. + * Copyright 2021 OmniSci, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,7 @@ * limitations under the License. */ -#include "UDFCompiler.h" -#include "CudaMgr/CudaMgr.h" +#include "UdfCompiler.h" #include #include @@ -39,7 +38,6 @@ #include #endif -#include "Execute.h" #include "Logger/Logger.h" using namespace clang; @@ -177,6 +175,11 @@ std::string exec_output(std::string cmd) { std::tuple get_clang_version(const std::string& clang_path) { std::string cmd = clang_path + " --version"; std::string result = exec_output(cmd); + if (result.empty()) { + throw std::runtime_error( + "Invalid clang binary path detected, cannot find clang binary. Is clang " + "installed?"); + } int major, minor, patchlevel; auto count = sscanf(result.substr(result.find("clang version")).c_str(), "clang version %d.%d.%d", @@ -190,7 +193,21 @@ std::tuple get_clang_version(const std::string& clang_path) { return {major, minor, patchlevel}; } -} // namespace +class UdfClangDriver { + public: + UdfClangDriver(const std::string&); + clang::driver::Driver* getClangDriver() { return &the_driver; } + std::tuple getClangVersion() const { return clang_version; } + + private: + llvm::IntrusiveRefCntPtr diag_options; + clang::DiagnosticConsumer* diag_client; + llvm::IntrusiveRefCntPtr diag_id; + clang::DiagnosticsEngine diags; + std::unique_ptr diag_client_owner; + clang::driver::Driver the_driver; + std::tuple clang_version; +}; UdfClangDriver::UdfClangDriver(const std::string& clang_path) : diag_options(new DiagnosticOptions()) @@ -226,7 +243,83 @@ UdfClangDriver::UdfClangDriver(const std::string& clang_path) } } -std::string UdfCompiler::removeFileExtension(const std::string& path) { +std::string get_clang_path(const std::string& clang_path_override) { + if (clang_path_override.empty()) { + const auto clang_path = (llvm::sys::findProgramByName("clang++").get()); + if (clang_path.empty()) { + throw std::runtime_error( + "Unable to find clang++ to compile user defined functions"); + } + return clang_path; + } else { + if (!boost::filesystem::exists(clang_path_override)) { + throw std::runtime_error("Path provided for udf compiler " + clang_path_override + + " does not exist."); + } + + if (boost::filesystem::is_directory(clang_path_override)) { + throw std::runtime_error("Path provided for udf compiler " + clang_path_override + + " is not to the clang++ executable."); + } + } + return clang_path_override; +} + +} // namespace + +UdfCompiler::UdfCompiler(CudaMgr_Namespace::NvidiaDeviceArch target_arch, + const std::string& clang_path_override) + : clang_path_(get_clang_path(clang_path_override)) +#ifdef HAVE_CUDA + , target_arch_(target_arch) +#endif +{ +} + +UdfCompiler::UdfCompiler(CudaMgr_Namespace::NvidiaDeviceArch target_arch, + const std::string& clang_path_override, + const std::vector clang_options) + : clang_path_(get_clang_path(clang_path_override)) + , clang_options_(clang_options) +#ifdef HAVE_CUDA + , target_arch_(target_arch) +#endif +{ +} + +std::pair UdfCompiler::compileUdf( + const std::string& udf_file_name) const { + LOG(INFO) << "UDFCompiler filename to compile: " << udf_file_name; + if (!boost::filesystem::exists(udf_file_name)) { + throw std::runtime_error("User defined function file " + udf_file_name + + " does not exist."); + } + + // create the AST file for the input function + generateAST(udf_file_name); + + // Compile udf file to generate cpu and gpu bytecode files + std::string cpu_file_name = ""; + std::string cuda_file_name = ""; + + cpu_file_name = compileToLLVMIR(udf_file_name); + +#ifdef HAVE_CUDA + try { + cuda_file_name = compileToCudaIR(udf_file_name); + } catch (const std::exception& e) { + LOG(WARNING) + << "Failed to generate GPU IR for UDF " + udf_file_name + + ", attempting to use CPU compiled IR for GPU.\nUDF Compiler exception: " + + e.what(); + } +#endif + return std::make_pair(cpu_file_name, cuda_file_name); +} + +namespace { + +std::string remove_file_extension(const std::string& path) { if (path == "." || path == "..") { return path; } @@ -239,36 +332,33 @@ std::string UdfCompiler::removeFileExtension(const std::string& path) { return path; } -std::string UdfCompiler::getFileExt(std::string& s) { +std::string get_file_ext(const std::string& s) { size_t i = s.rfind('.', s.length()); if (1 != std::string::npos) { return (s.substr(i + 1, s.length() - i)); } } -void UdfCompiler::replaceExtn(std::string& s, const std::string& new_ext) { +void replace_extension(std::string& s, const std::string& new_ext) { std::string::size_type i = s.rfind('.', s.length()); if (i != std::string::npos) { - s.replace(i + 1, getFileExt(s).length(), new_ext); + s.replace(i + 1, get_file_ext(s).length(), new_ext); } } -std::string UdfCompiler::genGpuIrFilename(const char* udf_file_name) { - std::string gpu_file_name(removeFileExtension(udf_file_name)); +} // namespace - gpu_file_name += "_gpu.bc"; - return gpu_file_name; +std::string UdfCompiler::genCUDAIRFilename(const std::string& udf_file_name) { + return remove_file_extension(udf_file_name) + "_gpu.bc"; } -std::string UdfCompiler::genCpuIrFilename(const char* udf_fileName) { - std::string cpu_file_name(removeFileExtension(udf_fileName)); - - cpu_file_name += "_cpu.bc"; - return cpu_file_name; +std::string UdfCompiler::genLLVMIRFilename(const std::string& udf_file_name) { + return remove_file_extension(udf_file_name) + "_cpu.bc"; } -int UdfCompiler::compileFromCommandLine(const std::vector& command_line) { +int UdfCompiler::compileFromCommandLine( + const std::vector& command_line) const { UdfClangDriver compiler_driver(clang_path_); auto the_driver(compiler_driver.getClangDriver()); @@ -291,7 +381,7 @@ int UdfCompiler::compileFromCommandLine(const std::vector& command_ std::unique_ptr compilation( the_driver->BuildCompilation(clang_command_opts)); if (!compilation) { - LOG(FATAL) << "failed to build compilation object!\n"; + throw std::runtime_error("failed to build compilation object!"); } auto [clang_version_major, clang_version_minor, clang_version_patchlevel] = compiler_driver.getClangVersion(); @@ -422,8 +512,9 @@ int UdfCompiler::compileFromCommandLine(const std::vector& command_ return res; } -int UdfCompiler::compileToGpuByteCode(const char* udf_file_name, bool cpu_mode) { - std::string gpu_out_filename(genGpuIrFilename(udf_file_name)); +#ifdef HAVE_CUDA +std::string UdfCompiler::compileToCudaIR(const std::string& udf_file_name) const { + const auto gpu_out_filename = genCUDAIRFilename(udf_file_name); std::vector command_line{clang_path_, "-c", @@ -434,22 +525,15 @@ int UdfCompiler::compileToGpuByteCode(const char* udf_file_name, bool cpu_mode) "-std=c++14", "-DNO_BOOST"}; - // If we are not compiling for cpu mode, then target the gpu - // Otherwise assume we can generic ir that will - // be translated to gpu code during target code generation -#ifdef HAVE_CUDA - if (!cpu_mode) { - command_line.emplace_back("--cuda-gpu-arch=" + - CudaMgr_Namespace::CudaMgr::deviceArchToSM(target_arch_)); - command_line.emplace_back("--cuda-device-only"); - command_line.emplace_back("-xcuda"); - command_line.emplace_back("--no-cuda-version-check"); - const auto cuda_path = get_cuda_home(); - if (cuda_path != "") { - command_line.emplace_back("--cuda-path=" + cuda_path); - } + command_line.emplace_back("--cuda-gpu-arch=" + + CudaMgr_Namespace::CudaMgr::deviceArchToSM(target_arch_)); + command_line.emplace_back("--cuda-device-only"); + command_line.emplace_back("-xcuda"); + command_line.emplace_back("--no-cuda-version-check"); + const auto cuda_path = get_cuda_home(); + if (cuda_path != "") { + command_line.emplace_back("--cuda-path=" + cuda_path); } -#endif command_line.emplace_back(udf_file_name); @@ -459,13 +543,16 @@ int UdfCompiler::compileToGpuByteCode(const char* udf_file_name, bool cpu_mode) // make sure that compilation actually succeeded by checking the // output file: if (!status && !boost::filesystem::exists(gpu_out_filename)) { - status = 2; + throw std::runtime_error( + "Failed to generate GPU UDF IR in CUDA mode with error code " + + std::to_string(status)); } - return status; + return gpu_out_filename; } +#endif -int UdfCompiler::compileToCpuByteCode(const char* udf_file_name) { - std::string cpu_out_filename(genCpuIrFilename(udf_file_name)); +std::string UdfCompiler::compileToLLVMIR(const std::string& udf_file_name) const { + std::string cpu_out_filename = genLLVMIRFilename(udf_file_name); std::vector command_line{clang_path_, "-c", @@ -477,14 +564,18 @@ int UdfCompiler::compileToCpuByteCode(const char* udf_file_name) { "-DNO_BOOST", udf_file_name}; auto res = compileFromCommandLine(command_line); + if (res != 0) { + throw std::runtime_error("Failed to compile CPU UDF (status code " + + std::to_string(res) + ")"); + } if (!boost::filesystem::exists(cpu_out_filename)) { - throw std::runtime_error("udf compile did not produce " + cpu_out_filename); + throw std::runtime_error("udf compile did not produce output file " + + cpu_out_filename); } - - return res; + return cpu_out_filename; } -int UdfCompiler::parseToAst(const char* file_name) { +void UdfCompiler::generateAST(const std::string& file_name) const { UdfClangDriver the_driver(clang_path_); std::string resource_path = the_driver.getClangDriver()->ResourceDir; std::string include_option = @@ -511,147 +602,23 @@ int UdfCompiler::parseToAst(const char* file_name) { std::string out_name(file_name); std::string file_ext("ast"); - replaceExtn(out_name, file_ext); + replace_extension(out_name, file_ext); std::error_code out_error_info; llvm::raw_fd_ostream out_file( llvm::StringRef(out_name), out_error_info, llvm::sys::fs::F_None); auto factory = std::make_unique(out_file); - return tool.run(factory.get()); -} - -const std::string& UdfCompiler::getAstFileName() const { - return udf_ast_file_name_; -} - -void UdfCompiler::init(const std::string& clang_path) { - replaceExtn(udf_ast_file_name_, "ast"); - - if (clang_path.empty()) { - clang_path_.assign(llvm::sys::findProgramByName("clang++").get()); - if (clang_path_.empty()) { - throw std::runtime_error( - "Unable to find clang++ to compile user defined functions"); - } - } else { - clang_path_.assign(clang_path); - - if (!boost::filesystem::exists(clang_path)) { - throw std::runtime_error("Path provided for udf compiler " + clang_path + - " does not exist."); - } - - if (boost::filesystem::is_directory(clang_path)) { - throw std::runtime_error("Path provided for udf compiler " + clang_path + - " is not to the clang++ executable."); - } - } -} - -UdfCompiler::UdfCompiler(const std::string& file_name, - CudaMgr_Namespace::NvidiaDeviceArch target_arch, - const std::string& clang_path) - : udf_file_name_(file_name) - , udf_ast_file_name_(file_name) -#ifdef HAVE_CUDA - , target_arch_(target_arch) -#endif -{ - init(clang_path); -} - -UdfCompiler::UdfCompiler(const std::string& file_name, - CudaMgr_Namespace::NvidiaDeviceArch target_arch, - const std::string& clang_path, - const std::vector clang_options) - : udf_file_name_(file_name) - , udf_ast_file_name_(file_name) -#ifdef HAVE_CUDA - , target_arch_(target_arch) -#endif - , clang_options_(clang_options) { - init(clang_path); -} - -void UdfCompiler::readCpuCompiledModule() { - std::string cpu_ir_file(genCpuIrFilename(udf_file_name_.c_str())); - - VLOG(1) << "UDFCompiler cpu bc file = " << cpu_ir_file; - - read_udf_cpu_module(cpu_ir_file); -} - -void UdfCompiler::readGpuCompiledModule() { - std::string gpu_ir_file(genGpuIrFilename(udf_file_name_.c_str())); - - VLOG(1) << "UDFCompiler gpu bc file = " << gpu_ir_file; - - read_udf_gpu_module(gpu_ir_file); -} - -void UdfCompiler::readCompiledModules() { - readCpuCompiledModule(); - readGpuCompiledModule(); -} - -int UdfCompiler::compileForGpu() { - int gpu_compile_result = 1; - - gpu_compile_result = compileToGpuByteCode(udf_file_name_.c_str(), false); - - // If gpu compilation fails but cpu compilation has succeeded, try compiling - // for the cpu with the assumption the user does not have the CUDA toolkit - // installed - // - // Update: while this approach may work for some cases, it will not - // work in general as evidenced by the current UdfTest using arrays: - // generation of PTX will fail. Hence, read_udf_gpu_module is now - // rejecting LLVM IR with a non-nvptx target triple. However, we - // will still try cpu compilation but with the aim of detecting any - // code errors. - if (gpu_compile_result != 0) { - gpu_compile_result = compileToGpuByteCode(udf_file_name_.c_str(), true); + const auto result = tool.run(factory.get()); + if (result != 0) { + throw std::runtime_error( + "Unable to create AST file for udf compilation (error code " + + std::to_string(result) + ")"); } - - return gpu_compile_result; } -int UdfCompiler::compileUdf() { - LOG(INFO) << "UDFCompiler filename to compile: " << udf_file_name_; - if (!boost::filesystem::exists(udf_file_name_)) { - LOG(FATAL) << "User defined function file " << udf_file_name_ << " does not exist."; - return 1; - } - - auto ast_result = parseToAst(udf_file_name_.c_str()); - if (ast_result == 0) { - // Compile udf file to generate cpu and gpu bytecode files - - int cpu_compile_result = compileToCpuByteCode(udf_file_name_.c_str()); -#ifdef HAVE_CUDA - int gpu_compile_result = 1; -#endif - - if (cpu_compile_result == 0) { - readCpuCompiledModule(); -#ifdef HAVE_CUDA - gpu_compile_result = compileForGpu(); - if (gpu_compile_result == 0) { - readGpuCompiledModule(); - } else { - LOG(FATAL) << "Unable to compile UDF file for gpu"; - return 1; - } -#endif - } else { - LOG(FATAL) << "Unable to compile UDF file for cpu"; - return 1; - } - } else { - LOG(FATAL) << "Unable to create AST file for udf compilation"; - return 1; - } - - return 0; +std::string UdfCompiler::getAstFileName(const std::string& udf_file_name) { + auto ast_file_name = udf_file_name; + replace_extension(ast_file_name, "ast"); + return ast_file_name; } diff --git a/UdfCompiler/UdfCompiler.h b/UdfCompiler/UdfCompiler.h new file mode 100644 index 0000000000..3b2ae4a184 --- /dev/null +++ b/UdfCompiler/UdfCompiler.h @@ -0,0 +1,79 @@ +/* + * Copyright 2021 OmniSci, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef UDF_COMPILER_H +#define UDF_COMPILER_H + +#include +#include + +#include "CudaMgr/CudaMgr.h" + +/** + * Driver for calling clang/clang++ to compile C++ programs to LLVM IR for use as a UDF. + * Default initialization will find Clang using the clang library invocations. An optional + * clang override and additional arguments to the clang binary can be added. Once + * initialized the class holds the state for calling clang until destruction. + */ +class UdfCompiler { + public: + UdfCompiler(CudaMgr_Namespace::NvidiaDeviceArch target_arch, + const std::string& clang_path_override = ""); + UdfCompiler(CudaMgr_Namespace::NvidiaDeviceArch target_arch, + const std::string& clang_path_override, + const std::vector clang_options); + + /** + * Compile a C++ file to LLVM IR, and generate an AST file. Both artifacts exist as + * files on disk. Three artifacts will be generated; the AST file, the CPU LLVM IR, and + * GPU LLVM IR (if CUDA is enabled and compilation succeeds). These LLVM IR files can be + * loaded by the Executor. The AST will be processed by Calcite. + */ + std::pair compileUdf(const std::string& udf_file_name) const; + + static std::string getAstFileName(const std::string& udf_file_name); + + private: + /** + * Call clang binary to generate abstract syntax tree file for registration in Calcite. + */ + void generateAST(const std::string& file_name) const; + + static std::string genLLVMIRFilename(const std::string& udf_file_name); + static std::string genCUDAIRFilename(const std::string& udf_file_name); + + /** + * Formulate Clang command line command and call clang binary to generate LLVM IR for + * the C/C++ file. + */ +#ifdef HAVE_CUDA + std::string compileToCudaIR(const std::string& udf_file_name) const; +#endif + std::string compileToLLVMIR(const std::string& udf_file_name) const; + + /** + * Formulate the full compile command and call the compiler. + */ + int compileFromCommandLine(const std::vector& command_line) const; + + std::string clang_path_; + std::vector clang_options_; +#ifdef HAVE_CUDA + CudaMgr_Namespace::NvidiaDeviceArch target_arch_; +#endif +}; + +#endif