diff --git a/.gitignore b/.gitignore
index 237fb5b33..6b785abe9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 # cortex-js
+.DS_Store
 cortex-js/cortex.db
 dist
 *.lock
@@ -18,4 +19,4 @@ cortex-js/package-lock.json
 .vscode
 cortex-js/command
 cortex-js/src/infrastructure/commanders/test/test_data
-**/vcpkg_installed
\ No newline at end of file
+**/vcpkg_installed
diff --git a/engine/commands/engine_init_cmd.cc b/engine/commands/engine_init_cmd.cc
index 5a1165e23..0f22bd57f 100644
--- a/engine/commands/engine_init_cmd.cc
+++ b/engine/commands/engine_init_cmd.cc
@@ -7,6 +7,7 @@
 #include "utils/archive_utils.h"   
 #include "utils/system_info_utils.h"
 // clang-format on
+#include "utils/cuda_toolkit_utils.h"
 #include "utils/engine_matcher_utils.h"
 
 namespace commands {
@@ -103,9 +104,10 @@ bool EngineInitCmd::Exec() const {
                                                  .path = path,
                                              }}};
 
-            DownloadService().AddDownloadTask(downloadTask, [](const std::string&
-                                                                   absolute_path,
-                                                               bool unused) {
+            DownloadService download_service;
+            download_service.AddDownloadTask(downloadTask, [](const std::string&
+                                                                  absolute_path,
+                                                              bool unused) {
               // try to unzip the downloaded file
               std::filesystem::path downloadedEnginePath{absolute_path};
               LOG_INFO << "Downloaded engine path: "
@@ -125,6 +127,58 @@ bool EngineInitCmd::Exec() const {
               }
               LOG_INFO << "Finished!";
             });
+            if (system_info.os == "mac" || engineName_ == "cortex.onnx") {
+              return false;
+            }
+            // download cuda toolkit
+            const std::string jan_host = "https://catalog.jan.ai";
+            const std::string cuda_toolkit_file_name = "cuda.tar.gz";
+            const std::string download_id = "cuda";
+
+            auto gpu_driver_version = system_info_utils::GetDriverVersion();
+
+            auto cuda_runtime_version =
+                cuda_toolkit_utils::GetCompatibleCudaToolkitVersion(
+                    gpu_driver_version, system_info.os, engineName_);
+
+            std::ostringstream cuda_toolkit_path;
+            cuda_toolkit_path << "dist/cuda-dependencies/" << 11.7 << "/"
+                              << system_info.os << "/"
+                              << cuda_toolkit_file_name;
+
+            LOG_DEBUG << "Cuda toolkit download url: " << jan_host
+                      << cuda_toolkit_path.str();
+
+            auto downloadCudaToolkitTask = DownloadTask{
+                .id = download_id,
+                .type = DownloadType::CudaToolkit,
+                .error = std::nullopt,
+                .items = {DownloadItem{
+                    .id = download_id,
+                    .host = jan_host,
+                    .fileName = cuda_toolkit_file_name,
+                    .type = DownloadType::CudaToolkit,
+                    .path = cuda_toolkit_path.str(),
+                }},
+            };
+
+            download_service.AddDownloadTask(
+                downloadCudaToolkitTask,
+                [](const std::string& absolute_path, bool unused) {
+                  LOG_DEBUG << "Downloaded cuda path: " << absolute_path;
+                  // try to unzip the downloaded file
+                  std::filesystem::path downloaded_path{absolute_path};
+
+                  archive_utils::ExtractArchive(
+                      absolute_path,
+                      downloaded_path.parent_path().parent_path().string());
+
+                  try {
+                    std::filesystem::remove(absolute_path);
+                  } catch (std::exception& e) {
+                    LOG_ERROR << "Error removing downloaded file: " << e.what();
+                  }
+                });
 
             return true;
           }
diff --git a/engine/main.cc b/engine/main.cc
index 193f84c60..4b966b3f6 100644
--- a/engine/main.cc
+++ b/engine/main.cc
@@ -22,53 +22,53 @@
 #error "Unsupported platform!"
 #endif
 
-void RunServer(){
+void RunServer() {
   // Create logs/ folder and setup log to file
-      std::filesystem::create_directory(cortex_utils::logs_folder);
-      trantor::AsyncFileLogger asyncFileLogger;
-      asyncFileLogger.setFileName(cortex_utils::logs_base_name);
-      asyncFileLogger.startLogging();
-      trantor::Logger::setOutputFunction(
-          [&](const char* msg, const uint64_t len) {
-            asyncFileLogger.output(msg, len);
-          },
-          [&]() { asyncFileLogger.flush(); });
-      asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);
-      // Number of cortex.cpp threads
-      // if (argc > 1) {
-      //   thread_num = std::atoi(argv[1]);
-      // }
+  std::filesystem::create_directory(cortex_utils::logs_folder);
+  trantor::AsyncFileLogger asyncFileLogger;
+  asyncFileLogger.setFileName(cortex_utils::logs_base_name);
+  asyncFileLogger.startLogging();
+  trantor::Logger::setOutputFunction(
+      [&](const char* msg, const uint64_t len) {
+        asyncFileLogger.output(msg, len);
+      },
+      [&]() { asyncFileLogger.flush(); });
+  asyncFileLogger.setFileSizeLimit(cortex_utils::log_file_size_limit);
+  // Number of cortex.cpp threads
+  // if (argc > 1) {
+  //   thread_num = std::atoi(argv[1]);
+  // }
 
-      // // Check for host argument
-      // if (argc > 2) {
-      //   host = argv[2];
-      // }
+  // // Check for host argument
+  // if (argc > 2) {
+  //   host = argv[2];
+  // }
 
-      // // Check for port argument
-      // if (argc > 3) {
-      //   port = std::atoi(argv[3]);  // Convert string argument to int
-      // }
-      int thread_num = 1;
-      std::string host = "127.0.0.1";
-      int port = 3928;
+  // // Check for port argument
+  // if (argc > 3) {
+  //   port = std::atoi(argv[3]);  // Convert string argument to int
+  // }
+  int thread_num = 1;
+  std::string host = "127.0.0.1";
+  int port = 3928;
 
-      int logical_cores = std::thread::hardware_concurrency();
-      int drogon_thread_num = std::max(thread_num, logical_cores);
-      // cortex_utils::nitro_logo();
+  int logical_cores = std::thread::hardware_concurrency();
+  int drogon_thread_num = std::max(thread_num, logical_cores);
+  // cortex_utils::nitro_logo();
 #ifdef CORTEX_CPP_VERSION
-      LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION;
+  LOG_INFO << "cortex.cpp version: " << CORTEX_CPP_VERSION;
 #else
-      LOG_INFO << "cortex.cpp version: undefined";
+  LOG_INFO << "cortex.cpp version: undefined";
 #endif
 
-      LOG_INFO << "Server started, listening at: " << host << ":" << port;
-      LOG_INFO << "Please load your model";
-      drogon::app().addListener(host, port);
-      drogon::app().setThreadNum(drogon_thread_num);
-      LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();
+  LOG_INFO << "Server started, listening at: " << host << ":" << port;
+  LOG_INFO << "Please load your model";
+  drogon::app().addListener(host, port);
+  drogon::app().setThreadNum(drogon_thread_num);
+  LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();
 
-      drogon::app().run();
-      // return 0;
+  drogon::app().run();
+  // return 0;
 }
 
 void ForkProcess() {
@@ -80,19 +80,21 @@ void ForkProcess() {
   ZeroMemory(&si, sizeof(si));
   si.cb = sizeof(si);
   ZeroMemory(&pi, sizeof(pi));
-  std::string cmds = cortex_utils::GetCurrentPath() + "/cortex-cpp.exe --start-server";
+  std::string cmds =
+      cortex_utils::GetCurrentPath() + "/cortex-cpp.exe --start-server";
   // Create child process
   if (!CreateProcess(
           NULL,  // No module name (use command line)
-          const_cast<char*>(cmds.c_str()),  // Command line (replace with your actual executable)
-          NULL,                   // Process handle not inheritable
-          NULL,                   // Thread handle not inheritable
-          FALSE,                  // Set handle inheritance to FALSE
-          0,                      // No creation flags
-          NULL,                   // Use parent's environment block
-          NULL,                   // Use parent's starting directory
-          &si,                    // Pointer to STARTUPINFO structure
-          &pi))                   // Pointer to PROCESS_INFORMATION structure
+          const_cast<char*>(
+              cmds.c_str()),  // Command line (replace with your actual executable)
+          NULL,               // Process handle not inheritable
+          NULL,               // Thread handle not inheritable
+          FALSE,              // Set handle inheritance to FALSE
+          0,                  // No creation flags
+          NULL,               // Use parent's environment block
+          NULL,               // Use parent's starting directory
+          &si,                // Pointer to STARTUPINFO structure
+          &pi))               // Pointer to PROCESS_INFORMATION structure
   {
     std::cout << "Could not start server: " << GetLastError() << std::endl;
   } else {
diff --git a/engine/services/download_service.h b/engine/services/download_service.h
index 4efe653bf..a8f7f109b 100644
--- a/engine/services/download_service.h
+++ b/engine/services/download_service.h
@@ -4,7 +4,7 @@
 #include <optional>
 #include <vector>
 
-enum class DownloadType { Model, Engine, Miscellaneous };
+enum class DownloadType { Model, Engine, Miscellaneous, CudaToolkit };
 
 enum class DownloadStatus {
   Pending,
diff --git a/engine/utils/cuda_toolkit_utils.h b/engine/utils/cuda_toolkit_utils.h
new file mode 100644
index 000000000..748af1bd3
--- /dev/null
+++ b/engine/utils/cuda_toolkit_utils.h
@@ -0,0 +1,63 @@
+#include <string>
+#include "utils/semantic_version_utils.h"
+
+namespace cuda_toolkit_utils {
+// those semantic versions are based on: https://docs.nvidia.com/deeplearning/cudnn/latest/reference/support-matrix.html#f1
+inline std::string GetCompatibleCudaToolkitVersion(
+    const std::string& driver_semantic_version, const std::string& os,
+    const std::string& engine) {
+
+  if (engine == "cortex.tensorrt-llm") {
+    // if the engine is cortex.tensorrt-llm, the minimum required CUDA version is 12.4
+    if (os == "windows") {
+      if (semantic_version_utils::CompareSemanticVersion(
+              driver_semantic_version, "527.41") >= 0) {
+        return "12.4";
+      } else {
+        throw std::runtime_error(
+            "GPU driver version not supported. Minimum "
+            "required driver version is 527.41");
+      }
+    } else if (os == "linux") {
+      if (semantic_version_utils::CompareSemanticVersion(
+              driver_semantic_version, "525.60.13") >= 0) {
+        return "12.4";
+      } else {
+        throw std::runtime_error(
+            "GPU driver version not supported. Minimum required driver version "
+            "is 525.60.13");
+      }
+    } else {
+      throw std::runtime_error("Unsupported OS");
+    }
+  }
+
+  if (os == "windows") {
+    if (semantic_version_utils::CompareSemanticVersion(driver_semantic_version,
+                                                       "527.41") >= 0) {
+      return "12.4";
+    } else if (semantic_version_utils::CompareSemanticVersion(
+                   driver_semantic_version, "452.39") >= 0) {
+      return "11.7";
+    } else {
+      throw std::runtime_error(
+          "GPU driver version not supported. Minimum "
+          "required driver version is 452.39");
+    }
+  } else if (os == "linux") {
+    if (semantic_version_utils::CompareSemanticVersion(driver_semantic_version,
+                                                       "525.60.13") >= 0) {
+      return "12.4";
+    } else if (semantic_version_utils::CompareSemanticVersion(
+                   driver_semantic_version, "450.80.02") >= 0) {
+      return "11.7";
+    } else {
+      throw std::runtime_error(
+          "GPU driver version not supported. Minimum "
+          "required driver version is 450.80.02");
+    }
+  } else {
+    throw std::runtime_error("Unsupported OS");
+  }
+}
+}  // namespace cuda_toolkit_utils
\ No newline at end of file
diff --git a/engine/utils/file_manager_utils.h b/engine/utils/file_manager_utils.h
index 77c6b74a6..334116fe7 100644
--- a/engine/utils/file_manager_utils.h
+++ b/engine/utils/file_manager_utils.h
@@ -4,17 +4,63 @@
 #include <string>
 #include <string_view>
 
+#if defined(__APPLE__) && defined(__MACH__)
+#include <mach-o/dyld.h>
+#elif defined(__linux__)
+#include <unistd.h>
+#elif defined(_WIN32)
+#include <windows.h>
+#endif
+
 namespace file_manager_utils {
 
+inline std::filesystem::path GetExecutableFolderContainerPath() {
+#if defined(__APPLE__) && defined(__MACH__)
+  char buffer[1024];
+  uint32_t size = sizeof(buffer);
+
+  if (_NSGetExecutablePath(buffer, &size) == 0) {
+    LOG_INFO << "Executable path: " << buffer;
+    return std::filesystem::path{buffer}.parent_path();
+  } else {
+    LOG_ERROR << "Failed to get executable path";
+    return std::filesystem::current_path();
+  }
+#elif defined(__linux__)
+  // TODO: haven't tested
+  char buffer[1024];
+  ssize_t len = readlink("/proc/self/exe", buffer, sizeof(buffer) - 1);
+  if (len != -1) {
+    buffer[len] = '\0';
+    LOG_INFO << "Executable path: " << buffer;
+    return std::filesystem::path{buffer}.parent_path();
+  } else {
+    LOG_ERROR << "Failed to get executable path";
+    return std::filesystem::current_path();
+  }
+#elif defined(_WIN32)
+  // TODO: haven't tested
+  char buffer[MAX_PATH];
+  GetModuleFileNameA(NULL, buffer, MAX_PATH);
+  LOG_INFO << "Executable path: " << buffer;
+  return std::filesystem::path{buffer}.parent_path();
+#else
+  LOG_ERROR << "Unsupported platform!";
+  return std::filesystem::current_path();
+#endif
+}
+
 inline std::filesystem::path GetContainerFolderPath(
     const std::string_view type) {
-  const auto current_path{std::filesystem::current_path()};
+  const auto current_path{GetExecutableFolderContainerPath()};
   auto container_folder_path = std::filesystem::path{};
 
   if (type == "Model") {
     container_folder_path = current_path / "models";
   } else if (type == "Engine") {
     container_folder_path = current_path / "engines";
+  } else if (type == "CudaToolkit") {
+    container_folder_path = current_path;
   } else {
     container_folder_path = current_path / "misc";
   }
@@ -35,6 +81,8 @@ inline std::string downloadTypeToString(DownloadType type) {
       return "Engine";
     case DownloadType::Miscellaneous:
       return "Misc";
+    case DownloadType::CudaToolkit:
+      return "CudaToolkit";
     default:
       return "UNKNOWN";
   }
diff --git a/engine/utils/semantic_version_utils.h b/engine/utils/semantic_version_utils.h
new file mode 100644
index 000000000..ea9244dc1
--- /dev/null
+++ b/engine/utils/semantic_version_utils.h
@@ -0,0 +1,34 @@
+#include <sstream>
+#include <vector>
+
+namespace semantic_version_utils {
+inline std::vector<int> SplitVersion(const std::string& version) {
+  std::vector<int> parts;
+  std::stringstream ss(version);
+  std::string part;
+
+  while (std::getline(ss, part, '.')) {
+    parts.push_back(std::stoi(part));
+  }
+
+  while (parts.size() < 3) {
+    parts.push_back(0);
+  }
+
+  return parts;
+}
+
+inline int CompareSemanticVersion(const std::string& version1,
+                                  const std::string& version2) {
+  std::vector<int> v1 = SplitVersion(version1);
+  std::vector<int> v2 = SplitVersion(version2);
+
+  for (size_t i = 0; i < 3; ++i) {
+    if (v1[i] < v2[i])
+      return -1;
+    if (v1[i] > v2[i])
+      return 1;
+  }
+  return 0;
+}
+}  // namespace semantic_version_utils
\ No newline at end of file
diff --git a/engine/utils/system_info_utils.h b/engine/utils/system_info_utils.h
index d13935295..16a9570b7 100644
--- a/engine/utils/system_info_utils.h
+++ b/engine/utils/system_info_utils.h
@@ -12,6 +12,7 @@ namespace system_info_utils {
 
 constexpr static auto kUnsupported{"Unsupported"};
 constexpr static auto kCudaVersionRegex{R"(CUDA Version:\s*([\d\.]+))"};
+constexpr static auto kDriverVersionRegex{R"(Driver Version:\s*(\d+\.\d+))"};
 constexpr static auto kGpuQueryCommand{
     "nvidia-smi --query-gpu=index,memory.total,name,compute_cap "
     "--format=csv,noheader,nounits"};
@@ -177,6 +178,31 @@ inline bool IsNvidiaSmiAvailable() {
 #endif
 }
 
+inline std::string GetDriverVersion() {
+  if (!IsNvidiaSmiAvailable()) {
+    LOG_INFO << "nvidia-smi is not available!";
+    return "";
+  }
+  try {
+    CommandExecutor cmd("nvidia-smi");
+    auto output = cmd.execute();
+
+    const std::regex driver_version_reg(kDriverVersionRegex);
+    std::smatch match;
+
+    if (std::regex_search(output, match, driver_version_reg)) {
+      LOG_INFO << "Gpu Driver Version: " << match[1].str();
+      return match[1].str();
+    } else {
+      LOG_ERROR << "Gpu Driver not found!";
+      return "";
+    }
+  } catch (const std::exception& e) {
+    LOG_ERROR << "Error: " << e.what();
+    return "";
+  }
+}
+
 inline std::string GetCudaVersion() {
   if (!IsNvidiaSmiAvailable()) {
     LOG_INFO << "nvidia-smi is not available!";
@@ -207,6 +233,9 @@ struct GpuInfo {
   std::string vram;
   std::string name;
   std::string arch;
+  // nvidia driver version. Haven't checked for AMD GPU.
+  std::optional<std::string> driver_version;
+  std::optional<std::string> cuda_driver_version;
   std::optional<std::string> compute_cap;
 };
 
@@ -271,6 +300,10 @@ inline std::vector<GpuInfo> GetGpuInfoList() {
   std::vector<GpuInfo> gpuInfoList;
 
   try {
+    // TODO: improve by parsing both in one command execution
+    auto driver_version = GetDriverVersion();
+    auto cuda_version = GetCudaVersion();
+
     CommandExecutor cmd(kGpuQueryCommand);
     auto output = cmd.execute();
 
@@ -285,6 +318,8 @@ inline std::vector<GpuInfo> GetGpuInfoList() {
           match[2].str(),              // vram
           match[3].str(),              // name
           GetGpuArch(match[3].str()),  // arch
+          driver_version,              // driver_version
+          cuda_version,                // cuda_driver_version
           match[4].str()               // compute_cap
       };
       gpuInfoList.push_back(gpuInfo);