From 71f06d15f6d644eaf5f6917bf5b0a7998f336698 Mon Sep 17 00:00:00 2001 From: James Date: Fri, 20 Dec 2024 01:39:23 +0700 Subject: [PATCH] feat: rendering chat_template --- engine/cli/commands/chat_completion_cmd.cc | 5 +- engine/common/model_metadata.h | 29 + engine/common/tokenizer.h | 72 + engine/controllers/engines.cc | 34 + engine/controllers/engines.h | 5 + engine/controllers/files.cc | 17 +- engine/controllers/server.cc | 9 +- engine/main.cc | 1 + engine/services/engine_service.h | 20 +- engine/services/inference_service.cc | 52 +- engine/services/inference_service.h | 9 +- engine/services/model_service.cc | 1 - engine/services/model_service.h | 5 +- engine/test/components/test_gguf_parser.cc | 245 +- engine/utils/chat-template.hpp | 137 + engine/utils/cortex_utils.h | 34 +- engine/utils/gguf_metadata_reader.h | 420 +++ engine/utils/jinja_utils.h | 25 + engine/utils/minja.hpp | 3428 ++++++++++++++++++++ 19 files changed, 4384 insertions(+), 164 deletions(-) create mode 100644 engine/common/model_metadata.h create mode 100644 engine/common/tokenizer.h create mode 100644 engine/utils/chat-template.hpp create mode 100644 engine/utils/gguf_metadata_reader.h create mode 100644 engine/utils/jinja_utils.h create mode 100644 engine/utils/minja.hpp diff --git a/engine/cli/commands/chat_completion_cmd.cc b/engine/cli/commands/chat_completion_cmd.cc index 0067b1c08..6dcaf4b70 100644 --- a/engine/cli/commands/chat_completion_cmd.cc +++ b/engine/cli/commands/chat_completion_cmd.cc @@ -151,9 +151,8 @@ void ChatCompletionCmd::Exec(const std::string& host, int port, json_data["model"] = model_handle; json_data["stream"] = true; - std::string json_payload = json_data.toStyledString(); - - curl_easy_setopt(curl, CURLOPT_POSTFIELDS, json_payload.c_str()); + curl_easy_setopt(curl, CURLOPT_POSTFIELDS, + json_data.toStyledString().c_str()); std::string ai_chat; StreamingCallback callback; diff --git a/engine/common/model_metadata.h b/engine/common/model_metadata.h new file mode 100644 index 000000000..739a0af3d --- /dev/null +++ b/engine/common/model_metadata.h @@ -0,0 +1,29 @@ +#pragma once + +#include "common/tokenizer.h" +#include + +struct ModelMetadata { + uint32_t version; + uint64_t tensor_count; + uint64_t metadata_kv_count; + std::unique_ptr tokenizer; + + std::string ToString() const { + std::ostringstream ss; + ss << "ModelMetadata {\n" + << "version: " << version << "\n" + << "tensor_count: " << tensor_count << "\n" + << "metadata_kv_count: " << metadata_kv_count << "\n" + << "tokenizer: "; + + if (tokenizer) { + ss << "\n" << tokenizer->ToString(); + } else { + ss << "null"; + } + + ss << "\n}"; + return ss.str(); + } +}; diff --git a/engine/common/tokenizer.h b/engine/common/tokenizer.h new file mode 100644 index 000000000..33367f06b --- /dev/null +++ b/engine/common/tokenizer.h @@ -0,0 +1,72 @@ +#pragma once + +#include +#include + +struct Tokenizer { + std::string eos_token = ""; + bool add_eos_token = true; + + std::string bos_token = ""; + bool add_bos_token = true; + + std::string unknown_token = ""; + std::string padding_token = ""; + + std::string chat_template = ""; + + bool add_generation_prompt = true; + + // Helper function for common fields + std::string BaseToString() const { + std::ostringstream ss; + ss << "eos_token: \"" << eos_token << "\"\n" + << "add_eos_token: " << (add_eos_token ? "true" : "false") << "\n" + << "bos_token: \"" << bos_token << "\"\n" + << "add_bos_token: " << (add_bos_token ? "true" : "false") << "\n" + << "unknown_token: \"" << unknown_token << "\"\n" + << "padding_token: \"" << padding_token << "\"\n" + << "chat_template: \"" << chat_template << "\"\n" + << "add_generation_prompt: " + << (add_generation_prompt ? "true" : "false") << "\""; + return ss.str(); + } + + virtual ~Tokenizer() = default; + + virtual std::string ToString() = 0; +}; + +struct GgufTokenizer : public Tokenizer { + std::string pre = ""; + + ~GgufTokenizer() override = default; + + std::string ToString() override { + std::ostringstream ss; + ss << "GgufTokenizer {\n"; + // Add base class members + ss << BaseToString() << "\n"; + // Add derived class members + ss << "pre: \"" << pre << "\"\n"; + ss << "}"; + return ss.str(); + } +}; + +struct SafeTensorTokenizer : public Tokenizer { + bool add_prefix_space = true; + + ~SafeTensorTokenizer() = default; + + std::string ToString() override { + std::ostringstream ss; + ss << "SafeTensorTokenizer {\n"; + // Add base class members + ss << BaseToString() << "\n"; + // Add derived class members + ss << "add_prefix_space: " << (add_prefix_space ? "true" : "false") << "\n"; + ss << "}"; + return ss.str(); + } +}; diff --git a/engine/controllers/engines.cc b/engine/controllers/engines.cc index a92d6805f..c5e816d66 100644 --- a/engine/controllers/engines.cc +++ b/engine/controllers/engines.cc @@ -3,6 +3,7 @@ #include "utils/archive_utils.h" #include "utils/cortex_utils.h" #include "utils/engine_constants.h" +#include "utils/jinja_utils.h" #include "utils/logging_utils.h" #include "utils/string_utils.h" @@ -20,6 +21,39 @@ std::string NormalizeEngine(const std::string& engine) { }; } // namespace +void Engines::TestJinja( + const HttpRequestPtr& req, + std::function&& callback) { + auto body = req->getJsonObject(); + if (body == nullptr) { + Json::Value ret; + ret["message"] = "Body can't be empty"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); + return; + } + + auto jinja = body->get("jinja", "").asString(); + auto data = body->get("data", {}); + auto bos_token = data.get("bos_token", "").asString(); + auto eos_token = data.get("eos_token", "").asString(); + + auto rendered_data = jinja::RenderTemplate(jinja, data, bos_token, eos_token); + + if (rendered_data.has_error()) { + Json::Value ret; + ret["message"] = rendered_data.error(); + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); + return; + } + + auto resp = cortex_utils::CreateTextPlainResponse(rendered_data.value()); + callback(resp); +} + void Engines::ListEngine( const HttpRequestPtr& req, std::function&& callback) const { diff --git a/engine/controllers/engines.h b/engine/controllers/engines.h index b0a92b6c3..bfb49888c 100644 --- a/engine/controllers/engines.h +++ b/engine/controllers/engines.h @@ -12,6 +12,8 @@ class Engines : public drogon::HttpController { public: METHOD_LIST_BEGIN + ADD_METHOD_TO(Engines::TestJinja, "/v1/jinja", Options, Post); + // install engine METHOD_ADD(Engines::InstallEngine, "/{1}/install", Options, Post); ADD_METHOD_TO(Engines::InstallEngine, "/v1/engines/{1}/install", Options, @@ -110,6 +112,9 @@ class Engines : public drogon::HttpController { std::function&& callback, const std::string& engine) const; + void TestJinja(const HttpRequestPtr& req, + std::function&& callback); + void LoadEngine(const HttpRequestPtr& req, std::function&& callback, const std::string& engine); diff --git a/engine/controllers/files.cc b/engine/controllers/files.cc index e0cd502f4..ed37967b2 100644 --- a/engine/controllers/files.cc +++ b/engine/controllers/files.cc @@ -216,10 +216,8 @@ void Files::RetrieveFileContent( return; } - auto [buffer, size] = std::move(res.value()); - auto resp = HttpResponse::newHttpResponse(); - resp->setBody(std::string(buffer.get(), size)); - resp->setContentTypeCode(CT_APPLICATION_OCTET_STREAM); + auto resp = + cortex_utils::CreateCortexContentResponse(std::move(res.value())); callback(resp); } else { if (!msg_res->rel_path.has_value()) { @@ -243,10 +241,8 @@ void Files::RetrieveFileContent( return; } - auto [buffer, size] = std::move(content_res.value()); - auto resp = HttpResponse::newHttpResponse(); - resp->setBody(std::string(buffer.get(), size)); - resp->setContentTypeCode(CT_APPLICATION_OCTET_STREAM); + auto resp = cortex_utils::CreateCortexContentResponse( + std::move(content_res.value())); callback(resp); } } @@ -261,9 +257,6 @@ void Files::RetrieveFileContent( return; } - auto [buffer, size] = std::move(res.value()); - auto resp = HttpResponse::newHttpResponse(); - resp->setBody(std::string(buffer.get(), size)); - resp->setContentTypeCode(CT_APPLICATION_OCTET_STREAM); + auto resp = cortex_utils::CreateCortexContentResponse(std::move(res.value())); callback(resp); } diff --git a/engine/controllers/server.cc b/engine/controllers/server.cc index 4c6bcaf82..19842bcdb 100644 --- a/engine/controllers/server.cc +++ b/engine/controllers/server.cc @@ -3,7 +3,6 @@ #include "trantor/utils/Logger.h" #include "utils/cortex_utils.h" #include "utils/function_calling/common.h" -#include "utils/http_util.h" using namespace inferences; @@ -27,6 +26,14 @@ void server::ChatCompletion( std::function&& callback) { LOG_DEBUG << "Start chat completion"; auto json_body = req->getJsonObject(); + if (json_body == nullptr) { + Json::Value ret; + ret["message"] = "Body can't be empty"; + auto resp = cortex_utils::CreateCortexHttpJsonResponse(ret); + resp->setStatusCode(k400BadRequest); + callback(resp); + return; + } bool is_stream = (*json_body).get("stream", false).asBool(); auto model_id = (*json_body).get("model", "invalid_model").asString(); auto engine_type = [this, &json_body]() -> std::string { diff --git a/engine/main.cc b/engine/main.cc index 5cc6c740e..ddf1eefd8 100644 --- a/engine/main.cc +++ b/engine/main.cc @@ -159,6 +159,7 @@ void RunServer(std::optional host, std::optional port, auto model_src_svc = std::make_shared(); auto model_service = std::make_shared( download_service, inference_svc, engine_service); + inference_svc->SetModelService(model_service); auto file_watcher_srv = std::make_shared( model_dir_path.string(), model_service); diff --git a/engine/services/engine_service.h b/engine/services/engine_service.h index 527123cb5..84a1401fc 100644 --- a/engine/services/engine_service.h +++ b/engine/services/engine_service.h @@ -4,7 +4,6 @@ #include #include #include -#include #include #include @@ -17,7 +16,6 @@ #include "utils/cpuid/cpu_info.h" #include "utils/dylib.h" #include "utils/dylib_path_manager.h" -#include "utils/engine_constants.h" #include "utils/github_release_utils.h" #include "utils/result.hpp" #include "utils/system_info_utils.h" @@ -48,10 +46,6 @@ class EngineService : public EngineServiceI { struct EngineInfo { std::unique_ptr dl; EngineV engine; -#if defined(_WIN32) - DLL_DIRECTORY_COOKIE cookie; - DLL_DIRECTORY_COOKIE cuda_cookie; -#endif }; std::mutex engines_mutex_; @@ -105,21 +99,23 @@ class EngineService : public EngineServiceI { cpp::result SetDefaultEngineVariant( const std::string& engine, const std::string& version, - const std::string& variant); + const std::string& variant) override; cpp::result GetDefaultEngineVariant( - const std::string& engine); + const std::string& engine) override; cpp::result, std::string> - GetInstalledEngineVariants(const std::string& engine) const; + GetInstalledEngineVariants(const std::string& engine) const override; cpp::result GetLoadedEngine( const std::string& engine_name); std::vector GetLoadedEngines(); - cpp::result LoadEngine(const std::string& engine_name); - cpp::result UnloadEngine(const std::string& engine_name); + cpp::result LoadEngine( + const std::string& engine_name) override; + cpp::result UnloadEngine( + const std::string& engine_name) override; cpp::result GetLatestEngineVersion(const std::string& engine) const; @@ -137,7 +133,7 @@ class EngineService : public EngineServiceI { cpp::result GetEngineByNameAndVariant( const std::string& engine_name, - const std::optional variant = std::nullopt); + const std::optional variant = std::nullopt) override; cpp::result UpsertEngine( const std::string& engine_name, const std::string& type, diff --git a/engine/services/inference_service.cc b/engine/services/inference_service.cc index 91cb277dc..441aafe92 100644 --- a/engine/services/inference_service.cc +++ b/engine/services/inference_service.cc @@ -1,7 +1,10 @@ #include "inference_service.h" #include #include "utils/engine_constants.h" +#include "utils/file_manager_utils.h" #include "utils/function_calling/common.h" +#include "utils/gguf_metadata_reader.h" +#include "utils/jinja_utils.h" namespace services { cpp::result InferenceService::HandleChatCompletion( @@ -24,6 +27,53 @@ cpp::result InferenceService::HandleChatCompletion( return cpp::fail(std::make_pair(stt, res)); } + { + // TODO: we can cache this one so we don't have to read the file every inference + auto model_id = json_body->get("model", "").asString(); + if (!model_id.empty()) { + if (auto model_service = model_service_.lock()) { + auto model_config = model_service->GetDownloadedModel(model_id); + if (model_config.has_value() && !model_config->files.empty()) { + auto file = model_config->files[0]; + + auto model_metadata_res = cortex_utils::ReadGgufMetadata( + file_manager_utils::ToAbsoluteCortexDataPath( + std::filesystem::path(file))); + if (model_metadata_res.has_value()) { + auto metadata = model_metadata_res.value().get(); + if (!metadata->tokenizer->chat_template.empty()) { + auto messages = (*json_body)["messages"]; + Json::Value messages_jsoncpp(Json::arrayValue); + for (auto message : messages) { + messages_jsoncpp.append(message); + } + + Json::Value tools(Json::arrayValue); + Json::Value template_data_json; + template_data_json["messages"] = messages_jsoncpp; + // template_data_json["tools"] = tools; + + auto prompt_result = jinja::RenderTemplate( + metadata->tokenizer->chat_template, template_data_json, + metadata->tokenizer->bos_token, + metadata->tokenizer->eos_token, + metadata->tokenizer->add_generation_prompt); + if (prompt_result.has_value()) { + (*json_body)["prompt"] = prompt_result.value(); + } else { + CTL_ERR("Failed to render prompt: " + prompt_result.error()); + } + } + } else { + CTL_ERR("Failed to read metadata: " + model_metadata_res.error()); + } + } + } + } + } + + CTL_INF("Prompt is: " + json_body->get("prompt", "").asString()); + auto cb = [q, tool_choice](Json::Value status, Json::Value res) { if (!tool_choice.isNull()) { res["tool_choice"] = tool_choice; @@ -297,4 +347,4 @@ bool InferenceService::HasFieldInReq(std::shared_ptr json_body, } return true; } -} // namespace services \ No newline at end of file +} // namespace services diff --git a/engine/services/inference_service.h b/engine/services/inference_service.h index b417fa14a..54bc9dc29 100644 --- a/engine/services/inference_service.h +++ b/engine/services/inference_service.h @@ -4,9 +4,11 @@ #include #include #include "services/engine_service.h" +#include "services/model_service.h" #include "utils/result.hpp" -#include "extensions/remote-engine/remote_engine.h" + namespace services { + // Status and result using InferResult = std::pair; @@ -58,7 +60,12 @@ class InferenceService { bool HasFieldInReq(std::shared_ptr json_body, const std::string& field); + void SetModelService(std::shared_ptr model_service) { + model_service_ = model_service; + } + private: std::shared_ptr engine_service_; + std::weak_ptr model_service_; }; } // namespace services diff --git a/engine/services/model_service.cc b/engine/services/model_service.cc index ce83152c4..8ee396898 100644 --- a/engine/services/model_service.cc +++ b/engine/services/model_service.cc @@ -10,7 +10,6 @@ #include "database/models.h" #include "hardware_service.h" #include "utils/cli_selection_utils.h" -#include "utils/cortex_utils.h" #include "utils/engine_constants.h" #include "utils/file_manager_utils.h" #include "utils/huggingface_utils.h" diff --git a/engine/services/model_service.h b/engine/services/model_service.h index e2638fd1f..48e5e327e 100644 --- a/engine/services/model_service.h +++ b/engine/services/model_service.h @@ -6,9 +6,12 @@ #include "common/engine_servicei.h" #include "config/model_config.h" #include "services/download_service.h" -#include "services/inference_service.h" #include "utils/hardware/gguf/gguf_file_estimate.h" +namespace services { +class InferenceService; +} + struct ModelPullInfo { std::string id; std::string default_branch; diff --git a/engine/test/components/test_gguf_parser.cc b/engine/test/components/test_gguf_parser.cc index 6c5c61486..df860a1d7 100644 --- a/engine/test/components/test_gguf_parser.cc +++ b/engine/test/components/test_gguf_parser.cc @@ -1,12 +1,10 @@ -#include "gtest/gtest.h" -#include "config/gguf_parser.h" -#include "config/yaml_config.h" -#include #include -#include -#include +#include #include -#include +#include +#include "config/gguf_parser.h" +#include "config/yaml_config.h" +#include "gtest/gtest.h" #ifdef _WIN32 #include @@ -15,144 +13,145 @@ #endif class GGUFParserTest : public ::testing::Test { -protected: - void SetUp() override { - gguf_handler = std::make_unique(); - yaml_handler = std::make_unique< config::YamlHandler>(); - } + protected: + void SetUp() override { + gguf_handler = std::make_unique(); + yaml_handler = std::make_unique(); + } - void TearDown() override { - } + void TearDown() override {} - std::unique_ptr gguf_handler; - std::unique_ptr yaml_handler; - - std::string getTempFilePath(const std::string& prefix, const std::string& extension) { - #ifdef _WIN32 - char temp_path[MAX_PATH]; - char file_name[MAX_PATH]; - GetTempPathA(MAX_PATH, temp_path); - GetTempFileNameA(temp_path, prefix.c_str(), 0, file_name); - std::string path(file_name); - DeleteFileA(file_name); // Delete the file created by GetTempFileNameA - return path + extension; - #else - std::string path = "/tmp/" + prefix + "XXXXXX" + extension; - char* temp = strdup(path.c_str()); - int fd = mkstemps(temp, extension.length()); - if (fd == -1) { - free(temp); - throw std::runtime_error("Failed to create temporary file"); - } - close(fd); - std::string result(temp); - free(temp); - return result; - #endif + std::unique_ptr gguf_handler; + std::unique_ptr yaml_handler; + + std::string getTempFilePath(const std::string& prefix, + const std::string& extension) { +#ifdef _WIN32 + char temp_path[MAX_PATH]; + char file_name[MAX_PATH]; + GetTempPathA(MAX_PATH, temp_path); + GetTempFileNameA(temp_path, prefix.c_str(), 0, file_name); + std::string path(file_name); + DeleteFileA(file_name); // Delete the file created by GetTempFileNameA + return path + extension; +#else + std::string path = "/tmp/" + prefix + "XXXXXX" + extension; + char* temp = strdup(path.c_str()); + int fd = mkstemps(temp, extension.length()); + if (fd == -1) { + free(temp); + throw std::runtime_error("Failed to create temporary file"); } + close(fd); + std::string result(temp); + free(temp); + return result; +#endif + } - std::string createMockGGUFFile() { - std::string gguf_path = getTempFilePath("mock_tinyllama-model", ".gguf"); - std::ofstream file(gguf_path, std::ios::binary); + std::string createMockGGUFFile() { + std::string gguf_path = getTempFilePath("mock_tinyllama-model", ".gguf"); + std::ofstream file(gguf_path, std::ios::binary); - if (!file.is_open()) { - throw std::runtime_error("Failed to create mock GGUF file"); - } + if (!file.is_open()) { + throw std::runtime_error("Failed to create mock GGUF file"); + } - try { - // GGUF magic number - uint32_t magic = 0x46554747; - file.write(reinterpret_cast(&magic), sizeof(magic)); - - // Version - uint32_t version = 2; - file.write(reinterpret_cast(&version), sizeof(version)); - - // Tensor count (not important for this test) - uint64_t tensor_count = 0; - file.write(reinterpret_cast(&tensor_count), sizeof(tensor_count)); - - // Metadata key-value count - uint64_t kv_count = 2; - file.write(reinterpret_cast(&kv_count), sizeof(kv_count)); - - // Helper function to write a string - auto writeString = [&file](const std::string& str) { - uint64_t length = str.length(); - file.write(reinterpret_cast(&length), sizeof(length)); - file.write(str.c_str(), length); - }; - - // Helper function to write a key-value pair - auto writeKV = [&](const std::string& key, uint32_t type, const auto& value) { - writeString(key); - file.write(reinterpret_cast(&type), sizeof(type)); - if constexpr (std::is_same_v) { - writeString(value); - } else { - file.write(reinterpret_cast(&value), sizeof(value)); - } - }; - - // Write metadata - writeKV("general.name", 8, std::string("tinyllama 1B")); - writeKV("llama.context_length", 4, uint32_t(4096)); - - file.close(); - - } catch (const std::exception& e) { - file.close(); - std::remove(gguf_path.c_str()); - throw std::runtime_error(std::string("Failed to write mock GGUF file: ") + e.what()); + try { + // GGUF magic number + uint32_t magic = 0x46554747; + file.write(reinterpret_cast(&magic), sizeof(magic)); + + // Version + uint32_t version = 2; + file.write(reinterpret_cast(&version), sizeof(version)); + + // Tensor count (not important for this test) + uint64_t tensor_count = 0; + file.write(reinterpret_cast(&tensor_count), sizeof(tensor_count)); + + // Metadata key-value count + uint64_t kv_count = 2; + file.write(reinterpret_cast(&kv_count), sizeof(kv_count)); + + // Helper function to write a string + auto writeString = [&file](const std::string& str) { + uint64_t length = str.length(); + file.write(reinterpret_cast(&length), sizeof(length)); + file.write(str.c_str(), length); + }; + + // Helper function to write a key-value pair + auto writeKV = [&](const std::string& key, uint32_t type, + const auto& value) { + writeString(key); + file.write(reinterpret_cast(&type), sizeof(type)); + if constexpr (std::is_same_v) { + writeString(value); + } else { + file.write(reinterpret_cast(&value), sizeof(value)); } + }; - return gguf_path; + // Write metadata + writeKV("general.name", 8, std::string("tinyllama 1B")); + writeKV("llama.context_length", 4, uint32_t(4096)); + + file.close(); + + } catch (const std::exception& e) { + file.close(); + std::remove(gguf_path.c_str()); + throw std::runtime_error(std::string("Failed to write mock GGUF file: ") + + e.what()); } + + return gguf_path; + } }; TEST_F(GGUFParserTest, ParseMockTinyLlamaModel) { - std::string gguf_path; - std::string yaml_path; - try { - // Create a mock GGUF file - gguf_path = createMockGGUFFile(); + std::string gguf_path; + std::string yaml_path; + try { + // Create a mock GGUF file + gguf_path = createMockGGUFFile(); - // Parse the GGUF file - gguf_handler->Parse(gguf_path); + // Parse the GGUF file + gguf_handler->Parse(gguf_path); - const config::ModelConfig& gguf_config = gguf_handler->GetModelConfig(); + const config::ModelConfig& gguf_config = gguf_handler->GetModelConfig(); - // Load the expected configuration from YAML - std::string yaml_content = R"( + // Load the expected configuration from YAML + std::string yaml_content = R"( name: tinyllama-1B ctx_len: 4096 )"; - yaml_path = getTempFilePath("expected_config", ".yaml"); - std::ofstream yaml_file(yaml_path); - yaml_file << yaml_content; - yaml_file.close(); + yaml_path = getTempFilePath("expected_config", ".yaml"); + std::ofstream yaml_file(yaml_path); + yaml_file << yaml_content; + yaml_file.close(); - yaml_handler->ModelConfigFromFile(yaml_path); + yaml_handler->ModelConfigFromFile(yaml_path); - const config::ModelConfig& yaml_config = yaml_handler->GetModelConfig(); + const config::ModelConfig& yaml_config = yaml_handler->GetModelConfig(); - // Compare GGUF parsed config with YAML config - EXPECT_EQ(gguf_config.name, yaml_config.name); - EXPECT_EQ(gguf_config.ctx_len, yaml_config.ctx_len); + // Compare GGUF parsed config with YAML config + EXPECT_EQ(gguf_config.name, yaml_config.name); + EXPECT_EQ(gguf_config.ctx_len, yaml_config.ctx_len); - // Clean up - std::remove(gguf_path.c_str()); - std::remove(yaml_path.c_str()); + // Clean up + std::remove(gguf_path.c_str()); + std::remove(yaml_path.c_str()); + } catch (const std::exception& e) { + // If an exception was thrown, make sure to clean up the files + if (!gguf_path.empty()) { + std::remove(gguf_path.c_str()); } - catch (const std::exception& e) { - // If an exception was thrown, make sure to clean up the files - if (!gguf_path.empty()) { - std::remove(gguf_path.c_str()); - } - if (!yaml_path.empty()) { - std::remove(yaml_path.c_str()); - } - FAIL() << "Exception thrown: " << e.what(); + if (!yaml_path.empty()) { + std::remove(yaml_path.c_str()); } -} \ No newline at end of file + FAIL() << "Exception thrown: " << e.what(); + } +} diff --git a/engine/utils/chat-template.hpp b/engine/utils/chat-template.hpp new file mode 100644 index 000000000..309dd8e97 --- /dev/null +++ b/engine/utils/chat-template.hpp @@ -0,0 +1,137 @@ +/* + Copyright 2024 Google LLC + + Use of this source code is governed by an MIT-style + license that can be found in the LICENSE file or at + https://opensource.org/licenses/MIT. +*/ +// SPDX-License-Identifier: MIT +#pragma once + +#include +#include +#include +#include "utils/minja.hpp" + +using json = nlohmann::ordered_json; + +namespace minja { + +class chat_template { + public: + private: + bool _supports_tools = true; + // Meta-Llama-3.1-8B-Instruct's template expects arguments to be an object. + // Most other templates (and OpenAI's API) expect the arguments object to be stringified. + bool _requires_object_arguments = false; + bool _supports_system_role = true; + std::string _source; + std::string _bos_token; + std::string _eos_token; + std::shared_ptr _template_root; + + public: + chat_template(const std::string& source, const std::string& bos_token, + const std::string& eos_token) + : _source(source), _bos_token(bos_token), _eos_token(eos_token) { + _supports_tools = source.find("tools") != std::string::npos; + _requires_object_arguments = + source.find("tool_call.arguments | items") != std::string::npos || + source.find("tool_call.arguments | tojson") != std::string::npos; + _supports_system_role = + source.find("System role not supported") == std::string::npos; + + _template_root = + minja::Parser::parse(_source, { + /* .trim_blocks = */ true, + /* .lstrip_blocks = */ true, + /* .keep_trailing_newline = */ false, + }); + } + + const std::string& source() const { return _source; } + bool supports_tools() const { return _supports_tools; } + + std::string apply(const nlohmann::ordered_json& messages, + const nlohmann::ordered_json& tools, + bool add_generation_prompt, + const nlohmann::ordered_json& extra_context = + nlohmann::ordered_json()) const { + auto actual_messages = messages; + + // First, "fix" messages so they have a chance to be rendered correctly by the template + + if (_requires_object_arguments || !_supports_system_role) { + std::string pending_system; + auto flush_sys = [&]() { + if (!pending_system.empty()) { + actual_messages.push_back({ + {"role", "user"}, + {"content", pending_system}, + }); + pending_system.clear(); + } + }; + for (auto& message : actual_messages) { + if (!message.contains("role") || !message.contains("content")) { + throw std::runtime_error( + "message must have 'role' and 'content' fields: " + + message.dump()); + } + std::string role = message.at("role"); + + if (!message["content"].is_null() && !_supports_system_role) { + std::string content = message.at("content"); + if (role == "system") { + if (!pending_system.empty()) + pending_system += "\n"; + pending_system += content; + continue; + } else { + if (role == "user") { + if (!pending_system.empty()) { + message["content"] = + pending_system + (content.empty() ? "" : "\n" + content); + pending_system.clear(); + } + } else { + flush_sys(); + } + } + } + if (_requires_object_arguments && message.contains("tool_calls")) { + for (auto& tool_call : message.at("tool_calls")) { + if (tool_call["type"] == "function") { + auto& function = tool_call.at("function"); + std::string arguments = function.at("arguments"); + function["arguments"] = json::parse(arguments); + } + } + } + } + flush_sys(); + } + + auto context = minja::Context::make(json({ + {"messages", actual_messages}, + {"add_generation_prompt", add_generation_prompt}, + {"bos_token", _bos_token}, + {"eos_token", _eos_token}, + })); + + if (!tools.is_null()) { + auto tools_val = minja::Value(tools); + context->set("tools", tools_val); + } + if (!extra_context.is_null()) { + for (auto& kv : extra_context.items()) { + minja::Value val(kv.value()); + context->set(kv.key(), val); + } + } + + return _template_root->render(context); + } +}; + +} // namespace minja diff --git a/engine/utils/cortex_utils.h b/engine/utils/cortex_utils.h index 4d0a956a9..f58fcfe8f 100644 --- a/engine/utils/cortex_utils.h +++ b/engine/utils/cortex_utils.h @@ -2,16 +2,10 @@ #include #include #include -#include #include -#include -#include #include -#include -#include -#include -#include #include +#include #if defined(__linux__) #include #include @@ -69,6 +63,30 @@ inline drogon::HttpResponsePtr CreateCortexHttpJsonResponse( return res; }; +inline drogon::HttpResponsePtr CreateCortexContentResponse( + std::pair, size_t> content) { + auto [buffer, size] = std::move(content); + auto resp = drogon::HttpResponse::newHttpResponse(); + resp->setBody(std::string(buffer.get(), size)); + resp->setContentTypeCode(drogon::CT_APPLICATION_OCTET_STREAM); + +#if defined(_WIN32) + resp->addHeader("date", GetDateRFC1123()); +#endif + return resp; +} + +inline drogon::HttpResponsePtr CreateTextPlainResponse( + const std::string& text) { + auto resp = drogon::HttpResponse::newHttpResponse(); + resp->setBody(text); + resp->setContentTypeCode(drogon::CT_TEXT_PLAIN); +#if defined(_WIN32) + resp->addHeader("date", GetDateRFC1123()); +#endif + return resp; +} + inline drogon::HttpResponsePtr CreateCortexStreamResponse( const std::function& callback, const std::string& attachmentFileName = "") { @@ -80,8 +98,6 @@ inline drogon::HttpResponsePtr CreateCortexStreamResponse( return res; } - - #if defined(_WIN32) inline std::string GetCurrentPath() { char path[MAX_PATH]; diff --git a/engine/utils/gguf_metadata_reader.h b/engine/utils/gguf_metadata_reader.h new file mode 100644 index 000000000..0dba0c1bb --- /dev/null +++ b/engine/utils/gguf_metadata_reader.h @@ -0,0 +1,420 @@ +#pragma once + +#include +#include +#include +#include +#include "common/model_metadata.h" +#include "utils/logging_utils.h" +#include "utils/result.hpp" + +/** + * Parsing the GGUF metadata. + * + * Reference: https://github.com/ggerganov/ggml/blob/master/docs/gguf.md + */ +namespace cortex_utils { +namespace { +// present in the first 4 bytes of a GGUF file +constexpr uint32_t GGUF_MAGIC_NUMBER = 1179993927; + +constexpr static auto GGUF_VERSION_LENGTH = 4; +constexpr static auto TENSOR_COUNT_LENGTH = 8; +constexpr static auto METADATA_KV_COUNT = 8; + +constexpr static auto TOKEN_LIST_KEY = "tokenizer.ggml.tokens"; +constexpr static auto BOS_ID_KEY = "tokenizer.ggml.bos_token_id"; +constexpr static auto EOS_ID_KEY = "tokenizer.ggml.eos_token_id"; +constexpr static auto UNK_ID_KEY = "tokenizer.ggml.unknown_token_id"; +constexpr static auto PADDING_ID_KEY = "tokenizer.ggml.padding_token_id"; + +constexpr static auto CHAT_TEMPLATE_ID_KEY = "tokenizer.chat_template"; +constexpr static auto ADD_BOS_TOKEN_KEY = "tokenizer.ggml.add_bos_token"; +constexpr static auto ADD_EOS_TOKEN_KEY = "tokenizer.ggml.add_eos_token"; +const std::vector kSpecialTokenIds{BOS_ID_KEY, EOS_ID_KEY, + UNK_ID_KEY, PADDING_ID_KEY}; + +struct MetadataArrayElement; + +// clang-format off +using MetadataValue = std::variant< + uint8_t, int8_t, + uint16_t, int16_t, + uint32_t, int32_t, + uint64_t, int64_t, + float, double, + bool, std::string, + std::vector +>; + +// clang-format on + +struct MetadataArrayElement { + MetadataValue value; + + // Add constructors for different types + MetadataArrayElement(uint8_t v) : value(v) {} + MetadataArrayElement(int8_t v) : value(v) {} + MetadataArrayElement(uint16_t v) : value(v) {} + MetadataArrayElement(int16_t v) : value(v) {} + MetadataArrayElement(uint32_t v) : value(v) {} + MetadataArrayElement(int32_t v) : value(v) {} + MetadataArrayElement(uint64_t v) : value(v) {} + MetadataArrayElement(int64_t v) : value(v) {} + MetadataArrayElement(float v) : value(v) {} + MetadataArrayElement(double v) : value(v) {} + MetadataArrayElement(bool v) : value(v) {} + MetadataArrayElement(const std::string& v) : value(v) {} + MetadataArrayElement(std::string&& v) : value(std::move(v)) {} + + MetadataArrayElement(MetadataValue&& v) : value(std::move(v)) {} +}; + +struct MetadataValueResult { + size_t bytes_read; + MetadataValue value; + + template + MetadataValueResult(size_t br, T&& val) + : bytes_read(br), value(std::forward(val)) {} +}; + +std::pair ReadString(std::ifstream& file) { + uint64_t length; + file.read(reinterpret_cast(&length), sizeof(uint64_t)); + + if (!file) { + throw std::runtime_error("Failed to read string length"); + } + + if (length > 1024 * 1024 * 1024) { + throw std::runtime_error("String length too large: " + + std::to_string(length)); + } + + std::string value(length, '\0'); + file.read(value.data(), length); + + if (!file) { + throw std::runtime_error("Failed to read string content of length " + + std::to_string(length)); + } + + return {8 + length, value}; +} + +inline MetadataValueResult ReadMetadataValue(uint32_t value_type, + std::ifstream& file, + const std::string& key) { + switch (value_type) { + case 0: { // uint8 + uint8_t value; + file.read(reinterpret_cast(&value), sizeof(value)); + return {sizeof(uint8_t), value}; + } + case 1: { // int8 + int8_t value; + file.read(reinterpret_cast(&value), sizeof(value)); + return {sizeof(int8_t), value}; + } + case 2: { // uint16 + uint16_t value; + file.read(reinterpret_cast(&value), sizeof(value)); + return {sizeof(uint16_t), value}; + } + case 3: { // int16 + int16_t value; + file.read(reinterpret_cast(&value), sizeof(value)); + return {sizeof(int16_t), value}; + } + case 4: { // uint32 + uint32_t value; + file.read(reinterpret_cast(&value), sizeof(value)); + return {sizeof(uint32_t), value}; + } + case 5: { // int32 + int32_t value; + file.read(reinterpret_cast(&value), sizeof(value)); + return {sizeof(int32_t), value}; + } + case 6: { // float32 + float value; + file.read(reinterpret_cast(&value), sizeof(value)); + return {sizeof(float), value}; + } + case 7: { // bool + bool value; + file.read(reinterpret_cast(&value), sizeof(value)); + return {sizeof(bool), value}; + } + case 8: { // string + auto [length, value] = ReadString(file); + return {length, value}; + } + case 9: { // array + uint32_t array_type; + file.read(reinterpret_cast(&array_type), sizeof(uint32_t)); + + uint64_t array_length; + file.read(reinterpret_cast(&array_length), sizeof(uint64_t)); + + size_t bytes_read = 12; // 4 for type + 8 for length + + std::vector array_values_string; + std::vector array_values_float; + + for (uint64_t i = 0; i < array_length; ++i) { + auto result = ReadMetadataValue(array_type, file, + key + "[" + std::to_string(i) + "]"); + bytes_read += result.bytes_read; + + if (array_type == 8) { + array_values_string.push_back(std::get(result.value)); + } else { + float float_value; + switch (result.value.index()) { + case 0: + float_value = static_cast(std::get(result.value)); + break; + case 1: + float_value = static_cast(std::get(result.value)); + break; + case 2: + float_value = + static_cast(std::get(result.value)); + break; + case 3: + float_value = static_cast(std::get(result.value)); + break; + case 4: + float_value = + static_cast(std::get(result.value)); + break; + case 5: + float_value = static_cast(std::get(result.value)); + break; + case 6: + float_value = + static_cast(std::get(result.value)); + break; + case 7: + float_value = static_cast(std::get(result.value)); + break; + case 8: + float_value = std::get(result.value); + break; + case 9: + float_value = static_cast(std::get(result.value)); + break; + case 10: + float_value = static_cast(std::get(result.value)); + break; + default: + throw std::runtime_error( + "Unexpected type in array element conversion"); + } + array_values_float.push_back(float_value); + } + } + + if (!array_values_string.empty()) { + std::vector result; + result.reserve(array_values_string.size()); + for (const auto& str : array_values_string) { + result.emplace_back(str); + } + return {bytes_read, std::move(result)}; + } else { + std::vector result; + result.reserve(array_values_float.size()); + for (float val : array_values_float) { + result.emplace_back(val); + } + return {bytes_read, std::move(result)}; + } + } + + case 10: { // uint64 + uint64_t value; + file.read(reinterpret_cast(&value), sizeof(value)); + return {sizeof(uint64_t), value}; + } + case 11: { // int64 + int64_t value; + file.read(reinterpret_cast(&value), sizeof(value)); + return {sizeof(int64_t), value}; + } + case 12: { // float64/double + double value; + file.read(reinterpret_cast(&value), sizeof(value)); + return {sizeof(double), value}; + } + default: + throw std::runtime_error("Unknown value type: " + + std::to_string(value_type) + " for key: " + key); + } +} + +void PrintMetadataValue(const std::string& key, const MetadataValue& value) { + std::ostringstream oss; + oss << "Key: " << key << " = "; + + switch (value.index()) { + case 0: // uint8_t + oss << "uint8: " << static_cast(std::get(value)); + break; + case 1: // int8_t + oss << "int8: " << static_cast(std::get(value)); + break; + case 2: // uint16_t + oss << "uint16: " << std::get(value); + break; + case 3: // int16_t + oss << "int16: " << std::get(value); + break; + case 4: // uint32_t + oss << "uint32: " << std::get(value); + break; + case 5: // int32_t + oss << "int32: " << std::get(value); + break; + case 6: // uint64_t + oss << "uint64: " << std::get(value); + break; + case 7: // int64_t + oss << "int64: " << std::get(value); + break; + case 8: // float + oss << "float: " << std::get(value); + break; + case 9: // double + oss << "double: " << std::get(value); + break; + case 10: // bool + oss << "bool: " << (std::get(value) ? "true" : "false"); + break; + case 11: // string + oss << "string: " << std::get(value); + break; + case 12: { // vector + const auto& arr = std::get>(value); + oss << "array[" << arr.size() << "]: {"; + for (size_t i = 0; i < arr.size(); ++i) { + if (i > 0) + oss << ", "; + std::ostringstream key_oss; + key_oss << key << "[" << i << "]"; + PrintMetadataValue(key_oss.str(), arr[i].value); + } + oss << "}"; + break; + } + } + + CTL_INF(oss.str()); +} +} // namespace + +inline cpp::result, std::string> +ReadGgufMetadata(const std::filesystem::path& path) { + if (!std::filesystem::exists(path)) { + return cpp::fail("Gguf file does not exist at " + path.string()); + } + + std::ifstream file(path, std::ios::binary); + if (!file) { + return cpp::fail("Failed to open file: " + path.string()); + } + + uint32_t magic_number; + file.read(reinterpret_cast(&magic_number), sizeof(magic_number)); + if (magic_number != GGUF_MAGIC_NUMBER) { + return cpp::fail("Invalid GGUF file: incorrect magic number"); + } + + auto metadata_ptr = std::make_unique(); + + uint32_t version; + file.read(reinterpret_cast(&version), GGUF_VERSION_LENGTH); + metadata_ptr->version = version; + + uint64_t tensor_count; + file.read(reinterpret_cast(&tensor_count), TENSOR_COUNT_LENGTH); + metadata_ptr->tensor_count = tensor_count; + + uint64_t metadata_kv_count; + file.read(reinterpret_cast(&metadata_kv_count), METADATA_KV_COUNT); + metadata_ptr->metadata_kv_count = metadata_kv_count; + + std::unordered_map kv; + for (uint64_t i = 0; i < metadata_kv_count; ++i) { + auto [key_byte_length, key] = ReadString(file); + + char value_type_bytes[4]; + file.read(value_type_bytes, 4); + uint32_t value_type = + static_cast(static_cast(value_type_bytes[0])) | + (static_cast(static_cast(value_type_bytes[1])) + << 8) | + (static_cast(static_cast(value_type_bytes[2])) + << 16) | + (static_cast(static_cast(value_type_bytes[3])) + << 24); + + try { + auto result = ReadMetadataValue(value_type, file, key); + kv.emplace(key, result); + } catch (const std::exception& e) { + CTL_ERR("Error reading metadata value for key '" + key + + "': " + e.what()); + return cpp::fail("Error reading metadata value for key '" + key + "'"); + } + } + + { + metadata_ptr->tokenizer = std::make_unique(); + // initialize tokenizer + if (auto it = kv.find(CHAT_TEMPLATE_ID_KEY); it != kv.end()) { + metadata_ptr->tokenizer->chat_template = + std::get(it->second.value); + } + + for (const auto& key : kSpecialTokenIds) { + if (auto it = kv.find(key); it != kv.end()) { + auto id = std::get(it->second.value); + if (auto token_it = kv.find(TOKEN_LIST_KEY); token_it != kv.end()) { + auto& tokens = std::get>( + token_it->second.value); + + if (key == BOS_ID_KEY) { + metadata_ptr->tokenizer->bos_token = + std::get(tokens[id].value); + } else if (key == EOS_ID_KEY) { + metadata_ptr->tokenizer->eos_token = + std::get(tokens[id].value); + } else if (key == UNK_ID_KEY) { + metadata_ptr->tokenizer->unknown_token = + std::get(tokens[id].value); + } else if (key == PADDING_ID_KEY) { + metadata_ptr->tokenizer->padding_token = + std::get(tokens[id].value); + } else { + CTL_ERR("Unknown special token key: " + key); + } + } + } + } + + if (auto it = kv.find(ADD_BOS_TOKEN_KEY); it != kv.end()) { + metadata_ptr->tokenizer->add_bos_token = std::get(it->second.value); + } + + if (auto it = kv.find(ADD_EOS_TOKEN_KEY); it != kv.end()) { + metadata_ptr->tokenizer->add_eos_token = std::get(it->second.value); + } + } + + CTL_INF("Parsed GGUF metadata successfully: " + metadata_ptr->ToString()); + return metadata_ptr; +} +} // namespace cortex_utils diff --git a/engine/utils/jinja_utils.h b/engine/utils/jinja_utils.h new file mode 100644 index 000000000..64ef85164 --- /dev/null +++ b/engine/utils/jinja_utils.h @@ -0,0 +1,25 @@ +#pragma once + +#include +#include + +#include "extensions/remote-engine/template_renderer.h" +#include "utils/chat-template.hpp" +#include "utils/result.hpp" + +namespace jinja { +inline cpp::result RenderTemplate( + std::string& tmpl, const Json::Value& data, const std::string& bos_token, + const std::string& eos_token, bool add_generation_prompt = true) { + try { + auto converted_json = + remote_engine::TemplateRenderer().ConvertJsonValue(data); + + minja::chat_template chat_tmpl(tmpl, bos_token, eos_token); + return chat_tmpl.apply(converted_json["messages"], {}, + add_generation_prompt); + } catch (const std::exception& e) { + return cpp::fail("Failed to render template: " + std::string(e.what())); + } +} +} // namespace jinja diff --git a/engine/utils/minja.hpp b/engine/utils/minja.hpp new file mode 100644 index 000000000..76f2110f2 --- /dev/null +++ b/engine/utils/minja.hpp @@ -0,0 +1,3428 @@ +/* + Copyright 2024 Google LLC + + Use of this source code is governed by an MIT-style + license that can be found in the LICENSE file or at + https://opensource.org/licenses/MIT. +*/ +// SPDX-License-Identifier: MIT +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using json = nlohmann::ordered_json; + +namespace minja { + +class Context; + +struct Options { + bool trim_blocks; // removes the first newline after a block + bool lstrip_blocks; // removes leading whitespace on the line of the block + bool keep_trailing_newline; // don't remove last newline +}; + +struct ArgumentsValue; + +/* Values that behave roughly like in Python. */ +class Value : public std::enable_shared_from_this { + public: + using CallableType = + std::function&, ArgumentsValue&)>; + using FilterType = + std::function&, ArgumentsValue&)>; + + private: + using ObjectType = + nlohmann::ordered_map; // Only contains primitive keys + using ArrayType = std::vector; + + std::shared_ptr array_; + std::shared_ptr object_; + std::shared_ptr callable_; + json primitive_; + + Value(const std::shared_ptr& array) : array_(array) {} + Value(const std::shared_ptr& object) : object_(object) {} + Value(const std::shared_ptr& callable) + : object_(std::make_shared()), callable_(callable) {} + + /* Python-style string repr */ + static void dump_string(const json& primitive, std::ostringstream& out, + char string_quote = '\'') { + if (!primitive.is_string()) + throw std::runtime_error("Value is not a string: " + primitive.dump()); + auto s = primitive.dump(); + if (string_quote == '"' || s.find('\'') != std::string::npos) { + out << s; + return; + } + // Reuse json dump, just changing string quotes + out << string_quote; + for (size_t i = 1, n = s.size() - 1; i < n; ++i) { + if (s[i] == '\\' && s[i + 1] == '"') { + out << '"'; + i++; + } else if (s[i] == string_quote) { + out << '\\' << string_quote; + } else { + out << s[i]; + } + } + out << string_quote; + } + void dump(std::ostringstream& out, int indent = -1, int level = 0, + bool to_json = false) const { + auto print_indent = [&](int level) { + if (indent > 0) { + out << "\n"; + for (int i = 0, n = level * indent; i < n; ++i) + out << ' '; + } + }; + auto print_sub_sep = [&]() { + out << ','; + if (indent < 0) + out << ' '; + else + print_indent(level + 1); + }; + + auto string_quote = to_json ? '"' : '\''; + + if (is_null()) + out << "null"; + else if (array_) { + out << "["; + print_indent(level + 1); + for (size_t i = 0; i < array_->size(); ++i) { + if (i) + print_sub_sep(); + (*array_)[i].dump(out, indent, level + 1, to_json); + } + print_indent(level); + out << "]"; + } else if (object_) { + out << "{"; + print_indent(level + 1); + for (auto begin = object_->begin(), it = begin; it != object_->end(); + ++it) { + if (it != begin) + print_sub_sep(); + if (it->first.is_string()) { + dump_string(it->first, out, string_quote); + } else { + out << string_quote << it->first.dump() << string_quote; + } + out << ": "; + it->second.dump(out, indent, level + 1, to_json); + } + print_indent(level); + out << "}"; + } else if (callable_) { + throw std::runtime_error("Cannot dump callable to JSON"); + } else if (is_boolean() && !to_json) { + out << (this->to_bool() ? "True" : "False"); + } else if (is_string() && !to_json) { + dump_string(primitive_, out, string_quote); + } else { + out << primitive_.dump(); + } + } + + public: + Value() {} + Value(const bool& v) : primitive_(v) {} + Value(const int64_t& v) : primitive_(v) {} + Value(const double& v) : primitive_(v) {} + Value(const std::nullptr_t&) {} + Value(const std::string& v) : primitive_(v) {} + Value(const char* v) : primitive_(std::string(v)) {} + + Value(const json& v) { + if (v.is_object()) { + auto object = std::make_shared(); + for (auto it = v.begin(); it != v.end(); ++it) { + (*object)[it.key()] = it.value(); + } + object_ = std::move(object); + } else if (v.is_array()) { + auto array = std::make_shared(); + for (const auto& item : v) { + array->push_back(Value(item)); + } + array_ = array; + } else { + primitive_ = v; + } + } + + std::vector keys() { + if (!object_) + throw std::runtime_error("Value is not an object: " + dump()); + std::vector res; + for (const auto& item : *object_) { + res.push_back(item.first); + } + return res; + } + + size_t size() const { + if (is_object()) + return object_->size(); + if (is_array()) + return array_->size(); + if (is_string()) + return primitive_.get().length(); + throw std::runtime_error("Value is not an array or object: " + dump()); + } + + static Value array(const std::vector values = {}) { + auto array = std::make_shared(); + for (const auto& item : values) { + array->push_back(item); + } + return Value(array); + } + static Value object(const std::shared_ptr object = + std::make_shared()) { + return Value(object); + } + static Value callable(const CallableType& callable) { + return Value(std::make_shared(callable)); + } + + void insert(size_t index, const Value& v) { + if (!array_) + throw std::runtime_error("Value is not an array: " + dump()); + array_->insert(array_->begin() + index, v); + } + void push_back(const Value& v) { + if (!array_) + throw std::runtime_error("Value is not an array: " + dump()); + array_->push_back(v); + } + Value get(const Value& key) { + if (array_) { + if (!key.is_number_integer()) { + return Value(); + } + auto index = key.get(); + return array_->at(index < 0 ? array_->size() + index : index); + } else if (object_) { + if (!key.is_hashable()) + throw std::runtime_error("Unashable type: " + dump()); + auto it = object_->find(key.primitive_); + if (it == object_->end()) + return Value(); + return it->second; + } + return Value(); + } + void set(const Value& key, const Value& value) { + if (!object_) + throw std::runtime_error("Value is not an object: " + dump()); + if (!key.is_hashable()) + throw std::runtime_error("Unashable type: " + dump()); + (*object_)[key.primitive_] = value; + } + Value call(const std::shared_ptr& context, + ArgumentsValue& args) const { + if (!callable_) + throw std::runtime_error("Value is not callable: " + dump()); + return (*callable_)(context, args); + } + + bool is_object() const { return !!object_; } + bool is_array() const { return !!array_; } + bool is_callable() const { return !!callable_; } + bool is_null() const { + return !object_ && !array_ && primitive_.is_null() && !callable_; + } + bool is_boolean() const { return primitive_.is_boolean(); } + bool is_number_integer() const { return primitive_.is_number_integer(); } + bool is_number_float() const { return primitive_.is_number_float(); } + bool is_number() const { return primitive_.is_number(); } + bool is_string() const { return primitive_.is_string(); } + bool is_iterable() const { return is_array() || is_object() || is_string(); } + + bool is_primitive() const { return !array_ && !object_ && !callable_; } + bool is_hashable() const { return is_primitive(); } + + bool empty() const { + if (is_null()) + throw std::runtime_error("Undefined value or reference"); + if (is_string()) + return primitive_.empty(); + if (is_array()) + return array_->empty(); + if (is_object()) + return object_->empty(); + return false; + } + + void for_each(const std::function& callback) const { + if (is_null()) + throw std::runtime_error("Undefined value or reference"); + if (array_) { + for (auto& item : *array_) { + callback(item); + } + } else if (object_) { + for (auto& item : *object_) { + Value key(item.first); + callback(key); + } + } else if (is_string()) { + for (char c : primitive_.get()) { + auto val = Value(std::string(1, c)); + callback(val); + } + } else { + throw std::runtime_error("Value is not iterable: " + dump()); + } + } + + bool to_bool() const { + if (is_null()) + return false; + if (is_boolean()) + return get(); + if (is_number()) + return get() != 0; + if (is_string()) + return !get().empty(); + if (is_array()) + return !empty(); + return true; + } + + int64_t to_int() const { + if (is_null()) + return 0; + if (is_boolean()) + return get() ? 1 : 0; + if (is_number()) + return static_cast(get()); + if (is_string()) { + try { + return std::stol(get()); + } catch (const std::exception&) { + return 0; + } + } + return 0; + } + + bool operator<(const Value& other) const { + if (is_null()) + throw std::runtime_error("Undefined value or reference"); + if (is_number() && other.is_number()) + return get() < other.get(); + if (is_string() && other.is_string()) + return get() < other.get(); + throw std::runtime_error("Cannot compare values: " + dump() + " < " + + other.dump()); + } + bool operator>=(const Value& other) const { return !(*this < other); } + + bool operator>(const Value& other) const { + if (is_null()) + throw std::runtime_error("Undefined value or reference"); + if (is_number() && other.is_number()) + return get() > other.get(); + if (is_string() && other.is_string()) + return get() > other.get(); + throw std::runtime_error("Cannot compare values: " + dump() + " > " + + other.dump()); + } + bool operator<=(const Value& other) const { return !(*this > other); } + + bool operator==(const Value& other) const { + if (callable_ || other.callable_) { + if (callable_.get() != other.callable_.get()) + return false; + } + if (array_) { + if (!other.array_) + return false; + if (array_->size() != other.array_->size()) + return false; + for (size_t i = 0; i < array_->size(); ++i) { + if (!(*array_)[i].to_bool() || !(*other.array_)[i].to_bool() || + (*array_)[i] != (*other.array_)[i]) + return false; + } + return true; + } else if (object_) { + if (!other.object_) + return false; + if (object_->size() != other.object_->size()) + return false; + for (const auto& item : *object_) { + if (!item.second.to_bool() || !other.object_->count(item.first) || + item.second != other.object_->at(item.first)) + return false; + } + return true; + } else { + return primitive_ == other.primitive_; + } + } + bool operator!=(const Value& other) const { return !(*this == other); } + + bool contains(const char* key) const { return contains(std::string(key)); } + bool contains(const std::string& key) const { + if (array_) { + return false; + } else if (object_) { + return object_->find(key) != object_->end(); + } else { + throw std::runtime_error( + "contains can only be called on arrays and objects: " + dump()); + } + } + bool contains(const Value& value) const { + if (is_null()) + throw std::runtime_error("Undefined value or reference"); + if (array_) { + for (const auto& item : *array_) { + if (item.to_bool() && item == value) + return true; + } + return false; + } else if (object_) { + if (!value.is_hashable()) + throw std::runtime_error("Unashable type: " + value.dump()); + return object_->find(value.primitive_) != object_->end(); + } else { + throw std::runtime_error( + "contains can only be called on arrays and objects: " + dump()); + } + } + void erase(size_t index) { + if (array_) + throw std::runtime_error("Value is not an array: " + dump()); + array_->erase(array_->begin() + index); + } + void erase(const std::string& key) { + if (object_) + throw std::runtime_error("Value is not an object: " + dump()); + object_->erase(key); + } + const Value& at(const Value& index) const { + return const_cast(this)->at(index); + } + Value& at(const Value& index) { + if (!index.is_hashable()) + throw std::runtime_error("Unashable type: " + dump()); + if (is_array()) + return array_->at(index.get()); + if (is_object()) + return object_->at(index.primitive_); + throw std::runtime_error("Value is not an array or object: " + dump()); + } + const Value& at(size_t index) const { + return const_cast(this)->at(index); + } + Value& at(size_t index) { + if (is_null()) + throw std::runtime_error("Undefined value or reference"); + if (is_array()) + return array_->at(index); + if (is_object()) + return object_->at(index); + throw std::runtime_error("Value is not an array or object: " + dump()); + } + + template + T get(const std::string& key, T default_value) const { + if (!contains(key)) + return default_value; + return at(key).get(); + } + + template + T get() const { + if (is_primitive()) + return primitive_.get(); + throw std::runtime_error("get not defined for this value type: " + + dump()); + } + + std::string dump(int indent = -1, bool to_json = false) const { + std::ostringstream out; + dump(out, indent, 0, to_json); + return out.str(); + } + + Value operator-() const { + if (is_number_integer()) + return -get(); + else + return -get(); + } + std::string to_str() const { + if (is_string()) + return get(); + if (is_number_integer()) + return std::to_string(get()); + if (is_number_float()) + return std::to_string(get()); + if (is_boolean()) + return get() ? "True" : "False"; + if (is_null()) + return "None"; + return dump(); + } + Value operator+(const Value& rhs) const { + if (is_string() || rhs.is_string()) { + return to_str() + rhs.to_str(); + } else if (is_number_integer() && rhs.is_number_integer()) { + return get() + rhs.get(); + } else if (is_array() && rhs.is_array()) { + auto res = Value::array(); + for (const auto& item : *array_) + res.push_back(item); + for (const auto& item : *rhs.array_) + res.push_back(item); + return res; + } else { + return get() + rhs.get(); + } + } + Value operator-(const Value& rhs) const { + if (is_number_integer() && rhs.is_number_integer()) + return get() - rhs.get(); + else + return get() - rhs.get(); + } + Value operator*(const Value& rhs) const { + if (is_string() && rhs.is_number_integer()) { + std::ostringstream out; + for (int64_t i = 0, n = rhs.get(); i < n; ++i) { + out << to_str(); + } + return out.str(); + } else if (is_number_integer() && rhs.is_number_integer()) + return get() * rhs.get(); + else + return get() * rhs.get(); + } + Value operator/(const Value& rhs) const { + if (is_number_integer() && rhs.is_number_integer()) + return get() / rhs.get(); + else + return get() / rhs.get(); + } + Value operator%(const Value& rhs) const { + return get() % rhs.get(); + } +}; + +struct ArgumentsValue { + std::vector args; + std::vector> kwargs; + + bool has_named(const std::string& name) { + for (const auto& p : kwargs) { + if (p.first == name) + return true; + } + return false; + } + + Value get_named(const std::string& name) { + for (const auto& [key, value] : kwargs) { + if (key == name) + return value; + } + return Value(); + } + + bool empty() { return args.empty() && kwargs.empty(); } + + void expectArgs(const std::string& method_name, + const std::pair& pos_count, + const std::pair& kw_count) { + if (args.size() < pos_count.first || args.size() > pos_count.second || + kwargs.size() < kw_count.first || kwargs.size() > kw_count.second) { + std::ostringstream out; + out << method_name << " must have between " << pos_count.first << " and " + << pos_count.second << " positional arguments and between " + << kw_count.first << " and " << kw_count.second + << " keyword arguments"; + throw std::runtime_error(out.str()); + } + } +}; + +template <> +inline json Value::get() const { + if (is_primitive()) + return primitive_; + if (is_null()) + return json(); + if (array_) { + std::vector res; + for (const auto& item : *array_) { + res.push_back(item.get()); + } + return res; + } + if (object_) { + json res = json::object(); + for (const auto& [key, value] : *object_) { + if (key.is_string()) { + res[key.get()] = value.get(); + } else if (key.is_primitive()) { + res[key.dump()] = value.get(); + } else { + throw std::runtime_error("Invalid key type for conversion to JSON: " + + key.dump()); + } + } + if (is_callable()) { + res["__callable__"] = true; + } + return res; + } + throw std::runtime_error("get not defined for this value type: " + + dump()); +} + +} // namespace minja + +namespace std { +template <> +struct hash { + size_t operator()(const minja::Value& v) const { + if (!v.is_hashable()) + throw std::runtime_error("Unsupported type for hashing: " + v.dump()); + return std::hash()(v.get()); + } +}; +} // namespace std + +namespace minja { + +static std::string error_location_suffix(const std::string& source, + size_t pos) { + auto get_line = [&](size_t line) { + auto start = source.begin(); + for (size_t i = 1; i < line; ++i) { + start = std::find(start, source.end(), '\n') + 1; + } + auto end = std::find(start, source.end(), '\n'); + return std::string(start, end); + }; + auto start = source.begin(); + auto end = source.end(); + auto it = start + pos; + auto line = std::count(start, it, '\n') + 1; + auto max_line = std::count(start, end, '\n') + 1; + auto col = pos - std::string(start, it).rfind('\n'); + std::ostringstream out; + out << " at row " << line << ", column " << col << ":\n"; + if (line > 1) + out << get_line(line - 1) << "\n"; + out << get_line(line) << "\n"; + out << std::string(col - 1, ' ') << "^" << "\n"; + if (line < max_line) + out << get_line(line + 1) << "\n"; + + return out.str(); +} + +class Context : public std::enable_shared_from_this { + protected: + Value values_; + std::shared_ptr parent_; + + public: + Context(Value&& values, const std::shared_ptr& parent = nullptr) + : values_(std::move(values)), parent_(parent) { + if (!values_.is_object()) + throw std::runtime_error("Context values must be an object: " + + values_.dump()); + } + virtual ~Context() {} + + static std::shared_ptr builtins(); + static std::shared_ptr make( + Value&& values, const std::shared_ptr& parent = builtins()); + + std::vector keys() { return values_.keys(); } + virtual Value get(const Value& key) { + if (values_.contains(key)) + return values_.at(key); + if (parent_) + return parent_->get(key); + return Value(); + } + virtual Value& at(const Value& key) { + if (values_.contains(key)) + return values_.at(key); + if (parent_) + return parent_->at(key); + throw std::runtime_error("Undefined variable: " + key.dump()); + } + virtual bool contains(const Value& key) { + if (values_.contains(key)) + return true; + if (parent_) + return parent_->contains(key); + return false; + } + virtual void set(const Value& key, Value& value) { values_.set(key, value); } +}; + +struct Location { + std::shared_ptr source; + size_t pos; +}; + +class Expression { + protected: + virtual Value do_evaluate(const std::shared_ptr& context) const = 0; + + public: + using Parameters = + std::vector>>; + + Location location; + + Expression(const Location& location) : location(location) {} + virtual ~Expression() = default; + + Value evaluate(const std::shared_ptr& context) const { + try { + return do_evaluate(context); + } catch (const std::exception& e) { + std::ostringstream out; + out << e.what(); + if (location.source) + out << error_location_suffix(*location.source, location.pos); + throw std::runtime_error(out.str()); + } + } +}; + +class VariableExpr : public Expression { + std::string name; + + public: + VariableExpr(const Location& location, const std::string& n) + : Expression(location), name(n) {} + std::string get_name() const { return name; } + Value do_evaluate(const std::shared_ptr& context) const override { + if (!context->contains(name)) { + return Value(); + } + return context->at(name); + } +}; + +static void destructuring_assign(const std::vector& var_names, + const std::shared_ptr& context, + Value& item) { + if (var_names.size() == 1) { + Value name(var_names[0]); + context->set(name, item); + } else { + if (!item.is_array() || item.size() != var_names.size()) { + throw std::runtime_error( + "Mismatched number of variables and items in destructuring " + "assignment"); + } + for (size_t i = 0; i < var_names.size(); ++i) { + context->set(var_names[i], item.at(i)); + } + } +} + +enum SpaceHandling { Keep, Strip, StripSpaces, StripNewline }; + +class TemplateToken { + public: + enum class Type { + Text, + Expression, + If, + Else, + Elif, + EndIf, + For, + EndFor, + Set, + EndSet, + Comment, + Macro, + EndMacro, + Filter, + EndFilter + }; + + static std::string typeToString(Type t) { + switch (t) { + case Type::Text: + return "text"; + case Type::Expression: + return "expression"; + case Type::If: + return "if"; + case Type::Else: + return "else"; + case Type::Elif: + return "elif"; + case Type::EndIf: + return "endif"; + case Type::For: + return "for"; + case Type::EndFor: + return "endfor"; + case Type::Set: + return "set"; + case Type::EndSet: + return "endset"; + case Type::Comment: + return "comment"; + case Type::Macro: + return "macro"; + case Type::EndMacro: + return "endmacro"; + case Type::Filter: + return "filter"; + case Type::EndFilter: + return "endfilter"; + } + return "Unknown"; + } + + TemplateToken(Type type, const Location& location, SpaceHandling pre, + SpaceHandling post) + : type(type), location(location), pre_space(pre), post_space(post) {} + virtual ~TemplateToken() = default; + + Type type; + Location location; + SpaceHandling pre_space = SpaceHandling::Keep; + SpaceHandling post_space = SpaceHandling::Keep; +}; + +struct TextTemplateToken : public TemplateToken { + std::string text; + TextTemplateToken(const Location& location, SpaceHandling pre, + SpaceHandling post, const std::string& t) + : TemplateToken(Type::Text, location, pre, post), text(t) {} +}; + +struct ExpressionTemplateToken : public TemplateToken { + std::shared_ptr expr; + ExpressionTemplateToken(const Location& location, SpaceHandling pre, + SpaceHandling post, std::shared_ptr&& e) + : TemplateToken(Type::Expression, location, pre, post), + expr(std::move(e)) {} +}; + +struct IfTemplateToken : public TemplateToken { + std::shared_ptr condition; + IfTemplateToken(const Location& location, SpaceHandling pre, + SpaceHandling post, std::shared_ptr&& c) + : TemplateToken(Type::If, location, pre, post), condition(std::move(c)) {} +}; + +struct ElifTemplateToken : public TemplateToken { + std::shared_ptr condition; + ElifTemplateToken(const Location& location, SpaceHandling pre, + SpaceHandling post, std::shared_ptr&& c) + : TemplateToken(Type::Elif, location, pre, post), + condition(std::move(c)) {} +}; + +struct ElseTemplateToken : public TemplateToken { + ElseTemplateToken(const Location& location, SpaceHandling pre, + SpaceHandling post) + : TemplateToken(Type::Else, location, pre, post) {} +}; + +struct EndIfTemplateToken : public TemplateToken { + EndIfTemplateToken(const Location& location, SpaceHandling pre, + SpaceHandling post) + : TemplateToken(Type::EndIf, location, pre, post) {} +}; + +struct MacroTemplateToken : public TemplateToken { + std::shared_ptr name; + Expression::Parameters params; + MacroTemplateToken(const Location& location, SpaceHandling pre, + SpaceHandling post, std::shared_ptr&& n, + Expression::Parameters&& p) + : TemplateToken(Type::Macro, location, pre, post), + name(std::move(n)), + params(std::move(p)) {} +}; + +struct EndMacroTemplateToken : public TemplateToken { + EndMacroTemplateToken(const Location& location, SpaceHandling pre, + SpaceHandling post) + : TemplateToken(Type::EndMacro, location, pre, post) {} +}; + +struct FilterTemplateToken : public TemplateToken { + std::shared_ptr filter; + FilterTemplateToken(const Location& location, SpaceHandling pre, + SpaceHandling post, std::shared_ptr&& filter) + : TemplateToken(Type::Filter, location, pre, post), + filter(std::move(filter)) {} +}; + +struct EndFilterTemplateToken : public TemplateToken { + EndFilterTemplateToken(const Location& location, SpaceHandling pre, + SpaceHandling post) + : TemplateToken(Type::EndFilter, location, pre, post) {} +}; + +struct ForTemplateToken : public TemplateToken { + std::vector var_names; + std::shared_ptr iterable; + std::shared_ptr condition; + bool recursive; + ForTemplateToken(const Location& location, SpaceHandling pre, + SpaceHandling post, const std::vector& vns, + std::shared_ptr&& iter, + std::shared_ptr&& c, bool r) + : TemplateToken(Type::For, location, pre, post), + var_names(vns), + iterable(std::move(iter)), + condition(std::move(c)), + recursive(r) {} +}; + +struct EndForTemplateToken : public TemplateToken { + EndForTemplateToken(const Location& location, SpaceHandling pre, + SpaceHandling post) + : TemplateToken(Type::EndFor, location, pre, post) {} +}; + +struct SetTemplateToken : public TemplateToken { + std::string ns; + std::vector var_names; + std::shared_ptr value; + SetTemplateToken(const Location& location, SpaceHandling pre, + SpaceHandling post, const std::string& ns, + const std::vector& vns, + std::shared_ptr&& v) + : TemplateToken(Type::Set, location, pre, post), + ns(ns), + var_names(vns), + value(std::move(v)) {} +}; + +struct EndSetTemplateToken : public TemplateToken { + EndSetTemplateToken(const Location& location, SpaceHandling pre, + SpaceHandling post) + : TemplateToken(Type::EndSet, location, pre, post) {} +}; + +struct CommentTemplateToken : public TemplateToken { + std::string text; + CommentTemplateToken(const Location& location, SpaceHandling pre, + SpaceHandling post, const std::string& t) + : TemplateToken(Type::Comment, location, pre, post), text(t) {} +}; + +class TemplateNode { + Location location_; + + protected: + virtual void do_render(std::ostringstream& out, + const std::shared_ptr& context) const = 0; + + public: + TemplateNode(const Location& location) : location_(location) {} + void render(std::ostringstream& out, + const std::shared_ptr& context) const { + try { + do_render(out, context); + } catch (const std::exception& e) { + std::ostringstream err; + err << e.what(); + if (location_.source) + err << error_location_suffix(*location_.source, location_.pos); + throw std::runtime_error(err.str()); + } + } + const Location& location() const { return location_; } + virtual ~TemplateNode() = default; + std::string render(const std::shared_ptr& context) const { + std::ostringstream out; + render(out, context); + return out.str(); + } +}; + +class SequenceNode : public TemplateNode { + std::vector> children; + + public: + SequenceNode(const Location& location, + std::vector>&& c) + : TemplateNode(location), children(std::move(c)) {} + void do_render(std::ostringstream& out, + const std::shared_ptr& context) const override { + for (const auto& child : children) + child->render(out, context); + } +}; + +class TextNode : public TemplateNode { + std::string text; + + public: + TextNode(const Location& location, const std::string& t) + : TemplateNode(location), text(t) {} + void do_render(std::ostringstream& out, + const std::shared_ptr&) const override { + out << text; + } +}; + +class ExpressionNode : public TemplateNode { + std::shared_ptr expr; + + public: + ExpressionNode(const Location& location, std::shared_ptr&& e) + : TemplateNode(location), expr(std::move(e)) {} + void do_render(std::ostringstream& out, + const std::shared_ptr& context) const override { + if (!expr) + throw std::runtime_error("ExpressionNode.expr is null"); + auto result = expr->evaluate(context); + if (result.is_string()) { + out << result.get(); + } else if (result.is_boolean()) { + out << (result.get() ? "True" : "False"); + } else if (!result.is_null()) { + out << result.dump(); + } + } +}; + +class IfNode : public TemplateNode { + std::vector< + std::pair, std::shared_ptr>> + cascade; + + public: + IfNode(const Location& location, + std::vector, + std::shared_ptr>>&& c) + : TemplateNode(location), cascade(std::move(c)) {} + void do_render(std::ostringstream& out, + const std::shared_ptr& context) const override { + for (const auto& branch : cascade) { + auto enter_branch = true; + if (branch.first) { + enter_branch = branch.first->evaluate(context).to_bool(); + } + if (enter_branch) { + if (!branch.second) + throw std::runtime_error("IfNode.cascade.second is null"); + branch.second->render(out, context); + return; + } + } + } +}; + +class ForNode : public TemplateNode { + std::vector var_names; + std::shared_ptr iterable; + std::shared_ptr condition; + std::shared_ptr body; + bool recursive; + std::shared_ptr else_body; + + public: + ForNode(const Location& location, std::vector&& var_names, + std::shared_ptr&& iterable, + std::shared_ptr&& condition, + std::shared_ptr&& body, bool recursive, + std::shared_ptr&& else_body) + : TemplateNode(location), + var_names(var_names), + iterable(std::move(iterable)), + condition(std::move(condition)), + body(std::move(body)), + recursive(recursive), + else_body(std::move(else_body)) {} + + void do_render(std::ostringstream& out, + const std::shared_ptr& context) const override { + // https://jinja.palletsprojects.com/en/3.0.x/templates/#for + if (!iterable) + throw std::runtime_error("ForNode.iterable is null"); + if (!body) + throw std::runtime_error("ForNode.body is null"); + + auto iterable_value = iterable->evaluate(context); + Value::CallableType loop_function; + + std::function visit = [&](Value& iter) { + auto filtered_items = Value::array(); + if (!iter.is_null()) { + if (!iterable_value.is_iterable()) { + throw std::runtime_error("For loop iterable must be iterable: " + + iterable_value.dump()); + } + iterable_value.for_each([&](Value& item) { + destructuring_assign(var_names, context, item); + if (!condition || condition->evaluate(context).to_bool()) { + filtered_items.push_back(item); + } + }); + } + if (filtered_items.empty()) { + if (else_body) { + else_body->render(out, context); + } + } else { + auto loop = + recursive ? Value::callable(loop_function) : Value::object(); + loop.set("length", (int64_t)filtered_items.size()); + + size_t cycle_index = 0; + loop.set("cycle", Value::callable([&](const std::shared_ptr&, + ArgumentsValue& args) { + if (args.args.empty() || !args.kwargs.empty()) { + throw std::runtime_error( + "cycle() expects at least 1 positional argument and " + "no named arg"); + } + auto item = args.args[cycle_index]; + cycle_index = (cycle_index + 1) % args.args.size(); + return item; + })); + auto loop_context = Context::make(Value::object(), context); + loop_context->set("loop", loop); + for (size_t i = 0, n = filtered_items.size(); i < n; ++i) { + auto& item = filtered_items.at(i); + destructuring_assign(var_names, loop_context, item); + loop.set("index", (int64_t)i + 1); + loop.set("index0", (int64_t)i); + loop.set("revindex", (int64_t)(n - i)); + loop.set("revindex0", (int64_t)(n - i - 1)); + loop.set("length", (int64_t)n); + loop.set("first", i == 0); + loop.set("last", i == (n - 1)); + loop.set("previtem", i > 0 ? filtered_items.at(i - 1) : Value()); + loop.set("nextitem", i < n - 1 ? filtered_items.at(i + 1) : Value()); + body->render(out, loop_context); + } + } + }; + + if (recursive) { + loop_function = [&](const std::shared_ptr&, + ArgumentsValue& args) { + if (args.args.size() != 1 || !args.kwargs.empty() || + !args.args[0].is_array()) { + throw std::runtime_error( + "loop() expects exactly 1 positional iterable argument"); + } + auto& items = args.args[0]; + visit(items); + return Value(); + }; + } + + visit(iterable_value); + } +}; + +class MacroNode : public TemplateNode { + std::shared_ptr name; + Expression::Parameters params; + std::shared_ptr body; + std::unordered_map named_param_positions; + + public: + MacroNode(const Location& location, std::shared_ptr&& n, + Expression::Parameters&& p, std::shared_ptr&& b) + : TemplateNode(location), + name(std::move(n)), + params(std::move(p)), + body(std::move(b)) { + for (size_t i = 0; i < params.size(); ++i) { + const auto& name = params[i].first; + if (!name.empty()) { + named_param_positions[name] = i; + } + } + } + void do_render(std::ostringstream&, + const std::shared_ptr& macro_context) const override { + if (!name) + throw std::runtime_error("MacroNode.name is null"); + if (!body) + throw std::runtime_error("MacroNode.body is null"); + auto callable = Value::callable([&](const std::shared_ptr& context, + ArgumentsValue& args) { + auto call_context = macro_context; + std::vector param_set(params.size(), false); + for (size_t i = 0, n = args.args.size(); i < n; i++) { + auto& arg = args.args[i]; + if (i >= params.size()) + throw std::runtime_error("Too many positional arguments for macro " + + name->get_name()); + param_set[i] = true; + auto& param_name = params[i].first; + call_context->set(param_name, arg); + } + for (auto& [arg_name, value] : args.kwargs) { + auto it = named_param_positions.find(arg_name); + if (it == named_param_positions.end()) + throw std::runtime_error("Unknown parameter name for macro " + + name->get_name() + ": " + arg_name); + + call_context->set(arg_name, value); + param_set[it->second] = true; + } + // Set default values for parameters that were not passed + for (size_t i = 0, n = params.size(); i < n; i++) { + if (!param_set[i] && params[i].second != nullptr) { + auto val = params[i].second->evaluate(context); + call_context->set(params[i].first, val); + } + } + return body->render(call_context); + }); + macro_context->set(name->get_name(), callable); + } +}; + +class FilterNode : public TemplateNode { + std::shared_ptr filter; + std::shared_ptr body; + + public: + FilterNode(const Location& location, std::shared_ptr&& f, + std::shared_ptr&& b) + : TemplateNode(location), filter(std::move(f)), body(std::move(b)) {} + + void do_render(std::ostringstream& out, + const std::shared_ptr& context) const override { + if (!filter) + throw std::runtime_error("FilterNode.filter is null"); + if (!body) + throw std::runtime_error("FilterNode.body is null"); + auto filter_value = filter->evaluate(context); + if (!filter_value.is_callable()) { + throw std::runtime_error("Filter must be a callable: " + + filter_value.dump()); + } + std::string rendered_body = body->render(context); + + ArgumentsValue filter_args = {{Value(rendered_body)}, {}}; + auto result = filter_value.call(context, filter_args); + out << result.to_str(); + } +}; + +class SetNode : public TemplateNode { + std::string ns; + std::vector var_names; + std::shared_ptr value; + + public: + SetNode(const Location& location, const std::string& ns, + const std::vector& vns, std::shared_ptr&& v) + : TemplateNode(location), ns(ns), var_names(vns), value(std::move(v)) {} + void do_render(std::ostringstream&, + const std::shared_ptr& context) const override { + if (!value) + throw std::runtime_error("SetNode.value is null"); + if (!ns.empty()) { + if (var_names.size() != 1) { + throw std::runtime_error( + "Namespaced set only supports a single variable name"); + } + auto& name = var_names[0]; + auto ns_value = context->get(ns); + if (!ns_value.is_object()) + throw std::runtime_error("Namespace '" + ns + "' is not an object"); + ns_value.set(name, this->value->evaluate(context)); + } else { + auto val = value->evaluate(context); + destructuring_assign(var_names, context, val); + } + } +}; + +class SetTemplateNode : public TemplateNode { + std::string name; + std::shared_ptr template_value; + + public: + SetTemplateNode(const Location& location, const std::string& name, + std::shared_ptr&& tv) + : TemplateNode(location), name(name), template_value(std::move(tv)) {} + void do_render(std::ostringstream&, + const std::shared_ptr& context) const override { + if (!template_value) + throw std::runtime_error("SetTemplateNode.template_value is null"); + Value value{template_value->render(context)}; + context->set(name, value); + } +}; + +class IfExpr : public Expression { + std::shared_ptr condition; + std::shared_ptr then_expr; + std::shared_ptr else_expr; + + public: + IfExpr(const Location& location, std::shared_ptr&& c, + std::shared_ptr&& t, std::shared_ptr&& e) + : Expression(location), + condition(std::move(c)), + then_expr(std::move(t)), + else_expr(std::move(e)) {} + Value do_evaluate(const std::shared_ptr& context) const override { + if (!condition) + throw std::runtime_error("IfExpr.condition is null"); + if (!then_expr) + throw std::runtime_error("IfExpr.then_expr is null"); + if (condition->evaluate(context).to_bool()) { + return then_expr->evaluate(context); + } + if (else_expr) { + return else_expr->evaluate(context); + } + return nullptr; + } +}; + +class LiteralExpr : public Expression { + Value value; + + public: + LiteralExpr(const Location& location, const Value& v) + : Expression(location), value(v) {} + Value do_evaluate(const std::shared_ptr&) const override { + return value; + } +}; + +class ArrayExpr : public Expression { + std::vector> elements; + + public: + ArrayExpr(const Location& location, + std::vector>&& e) + : Expression(location), elements(std::move(e)) {} + Value do_evaluate(const std::shared_ptr& context) const override { + auto result = Value::array(); + for (const auto& e : elements) { + if (!e) + throw std::runtime_error("Array element is null"); + result.push_back(e->evaluate(context)); + } + return result; + } +}; + +class DictExpr : public Expression { + std::vector< + std::pair, std::shared_ptr>> + elements; + + public: + DictExpr(const Location& location, + std::vector, + std::shared_ptr>>&& e) + : Expression(location), elements(std::move(e)) {} + Value do_evaluate(const std::shared_ptr& context) const override { + auto result = Value::object(); + for (const auto& [key, value] : elements) { + if (!key) + throw std::runtime_error("Dict key is null"); + if (!value) + throw std::runtime_error("Dict value is null"); + result.set(key->evaluate(context), value->evaluate(context)); + } + return result; + } +}; + +class SliceExpr : public Expression { + public: + std::shared_ptr start, end; + SliceExpr(const Location& location, std::shared_ptr&& s, + std::shared_ptr&& e) + : Expression(location), start(std::move(s)), end(std::move(e)) {} + Value do_evaluate(const std::shared_ptr&) const override { + throw std::runtime_error("SliceExpr not implemented"); + } +}; + +class SubscriptExpr : public Expression { + std::shared_ptr base; + std::shared_ptr index; + + public: + SubscriptExpr(const Location& location, std::shared_ptr&& b, + std::shared_ptr&& i) + : Expression(location), base(std::move(b)), index(std::move(i)) {} + Value do_evaluate(const std::shared_ptr& context) const override { + if (!base) + throw std::runtime_error("SubscriptExpr.base is null"); + if (!index) + throw std::runtime_error("SubscriptExpr.index is null"); + auto target_value = base->evaluate(context); + if (auto slice = dynamic_cast(index.get())) { + auto start = + slice->start ? slice->start->evaluate(context).get() : 0; + auto end = slice->end ? slice->end->evaluate(context).get() + : (int64_t)target_value.size(); + if (target_value.is_string()) { + std::string s = target_value.get(); + if (start < 0) + start = s.size() + start; + if (end < 0) + end = s.size() + end; + return s.substr(start, end - start); + } else if (target_value.is_array()) { + if (start < 0) + start = target_value.size() + start; + if (end < 0) + end = target_value.size() + end; + auto result = Value::array(); + for (auto i = start; i < end; ++i) { + result.push_back(target_value.at(i)); + } + return result; + } else { + throw std::runtime_error( + target_value.is_null() + ? "Cannot subscript null" + : "Subscripting only supported on arrays and strings"); + } + } else { + auto index_value = index->evaluate(context); + if (target_value.is_null()) { + if (auto t = dynamic_cast(base.get())) { + throw std::runtime_error( + "'" + t->get_name() + "' is " + + (context->contains(t->get_name()) ? "null" : "not defined")); + } + throw std::runtime_error("Trying to access property '" + + index_value.dump() + "' on null!"); + } + return target_value.get(index_value); + } + } +}; + +class UnaryOpExpr : public Expression { + public: + enum class Op { Plus, Minus, LogicalNot, Expansion, ExpansionDict }; + std::shared_ptr expr; + Op op; + UnaryOpExpr(const Location& location, std::shared_ptr&& e, Op o) + : Expression(location), expr(std::move(e)), op(o) {} + Value do_evaluate(const std::shared_ptr& context) const override { + if (!expr) + throw std::runtime_error("UnaryOpExpr.expr is null"); + auto e = expr->evaluate(context); + switch (op) { + case Op::Plus: + return e; + case Op::Minus: + return -e; + case Op::LogicalNot: + return !e.to_bool(); + case Op::Expansion: + case Op::ExpansionDict: + throw std::runtime_error( + "Expansion operator is only supported in function calls and " + "collections"); + } + throw std::runtime_error("Unknown unary operator"); + } +}; + +class BinaryOpExpr : public Expression { + public: + enum class Op { + StrConcat, + Add, + Sub, + Mul, + MulMul, + Div, + DivDiv, + Mod, + Eq, + Ne, + Lt, + Gt, + Le, + Ge, + And, + Or, + In, + NotIn, + Is, + IsNot + }; + + private: + std::shared_ptr left; + std::shared_ptr right; + Op op; + + public: + BinaryOpExpr(const Location& location, std::shared_ptr&& l, + std::shared_ptr&& r, Op o) + : Expression(location), left(std::move(l)), right(std::move(r)), op(o) {} + Value do_evaluate(const std::shared_ptr& context) const override { + if (!left) + throw std::runtime_error("BinaryOpExpr.left is null"); + if (!right) + throw std::runtime_error("BinaryOpExpr.right is null"); + auto l = left->evaluate(context); + + auto do_eval = [&](const Value& l) -> Value { + if (op == Op::Is || op == Op::IsNot) { + auto t = dynamic_cast(right.get()); + if (!t) + throw std::runtime_error( + "Right side of 'is' operator must be a variable"); + + auto eval = [&]() { + const auto& name = t->get_name(); + if (name == "none") + return l.is_null(); + if (name == "boolean") + return l.is_boolean(); + if (name == "integer") + return l.is_number_integer(); + if (name == "float") + return l.is_number_float(); + if (name == "number") + return l.is_number(); + if (name == "string") + return l.is_string(); + if (name == "mapping") + return l.is_object(); + if (name == "iterable") + return l.is_iterable(); + if (name == "sequence") + return l.is_array(); + if (name == "defined") + return !l.is_null(); + throw std::runtime_error("Unknown type for 'is' operator: " + name); + }; + auto value = eval(); + return Value(op == Op::Is ? value : !value); + } + + if (op == Op::And) { + if (!l.to_bool()) + return Value(false); + return right->evaluate(context).to_bool(); + } else if (op == Op::Or) { + if (l.to_bool()) + return Value(true); + return right->evaluate(context).to_bool(); + } + + auto r = right->evaluate(context); + switch (op) { + case Op::StrConcat: + return l.to_str() + r.to_str(); + case Op::Add: + return l + r; + case Op::Sub: + return l - r; + case Op::Mul: + return l * r; + case Op::Div: + return l / r; + case Op::MulMul: + return std::pow(l.get(), r.get()); + case Op::DivDiv: + return l.get() / r.get(); + case Op::Mod: + return l.get() % r.get(); + case Op::Eq: + return l == r; + case Op::Ne: + return l != r; + case Op::Lt: + return l < r; + case Op::Gt: + return l > r; + case Op::Le: + return l <= r; + case Op::Ge: + return l >= r; + case Op::In: + return (r.is_array() || r.is_object()) && r.contains(l); + case Op::NotIn: + return !(r.is_array() && r.contains(l)); + default: + break; + } + throw std::runtime_error("Unknown binary operator"); + }; + + if (l.is_callable()) { + return Value::callable( + [l, do_eval](const std::shared_ptr& context, + ArgumentsValue& args) { + auto ll = l.call(context, args); + return do_eval(ll); //args[0].second); + }); + } else { + return do_eval(l); + } + } +}; + +struct ArgumentsExpression { + std::vector> args; + std::vector>> kwargs; + + ArgumentsValue evaluate(const std::shared_ptr& context) const { + ArgumentsValue vargs; + for (const auto& arg : this->args) { + if (auto un_expr = std::dynamic_pointer_cast(arg)) { + if (un_expr->op == UnaryOpExpr::Op::Expansion) { + auto array = un_expr->expr->evaluate(context); + if (!array.is_array()) { + throw std::runtime_error( + "Expansion operator only supported on arrays"); + } + array.for_each([&](Value& value) { vargs.args.push_back(value); }); + continue; + } else if (un_expr->op == UnaryOpExpr::Op::ExpansionDict) { + auto dict = un_expr->expr->evaluate(context); + if (!dict.is_object()) { + throw std::runtime_error( + "ExpansionDict operator only supported on objects"); + } + dict.for_each([&](const Value& key) { + vargs.kwargs.push_back({key.get(), dict.at(key)}); + }); + continue; + } + } + vargs.args.push_back(arg->evaluate(context)); + } + for (const auto& [name, value] : this->kwargs) { + vargs.kwargs.push_back({name, value->evaluate(context)}); + } + return vargs; + } +}; + +static std::string strip(const std::string& s) { + static std::regex trailing_spaces_regex("^\\s+|\\s+$"); + return std::regex_replace(s, trailing_spaces_regex, ""); +} + +static std::string html_escape(const std::string& s) { + std::string result; + result.reserve(s.size()); + for (const auto& c : s) { + switch (c) { + case '&': + result += "&"; + break; + case '<': + result += "<"; + break; + case '>': + result += ">"; + break; + case '"': + result += """; + break; + case '\'': + result += "'"; + break; + default: + result += c; + break; + } + } + return result; +} + +class MethodCallExpr : public Expression { + std::shared_ptr object; + std::shared_ptr method; + ArgumentsExpression args; + + public: + MethodCallExpr(const Location& location, std::shared_ptr&& obj, + std::shared_ptr&& m, ArgumentsExpression&& a) + : Expression(location), + object(std::move(obj)), + method(std::move(m)), + args(std::move(a)) {} + Value do_evaluate(const std::shared_ptr& context) const override { + if (!object) + throw std::runtime_error("MethodCallExpr.object is null"); + if (!method) + throw std::runtime_error("MethodCallExpr.method is null"); + auto obj = object->evaluate(context); + auto vargs = args.evaluate(context); + if (obj.is_null()) { + throw std::runtime_error("Trying to call method '" + method->get_name() + + "' on null"); + } + if (obj.is_array()) { + if (method->get_name() == "append") { + vargs.expectArgs("append method", {1, 1}, {0, 0}); + obj.push_back(vargs.args[0]); + return Value(); + } else if (method->get_name() == "insert") { + vargs.expectArgs("insert method", {2, 2}, {0, 0}); + auto index = vargs.args[0].get(); + if (index < 0 || index > (int64_t)obj.size()) + throw std::runtime_error("Index out of range for insert method"); + obj.insert(index, vargs.args[1]); + return Value(); + } + } else if (obj.is_object()) { + if (method->get_name() == "items") { + vargs.expectArgs("items method", {0, 0}, {0, 0}); + auto result = Value::array(); + for (const auto& key : obj.keys()) { + result.push_back(Value::array({key, obj.at(key)})); + } + return result; + } else if (method->get_name() == "get") { + vargs.expectArgs("get method", {1, 2}, {0, 0}); + auto key = vargs.args[0]; + if (vargs.args.size() == 1) { + return obj.contains(key) ? obj.at(key) : Value(); + } else { + return obj.contains(key) ? obj.at(key) : vargs.args[1]; + } + } else if (obj.contains(method->get_name())) { + auto callable = obj.at(method->get_name()); + if (!callable.is_callable()) { + throw std::runtime_error("Property '" + method->get_name() + + "' is not callable"); + } + return callable.call(context, vargs); + } + } else if (obj.is_string()) { + auto str = obj.get(); + if (method->get_name() == "strip") { + vargs.expectArgs("strip method", {0, 0}, {0, 0}); + return Value(strip(str)); + } else if (method->get_name() == "endswith") { + vargs.expectArgs("endswith method", {1, 1}, {0, 0}); + auto suffix = vargs.args[0].get(); + return suffix.length() <= str.length() && + std::equal(suffix.rbegin(), suffix.rend(), str.rbegin()); + } else if (method->get_name() == "title") { + vargs.expectArgs("title method", {0, 0}, {0, 0}); + auto res = str; + for (size_t i = 0, n = res.size(); i < n; ++i) { + if (i == 0 || std::isspace(res[i - 1])) + res[i] = std::toupper(res[i]); + else + res[i] = std::tolower(res[i]); + } + return res; + } + } + throw std::runtime_error("Unknown method: " + method->get_name()); + } +}; + +class CallExpr : public Expression { + public: + std::shared_ptr object; + ArgumentsExpression args; + CallExpr(const Location& location, std::shared_ptr&& obj, + ArgumentsExpression&& a) + : Expression(location), object(std::move(obj)), args(std::move(a)) {} + Value do_evaluate(const std::shared_ptr& context) const override { + if (!object) + throw std::runtime_error("CallExpr.object is null"); + auto obj = object->evaluate(context); + if (!obj.is_callable()) { + throw std::runtime_error("Object is not callable: " + obj.dump(2)); + } + auto vargs = args.evaluate(context); + return obj.call(context, vargs); + } +}; + +class FilterExpr : public Expression { + std::vector> parts; + + public: + FilterExpr(const Location& location, + std::vector>&& p) + : Expression(location), parts(std::move(p)) {} + Value do_evaluate(const std::shared_ptr& context) const override { + Value result; + bool first = true; + for (const auto& part : parts) { + if (!part) + throw std::runtime_error("FilterExpr.part is null"); + if (first) { + first = false; + result = part->evaluate(context); + } else { + if (auto ce = dynamic_cast(part.get())) { + auto target = ce->object->evaluate(context); + ArgumentsValue args = ce->args.evaluate(context); + args.args.insert(args.args.begin(), result); + result = target.call(context, args); + } else { + auto callable = part->evaluate(context); + ArgumentsValue args; + args.args.insert(args.args.begin(), result); + result = callable.call(context, args); + } + } + } + return result; + } + + void prepend(std::shared_ptr&& e) { + parts.insert(parts.begin(), std::move(e)); + } +}; + +class Parser { + private: + using CharIterator = std::string::const_iterator; + + std::shared_ptr template_str; + CharIterator start, end, it; + Options options; + + Parser(const std::shared_ptr& template_str, + const Options& options) + : template_str(template_str), options(options) { + if (!template_str) + throw std::runtime_error("Template string is null"); + start = it = this->template_str->begin(); + end = this->template_str->end(); + } + + bool consumeSpaces(SpaceHandling space_handling = SpaceHandling::Strip) { + if (space_handling == SpaceHandling::Strip) { + while (it != end && std::isspace(*it)) + ++it; + } + return true; + } + + std::unique_ptr parseString() { + auto doParse = [&](char quote) -> std::unique_ptr { + if (it == end || *it != quote) + return nullptr; + std::string result; + bool escape = false; + for (++it; it != end; ++it) { + if (escape) { + escape = false; + switch (*it) { + case 'n': + result += '\n'; + break; + case 'r': + result += '\r'; + break; + case 't': + result += '\t'; + break; + case 'b': + result += '\b'; + break; + case 'f': + result += '\f'; + break; + case '\\': + result += '\\'; + break; + default: + if (*it == quote) { + result += quote; + } else { + result += *it; + } + break; + } + } else if (*it == '\\') { + escape = true; + } else if (*it == quote) { + ++it; + return std::make_unique(std::move(result)); + } else { + result += *it; + } + } + return nullptr; + }; + + consumeSpaces(); + if (it == end) + return nullptr; + if (*it == '"') + return doParse('"'); + if (*it == '\'') + return doParse('\''); + return nullptr; + } + + json parseNumber(CharIterator& it, const CharIterator& end) { + auto before = it; + consumeSpaces(); + auto start = it; + bool hasDecimal = false; + bool hasExponent = false; + + if (it != end && (*it == '-' || *it == '+')) + ++it; + + while (it != end) { + if (std::isdigit(*it)) { + ++it; + } else if (*it == '.') { + if (hasDecimal) + throw std::runtime_error("Multiple decimal points"); + hasDecimal = true; + ++it; + } else if (it != start && (*it == 'e' || *it == 'E')) { + if (hasExponent) + throw std::runtime_error("Multiple exponents"); + hasExponent = true; + ++it; + } else { + break; + } + } + if (start == it) { + it = before; + return json(); // No valid characters found + } + + std::string str(start, it); + try { + return json::parse(str); + } catch (json::parse_error& e) { + throw std::runtime_error("Failed to parse number: '" + str + "' (" + + std::string(e.what()) + ")"); + return json(); + } + } + + /** integer, float, bool, string */ + std::shared_ptr parseConstant() { + auto start = it; + consumeSpaces(); + if (it == end) + return nullptr; + if (*it == '"' || *it == '\'') { + auto str = parseString(); + if (str) + return std::make_shared(*str); + } + static std::regex prim_tok(R"(true\b|True\b|false\b|False\b|None\b)"); + auto token = consumeToken(prim_tok); + if (!token.empty()) { + if (token == "true" || token == "True") + return std::make_shared(true); + if (token == "false" || token == "False") + return std::make_shared(false); + if (token == "None") + return std::make_shared(nullptr); + throw std::runtime_error("Unknown constant token: " + token); + } + + auto number = parseNumber(it, end); + if (!number.is_null()) + return std::make_shared(number); + + it = start; + return nullptr; + } + + class expression_parsing_error : public std::runtime_error { + const CharIterator it; + + public: + expression_parsing_error(const std::string& message, const CharIterator& it) + : std::runtime_error(message), it(it) {} + size_t get_pos(const CharIterator& begin) const { + return std::distance(begin, it); + } + }; + + bool peekSymbols(const std::vector& symbols) const { + for (const auto& symbol : symbols) { + if (std::distance(it, end) >= (int64_t)symbol.size() && + std::string(it, it + symbol.size()) == symbol) { + return true; + } + } + return false; + } + + std::vector consumeTokenGroups( + const std::regex& regex, + SpaceHandling space_handling = SpaceHandling::Strip) { + auto start = it; + consumeSpaces(space_handling); + std::smatch match; + if (std::regex_search(it, end, match, regex) && match.position() == 0) { + it += match[0].length(); + std::vector ret; + for (size_t i = 0, n = match.size(); i < n; ++i) { + ret.push_back(match[i].str()); + } + return ret; + } + it = start; + return {}; + } + std::string consumeToken( + const std::regex& regex, + SpaceHandling space_handling = SpaceHandling::Strip) { + auto start = it; + consumeSpaces(space_handling); + std::smatch match; + if (std::regex_search(it, end, match, regex) && match.position() == 0) { + it += match[0].length(); + return match[0].str(); + } + it = start; + return ""; + } + + std::string consumeToken( + const std::string& token, + SpaceHandling space_handling = SpaceHandling::Strip) { + auto start = it; + consumeSpaces(space_handling); + if (std::distance(it, end) >= (int64_t)token.size() && + std::string(it, it + token.size()) == token) { + it += token.size(); + return token; + } + it = start; + return ""; + } + + std::shared_ptr parseExpression(bool allow_if_expr = true) { + auto left = parseLogicalOr(); + if (it == end) + return left; + + if (!allow_if_expr) + return left; + + static std::regex if_tok(R"(if\b)"); + if (consumeToken(if_tok).empty()) { + return left; + } + + auto location = get_location(); + auto [condition, else_expr] = parseIfExpression(); + return std::make_shared(location, std::move(condition), + std::move(left), std::move(else_expr)); + } + + Location get_location() const { + return {template_str, (size_t)std::distance(start, it)}; + } + + std::pair, std::shared_ptr> + parseIfExpression() { + auto condition = parseLogicalOr(); + if (!condition) + throw std::runtime_error("Expected condition expression"); + + static std::regex else_tok(R"(else\b)"); + std::shared_ptr else_expr; + if (!consumeToken(else_tok).empty()) { + else_expr = parseExpression(); + if (!else_expr) + throw std::runtime_error("Expected 'else' expression"); + } + return std::pair(std::move(condition), std::move(else_expr)); + } + + std::shared_ptr parseLogicalOr() { + auto left = parseLogicalAnd(); + if (!left) + throw std::runtime_error("Expected left side of 'logical or' expression"); + + static std::regex or_tok(R"(or\b)"); + auto location = get_location(); + while (!consumeToken(or_tok).empty()) { + auto right = parseLogicalAnd(); + if (!right) + throw std::runtime_error("Expected right side of 'or' expression"); + left = std::make_shared( + location, std::move(left), std::move(right), BinaryOpExpr::Op::Or); + } + return left; + } + + std::shared_ptr parseLogicalNot() { + static std::regex not_tok(R"(not\b)"); + auto location = get_location(); + + if (!consumeToken(not_tok).empty()) { + auto sub = parseLogicalNot(); + if (!sub) + throw std::runtime_error("Expected expression after 'not' keyword"); + return std::make_shared(location, std::move(sub), + UnaryOpExpr::Op::LogicalNot); + } + return parseLogicalCompare(); + } + + std::shared_ptr parseLogicalAnd() { + auto left = parseLogicalNot(); + if (!left) + throw std::runtime_error( + "Expected left side of 'logical and' expression"); + + static std::regex and_tok(R"(and\b)"); + auto location = get_location(); + while (!consumeToken(and_tok).empty()) { + auto right = parseLogicalNot(); + if (!right) + throw std::runtime_error("Expected right side of 'and' expression"); + left = std::make_shared( + location, std::move(left), std::move(right), BinaryOpExpr::Op::And); + } + return left; + } + + std::shared_ptr parseLogicalCompare() { + auto left = parseStringConcat(); + if (!left) + throw std::runtime_error( + "Expected left side of 'logical compare' expression"); + + static std::regex compare_tok( + R"(==|!=|<=?|>=?|in\b|is\b|not[\r\n\s]+in\b)"); + static std::regex not_tok(R"(not\b)"); + std::string op_str; + while (!(op_str = consumeToken(compare_tok)).empty()) { + auto location = get_location(); + if (op_str == "is") { + auto negated = !consumeToken(not_tok).empty(); + + auto identifier = parseIdentifier(); + if (!identifier) + throw std::runtime_error("Expected identifier after 'is' keyword"); + + return std::make_shared( + left->location, std::move(left), std::move(identifier), + negated ? BinaryOpExpr::Op::IsNot : BinaryOpExpr::Op::Is); + } + auto right = parseStringConcat(); + if (!right) + throw std::runtime_error( + "Expected right side of 'logical compare' expression"); + BinaryOpExpr::Op op; + if (op_str == "==") + op = BinaryOpExpr::Op::Eq; + else if (op_str == "!=") + op = BinaryOpExpr::Op::Ne; + else if (op_str == "<") + op = BinaryOpExpr::Op::Lt; + else if (op_str == ">") + op = BinaryOpExpr::Op::Gt; + else if (op_str == "<=") + op = BinaryOpExpr::Op::Le; + else if (op_str == ">=") + op = BinaryOpExpr::Op::Ge; + else if (op_str == "in") + op = BinaryOpExpr::Op::In; + else if (op_str.substr(0, 3) == "not") + op = BinaryOpExpr::Op::NotIn; + else + throw std::runtime_error("Unknown comparison operator: " + op_str); + left = std::make_shared(get_location(), std::move(left), + std::move(right), op); + } + return left; + } + + Expression::Parameters parseParameters() { + consumeSpaces(); + if (consumeToken("(").empty()) + throw std::runtime_error("Expected opening parenthesis in param list"); + + Expression::Parameters result; + + while (it != end) { + if (!consumeToken(")").empty()) { + return result; + } + auto expr = parseExpression(); + if (!expr) + throw std::runtime_error("Expected expression in call args"); + + if (auto ident = dynamic_cast(expr.get())) { + if (!consumeToken("=").empty()) { + auto value = parseExpression(); + if (!value) + throw std::runtime_error("Expected expression in for named arg"); + result.emplace_back(ident->get_name(), std::move(value)); + } else { + result.emplace_back(ident->get_name(), nullptr); + } + } else { + result.emplace_back(std::string(), std::move(expr)); + } + if (consumeToken(",").empty()) { + if (consumeToken(")").empty()) { + throw std::runtime_error("Expected closing parenthesis in call args"); + } + return result; + } + } + throw std::runtime_error("Expected closing parenthesis in call args"); + } + + ArgumentsExpression parseCallArgs() { + consumeSpaces(); + if (consumeToken("(").empty()) + throw std::runtime_error("Expected opening parenthesis in call args"); + + ArgumentsExpression result; + + while (it != end) { + if (!consumeToken(")").empty()) { + return result; + } + auto expr = parseExpression(); + if (!expr) + throw std::runtime_error("Expected expression in call args"); + + if (auto ident = dynamic_cast(expr.get())) { + if (!consumeToken("=").empty()) { + auto value = parseExpression(); + if (!value) + throw std::runtime_error("Expected expression in for named arg"); + result.kwargs.emplace_back(ident->get_name(), std::move(value)); + } else { + result.args.emplace_back(std::move(expr)); + } + } else { + result.args.emplace_back(std::move(expr)); + } + if (consumeToken(",").empty()) { + if (consumeToken(")").empty()) { + throw std::runtime_error("Expected closing parenthesis in call args"); + } + return result; + } + } + throw std::runtime_error("Expected closing parenthesis in call args"); + } + + std::shared_ptr parseIdentifier() { + static std::regex ident_regex(R"((?!(?:not|is|and|or|del)\b)[a-zA-Z_]\w*)"); + auto location = get_location(); + auto ident = consumeToken(ident_regex); + if (ident.empty()) + return nullptr; + return std::make_shared(location, ident); + } + + std::shared_ptr parseStringConcat() { + auto left = parseMathPow(); + if (!left) + throw std::runtime_error( + "Expected left side of 'string concat' expression"); + + static std::regex concat_tok(R"(~(?!\}))"); + if (!consumeToken(concat_tok).empty()) { + auto right = parseLogicalAnd(); + if (!right) + throw std::runtime_error( + "Expected right side of 'string concat' expression"); + left = std::make_shared(get_location(), std::move(left), + std::move(right), + BinaryOpExpr::Op::StrConcat); + } + return left; + } + + std::shared_ptr parseMathPow() { + auto left = parseMathPlusMinus(); + if (!left) + throw std::runtime_error("Expected left side of 'math pow' expression"); + + while (!consumeToken("**").empty()) { + auto right = parseMathPlusMinus(); + if (!right) + throw std::runtime_error( + "Expected right side of 'math pow' expression"); + left = std::make_shared(get_location(), std::move(left), + std::move(right), + BinaryOpExpr::Op::MulMul); + } + return left; + } + + std::shared_ptr parseMathPlusMinus() { + static std::regex plus_minus_tok(R"(\+|-(?![}%#]\}))"); + + auto left = parseMathMulDiv(); + if (!left) + throw std::runtime_error( + "Expected left side of 'math plus/minus' expression"); + std::string op_str; + while (!(op_str = consumeToken(plus_minus_tok)).empty()) { + auto right = parseMathMulDiv(); + if (!right) + throw std::runtime_error( + "Expected right side of 'math plus/minus' expression"); + auto op = op_str == "+" ? BinaryOpExpr::Op::Add : BinaryOpExpr::Op::Sub; + left = std::make_shared(get_location(), std::move(left), + std::move(right), op); + } + return left; + } + + std::shared_ptr parseMathMulDiv() { + auto left = parseMathUnaryPlusMinus(); + if (!left) + throw std::runtime_error( + "Expected left side of 'math mul/div' expression"); + + static std::regex mul_div_tok(R"(\*\*?|//?|%(?!\}))"); + std::string op_str; + while (!(op_str = consumeToken(mul_div_tok)).empty()) { + auto right = parseMathUnaryPlusMinus(); + if (!right) + throw std::runtime_error( + "Expected right side of 'math mul/div' expression"); + auto op = op_str == "*" ? BinaryOpExpr::Op::Mul + : op_str == "**" ? BinaryOpExpr::Op::MulMul + : op_str == "/" ? BinaryOpExpr::Op::Div + : op_str == "//" ? BinaryOpExpr::Op::DivDiv + : BinaryOpExpr::Op::Mod; + left = std::make_shared(get_location(), std::move(left), + std::move(right), op); + } + + if (!consumeToken("|").empty()) { + auto expr = parseMathMulDiv(); + if (auto filter = dynamic_cast(expr.get())) { + filter->prepend(std::move(left)); + return expr; + } else { + std::vector> parts; + parts.emplace_back(std::move(left)); + parts.emplace_back(std::move(expr)); + return std::make_shared(get_location(), std::move(parts)); + } + } + return left; + } + + std::shared_ptr call_func(const std::string& name, + ArgumentsExpression&& args) const { + return std::make_shared( + get_location(), std::make_shared(get_location(), name), + std::move(args)); + } + + std::shared_ptr parseMathUnaryPlusMinus() { + static std::regex unary_plus_minus_tok(R"(\+|-(?![}%#]\}))"); + auto op_str = consumeToken(unary_plus_minus_tok); + auto expr = parseExpansion(); + if (!expr) + throw std::runtime_error( + "Expected expr of 'unary plus/minus/expansion' expression"); + + if (!op_str.empty()) { + auto op = op_str == "+" ? UnaryOpExpr::Op::Plus : UnaryOpExpr::Op::Minus; + return std::make_shared(get_location(), std::move(expr), op); + } + return expr; + } + + std::shared_ptr parseExpansion() { + static std::regex expansion_tok(R"(\*\*?)"); + auto op_str = consumeToken(expansion_tok); + auto expr = parseValueExpression(); + if (op_str.empty()) + return expr; + if (!expr) + throw std::runtime_error("Expected expr of 'expansion' expression"); + return std::make_shared(get_location(), std::move(expr), + op_str == "*" + ? UnaryOpExpr::Op::Expansion + : UnaryOpExpr::Op::ExpansionDict); + } + + std::shared_ptr parseValueExpression() { + auto parseValue = [&]() -> std::shared_ptr { + auto location = get_location(); + auto constant = parseConstant(); + if (constant) + return std::make_shared(location, *constant); + + static std::regex null_regex(R"(null\b)"); + if (!consumeToken(null_regex).empty()) + return std::make_shared(location, Value()); + + auto identifier = parseIdentifier(); + if (identifier) + return identifier; + + auto braced = parseBracedExpressionOrArray(); + if (braced) + return braced; + + auto array = parseArray(); + if (array) + return array; + + auto dictionary = parseDictionary(); + if (dictionary) + return dictionary; + + throw std::runtime_error("Expected value expression"); + }; + + auto value = parseValue(); + + while (it != end && consumeSpaces() && peekSymbols({"[", "."})) { + if (!consumeToken("[").empty()) { + std::shared_ptr index; + if (!consumeToken(":").empty()) { + auto slice_end = parseExpression(); + index = std::make_shared(slice_end->location, nullptr, + std::move(slice_end)); + } else { + auto slice_start = parseExpression(); + if (!consumeToken(":").empty()) { + consumeSpaces(); + if (peekSymbols({"]"})) { + index = std::make_shared( + slice_start->location, std::move(slice_start), nullptr); + } else { + auto slice_end = parseExpression(); + index = std::make_shared(slice_start->location, + std::move(slice_start), + std::move(slice_end)); + } + } else { + index = std::move(slice_start); + } + } + if (!index) + throw std::runtime_error("Empty index in subscript"); + if (consumeToken("]").empty()) + throw std::runtime_error("Expected closing bracket in subscript"); + + value = std::make_shared( + value->location, std::move(value), std::move(index)); + } else if (!consumeToken(".").empty()) { + auto identifier = parseIdentifier(); + if (!identifier) + throw std::runtime_error("Expected identifier in subscript"); + + consumeSpaces(); + if (peekSymbols({"("})) { + auto callParams = parseCallArgs(); + value = std::make_shared( + identifier->location, std::move(value), std::move(identifier), + std::move(callParams)); + } else { + auto key = std::make_shared( + identifier->location, Value(identifier->get_name())); + value = std::make_shared( + identifier->location, std::move(value), std::move(key)); + } + } + consumeSpaces(); + } + + if (peekSymbols({"("})) { + auto location = get_location(); + auto callParams = parseCallArgs(); + value = std::make_shared(location, std::move(value), + std::move(callParams)); + } + return value; + } + + std::shared_ptr parseBracedExpressionOrArray() { + if (consumeToken("(").empty()) + return nullptr; + + auto expr = parseExpression(); + if (!expr) + throw std::runtime_error("Expected expression in braced expression"); + + if (!consumeToken(")").empty()) { + return expr; // Drop the parentheses + } + + std::vector> tuple; + tuple.emplace_back(std::move(expr)); + + while (it != end) { + if (consumeToken(",").empty()) + throw std::runtime_error("Expected comma in tuple"); + auto next = parseExpression(); + if (!next) + throw std::runtime_error("Expected expression in tuple"); + tuple.push_back(std::move(next)); + + if (!consumeToken(")").empty()) { + return std::make_shared(get_location(), std::move(tuple)); + } + } + throw std::runtime_error("Expected closing parenthesis"); + } + + std::shared_ptr parseArray() { + if (consumeToken("[").empty()) + return nullptr; + + std::vector> elements; + if (!consumeToken("]").empty()) { + return std::make_shared(get_location(), std::move(elements)); + } + auto first_expr = parseExpression(); + if (!first_expr) + throw std::runtime_error("Expected first expression in array"); + elements.push_back(std::move(first_expr)); + + while (it != end) { + if (!consumeToken(",").empty()) { + auto expr = parseExpression(); + if (!expr) + throw std::runtime_error("Expected expression in array"); + elements.push_back(std::move(expr)); + } else if (!consumeToken("]").empty()) { + return std::make_shared(get_location(), std::move(elements)); + } else { + throw std::runtime_error("Expected comma or closing bracket in array"); + } + } + throw std::runtime_error("Expected closing bracket"); + } + + std::shared_ptr parseDictionary() { + if (consumeToken("{").empty()) + return nullptr; + + std::vector< + std::pair, std::shared_ptr>> + elements; + if (!consumeToken("}").empty()) { + return std::make_shared(get_location(), std::move(elements)); + } + + auto parseKeyValuePair = [&]() { + auto key = parseExpression(); + if (!key) + throw std::runtime_error("Expected key in dictionary"); + if (consumeToken(":").empty()) + throw std::runtime_error( + "Expected colon betweek key & value in dictionary"); + auto value = parseExpression(); + if (!value) + throw std::runtime_error("Expected value in dictionary"); + elements.emplace_back(std::pair(std::move(key), std::move(value))); + }; + + parseKeyValuePair(); + + while (it != end) { + if (!consumeToken(",").empty()) { + parseKeyValuePair(); + } else if (!consumeToken("}").empty()) { + return std::make_shared(get_location(), std::move(elements)); + } else { + throw std::runtime_error( + "Expected comma or closing brace in dictionary"); + } + } + throw std::runtime_error("Expected closing brace"); + } + + SpaceHandling parsePreSpace(const std::string& s) const { + if (s == "-") + return SpaceHandling::Strip; + return SpaceHandling::Keep; + } + + SpaceHandling parsePostSpace(const std::string& s) const { + if (s == "-") + return SpaceHandling::Strip; + return SpaceHandling::Keep; + } + + using TemplateTokenVector = std::vector>; + using TemplateTokenIterator = TemplateTokenVector::const_iterator; + + std::vector parseVarNames() { + static std::regex varnames_regex( + R"(((?:\w+)(?:[\r\n\s]*,[\r\n\s]*(?:\w+))*)[\r\n\s]*)"); + + std::vector group; + if ((group = consumeTokenGroups(varnames_regex)).empty()) + throw std::runtime_error("Expected variable names"); + std::vector varnames; + std::istringstream iss(group[1]); + std::string varname; + while (std::getline(iss, varname, ',')) { + varnames.push_back(strip(varname)); + } + return varnames; + } + + std::runtime_error unexpected(const TemplateToken& token) const { + return std::runtime_error( + "Unexpected " + TemplateToken::typeToString(token.type) + + error_location_suffix(*template_str, token.location.pos)); + } + std::runtime_error unterminated(const TemplateToken& token) const { + return std::runtime_error( + "Unterminated " + TemplateToken::typeToString(token.type) + + error_location_suffix(*template_str, token.location.pos)); + } + + TemplateTokenVector tokenize() { + static std::regex comment_tok(R"(\{#([-~]?)(.*?)([-~]?)#\})"); + static std::regex expr_open_regex(R"(\{\{([-~])?)"); + static std::regex block_open_regex(R"(^\{%([-~])?[\s\n\r]*)"); + static std::regex block_keyword_tok( + R"((if|else|elif|endif|for|endfor|set|endset|block|endblock|macro|endmacro|filter|endfilter)\b)"); + static std::regex text_regex(R"([\s\S\n\r]*?($|(?=\{\{|\{%|\{#)))"); + static std::regex expr_close_regex(R"([\s\n\r]*([-~])?\}\})"); + static std::regex block_close_regex(R"([\s\n\r]*([-~])?%\})"); + + TemplateTokenVector tokens; + std::vector group; + std::string text; + + try { + while (it != end) { + auto location = get_location(); + + if (!(group = consumeTokenGroups(comment_tok, SpaceHandling::Keep)) + .empty()) { + auto pre_space = parsePreSpace(group[1]); + auto content = group[2]; + auto post_space = parsePostSpace(group[3]); + tokens.push_back(std::make_unique( + location, pre_space, post_space, content)); + } else if (!(group = consumeTokenGroups(expr_open_regex, + SpaceHandling::Keep)) + .empty()) { + auto pre_space = parsePreSpace(group[1]); + auto expr = parseExpression(); + + if ((group = consumeTokenGroups(expr_close_regex)).empty()) { + throw std::runtime_error("Expected closing expression tag"); + } + + auto post_space = parsePostSpace(group[1]); + tokens.push_back(std::make_unique( + location, pre_space, post_space, std::move(expr))); + } else if (!(group = consumeTokenGroups(block_open_regex, + SpaceHandling::Keep)) + .empty()) { + auto pre_space = parsePreSpace(group[1]); + + std::string keyword; + + auto parseBlockClose = [&]() -> SpaceHandling { + if ((group = consumeTokenGroups(block_close_regex)).empty()) + throw std::runtime_error("Expected closing block tag"); + return parsePostSpace(group[1]); + }; + + if ((keyword = consumeToken(block_keyword_tok)).empty()) + throw std::runtime_error("Expected block keyword"); + + if (keyword == "if") { + auto condition = parseExpression(); + if (!condition) + throw std::runtime_error("Expected condition in if block"); + + auto post_space = parseBlockClose(); + tokens.push_back(std::make_unique( + location, pre_space, post_space, std::move(condition))); + } else if (keyword == "elif") { + auto condition = parseExpression(); + if (!condition) + throw std::runtime_error("Expected condition in elif block"); + + auto post_space = parseBlockClose(); + tokens.push_back(std::make_unique( + location, pre_space, post_space, std::move(condition))); + } else if (keyword == "else") { + auto post_space = parseBlockClose(); + tokens.push_back(std::make_unique( + location, pre_space, post_space)); + } else if (keyword == "endif") { + auto post_space = parseBlockClose(); + tokens.push_back(std::make_unique( + location, pre_space, post_space)); + } else if (keyword == "for") { + static std::regex recursive_tok(R"(recursive\b)"); + static std::regex if_tok(R"(if\b)"); + + auto varnames = parseVarNames(); + static std::regex in_tok(R"(in\b)"); + if (consumeToken(in_tok).empty()) + throw std::runtime_error("Expected 'in' keyword in for block"); + auto iterable = parseExpression(/* allow_if_expr = */ false); + if (!iterable) + throw std::runtime_error("Expected iterable in for block"); + + std::shared_ptr condition; + if (!consumeToken(if_tok).empty()) { + condition = parseExpression(); + } + auto recursive = !consumeToken(recursive_tok).empty(); + + auto post_space = parseBlockClose(); + tokens.push_back(std::make_unique( + location, pre_space, post_space, std::move(varnames), + std::move(iterable), std::move(condition), recursive)); + } else if (keyword == "endfor") { + auto post_space = parseBlockClose(); + tokens.push_back(std::make_unique( + location, pre_space, post_space)); + } else if (keyword == "set") { + static std::regex namespaced_var_regex( + R"((\w+)[\s\n\r]*\.[\s\n\r]*(\w+))"); + + std::string ns; + std::vector var_names; + std::shared_ptr value; + if (!(group = consumeTokenGroups(namespaced_var_regex)).empty()) { + ns = group[1]; + var_names.push_back(group[2]); + + if (consumeToken("=").empty()) + throw std::runtime_error("Expected equals sign in set block"); + + value = parseExpression(); + if (!value) + throw std::runtime_error("Expected value in set block"); + } else { + var_names = parseVarNames(); + + if (!consumeToken("=").empty()) { + value = parseExpression(); + if (!value) + throw std::runtime_error("Expected value in set block"); + } + } + auto post_space = parseBlockClose(); + tokens.push_back(std::make_unique( + location, pre_space, post_space, ns, var_names, + std::move(value))); + } else if (keyword == "endset") { + auto post_space = parseBlockClose(); + tokens.push_back(std::make_unique( + location, pre_space, post_space)); + } else if (keyword == "macro") { + auto macroname = parseIdentifier(); + if (!macroname) + throw std::runtime_error("Expected macro name in macro block"); + auto params = parseParameters(); + + auto post_space = parseBlockClose(); + tokens.push_back(std::make_unique( + location, pre_space, post_space, std::move(macroname), + std::move(params))); + } else if (keyword == "endmacro") { + auto post_space = parseBlockClose(); + tokens.push_back(std::make_unique( + location, pre_space, post_space)); + } else if (keyword == "filter") { + auto filter = parseExpression(); + if (!filter) + throw std::runtime_error("Expected expression in filter block"); + + auto post_space = parseBlockClose(); + tokens.push_back(std::make_unique( + location, pre_space, post_space, std::move(filter))); + } else if (keyword == "endfilter") { + auto post_space = parseBlockClose(); + tokens.push_back(std::make_unique( + location, pre_space, post_space)); + } else { + throw std::runtime_error("Unexpected block: " + keyword); + } + } else if (!(text = consumeToken(text_regex, SpaceHandling::Keep)) + .empty()) { + tokens.push_back(std::make_unique( + location, SpaceHandling::Keep, SpaceHandling::Keep, text)); + } else { + if (it != end) + throw std::runtime_error("Unexpected character"); + } + } + return tokens; + } catch (const std::exception& e) { + throw std::runtime_error( + e.what() + + error_location_suffix(*template_str, std::distance(start, it))); + } + } + + std::shared_ptr parseTemplate( + const TemplateTokenIterator& begin, TemplateTokenIterator& it, + const TemplateTokenIterator& end, bool fully = false) const { + std::vector> children; + while (it != end) { + const auto start = it; + const auto& token = *(it++); + if (auto if_token = dynamic_cast(token.get())) { + std::vector, + std::shared_ptr>> + cascade; + cascade.emplace_back(std::move(if_token->condition), + parseTemplate(begin, it, end)); + + while (it != end && (*it)->type == TemplateToken::Type::Elif) { + auto elif_token = dynamic_cast((*(it++)).get()); + cascade.emplace_back(std::move(elif_token->condition), + parseTemplate(begin, it, end)); + } + + if (it != end && (*it)->type == TemplateToken::Type::Else) { + cascade.emplace_back(nullptr, parseTemplate(begin, ++it, end)); + } + if (it == end || (*(it++))->type != TemplateToken::Type::EndIf) { + throw unterminated(**start); + } + children.emplace_back( + std::make_shared(token->location, std::move(cascade))); + } else if (auto for_token = + dynamic_cast(token.get())) { + auto body = parseTemplate(begin, it, end); + auto else_body = std::shared_ptr(); + if (it != end && (*it)->type == TemplateToken::Type::Else) { + else_body = parseTemplate(begin, ++it, end); + } + if (it == end || (*(it++))->type != TemplateToken::Type::EndFor) { + throw unterminated(**start); + } + children.emplace_back(std::make_shared( + token->location, std::move(for_token->var_names), + std::move(for_token->iterable), std::move(for_token->condition), + std::move(body), for_token->recursive, std::move(else_body))); + } else if (auto text_token = + dynamic_cast(token.get())) { + SpaceHandling pre_space = + (it - 1) != begin ? (*(it - 2))->post_space : SpaceHandling::Keep; + SpaceHandling post_space = + it != end ? (*it)->pre_space : SpaceHandling::Keep; + + auto text = text_token->text; + if (pre_space == SpaceHandling::Strip) { + static std::regex leading_space_regex(R"(^(\s|\r|\n)+)"); + text = std::regex_replace(text, leading_space_regex, ""); + } else if (options.trim_blocks && (it - 1) != begin && + !dynamic_cast((*(it - 2)).get())) { + static std::regex leading_line(R"(^[ \t]*\r?\n)"); + text = std::regex_replace(text, leading_line, ""); + } + if (post_space == SpaceHandling::Strip) { + static std::regex trailing_space_regex(R"((\s|\r|\n)+$)"); + text = std::regex_replace(text, trailing_space_regex, ""); + } else if (options.lstrip_blocks && it != end) { + static std::regex trailing_last_line_space_regex(R"((\r?\n)[ \t]*$)"); + text = std::regex_replace(text, trailing_last_line_space_regex, "$1"); + } + + if (it == end && !options.keep_trailing_newline) { + static std::regex r(R"(\r?\n$)"); + text = std::regex_replace(text, r, ""); // Strip one trailing newline + } + children.emplace_back( + std::make_shared(token->location, text)); + } else if (auto expr_token = + dynamic_cast(token.get())) { + children.emplace_back(std::make_shared( + token->location, std::move(expr_token->expr))); + } else if (auto set_token = + dynamic_cast(token.get())) { + if (set_token->value) { + children.emplace_back(std::make_shared( + token->location, set_token->ns, set_token->var_names, + std::move(set_token->value))); + } else { + auto value_template = parseTemplate(begin, it, end); + if (it == end || (*(it++))->type != TemplateToken::Type::EndSet) { + throw unterminated(**start); + } + if (!set_token->ns.empty()) + throw std::runtime_error( + "Namespaced set not supported in set with template value"); + if (set_token->var_names.size() != 1) + throw std::runtime_error( + "Structural assignment not supported in set with template " + "value"); + auto& name = set_token->var_names[0]; + children.emplace_back(std::make_shared( + token->location, name, std::move(value_template))); + } + } else if (auto macro_token = + dynamic_cast(token.get())) { + auto body = parseTemplate(begin, it, end); + if (it == end || (*(it++))->type != TemplateToken::Type::EndMacro) { + throw unterminated(**start); + } + children.emplace_back(std::make_shared( + token->location, std::move(macro_token->name), + std::move(macro_token->params), std::move(body))); + } else if (auto filter_token = + dynamic_cast(token.get())) { + auto body = parseTemplate(begin, it, end); + if (it == end || (*(it++))->type != TemplateToken::Type::EndFilter) { + throw unterminated(**start); + } + children.emplace_back(std::make_shared( + token->location, std::move(filter_token->filter), std::move(body))); + } else if (dynamic_cast(token.get())) { + // Ignore comments + } else if (dynamic_cast(token.get()) || + dynamic_cast(token.get()) || + dynamic_cast(token.get()) || + dynamic_cast(token.get()) || + dynamic_cast(token.get()) || + dynamic_cast(token.get()) || + dynamic_cast(token.get())) { + it--; // unconsume the token + break; // exit the loop + } else { + throw unexpected(**(it - 1)); + } + } + if (fully && it != end) { + throw unexpected(**it); + } + if (children.empty()) { + return std::make_shared(Location{template_str, 0}, + std::string()); + } else if (children.size() == 1) { + return std::move(children[0]); + } else { + return std::make_shared(children[0]->location(), + std::move(children)); + } + } + + public: + static std::shared_ptr parse(const std::string& template_str, + const Options& options) { + Parser parser(std::make_shared(template_str), options); + auto tokens = parser.tokenize(); + TemplateTokenIterator begin = tokens.begin(); + auto it = begin; + TemplateTokenIterator end = tokens.end(); + return parser.parseTemplate(begin, it, end, /* full= */ true); + } +}; + +static Value simple_function( + const std::string& fn_name, const std::vector& params, + const std::function&, Value& args)>& + fn) { + std::map named_positions; + for (size_t i = 0, n = params.size(); i < n; i++) + named_positions[params[i]] = i; + + return Value::callable([=](const std::shared_ptr& context, + ArgumentsValue& args) -> Value { + auto args_obj = Value::object(); + std::vector provided_args(params.size()); + for (size_t i = 0, n = args.args.size(); i < n; i++) { + auto& arg = args.args[i]; + if (i < params.size()) { + args_obj.set(params[i], arg); + provided_args[i] = true; + } else { + throw std::runtime_error("Too many positional params for " + fn_name); + } + } + for (auto& [name, value] : args.kwargs) { + auto named_pos_it = named_positions.find(name); + if (named_pos_it == named_positions.end()) { + throw std::runtime_error("Unknown argument " + name + " for function " + + fn_name); + } + provided_args[named_pos_it->second] = true; + args_obj.set(name, value); + } + return fn(context, args_obj); + }); +} + +inline std::shared_ptr Context::builtins() { + auto globals = Value::object(); + + globals.set( + "raise_exception", + simple_function( + "raise_exception", {"message"}, + [](const std::shared_ptr&, Value& args) -> Value { + throw std::runtime_error(args.at("message").get()); + })); + globals.set("tojson", + simple_function("tojson", {"value", "indent"}, + [](const std::shared_ptr&, Value& args) { + return Value(args.at("value").dump( + args.get("indent", -1), + /* tojson= */ true)); + })); + globals.set("items", + simple_function( + "items", {"object"}, + [](const std::shared_ptr&, Value& args) { + auto items = Value::array(); + if (args.contains("object")) { + auto& obj = args.at("object"); + if (obj.is_string()) { + auto json_obj = json::parse(obj.get()); + for (const auto& kv : json_obj.items()) { + items.push_back(Value::array({kv.key(), kv.value()})); + } + } else if (!obj.is_null()) { + for (auto& key : obj.keys()) { + items.push_back(Value::array({key, obj.at(key)})); + } + } + } + return items; + })); + globals.set("last", simple_function( + "last", {"items"}, + [](const std::shared_ptr&, Value& args) { + auto items = args.at("items"); + if (!items.is_array()) + throw std::runtime_error("object is not a list"); + if (items.size() == 0) + return Value(); + return items.at(items.size() - 1); + })); + globals.set("trim", simple_function( + "trim", {"text"}, + [](const std::shared_ptr&, Value& args) { + auto& text = args.at("text"); + return text.is_null() + ? text + : Value(strip(text.get())); + })); + globals.set("lower", simple_function( + "lower", {"text"}, + [](const std::shared_ptr&, Value& args) { + auto text = args.at("text"); + if (text.is_null()) + return text; + std::string res; + auto str = text.get(); + std::transform(str.begin(), str.end(), + std::back_inserter(res), ::tolower); + return Value(res); + })); + globals.set("default", Value::callable([=](const std::shared_ptr&, + ArgumentsValue& args) { + args.expectArgs("default", {2, 3}, {0, 1}); + auto& value = args.args[0]; + auto& default_value = args.args[1]; + bool boolean = false; + if (args.args.size() == 3) { + boolean = args.args[2].get(); + } else { + Value bv = args.get_named("boolean"); + if (!bv.is_null()) { + boolean = bv.get(); + } + } + return boolean ? (value.to_bool() ? value : default_value) + : value.is_null() ? default_value + : value; + })); + auto escape = simple_function( + "escape", {"text"}, [](const std::shared_ptr&, Value& args) { + return Value(html_escape(args.at("text").get())); + }); + globals.set("e", escape); + globals.set("escape", escape); + globals.set( + "joiner", + simple_function( + "joiner", {"sep"}, [](const std::shared_ptr&, Value& args) { + auto sep = args.get("sep", ""); + auto first = std::make_shared(true); + return simple_function("", {}, + [sep, first](const std::shared_ptr&, + const Value&) -> Value { + if (*first) { + *first = false; + return ""; + } + return sep; + }); + return Value(html_escape(args.at("text").get())); + })); + globals.set("count", + simple_function("count", {"items"}, + [](const std::shared_ptr&, Value& args) { + return Value((int64_t)args.at("items").size()); + })); + globals.set( + "dictsort", + simple_function("dictsort", {"value"}, + [](const std::shared_ptr&, Value& args) { + if (args.size() != 1) + throw std::runtime_error( + "dictsort expects exactly 1 argument (TODO: fix " + "implementation)"); + auto& value = args.at("value"); + auto keys = value.keys(); + std::sort(keys.begin(), keys.end()); + auto res = Value::array(); + for (auto& key : keys) { + res.push_back(Value::array({key, value.at(key)})); + } + return res; + })); + globals.set( + "join", + simple_function( + "join", {"items", "d"}, + [](const std::shared_ptr&, Value& args) { + auto do_join = [](Value& items, const std::string& sep) { + std::ostringstream oss; + auto first = true; + for (size_t i = 0, n = items.size(); i < n; ++i) { + if (first) + first = false; + else + oss << sep; + oss << items.at(i).to_str(); + } + return Value(oss.str()); + }; + auto sep = args.get("d", ""); + if (args.contains("items")) { + auto& items = args.at("items"); + return do_join(items, sep); + } else { + return simple_function( + "", {"items"}, + [sep, do_join](const std::shared_ptr&, Value& args) { + auto& items = args.at("items"); + if (!items.to_bool() || !items.is_array()) + throw std::runtime_error( + "join expects an array for items, got: " + + items.dump()); + return do_join(items, sep); + }); + } + })); + globals.set("namespace", Value::callable([=](const std::shared_ptr&, + ArgumentsValue& args) { + auto ns = Value::object(); + args.expectArgs("namespace", {0, 0}, + {0, std::numeric_limits::max()}); + for (auto& [name, value] : args.kwargs) { + ns.set(name, value); + } + return ns; + })); + auto equalto = simple_function( + "equalto", {"expected", "actual"}, + [](const std::shared_ptr&, Value& args) -> Value { + return args.at("actual") == args.at("expected"); + }); + globals.set("equalto", equalto); + globals.set("==", equalto); + globals.set("length", simple_function("length", {"items"}, + [](const std::shared_ptr&, + Value& args) -> Value { + auto& items = args.at("items"); + return (int64_t)items.size(); + })); + globals.set("safe", simple_function("safe", {"value"}, + [](const std::shared_ptr&, + Value& args) -> Value { + return args.at("value"); + })); + globals.set("string", simple_function("string", {"value"}, + [](const std::shared_ptr&, + Value& args) -> Value { + return args.at("value").to_str(); + })); + globals.set("int", simple_function("int", {"value"}, + [](const std::shared_ptr&, + Value& args) -> Value { + return args.at("value").to_int(); + })); + globals.set("list", + simple_function( + "list", {"items"}, + [](const std::shared_ptr&, Value& args) -> Value { + auto& items = args.at("items"); + if (!items.is_array()) + throw std::runtime_error("object is not iterable"); + return items; + })); + globals.set("unique", + simple_function( + "unique", {"items"}, + [](const std::shared_ptr&, Value& args) -> Value { + auto& items = args.at("items"); + if (!items.is_array()) + throw std::runtime_error("object is not iterable"); + std::unordered_set seen; + auto result = Value::array(); + for (size_t i = 0, n = items.size(); i < n; i++) { + auto pair = seen.insert(items.at(i)); + if (pair.second) { + result.push_back(items.at(i)); + } + } + return result; + })); + auto make_filter = [](const Value& filter, Value& extra_args) -> Value { + return simple_function( + "", {"value"}, + [=](const std::shared_ptr& context, Value& args) { + auto& value = args.at("value"); + ArgumentsValue actual_args; + actual_args.args.emplace_back(value); + for (size_t i = 0, n = extra_args.size(); i < n; i++) { + actual_args.args.emplace_back(extra_args.at(i)); + } + return filter.call(context, actual_args); + }); + }; + // https://jinja.palletsprojects.com/en/3.0.x/templates/#jinja-filters.reject + globals.set( + "reject", Value::callable([=](const std::shared_ptr& context, + ArgumentsValue& args) { + args.expectArgs("reject", {2, std::numeric_limits::max()}, + {0, 0}); + auto& items = args.args[0]; + auto filter_fn = context->get(args.args[1]); + if (filter_fn.is_null()) + throw std::runtime_error("Undefined filter: " + args.args[1].dump()); + + auto filter_args = Value::array(); + for (size_t i = 2, n = args.args.size(); i < n; i++) { + filter_args.push_back(args.args[i]); + } + auto filter = make_filter(filter_fn, filter_args); + + auto res = Value::array(); + for (size_t i = 0, n = items.size(); i < n; i++) { + auto& item = items.at(i); + ArgumentsValue filter_args; + filter_args.args.emplace_back(item); + auto pred_res = filter.call(context, filter_args); + if (!pred_res.to_bool()) { + res.push_back(item); + } + } + return res; + })); + globals.set( + "map", Value::callable([=](const std::shared_ptr& context, + ArgumentsValue& args) { + auto res = Value::array(); + if (args.args.size() == 1 && + ((args.has_named("attribute") && args.kwargs.size() == 1) || + (args.has_named("default") && args.kwargs.size() == 2))) { + auto& items = args.args[0]; + auto attr_name = args.get_named("attribute"); + auto default_value = args.get_named("default"); + for (size_t i = 0, n = items.size(); i < n; i++) { + auto& item = items.at(i); + auto attr = item.get(attr_name); + res.push_back(attr.is_null() ? default_value : attr); + } + } else if (args.kwargs.empty() && args.args.size() >= 2) { + auto fn = context->get(args.args[1]); + if (fn.is_null()) + throw std::runtime_error("Undefined filter: " + + args.args[1].dump()); + ArgumentsValue filter_args{{Value()}, {}}; + for (size_t i = 2, n = args.args.size(); i < n; i++) { + filter_args.args.emplace_back(args.args[i]); + } + for (size_t i = 0, n = args.args[0].size(); i < n; i++) { + auto& item = args.args[0].at(i); + filter_args.args[0] = item; + res.push_back(fn.call(context, filter_args)); + } + } else { + throw std::runtime_error("Invalid or unsupported arguments for map"); + } + return res; + })); + globals.set("indent", + simple_function("indent", {"text", "indent", "first"}, + [](const std::shared_ptr&, Value& args) { + auto text = args.at("text").get(); + auto first = args.get("first", false); + std::string out; + std::string indent( + args.get("indent", 0), ' '); + std::istringstream iss(text); + std::string line; + auto is_first = true; + while (std::getline(iss, line, '\n')) { + auto needs_indent = !is_first || first; + if (is_first) + is_first = false; + else + out += "\n"; + if (needs_indent) + out += indent; + out += line; + } + if (!text.empty() && text.back() == '\n') + out += "\n"; + return out; + })); + globals.set( + "selectattr", Value::callable([=](const std::shared_ptr& context, + ArgumentsValue& args) { + args.expectArgs("selectattr", {2, std::numeric_limits::max()}, + {0, 0}); + auto& items = args.args[0]; + if (items.is_null()) + return Value::array(); + auto attr_name = args.args[1].get(); + + bool has_test = false; + Value test_fn; + ArgumentsValue test_args{{Value()}, {}}; + if (args.args.size() >= 3) { + has_test = true; + test_fn = context->get(args.args[2]); + if (test_fn.is_null()) + throw std::runtime_error("Undefined test: " + args.args[2].dump()); + for (size_t i = 3, n = args.args.size(); i < n; i++) { + test_args.args.emplace_back(args.args[i]); + } + test_args.kwargs = args.kwargs; + } + + auto res = Value::array(); + for (size_t i = 0, n = items.size(); i < n; i++) { + auto& item = items.at(i); + auto attr = item.get(attr_name); + if (has_test) { + test_args.args[0] = attr; + if (test_fn.call(context, test_args).to_bool()) { + res.push_back(item); + } + } else { + res.push_back(attr); + } + } + return res; + })); + globals.set("range", Value::callable([=](const std::shared_ptr&, + ArgumentsValue& args) { + std::vector startEndStep(3); + std::vector param_set(3); + if (args.args.size() == 1) { + startEndStep[1] = args.args[0].get(); + param_set[1] = true; + } else { + for (size_t i = 0; i < args.args.size(); i++) { + auto& arg = args.args[i]; + auto v = arg.get(); + startEndStep[i] = v; + param_set[i] = true; + } + } + for (auto& [name, value] : args.kwargs) { + size_t i; + if (name == "start") + i = 0; + else if (name == "end") + i = 1; + else if (name == "step") + i = 2; + else + throw std::runtime_error("Unknown argument " + name + + " for function range"); + + if (param_set[i]) { + throw std::runtime_error("Duplicate argument " + name + + " for function range"); + } + startEndStep[i] = value.get(); + param_set[i] = true; + } + if (!param_set[1]) { + throw std::runtime_error( + "Missing required argument 'end' for function range"); + } + int64_t start = param_set[0] ? startEndStep[0] : 0; + int64_t end = startEndStep[1]; + int64_t step = param_set[2] ? startEndStep[2] : 1; + + auto res = Value::array(); + if (step > 0) { + for (int64_t i = start; i < end; i += step) { + res.push_back(Value(i)); + } + } else { + for (int64_t i = start; i > end; i += step) { + res.push_back(Value(i)); + } + } + return res; + })); + + return std::make_shared(std::move(globals)); +} + +inline std::shared_ptr Context::make( + Value&& values, const std::shared_ptr& parent) { + return std::make_shared( + values.is_null() ? Value::object() : std::move(values), parent); +} + +} // namespace minja