From a973045345bd40e38ba00953b914e5f967ea1684 Mon Sep 17 00:00:00 2001 From: Luca Gretscher Date: Thu, 11 Apr 2024 15:38:33 +0200 Subject: [PATCH] [Interpreter] Fix strings with length exceeding `uint8_t`. Prior to this commit, the string length was encoded into the opcode of the `StackMachine` instructions `St_Tup_s`, `Ld_s`, and `St_s`. However, this encoding was done using the opcode type which is stored in a `uint8_t` internally. Thus, strings exceeding the length of 255 characters let the encoded length overflow. To circumvent this issue, we now store the string length in the stack machine's context which is able to hold values up to `uint64_t`. Accessing the length is then done via the stack itself. --- src/backend/StackMachine.cpp | 18 +++++++++++------ src/tables/Opcodes.tbl | 28 +++++++++++++-------------- unittest/backend/StackMachineTest.cpp | 9 ++++++--- 3 files changed, 32 insertions(+), 23 deletions(-) diff --git a/src/backend/StackMachine.cpp b/src/backend/StackMachine.cpp index 2ef72b5a..c6939db7 100644 --- a/src/backend/StackMachine.cpp +++ b/src/backend/StackMachine.cpp @@ -644,7 +644,8 @@ void StackMachine::emit_Ld(const Type *ty) } } } else if (auto cs = cast(ty)) { - emit_Ld_s(cs->length); + add_and_emit_load(cs->length); + emit_Ld_s(); } else if (auto d = cast(ty)) { emit_Ld_i32(); } else if (auto dt = cast(ty)) { @@ -681,7 +682,8 @@ void StackMachine::emit_St(const Type *ty) } } } else if (auto cs = cast(ty)) { - emit_St_s(cs->length + cs->is_varying); + add_and_emit_load(cs->length + cs->is_varying); + emit_St_s(); } else if (auto d = cast(ty)) { emit_St_i32(); } else if (auto dt = cast(ty)) { @@ -696,7 +698,8 @@ void StackMachine::emit_St_Tup(std::size_t tuple_id, std::size_t index, const Ty if (ty->is_none()) { emit_St_Tup_Null(tuple_id, index); } else if (auto cs = cast(ty)) { - emit_St_Tup_s(tuple_id, index, cs->length); + add_and_emit_load(cs->length); + emit_St_Tup_s(tuple_id, index); } else { std::ostringstream oss; oss << "St_Tup" << tystr(as(ty)); @@ -986,7 +989,8 @@ NEXT; St_Tup_s: { std::size_t tuple_id = std::size_t(*op_++); std::size_t index = std::size_t(*op_++); - std::size_t length = std::size_t(*op_++); + std::size_t length = TOP.as_i(); + POP(); auto &t = *tuples[tuple_id]; if (TOP_IS_NULL) t.null(index); @@ -1132,7 +1136,8 @@ Ld_d: LOAD(double, double); Ld_s: { M_insist(top_ >= 1); - uint64_t length = uint64_t(*op_++); + uint64_t length = TOP.as_i(); + POP(); void *ptr = TOP.as_p(); strncpy(reinterpret_cast(p_mem), reinterpret_cast(ptr), length); p_mem[length] = 0; // always add terminating NUL byte, no matter whether this is a CHAR or VARCHAR @@ -1173,7 +1178,8 @@ St_d: STORE(double, double); St_s: { M_insist(top_ >= 2); - uint64_t length = uint64_t(*op_++); + uint64_t length = TOP.as_i(); + POP(); if (TOP_IS_NULL) { POP(); POP(); NEXT; } char *str = reinterpret_cast(TOP.as_p()); diff --git a/src/tables/Opcodes.tbl b/src/tables/Opcodes.tbl index 7d417234..27db9a94 100644 --- a/src/tables/Opcodes.tbl +++ b/src/tables/Opcodes.tbl @@ -54,15 +54,15 @@ M_OPCODE(Ld_Tup, +1, tuple_id, index) /** Store NULL into a tuple. */ M_OPCODE(St_Tup_Null, 0, tuple_id, index) /** Store the current top-of-stack into a tuple. */ -M_OPCODE(St_Tup_i, 0, tuple_id, index) +M_OPCODE(St_Tup_i, 0, tuple_id, index) /** Store the current top-of-stack into a tuple. */ -M_OPCODE(St_Tup_f, 0, tuple_id, index) +M_OPCODE(St_Tup_f, 0, tuple_id, index) /** Store the current top-of-stack into a tuple. */ -M_OPCODE(St_Tup_d, 0, tuple_id, index) +M_OPCODE(St_Tup_d, 0, tuple_id, index) /** Store the current top-of-stack into a tuple. */ -M_OPCODE(St_Tup_s, 0, tuple_id, index, length) +M_OPCODE(St_Tup_s, -1, tuple_id, index) /** Store the current top-of-stack into a tuple. */ -M_OPCODE(St_Tup_b, 0, tuple_id, index) +M_OPCODE(St_Tup_b, 0, tuple_id, index) /*====================================================================================================================== @@ -88,14 +88,14 @@ M_OPCODE(Print_datetime, 0, index) /*----- Load from memory ---------------------------------------------------------------------------------------------*/ -M_OPCODE(Ld_i8, 0) -M_OPCODE(Ld_i16, 0) -M_OPCODE(Ld_i32, 0) -M_OPCODE(Ld_i64, 0) -M_OPCODE(Ld_f, 0) -M_OPCODE(Ld_d, 0) -M_OPCODE(Ld_s, 0, length) -M_OPCODE(Ld_b, 0, mask) +M_OPCODE(Ld_i8, 0) +M_OPCODE(Ld_i16, 0) +M_OPCODE(Ld_i32, 0) +M_OPCODE(Ld_i64, 0) +M_OPCODE(Ld_f, 0) +M_OPCODE(Ld_d, 0) +M_OPCODE(Ld_s, -1) +M_OPCODE(Ld_b, 0, mask) /*----- Store to memory ----------------------------------------------------------------------------------------------*/ @@ -105,7 +105,7 @@ M_OPCODE(St_i32, -2) M_OPCODE(St_i64, -2) M_OPCODE(St_f, -2) M_OPCODE(St_d, -2) -M_OPCODE(St_s, -2, length) +M_OPCODE(St_s, -3) M_OPCODE(St_b, -2, bit_offset) diff --git a/unittest/backend/StackMachineTest.cpp b/unittest/backend/StackMachineTest.cpp index 1de6453f..12938693 100644 --- a/unittest/backend/StackMachineTest.cpp +++ b/unittest/backend/StackMachineTest.cpp @@ -950,7 +950,8 @@ TEST_CASE("StackMachine/TupleAccess/St_Tup_s", "[core][backend]") Tuple *args[] = { &res }; SM.emit_Push_Null(); - SM.emit_St_Tup_s(0, 0, 4); + SM.add_and_emit_load(4); + SM.emit_St_Tup_s(0, 0); SM(args); REQUIRE(res.is_null(0)); } @@ -1323,7 +1324,8 @@ TEST_CASE("StackMachine/StorageAccess/Ld_s", "[core][backend]") std::string str = "sql"; SM.add_and_emit_load(str.c_str()); - SM.emit_Ld_s(3); + SM.add_and_emit_load(3); + SM.emit_Ld_s(); SM.emit_Print_s(idx); SM(nullptr); CHECK(oss.str() == "\"sql\""); @@ -1422,7 +1424,8 @@ TEST_CASE("StackMachine/StorageAccess/St_s", "[core][backend]") std::string str = "abcd"; SM.add_and_emit_load(str.c_str()); SM.add_and_emit_load("test"); - SM.emit_St_s(4); + SM.add_and_emit_load(4); + SM.emit_St_s(); SM(nullptr); CHECK(str == "test"); }