Skip to content

Commit

Permalink
Fixes findings from review
Browse files Browse the repository at this point in the history
  • Loading branch information
tomuben committed Oct 7, 2024
1 parent a9addbb commit 953b925
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 74 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,19 +29,19 @@ using options_type = std::vector<Option>;

namespace ParserInternals {

auto empty_options()
const auto empty_options()
{
options_type ob;
return ob;
}

auto to_options(Option&& e)
const auto to_options(Option&& e)
{
options_type ob{e};
return ob;
}

auto to_option(std::string_view key, std::string value, source_point sp_begin, source_point sp_end)
const auto to_option(std::string_view key, std::string value, source_point sp_begin, source_point sp_end)
{
return Option{std::string(key), value, sp_begin, sp_end};
}
Expand All @@ -56,7 +56,7 @@ auto&& add_option(Option&& e, options_type&& ob)

constexpr char alpha_numeric_pattern[] = R"_([0-9a-zA-Z_]+)_";
constexpr char option_char_pattern[] = R"_([^;])_";
constexpr char whitespaces_pattern[] = R"_([ \x09 \x0c \x0b]+)_";
constexpr char whitespaces_pattern[] = R"_([ \x09\x0c\x0b]+)_";


constexpr char_term start_option_tag('%');
Expand Down Expand Up @@ -156,18 +156,21 @@ void parse(const std::string& code, options_type& result, std::function<void(con
} //namespace ParserInternals

void parseOptions(const std::string& code, options_map_t & result, std::function<void(const char*)> throwException) {
std::stringstream ss(code);
std::string line;
size_t current_index(0);
while(std::getline(ss, line, '\n')) {
size_t current_pos = 0;

do {

const size_t new_pos = code.find_first_of("\r\n", current_pos);
line = code.substr(current_pos, new_pos);
if (!line.empty() && !std::all_of(line.begin(),line.end(), [](const char c) {return std::isspace(c);})) {
options_type parser_result;
ParserInternals::parse(line, parser_result, throwException);
for (const auto & option: parser_result)
{
ScriptOption entry = {
.value = option.value,
.idx_in_source = current_index + option.start.column - 1,
.idx_in_source = current_pos + option.start.column - 1,
.size = option.end.column - option.start.column + 1
};
auto it_in_result = result.find(option.key);
Expand All @@ -183,8 +186,11 @@ void parseOptions(const std::string& code, options_map_t & result, std::function
}
}
}
current_index += line.size() + 1;
}
if (new_pos == std::string::npos) {
break;
}
current_pos = new_pos + 1;
} while(true);
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ using options_t = std::vector<ScriptOption>;
using options_map_t = std::map<std::string, options_t>;

/*!
* \brief extractOptionLine Extracts syntactically valid option lines of form %<option> <values> [<values>] from UDF scripts.
* \brief parseOptions Extracts syntactically valid option lines of form %<option> <values> [<values>] from UDF scripts.
*
* \param code Reference to string where the script code is stored.
* \param result Result of all found options.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
#include "base/script_options_parser/ctpg/script_option_lines_ctpg.h"
#include <gtest/gtest.h>
#include <gmock/gmock.h>
#include <string>
#include <exception>

const std::string lineEnd = ";";


class TestException : public std::runtime_error {
using std::runtime_error::runtime_error;
Expand All @@ -13,29 +14,40 @@ void throwException(const char* ex) {
throw TestException(std::string(ex));
}


using namespace ExecutionGraph::OptionsLineParser::CTPG;

using ::testing::MatchesRegex;


inline ScriptOption buildOption(const char* value, size_t idx, size_t len) {
ScriptOption option = { .value = value, .idx_in_source = idx, .size = len};
return option;
}

class ScriptOptionLinesWhitespaceTest : public ::testing::TestWithParam<std::tuple<std::string, std::string, std::string, std::string, std::string, std::string>> {};
class ScriptOptionLinesWhitespaceTest : public ::testing::TestWithParam<std::tuple<std::string, std::string, std::string, std::string, std::string, std::string, std::string>> {};

TEST_P(ScriptOptionLinesWhitespaceTest, WhitespaceExtractOptionLineTest) {
const std::string prefix = std::get<0>(GetParam());
const std::string suffix = std::get<1>(GetParam());
const std::string option = std::get<2>(GetParam());
const std::string delimeter = std::get<3>(GetParam());
const std::string value = std::get<4>(GetParam());
const std::string payload = std::get<5>(GetParam());
std::string code = prefix + "%" + option + delimeter + value + lineEnd + suffix + "\n" + payload;
const std::string new_line = std::get<2>(GetParam());
const std::string option = std::get<3>(GetParam());
const std::string delimeter = std::get<4>(GetParam());
const std::string value = std::get<5>(GetParam());
const std::string payload = std::get<6>(GetParam());
const std::string code = prefix + '%' + option + delimeter + value + ';' + suffix + new_line + payload;
options_map_t result;
parseOptions(code, result, throwException);
ASSERT_EQ(result.size(), 1);
const auto option_result = result.find(option);
ASSERT_TRUE(option_result != result.end());
ASSERT_NE(option_result, result.end());
ASSERT_EQ(option_result->second.size(), 1);
EXPECT_EQ(option_result->second[0].value, value);
}

std::vector<std::string> white_space_strings = {"", " ", "\t", "\f", "\v", "\n", " \t", "\t ", "\t\f", "\f\t", "\f ", " \f", "\t\v", "\v\t", "\v ", " \v", "\f\v", "\v\f", " \t", " \t "};
std::vector<std::string> prefixes = {"", " ", "\t", "\f", "\v", "\n", "\r\n", " \t", "\t ", "\t\f", "\f\t", "\f ", " \f", "\t\v", "\v\t", "\v ", " \v", "\f\v", "\v\f", " \t", " \t "}; //"" for case if there is prefix
std::vector<std::string> suffixes = {"", " ", "\t", "\f", "\v"}; //"" for case if there is suffix
std::vector<std::string> new_lines = {"", "\n", "\r", "\r\n"}; //"" for case if there is no newline
std::vector<std::string> delimeters = {" ", "\t", "\f", "\v", " \t", "\t ", "\t\f", "\f\t", "\f ", " \f", "\t\v", "\v\t", "\v ", " \v", "\f\v", "\v\f", " \t", " \t "};
std::vector<std::string> keywords = {"import", "jvmoption", "scriptclass", "jar", "env"};
std::vector<std::string> values = {"something", "com.mycompany.MyScriptClass", "LD_LIBRARY_PATH=/nvdriver", "-Xms128m -Xmx1024m -Xss512k", "/buckets/bfsdefault/default/my_code.jar"};
Expand All @@ -44,8 +56,9 @@ std::vector<std::string> payloads = {"anything", "\n\ndef my_func:\n\tpass", "cl
INSTANTIATE_TEST_SUITE_P(
ScriptOptionLines,
ScriptOptionLinesWhitespaceTest,
::testing::Combine(::testing::ValuesIn(white_space_strings),
::testing::ValuesIn(white_space_strings),
::testing::Combine(::testing::ValuesIn(prefixes),
::testing::ValuesIn(suffixes),
::testing::ValuesIn(new_lines),
::testing::ValuesIn(keywords),
::testing::ValuesIn(delimeters),
::testing::ValuesIn(values),
Expand All @@ -54,80 +67,89 @@ INSTANTIATE_TEST_SUITE_P(
);

TEST(ScriptOptionLinesTest, ignore_anything_other_than_whitepsace) {
std::string code =
const std::string code =
"abc %option myoption;\n"
"\nmycode";
options_map_t result;
parseOptions(code, result, throwException);
EXPECT_TRUE(result.empty());
}

TEST(ScriptOptionLinesTest, need_line_end_character) {
std::string code =
TEST(ScriptOptionLinesTest, need_option_termination_character) {
const std::string code =
"%option myoption\n"
"\nmycode";
options_map_t result;
EXPECT_THROW({
parseOptions(code, result, throwException);
try
{
parseOptions(code, result, throwException);
}
catch( const TestException& e )
{
// and this tests that it has the correct message
EXPECT_STREQ( e.what(), "Error parsing script options: [1:17] PARSE: Syntax error: Unexpected '<eof>'\n");
throw;
}
}, TestException );
}

TEST(ScriptOptionLinesTest, finds_the_two_options_same_key) {
std::string code =
"%option myoption; %option mysecondoption;\n"
const std::string code =
"%some_option myoption; %some_option mysecondoption;\n"
"\nmycode";
options_map_t result;
parseOptions(code, result, throwException);
ASSERT_EQ(result.size(), 1);
const auto option_result = result.find("option");
const auto option_result = result.find("some_option");

ASSERT_TRUE(option_result != result.end());
ASSERT_NE(option_result, result.end());
ASSERT_EQ(option_result->second.size(), 2);
ScriptOption expected_option_one = { .value = "myoption", .idx_in_source = 0, .size = 17};
ScriptOption expected_option_two = { .value = "mysecondoption", .idx_in_source = 18, .size = 23};
ASSERT_EQ(option_result->second[0], expected_option_one);
ASSERT_EQ(option_result->second[1], expected_option_two);
EXPECT_EQ(code.substr(expected_option_one.idx_in_source, expected_option_one.size), "%option myoption;");
EXPECT_EQ(code.substr(expected_option_two.idx_in_source, expected_option_two.size), "%option mysecondoption;");
ASSERT_EQ(option_result->second[0], buildOption("myoption", 0, 22));
ASSERT_EQ(option_result->second[1], buildOption("mysecondoption", 23, 28));
}

TEST(ScriptOptionLinesTest, finds_the_two_options_different_keys) {
std::string code =
"%option myoption; %otheroption mysecondoption;\n"
const std::string code =
"%some_option myoption; %otheroption mysecondoption;\n"
"\nmycode";
options_map_t result;
parseOptions(code, result, throwException);
ASSERT_EQ(result.size(), 2);
const auto option_result = result.find("option");
const auto option_result = result.find("some_option");

ASSERT_TRUE(option_result != result.end());
ASSERT_NE(option_result, result.end());
ASSERT_EQ(option_result->second.size(), 1);
ScriptOption expected_option_one = { .value = "myoption", .idx_in_source = 0, .size = 17};
ASSERT_EQ(option_result->second[0], expected_option_one);
EXPECT_EQ(code.substr(expected_option_one.idx_in_source, expected_option_one.size), "%option myoption;");
ASSERT_EQ(option_result->second[0], buildOption("myoption", 0, 22));

const auto otheroption_result = result.find("otheroption");

ASSERT_TRUE(otheroption_result != result.end());
ASSERT_NE(otheroption_result, result.end());
ASSERT_EQ(otheroption_result->second.size(), 1);
ScriptOption expected_option_two = { .value = "mysecondoption", .idx_in_source = 18, .size = 28};
ASSERT_EQ(otheroption_result->second[0], expected_option_two);
EXPECT_EQ(code.substr(expected_option_two.idx_in_source, expected_option_two.size), "%otheroption mysecondoption;");
ASSERT_EQ(otheroption_result->second[0], buildOption("mysecondoption", 23, 28));
}

class ScriptOptionLinesInvalidOptionTest : public ::testing::TestWithParam<std::string> {};


TEST_P(ScriptOptionLinesInvalidOptionTest, value_is_mandatory) {
const std::string invalid_option = GetParam();
std::string code = invalid_option + "\nsomething";
const std::string code = invalid_option + "\nsomething";
options_map_t result;
EXPECT_THROW({
parseOptions(code, result, throwException);
try
{
parseOptions(code, result, throwException);
}
catch( const TestException& e )
{
EXPECT_THAT( e.what(), MatchesRegex("^Error parsing script options.*PARSE: Syntax error: Unexpected.*$"));
throw;
}
}, TestException );
}

std::vector<std::string> invalid_options = {"%option ;", "%option \n", "\n%option\n;", "%option\nvalue;"};
const std::vector<std::string> invalid_options = {"%some_option ;", "%some_option \n", "\n%some_option\n;", "%some_option\nvalue;"};

INSTANTIATE_TEST_SUITE_P(
ScriptOptionLines,
Expand All @@ -136,8 +158,7 @@ INSTANTIATE_TEST_SUITE_P(
);



TEST(ScriptOptionLinesTest, test_all_in_one_line_does_second_option_does_not_work) {
TEST(ScriptOptionLinesTest, test_when_two_options_plus_code_in_same_line_then_options_parsed_successfully) {
/**
Verify the correct behavior of new parser for situation as described in https://github.com/exasol/script-languages-release/issues/652.
*/
Expand All @@ -147,24 +168,20 @@ TEST(ScriptOptionLinesTest, test_all_in_one_line_does_second_option_does_not_wor
ASSERT_EQ(result.size(), 2);

const auto jar_option_result = result.find("jar");
ASSERT_TRUE(jar_option_result != result.end());
ASSERT_NE(jar_option_result, result.end());
ASSERT_EQ(jar_option_result->second.size(), 1);
ScriptOption expected_jar_option = { .value = "/buckets/bucketfs1/jars/exajdbc.jar", .idx_in_source = 0, .size = 41};
ASSERT_EQ(jar_option_result->second[0], expected_jar_option);
EXPECT_EQ(code.substr(expected_jar_option.idx_in_source, expected_jar_option.size), "%jar /buckets/bucketfs1/jars/exajdbc.jar;");
ASSERT_EQ(jar_option_result->second[0], buildOption("/buckets/bucketfs1/jars/exajdbc.jar", 0, 41));

const auto jvm_option_result = result.find("jvmoption");
ASSERT_TRUE(jvm_option_result != result.end());
ASSERT_NE(jvm_option_result, result.end());
ASSERT_EQ(jvm_option_result->second.size(), 1);
ScriptOption expected_jvm_option = { .value = "-Xms4m", .idx_in_source = 42, .size = 18};
ASSERT_EQ(jvm_option_result->second[0], expected_jvm_option);
EXPECT_EQ(code.substr(expected_jvm_option.idx_in_source, expected_jvm_option.size), "%jvmoption -Xms4m;");
ASSERT_EQ(jvm_option_result->second[0], buildOption("-Xms4m", 42, 18));
}


TEST(ScriptOptionLinesTest, test_values_must_not_contain_spaces) {
TEST(ScriptOptionLinesTest, test_values_can_contain_spaces) {
/**
Verify the wrong behavior and assumptions as described in https://github.com/exasol/script-languages-release/issues/878
Verify assumptions as described in https://github.com/exasol/script-languages-release/issues/878
The parser is actually correct, but the client code incorrectly parses the result (see javacontainer_test.cc - quoted_jvm_option)
*/
const std::string code =
Expand All @@ -179,11 +196,9 @@ TEST(ScriptOptionLinesTest, test_values_must_not_contain_spaces) {
ASSERT_EQ(result.size(), 1);

const auto jvm_option_result = result.find("jvmoption");
ASSERT_TRUE(jvm_option_result != result.end());
ASSERT_NE(jvm_option_result, result.end());
ASSERT_EQ(jvm_option_result->second.size(), 1);
ScriptOption expected_jvm_option = { .value = "-Dhttp.agent=\"ABC DEF\"", .idx_in_source = 0, .size = 34};
ASSERT_EQ(jvm_option_result->second[0], expected_jvm_option);
EXPECT_EQ(code.substr(expected_jvm_option.idx_in_source, expected_jvm_option.size), "%jvmoption -Dhttp.agent=\"ABC DEF\";");
ASSERT_EQ(jvm_option_result->second[0], buildOption("-Dhttp.agent=\"ABC DEF\"", 0, 34));
}

TEST(ScriptOptionLinesTest, test_multiple_lines_with_code) {
Expand All @@ -199,16 +214,12 @@ TEST(ScriptOptionLinesTest, test_multiple_lines_with_code) {
ASSERT_EQ(result.size(), 2);

const auto jvm_option_result = result.find("jvmoption");
ASSERT_TRUE(jvm_option_result != result.end());
ASSERT_NE(jvm_option_result, result.end());
ASSERT_EQ(jvm_option_result->second.size(), 1);
ScriptOption expected_jvm_option = { .value = "-Dhttp.agent=\"ABC DEF\"", .idx_in_source = 0, .size = 34};
ASSERT_EQ(jvm_option_result->second[0], expected_jvm_option);
EXPECT_EQ(code.substr(expected_jvm_option.idx_in_source, expected_jvm_option.size), "%jvmoption -Dhttp.agent=\"ABC DEF\";");
ASSERT_EQ(jvm_option_result->second[0], buildOption("-Dhttp.agent=\"ABC DEF\"", 0, 34));

const auto jar_option_result = result.find("jar");
ASSERT_TRUE(jar_option_result != result.end());
ASSERT_NE(jar_option_result, result.end());
ASSERT_EQ(jar_option_result->second.size(), 1);
ScriptOption expected_jar_option = { .value = "/buckets/bucketfs1/jars/exajdbc.jar", .idx_in_source = 49, .size = 41};
ASSERT_EQ(jar_option_result->second[0], expected_jar_option);
EXPECT_EQ(code.substr(expected_jar_option.idx_in_source, expected_jar_option.size), "%jar /buckets/bucketfs1/jars/exajdbc.jar;");
ASSERT_EQ(jar_option_result->second[0], buildOption("/buckets/bucketfs1/jars/exajdbc.jar", 49, 41));
}

0 comments on commit 953b925

Please sign in to comment.