Skip to content

Commit

Permalink
add json key inverted index in stats to speed up json expr
Browse files Browse the repository at this point in the history
Signed-off-by: luzhang <[email protected]>
  • Loading branch information
luzhang committed Oct 23, 2024
1 parent 0516624 commit 5685814
Show file tree
Hide file tree
Showing 48 changed files with 2,160 additions and 198 deletions.
1 change: 1 addition & 0 deletions internal/core/src/common/Consts.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ const char KMEANS_CLUSTER[] = "KMEANS";
const char VEC_OPT_FIELDS[] = "opt_fields";
const char PAGE_RETAIN_ORDER[] = "page_retain_order";
const char TEXT_LOG_ROOT_PATH[] = "text_log";
const char JSON_KEY_INDEX_LOG_ROOT_PATH[] = "json_key_index_log";

const char DEFAULT_PLANNODE_ID[] = "0";
const char DEAFULT_QUERY_ID[] = "0";
Expand Down
74 changes: 74 additions & 0 deletions internal/core/src/common/Json.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,35 @@ ExtractSubJson(const std::string& json, const std::vector<std::string>& keys) {
return buffer.GetString();
}

inline std::pair<std::string, std::string>
ParseTopLevelKey(const std::string& json_pointer, bool escaped = false) {
if (json_pointer.empty()) {
return {"", ""};
}

Assert(json_pointer[0] == '/');
size_t start = 1;
size_t end = json_pointer.find('/', start);

std::string top_key = (end == std::string::npos)
? json_pointer.substr(start)
: json_pointer.substr(start, end - start);

if (escaped) {
if (top_key.find("~0") != std::string::npos) {
top_key.replace(top_key.find("~0"), 2, "~");
}
if (top_key.find("~1") != std::string::npos) {
top_key.replace(top_key.find("~1"), 2, "/");
}
}

std::string remaining_path =
(end == std::string::npos) ? "" : json_pointer.substr(end);

return {top_key, remaining_path};
}

using document = simdjson::ondemand::document;
template <typename T>
using value_result = simdjson::simdjson_result<T>;
Expand Down Expand Up @@ -146,6 +175,25 @@ class Json {
return doc;
}

value_result<document>
doc(uint16_t offset, uint16_t length) const {
thread_local simdjson::ondemand::parser parser;

// it's always safe to add the padding,
// as we have allocated the memory with this padding
auto doc = parser.iterate(
data_.data() + offset, length, length + simdjson::SIMDJSON_PADDING);
AssertInfo(doc.error() == simdjson::SUCCESS,
"failed to parse the json {} offset {}, length {}: {}, "
"total_json:{}",
std::string(data_.data() + offset, length),
offset,
length,
simdjson::error_message(doc.error()),
data_);
return doc;
}

value_result<simdjson::dom::element>
dom_doc() const {
thread_local simdjson::dom::parser parser;
Expand All @@ -160,6 +208,21 @@ class Json {
return doc;
}

value_result<simdjson::dom::element>
dom_doc(uint16_t offset, uint16_t length) const {
thread_local simdjson::dom::parser parser;

// it's always safe to add the padding,
// as we have allocated the memory with this padding
auto doc = parser.parse(data_.data() + offset,
length + simdjson::SIMDJSON_PADDING);
AssertInfo(doc.error() == simdjson::SUCCESS,
"failed to parse the json {}: {}",
std::string(data_.data() + offset, length),
simdjson::error_message(doc.error()));
return doc;
}

bool
exist(std::string_view pointer) const {
return doc().at_pointer(pointer).error() == simdjson::SUCCESS;
Expand Down Expand Up @@ -199,6 +262,17 @@ class Json {
return doc().at_pointer(pointer).get<T>();
}

template <typename T>
value_result<T>
at(uint16_t offset, uint16_t length) const {
return doc(offset, length).get<T>();
}

value_result<simdjson::dom::array>
array_at(uint16_t offset, uint16_t length) const {
return dom_doc(offset, length).get_array();
}

// get dom array by JSON pointer,
// call `size()` to get array size,
// call `at()` to get array element by index,
Expand Down
Loading

0 comments on commit 5685814

Please sign in to comment.