From ebecc72fc4830bd0f7f55cc429560bedd10ddb57 Mon Sep 17 00:00:00 2001 From: Zhang Lei Date: Fri, 15 Dec 2023 17:33:44 +0800 Subject: [PATCH] refactor(interactive): Remove dummy config `incoming/outgoing_edge_strategy` (#3434) Remove the dummy `x_csr_params: incoming/outgoing_edge_strategy`, replace with option `edge_storage_strategy`. Fixes #3411 --- docs/flex/interactive/data_model.md | 8 +++ .../examples/modern_graph/modern_graph.yaml | 8 +-- flex/storages/rt_mutable_graph/README.md | 17 ++--- flex/storages/rt_mutable_graph/schema.cc | 69 ++++++++++++++----- flex/storages/rt_mutable_graph/types.h | 23 +++++++ .../modern_graph_string_edge.yaml | 8 +-- .../test/resources/config/modern/graph.yaml | 8 +-- 7 files changed, 93 insertions(+), 48 deletions(-) diff --git a/docs/flex/interactive/data_model.md b/docs/flex/interactive/data_model.md index 18a5eca0b5be..dfdb25be60b1 100644 --- a/docs/flex/interactive/data_model.md +++ b/docs/flex/interactive/data_model.md @@ -44,6 +44,14 @@ Note: - A single edge type can have multiple `vertex_type_pair_relations`. For instance, a "knows" edge might connect one person to another, symbolizing their friendship. Alternatively, it could associate a person with a skill, indicating their proficiency in that skill. - The permissible relations include: `ONE_TO_ONE`, `ONE_TO_MANY`, `MANY_TO_ONE`, and `MANY_TO_MANY`. These relations can be utilized by the optimizer to generate more efficient execution plans. - Currently we only support at most one property for each edge triplet. +- All implementation related configuration are put under `x_csr_params`. + - `max_vertex_num` limit the number of vertices of this type: + - The limit number is used to `mmap` memory, so it only takes virtual memory before vertices are actually inserted. + - If `max_vertex_num` is not set, a default large number (e.g.: 2^48) will be used. + - `edge_storage_strategy` specifies the storing strategy of the incoming or outgoing edges of this type, there are 3 kinds of strategies + - `ONLY_IN`: Only incoming edges are stored. + - `ONLY_OUT`: Only outgoing edges are stored. + - `BOTH_OUT_IN`(default): Both direction of edges are stored. ## Entity Data diff --git a/flex/interactive/examples/modern_graph/modern_graph.yaml b/flex/interactive/examples/modern_graph/modern_graph.yaml index d6b65c0fc544..9b7389f7488e 100644 --- a/flex/interactive/examples/modern_graph/modern_graph.yaml +++ b/flex/interactive/examples/modern_graph/modern_graph.yaml @@ -47,9 +47,6 @@ schema: - source_vertex: person destination_vertex: person relation: MANY_TO_MANY - x_csr_params: - incoming_edge_strategy: Multiple - outgoing_edge_strategy: Multiple properties: - property_id: 0 property_name: weight @@ -60,10 +57,7 @@ schema: vertex_type_pair_relations: - source_vertex: person destination_vertex: software - relation: ONE_TO_MANY - x_csr_params: - incoming_edge_strategy: Multiple - outgoing_edge_strategy: Single + relation: MANY_TO_MANY properties: - property_id: 0 property_name: weight diff --git a/flex/storages/rt_mutable_graph/README.md b/flex/storages/rt_mutable_graph/README.md index 695129dff5a7..f80187ea3e2b 100644 --- a/flex/storages/rt_mutable_graph/README.md +++ b/flex/storages/rt_mutable_graph/README.md @@ -59,7 +59,6 @@ schema: property_name: id property_type: primitive_type: DT_SIGNED_INT64 - x_csr_params: - property_id: 1 property_name: name property_type: @@ -77,8 +76,7 @@ schema: destination_vertex: person relation: MANY_TO_MANY x_csr_params: - incoming_edge_strategy: Multiple - outgoing_edge_strategy: Multiple + edge_storage_strategy: BOTH_OUT_IN properties: - property_id: 0 property_name: weight @@ -88,10 +86,7 @@ schema: vertex_type_pair_relations: source_vertex: person destination_vertex: software - relation: ONE_TO_MANY - x_csr_params: - incoming_edge_strategy: Multiple - outgoing_edge_strategy: Single + relation: ONE_TO_MANY properties: - property_id: 0 property_name: weight @@ -106,10 +101,10 @@ Notes: - `max_vertex_num` limit the number of vertices of this type: - The limit number is used to `mmap` memory, so it only takes virtual memory before vertices are actually inserted. - If `max_vertex_num` is not set, a default large number (e.g.: 2^48) will be used. - - `incoming/outgoing_edge_strategy` specifies the storing strategy of the incoming or outgoing edges of this type, there are 3 kinds of strategies - - None: no edge will be stored - - Single: only one edge will be stored - - Multiple(default): multiple edges will be stored + - `edge_storage_strategy` specifies the storing strategy of the incoming or outgoing edges of this type, there are 3 kinds of strategies + - `ONLY_IN`: Only incoming edges are stored. + - `ONLY_OUT`: Only outgoing edges are stored. + - `BOTH_OUT_IN`(default): Both direction of edges are stored. ## 3. Vertex Management diff --git a/flex/storages/rt_mutable_graph/schema.cc b/flex/storages/rt_mutable_graph/schema.cc index 9143b46d1472..5f570f9c7537 100644 --- a/flex/storages/rt_mutable_graph/schema.cc +++ b/flex/storages/rt_mutable_graph/schema.cc @@ -413,16 +413,26 @@ static PropertyType StringToPropertyType(const std::string& str) { return PropertyType::kEmpty; } } - -EdgeStrategy StringToEdgeStrategy(const std::string& str) { - if (str == "None") { - return EdgeStrategy::kNone; - } else if (str == "Single") { - return EdgeStrategy::kSingle; - } else if (str == "Multiple") { - return EdgeStrategy::kMultiple; +void RelationToEdgeStrategy(const std::string& rel_str, + EdgeStrategy& ie_strategy, + EdgeStrategy& oe_strategy) { + if (rel_str == "ONE_TO_MANY") { + ie_strategy = EdgeStrategy::kSingle; + oe_strategy = EdgeStrategy::kMultiple; + } else if (rel_str == "ONE_TO_ONE") { + ie_strategy = EdgeStrategy::kSingle; + oe_strategy = EdgeStrategy::kSingle; + } else if (rel_str == "MANY_TO_ONE") { + ie_strategy = EdgeStrategy::kMultiple; + oe_strategy = EdgeStrategy::kSingle; + } else if (rel_str == "MANY_TO_MANY") { + ie_strategy = EdgeStrategy::kMultiple; + oe_strategy = EdgeStrategy::kMultiple; } else { - return EdgeStrategy::kMultiple; + LOG(WARNING) << "relation " << rel_str + << " is not valid, using default value: kMultiple"; + ie_strategy = EdgeStrategy::kMultiple; + oe_strategy = EdgeStrategy::kMultiple; } } @@ -700,17 +710,44 @@ static bool parse_edge_schema(YAML::Node node, Schema& schema) { << "] to [" << dst_label_name << "] already exists"; return false; } - // if x_csr_params presents, overwrite the default strategy + + std::string relation_str; + if (get_scalar(cur_node, "relation", relation_str)) { + RelationToEdgeStrategy(relation_str, cur_ie, cur_oe); + } else { + LOG(WARNING) << "relation not defined, using default ie strategy: " + << cur_ie << ", oe strategy: " << cur_oe; + } + // check if x_csr_params presents if (cur_node["x_csr_params"]) { auto csr_node = cur_node["x_csr_params"]; - std::string ie_str, oe_str; - if (get_scalar(csr_node, "outgoing_edge_strategy", oe_str)) { - cur_oe = StringToEdgeStrategy(oe_str); - } - if (get_scalar(csr_node, "incoming_edge_strategy", ie_str)) { - cur_ie = StringToEdgeStrategy(ie_str); + if (csr_node["edge_storage_strategy"]) { + std::string edge_storage_strategy_str; + if (get_scalar(csr_node, "edge_storage_strategy", + edge_storage_strategy_str)) { + if (edge_storage_strategy_str == "ONLY_IN") { + cur_oe = EdgeStrategy::kNone; + VLOG(10) << "Store only in edges for edge: " << src_label_name + << "-[" << edge_label_name << "]->" << dst_label_name; + } else if (edge_storage_strategy_str == "ONLY_OUT") { + cur_ie = EdgeStrategy::kNone; + VLOG(10) << "Store only out edges for edge: " << src_label_name + << "-[" << edge_label_name << "]->" << dst_label_name; + } else if (edge_storage_strategy_str == "BOTH_OUT_IN" || + edge_storage_strategy_str == "BOTH_IN_OUT") { + VLOG(10) << "Store both in and out edges for edge: " + << src_label_name << "-[" << edge_label_name << "]->" + << dst_label_name; + } else { + LOG(ERROR) << "edge_storage_strategy is not set properly for edge: " + << src_label_name << "-[" << edge_label_name << "]->" + << dst_label_name; + return false; + } + } } } + VLOG(10) << "edge " << edge_label_name << " from " << src_label_name << " to " << dst_label_name << " with " << property_types.size() << " properties"; diff --git a/flex/storages/rt_mutable_graph/types.h b/flex/storages/rt_mutable_graph/types.h index 6066fe69253d..2f9603776a55 100644 --- a/flex/storages/rt_mutable_graph/types.h +++ b/flex/storages/rt_mutable_graph/types.h @@ -17,6 +17,7 @@ limitations under the License. #define STORAGES_RT_MUTABLE_GRAPH_TYPES_H_ #include +#include namespace gs { @@ -44,4 +45,26 @@ static constexpr const char* DT_DATE = "DT_DATE32"; } // namespace gs +namespace std { + +// operator << for EdgeStrategy +inline ostream& operator<<(ostream& os, const gs::EdgeStrategy& strategy) { + switch (strategy) { + case gs::EdgeStrategy::kNone: + os << "None"; + break; + case gs::EdgeStrategy::kSingle: + os << "Single"; + break; + case gs::EdgeStrategy::kMultiple: + os << "Multiple"; + break; + default: + os << "Unknown"; + break; + } + return os; +} +} // namespace std + #endif // STORAGES_RT_MUTABLE_GRAPH_TYPES_H_ diff --git a/flex/tests/rt_mutable_graph/modern_graph_string_edge.yaml b/flex/tests/rt_mutable_graph/modern_graph_string_edge.yaml index 2e46a4ae64ed..3b18fc4dbcd2 100644 --- a/flex/tests/rt_mutable_graph/modern_graph_string_edge.yaml +++ b/flex/tests/rt_mutable_graph/modern_graph_string_edge.yaml @@ -50,9 +50,6 @@ schema: - source_vertex: person destination_vertex: person relation: MANY_TO_MANY - x_csr_params: - incoming_edge_strategy: Multiple - outgoing_edge_strategy: Multiple properties: - property_id: 0 property_name: weight @@ -63,10 +60,7 @@ schema: vertex_type_pair_relations: - source_vertex: person destination_vertex: software - relation: ONE_TO_MANY - x_csr_params: - incoming_edge_strategy: Multiple - outgoing_edge_strategy: Single + relation: MANY_TO_ONE properties: - property_id: 0 property_name: weight diff --git a/interactive_engine/compiler/src/test/resources/config/modern/graph.yaml b/interactive_engine/compiler/src/test/resources/config/modern/graph.yaml index 7da7efa31252..d044416b3bbf 100644 --- a/interactive_engine/compiler/src/test/resources/config/modern/graph.yaml +++ b/interactive_engine/compiler/src/test/resources/config/modern/graph.yaml @@ -52,9 +52,6 @@ schema: edge_types: - type_name: knows type_id: 2 - x_csr_params: - incoming_edge_strategy: Multiple - outgoing_edge_strategy: Multiple vertex_type_pair_relations: - source_vertex: person destination_vertex: person @@ -66,13 +63,10 @@ schema: primitive_type: DT_DOUBLE - type_name: created type_id: 3 - x_csr_params: - incoming_edge_strategy: Multiple - outgoing_edge_strategy: Single vertex_type_pair_relations: - source_vertex: person destination_vertex: software - relation: ONE_TO_MANY + relation: MANY_TO_MANY properties: - property_id: 0 property_name: weight