Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(analytical): make clustering support undirected graph #3715

Merged
merged 7 commits into from
Apr 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 40 additions & 26 deletions analytical_engine/apps/clustering/clustering.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ limitations under the License.

namespace gs {
/**
* @brief An implementation of Clustering Coefficient for vertices, which only
* works on directed graphs. For undirected graphs, see grape::LCC.
* @brief An implementation of Clustering Coefficient for vertices. Support both
* directed and undirected graph.
*
* This app inherits ParallelAppBase. Messages can be sent in
* parallel to the evaluation. This strategy improve performance by overlapping
Expand Down Expand Up @@ -56,8 +56,12 @@ class Clustering
messages.InitChannels(thread_num());
ctx.stage = 0;
ForEach(inner_vertices, [&messages, &frag, &ctx](int tid, vertex_t v) {
ctx.global_degree[v] =
frag.GetLocalOutDegree(v) + frag.GetLocalInDegree(v);
if (frag.directed()) {
ctx.global_degree[v] =
frag.GetLocalOutDegree(v) + frag.GetLocalInDegree(v);
} else {
ctx.global_degree[v] = frag.GetLocalOutDegree(v);
}
if (ctx.global_degree[v] > 1) {
messages.SendMsgThroughEdges<fragment_t, int>(
frag, v, ctx.global_degree[v], tid);
Expand Down Expand Up @@ -97,12 +101,15 @@ class Clustering
auto u = e.get_neighbor();
is_rec[u.GetValue()]++;
}
es = frag.GetIncomingAdjList(v);
for (auto& e : es) {
auto u = e.get_neighbor();
is_rec[u.GetValue()]++;
if (is_rec[u.GetValue()] == 2) {
ctx.rec_degree[v]++;

if (frag.directed()) {
es = frag.GetIncomingAdjList(v);
for (auto& e : es) {
auto u = e.get_neighbor();
is_rec[u.GetValue()]++;
if (is_rec[u.GetValue()] == 2) {
ctx.rec_degree[v]++;
}
}
}

Expand Down Expand Up @@ -136,28 +143,30 @@ class Clustering
}
}

es = frag.GetIncomingAdjList(v);
for (auto& e : es) {
auto u = e.get_neighbor();
if (ctx.global_degree[u] < ctx.global_degree[v]) {
std::pair<vid_t, uint32_t> msg;
msg.first = frag.Vertex2Gid(u);
if (is_rec[u.GetValue()] == 1) {
msg.second = 1;
msg_vec.push_back(msg);
nbr_vec.push_back(std::make_pair(u, 1));
}
} else if (ctx.global_degree[u] == ctx.global_degree[v]) {
u_gid = frag.Vertex2Gid(u);
v_gid = frag.GetInnerVertexGid(v);
if (v_gid > u_gid) {
if (frag.directed()) {
es = frag.GetIncomingAdjList(v);
for (auto& e : es) {
auto u = e.get_neighbor();
if (ctx.global_degree[u] < ctx.global_degree[v]) {
std::pair<vid_t, uint32_t> msg;
msg.first = frag.Vertex2Gid(u);
if (is_rec[u.GetValue()] == 1) {
msg.second = 1;
msg_vec.push_back(msg);
nbr_vec.push_back(std::make_pair(u, 1));
}
} else if (ctx.global_degree[u] == ctx.global_degree[v]) {
u_gid = frag.Vertex2Gid(u);
v_gid = frag.GetInnerVertexGid(v);
if (v_gid > u_gid) {
std::pair<vid_t, uint32_t> msg;
msg.first = frag.Vertex2Gid(u);
if (is_rec[u.GetValue()] == 1) {
msg.second = 1;
msg_vec.push_back(msg);
nbr_vec.push_back(std::make_pair(u, 1));
}
}
}
}
}
Expand Down Expand Up @@ -251,7 +260,12 @@ class Clustering
} else {
int degree =
global_degree[v] * (global_degree[v] - 1) - 2 * rec_degree[v];
ctx_data[v] = degree == 0 ? 0.0 : 1.0 * tricnt[v] / degree;
// Refer to https://en.wikipedia.org/wiki/Clustering_coefficient
if (frag.directed()) {
ctx_data[v] = degree == 0 ? 0.0 : 1.0 * tricnt[v] / degree;
} else {
ctx_data[v] = degree == 0 ? 0.0 : 2.0 * tricnt[v] / degree;
}
}
}
}
Expand Down
75 changes: 75 additions & 0 deletions python/graphscope/nx/algorithms/tests/builtin/test_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,81 @@ def test_triangle_and_edge(self):
assert nx.builtin.clustering(G)[0] == 1.0 / 6.0


@pytest.mark.usefixtures("graphscope_session")
class TestClustering:
@classmethod
def setup_class(cls):
pytest.importorskip("numpy")

def test_clustering(self):
G = nx.Graph()
assert list(nx.builtin.clustering(G).values()) == []
assert nx.builtin.clustering(G) == {}

def test_path(self):
G = nx.path_graph(10)
assert list(nx.builtin.clustering(G).values()) == [
0,
0,
0,
0,
0,
0,
0,
0,
0,
0,
]
assert nx.builtin.clustering(G) == {
0: 0,
1: 0,
2: 0,
3: 0,
4: 0,
5: 0,
6: 0,
7: 0,
8: 0,
9: 0,
}

def test_cubical(self):
G = nx.cubical_graph()
assert list(nx.builtin.clustering(G).values()) == [0, 0, 0, 0, 0, 0, 0, 0]
assert nx.builtin.clustering(G, 1) == 0
assert list(nx.builtin.clustering(G, [1, 2]).values()) == [0, 0]
assert nx.builtin.clustering(G, 1) == 0
assert nx.builtin.clustering(G, [1, 2]) == {1: 0, 2: 0}

def test_k5(self):
G = nx.complete_graph(5)
assert list(nx.builtin.clustering(G).values()) == [1, 1, 1, 1, 1]
assert nx.builtin.average_clustering(G) == 1
G.remove_edge(1, 2)
assert list(nx.builtin.clustering(G).values()) == [
5 / 6,
1,
1,
5 / 6,
5 / 6,
]
assert nx.builtin.clustering(G, [1, 4]) == {1: 1, 4: 0.83333333333333337}

def test_k5_signed(self):
G = nx.complete_graph(5)
assert list(nx.builtin.clustering(G).values()) == [1, 1, 1, 1, 1]
assert nx.builtin.average_clustering(G) == 1
G.remove_edge(1, 2)
G.add_edge(0, 1, weight=-1)
assert list(nx.builtin.clustering(G, weight="weight").values()) == [
1 / 6,
-1 / 3,
1,
3 / 6,
3 / 6,
]


@pytest.mark.usefixtures("graphscope_session")
class TestDirectedWeightedClustering:
def test_clustering(self):
Expand Down
7 changes: 7 additions & 0 deletions python/graphscope/nx/tests/test_ctx_builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,13 @@ def test_clustering(self):
ans = nx.builtin.clustering(self.p2p)
self.assert_result_almost_equal(ans, self.p2p_clus_ans)

def test_clustering_undirected(self):
# test undirected graph
G = nx.Graph()
G.add_nodes_from([1, 2, 3])
G.add_edges_from([(1, 2), (2, 3), (3, 1)])
nx.builtin.clustering(G) == {1: 1.0, 2: 1.0, 3: 1.0}

def test_triangles(self):
ans = nx.builtin.triangles(self.p2p_undirected)
self.assert_result_almost_equal(ans, self.p2p_triangles_ans)
Expand Down
Loading