-
Notifications
You must be signed in to change notification settings - Fork 3
/
visualize_network.py
301 lines (273 loc) · 12.8 KB
/
visualize_network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
"""
Visualize the network by religion to investigate for homophily in terms of religion
"""
no_muslim_village_count = 0
all_hindu_villages = []
import csv
import networkx as nx
import pandas as pd
import matplotlib.pyplot as plt
import math
from scipy import stats
import random
from random import choice
def construct_graph_with_attribute(vilno):
f = open("datav4.0/datav4.0/Data/1. Network Data\Adjacency Matrices/adj_allVillageRelationships_vilno_" + str(vilno) + ".csv", 'r')
fk = open("datav4.0/datav4.0/Data/1. Network Data/Adjacency Matrix Keys/key_vilno_" + str(vilno) + ".csv", 'r')
fi = open("indiv.csv", 'r')
csv_reader = csv.reader(f)
# adjacency matrix of village 1
list_adj_matrix = list(csv_reader)
# list of individual keys
csv_reader_keys = csv.reader(fk)
list_keys = list(csv_reader_keys)
# list of individual characteristics
csv_indiv = csv.reader(fi)
list_indiv = list(csv_indiv)
num_nodes = len(list_adj_matrix)
# create dictionary mapping each individual to a key
keys = [(x,y[0]) for x,y in zip( range(num_nodes), list_keys )] # y[0] because the keys are stored as one-element lists
nodes_to_keys = dict(keys)
# create dictionary mapping each key to a religion
keys_to_religion = {}
for row in list_indiv:
key = row[3]
religion = row[9]
keys_to_religion[key] = religion
i = 0
edge_count = 0
edge_list = []
node_list = []
g = nx.Graph()
# for each node, get the key and find the religion if the key is present in the indiv file
for row in list_adj_matrix:
node = i
key = nodes_to_keys[node]
# print("Key: ",key)
# if the key is present in the sample of 200 in the indiv.csv file
if key in keys_to_religion:
religion = keys_to_religion[key]
# print("Religion: ", religion)
# add node and corresponding religion
g.add_node(node, religion=religion)
for j in range(i+1, len(row)):
# if the node is part of the sample in the indiv file, add edge
if nodes_to_keys[j] in keys_to_religion:
if(row[j]=='1'):
edge_count += 1
edge_list.append((i,j))
i += 1
# print("Number of edges: ", edge_count)
g.add_edges_from(edge_list)
hindus = [node for node in g.nodes() if g.node[node]['religion']=='HINDUISM']
muslims = [node for node in g.nodes() if g.node[node]['religion']=='ISLAM']
christians = [node for node in g.nodes() if g.node[node]['religion']=='CHRISTIANITY']
nx.draw(g, nodelist=hindus, node_color='r', node_size=300)
nx.draw(g, nodelist=muslims, node_color='b', node_size=300)
nx.draw(g, nodelist=christians, node_color='g', node_size=300)
plt.show()
return g, hindus, muslims, christians
def calculate_densities(g, hindus, muslims, vilno, rows, within_hindus_count=0, within_muslims_count=0, cross_edges_count=0):
num_hindus = len(hindus)
print("Number of hindus: ", num_hindus)
max_edges_hindus = num_hindus * (num_hindus-1) / 2
num_muslims = len(muslims)
print("Number of muslims: ", num_muslims)
max_edges_muslims = num_muslims * (num_muslims-1) / 2
max_cross_edges = num_hindus * num_muslims
num_nodes = g.number_of_nodes()
num_edges = g.number_of_edges()
avg_degree = g.number_of_edges()/g.number_of_nodes()
max_edges_all = num_nodes*(num_nodes-1)/2
global no_muslim_village_count
global all_hindu_villages
if num_muslims == 0:
all_hindu_villages.append(vilno)
no_muslim_village_count += 1
# print("Number of edges: ", g.number_of_edges())
for edge in g.edges():
node1 = edge[0]
node2 = edge[1]
if (g.node[node1]['religion'], g.node[node2]['religion']) == ('HINDUISM', 'HINDUISM'):
within_hindus_count += 1
elif (g.node[node1]['religion'], g.node[node2]['religion']) == ('ISLAM', 'ISLAM'):
within_muslims_count += 1
elif (g.node[node1]['religion'], g.node[node2]['religion']) == ('HINDUISM', 'ISLAM') or (g.node[node1]['religion'], g.node[node2]['religion']) == ('ISLAM', 'HINDUISM'):
cross_edges_count += 1
p_within_hindus = within_hindus_count / max_edges_hindus
p_within_muslims = within_muslims_count / max_edges_muslims if num_muslims>1 else 0
p_cross_edges = cross_edges_count / max_cross_edges if num_muslims>0 else 0
p_all = num_edges/ max_edges_all
std_within_hindus = math.sqrt( p_within_hindus * (1-p_within_hindus) / max_edges_hindus )
std_within_muslims = math.sqrt( p_within_muslims * (1-p_within_muslims) / max_edges_muslims ) if num_muslims>1 else 0
std_cross_edges = math.sqrt( p_cross_edges * (1-p_cross_edges) / max_cross_edges ) if num_muslims>0 else 0
std_all = math.sqrt( p_all * (1-p_all) / max_edges_all )
row = [vilno, num_nodes, p_all, num_hindus, p_within_hindus, num_muslims, p_within_muslims, p_cross_edges, avg_degree]
rows.append(row)
conf_int_density_hindus = (p_within_hindus - 1.96*std_within_hindus, p_within_hindus + 1.96*std_within_hindus)
print("Density within the Hindu community: ", conf_int_density_hindus)
conf_int_density_muslims = (p_within_muslims - 1.96*std_within_muslims, p_within_muslims + 1.96*std_within_muslims)
print("Density within the Muslim community", conf_int_density_muslims)
conf_int_density_cross= (p_cross_edges - 1.96*std_cross_edges, p_cross_edges + 1.96*std_cross_edges)
print("Density of cross edges: ", conf_int_density_cross)
conf_int_density_all= (p_all - 1.96*std_all, p_all + 1.96*std_all)
return {'hindu': (p_within_hindus, conf_int_density_hindus), 'muslim': (p_within_muslims, conf_int_density_muslims),
'cross': (p_cross_edges, conf_int_density_cross), 'all': (p_all, conf_int_density_all)}
def calculate_homophily_unnormalized(g, hindus, muslims):
homophily_edge_count = 0
homophily_hindu_edges = 0
homophily_muslim_edges = 0
N = g.number_of_nodes()
E = g.number_of_edges()
total_hindu_edges = 0
total_muslim_edges = 0
# print("Number of nodes in village: ", g.number_of_nodes())
# print("Number of edges in village: ", g.number_of_edges())
for edge in g.edges():
if g.node[edge[0]]['religion'] == "HINDUISM" or g.node[edge[1]]['religion'] == "HINDUISM":
total_hindu_edges += 1
if g.node[edge[0]]['religion'] == "ISLAM" or g.node[edge[1]]['religion'] == "ISLAM":
total_muslim_edges += 1
if g.node[edge[0]]['religion'] == g.node[edge[1]]['religion']:
homophily_edge_count += 1
if g.node[edge[0]]['religion'] == 'HINDUISM':
homophily_hindu_edges += 1
if g.node[edge[0]]['religion'] == 'ISLAM':
homophily_muslim_edges += 1
# print("Number of same religion edges: ", homo_edge_count)
homophily_unnormalized = homophily_edge_count/E
homophily_hindus = homophily_hindu_edges/total_hindu_edges
homophily_muslims = homophily_muslim_edges/total_muslim_edges
return homophily_unnormalized, homophily_hindus, homophily_muslims
def calculate_homophily_normalized(g, hindus, muslims):
N = g.number_of_nodes()
E = g.number_of_edges()
homophily_all, homophily_hindus, homophily_muslims = calculate_homophily_unnormalized(g, hindus, muslims)
# print("Homophily of g: ", homophily_unnormalized)
# create a random graph with the same number of nodes and edges as the graph g
iter_count = 0
h_all_sum = 0
h_hindus_sum = 0
h_muslims_sum = 0
p = E / (N*(N-1)/2)
for x in range(100):
iter_count += 1
print("Iteration number: ", iter_count)
rg = nx.Graph()
# copy the nodes from the village graph
rg.add_nodes_from(g.nodes(data=True))
# generate E random edges
for x in range(E):
existing_edges = rg.edges()
# making sure that there are no self-edges
while True:
n1 = choice(g.nodes())
n2 = choice(g.nodes())
if n1!=n2 and (n1,n2) not in existing_edges and (n2,n1) not in existing_edges:
break
rg.add_edge(n1,n2)
# print("Number of nodes in random graph: ", rg.number_of_nodes())
# print("Number of edges in random graph: ", rg.number_of_edges())
homophily_all_random, homophily_hindus_random, homophily_muslims_random = calculate_homophily_unnormalized(rg, hindus, muslims)
h_all_sum += homophily_all_random
h_hindus_sum += homophily_hindus_random
h_muslims_sum += homophily_muslims_random
# print("Homophily of random graph: ", homophily_random)
avg_homophily_all_random = h_all_sum/100
avg_homophily_hindus_random = h_hindus_sum/100
avg_homophily_muslims_random = h_muslims_sum/100
# calculate the normalized homophily
homophily_all_norm = homophily_all - avg_homophily_all_random
print("Homophily overall: ", homophily_all_norm)
homophily_hindus_norm = homophily_hindus - avg_homophily_hindus_random
print("Homophily hindus: ", homophily_hindus_norm)
homophily_muslims_norm = homophily_muslims - avg_homophily_muslims_random
print("Homophily muslims: ", homophily_muslims_norm)
return homophily_all_norm, homophily_hindus_norm, homophily_muslims_norm
if __name__=='__main__':
hindu_densities = []
muslim_densities = []
cross_densities = []
all_densities = []
headings = ['VillageNumber', 'NumNodes', 'DensityAll', 'NumHindus', 'DensityHindus', 'NumMuslims', 'DensityMuslims', 'DensityCrossEdges', 'AvgDegree']
rows = [headings]
village_count = 0
homophily_villages = []
x = []
y1 = []
y2 = []
y3 = []
for vilno in [1]:
if vilno in [13,22]:
continue
print("Village number: ", vilno)
g, hindus, muslims, christians = construct_graph_with_attribute(vilno)
return_value = calculate_densities(g, hindus, muslims, vilno, rows)
if return_value!=None:
village_count += 1
edge_densities = return_value
hindu_densities.append(edge_densities['hindu'][0])
muslim_densities.append(edge_densities['muslim'][0])
cross_densities.append(edge_densities['cross'][0])
all_densities.append(edge_densities['all'][0])
# # if it is an all-Hindu village, do not calculate homophily
# if len(muslims)<3:
# continue
# homophily_all, homophily_hindus, homophily_muslims = calculate_homophily_normalized(g, hindus, muslims)
# homophily_villages.append( (vilno, homophily_all, homophily_hindus, homophily_muslims) )
#
# print("Homophily normalized: ", homophily_villages)
# f = open("homophily2.csv", 'w')
# csv_writer = csv.writer(f, lineterminator='\n')
# csv_writer.writerow(["Vilno", "HomophilyAll", "HomophilyHindus", "HomophilyMuslims"])
# for item in homophily_villages:
# csv_writer.writerow([ item[0], item[1], item[2], item[3] ])
# x.append(item[0])
# y1.append(item[1])
# y2.append(item[2])
# y3.append(item[3])
# # plot distribution of overall homophily for villages
# plt.hist(y1)
# plt.xlim(-0.1,0.5)
# plt.xlabel("Homophily")
# plt.ylabel("Number of villages")
# plt.title("Distribution of overall homophily across villages")
# plt.show()
# # plot distribution of hindus homophily for villages
# plt.hist(y2)
# plt.xlim(-0.1,1)
# plt.xlabel("Homophily")
# plt.ylabel("Number of villages")
# plt.title("Distribution of hindu homophily across villages")
# plt.show()
# # plot distribution of muslims homophily for villages
# plt.hist(y3)
# plt.xlim(-0.1,1)
# plt.xlabel("Homophily")
# plt.ylabel("Number of villages")
# plt.title("Distribution of muslim homophily across villages")
# plt.show()
#
# print("Average overall homophily: ", sum(y1)/len(y1))
# print("Average hindu homophily: ", sum(y2)/len(y2))
# print("Average muslim homophily: ", sum(y3)/len(y3))
# print("Hindu densities: ", hindu_densities)
# plt.plot(hindu_densities, label="Hinduism")
# print("Muslim densities: ", muslim_densities)
# plt.plot(muslim_densities, label="Islam")
# print("Cross densities: ", cross_densities)
# plt.plot(cross_densities, label="Cross edges")
# xticks_set = set(range(1,78)) - set([13,22])
# plt.xticks(list(xticks_set))
# plt.xlabel("Village numbers")
# plt.ylabel("Edge density")
# plt.legend()
# plt.show()
# print("Village count: ", village_count)
# print("Num villages with no muslims: ", no_muslim_village_count)
# print("All hindu villages: ", all_hindu_villages)
# print(rows)
# print(len(rows))
# with open("density_by_religion.csv", 'w') as f:
# csv_writer = csv.writer(f, lineterminator='\n')
# csv_writer.writerows(rows)