Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Graph analytics #10

Open
wants to merge 42 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
9a5f20e
Create stat_plots_in.py
SahithiKasim Jan 28, 2023
1ecd98c
Create stat_plots_out.py
SahithiKasim Jan 28, 2023
494e718
Update stat_plots_in.py
SahithiKasim Jan 28, 2023
6902094
Update and rename stat_plots_in.py to stats_plots_in.py
SahithiKasim Jan 28, 2023
0582f78
Rename stat_plots_out.py to stats_plots_out.py
SahithiKasim Jan 28, 2023
bf676a8
Update stats_plots_in.py
SahithiKasim Jan 28, 2023
af1bccf
Create stats_plots_amd64_in.py
SahithiKasim Jan 28, 2023
8500e9d
Rename stats_plots_amd64_in.py to stats_plots_amd64_out.py
SahithiKasim Jan 28, 2023
1aff1c5
Create stats_plots_amd64_in.py
SahithiKasim Jan 28, 2023
494b9a0
Create new
SahithiKasim Jan 28, 2023
536fa3e
Delete new
SahithiKasim Jan 28, 2023
3d388bf
Create new
SahithiKasim Jan 28, 2023
81a8da4
Add files via upload
SahithiKasim Jan 28, 2023
f4b39ac
Delete new
SahithiKasim Jan 28, 2023
304509d
Create top_10_packages.py
SahithiKasim Jan 28, 2023
7c035b9
Add files via upload
SahithiKasim Jan 28, 2023
1db0699
Create stackplot_in.py
SahithiKasim Jan 29, 2023
f992794
Create stackplot_out.py
SahithiKasim Jan 29, 2023
e709473
Add files via upload
SahithiKasim Jan 29, 2023
6053b7b
Create description.md
SahithiKasim Feb 2, 2023
d50c4b9
Create stat_in_plot.py
SahithiKasim Feb 20, 2023
a99fe0c
Rename stat_in_plot.py to stack_in_plot.py
SahithiKasim Feb 20, 2023
e818f9f
Create stack_out_plot.py
SahithiKasim Feb 20, 2023
2e801f9
Create stats_in_plots.py
SahithiKasim Feb 20, 2023
87e6483
Create stats_out_plots.py
SahithiKasim Feb 20, 2023
b146aaf
Create core_minres.py
SahithiKasim Feb 20, 2023
d50aeb7
Create top_packages.py
SahithiKasim Feb 20, 2023
e59b775
Create snb.txt
SahithiKasim Feb 20, 2023
89c5076
Add files via upload
SahithiKasim Feb 20, 2023
b9275d0
Delete snb.txt
SahithiKasim Feb 20, 2023
041c28d
Add files via upload
SahithiKasim Feb 20, 2023
6c13640
Delete scripts/ingestion/plots directory
SahithiKasim Feb 21, 2023
d37d2d1
Create description.md
SahithiKasim Feb 21, 2023
9ab6891
New Data
SahithiKasim Feb 21, 2023
19f2cb4
New Plots
SahithiKasim Feb 21, 2023
16d64aa
analytics
SahithiKasim Apr 11, 2023
cafcfad
Add files via upload
SahithiKasim Apr 11, 2023
a403d5b
Update page_rank.py
SahithiKasim Apr 11, 2023
33bebd1
Add files via upload
SahithiKasim Apr 21, 2023
b312242
Rename path.md to path_length.md
SahithiKasim Apr 21, 2023
468812d
Rearranged in Subfolders
SahithiKasim Sep 11, 2023
0bb6037
Updated Path
SahithiKasim Sep 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions analytics/Network_Analysis/betweenness_centrality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import sqlite3
import networkx as nx
from prettytable import PrettyTable
import time
from constants import DB_LOC, CONSTRUCT_GRAPH

# Define function to retrieve edge data from SQLite database
def get_edge_data():
conn = sqlite3.connect(DB_LOC)
cursor = conn.cursor()
query = CONSTRUCT_GRAPH
cursor.execute(query)
items = cursor.fetchall()
conn.close()
return items

# Define function to insert top node data into a table format in a md file
def insert_into_md(top_nodes):
myTable = PrettyTable(["Sno","Top 5000 valued packages", "Betweenness Centrality"])
i=1
for node, value in top_nodes:
myTable.add_row([i , node , value])
i+=1
myTable.add_row([ '-' , '-','-'])


data = myTable.get_string()
with open('plots/between_centrality.md', 'w') as f:
f.write(data)

# Define function to create directed graph and calculate Betweenness centrality metric for each node
def get_top_nodes(items):
G = nx.DiGraph()
G.add_edges_from(items)

bet = nx.betweenness_centrality(G,k=500)

top_nodes = sorted(bet.items(), key=lambda x: x[1], reverse=True)[:5000]
return top_nodes

# Main function to call other functions
def main():
t_in = time.time()
print("program running")

items = get_edge_data()
top_nodes = get_top_nodes(items)
insert_into_md(top_nodes)

t_out = time.time()
print('Program run time in seconds:', t_out - t_in, '(s)')

if __name__ == '__main__':
main()
69 changes: 69 additions & 0 deletions analytics/Network_Analysis/centrality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import sqlite3
import networkx as nx
from prettytable import PrettyTable
import time
from constants import DB_LOC, CONSTRUCT_GRAPH

# Define function to retrieve edge data from SQLite database
def get_edge_data():
conn = sqlite3.connect(DB_LOC)
cursor = conn.cursor()
query = CONSTRUCT_GRAPH
cursor.execute(query)
items = cursor.fetchall()
conn.close()
return items

# Define function to insert top node data into a table format in a md file
def insert_into_md(values):
with open('plots/centrality.md', 'w') as f:
f.write('| Package | Coreness | Degree Centrality | Eigenvector Centrality | Betweenness Centrality | Closeness Centrality | Page Rank |\n')
f.write('| --- | --- | --- | --- | --- | --- | --- |\n')
for value in values:
f.write(f'| {value[0]} | {value[1]} | {value[2]} | {value[3]} | {value[4]} | {value[5]} | {value[6]} |\n')


# Define function to create directed graph and calculate Eigenvector Centrality metric for each node
def get_top_nodes(items):
G = nx.DiGraph()
G.add_edges_from(items)
# calculate degree centrality
degree_centrality = nx.degree_centrality(G)

# calculate eigenvector centrality
eigenvector_centrality = nx.eigenvector_centrality(G)

# calculate PageRank
pagerank = nx.pagerank(G)

# calculate betweenness centrality
betweenness_centrality = nx.betweenness_centrality(G,k=500)

# calculate closeness centrality
closeness_centrality = nx.closeness_centrality(G)

G.remove_edges_from(nx.selfloop_edges(G))
# calculate coreness
coreness = nx.core_number(G)

# create a list of tuples with the node, coreness, degree centrality, eigenvector centrality, betweenness centrality, closeness centrality and PageRank values
values = [(node, coreness[node], degree_centrality[node], eigenvector_centrality[node] , betweenness_centrality[node], closeness_centrality[node], pagerank[node]) for node in G.nodes()]

# sort the list by coreness
values.sort(key=lambda x: x[1], reverse=True)
return values

# Main function to call other functions
def main():
t_in = time.time()
print("program running")

items = get_edge_data()
values = get_top_nodes(items)
insert_into_md(values)

t_out = time.time()
print('Program run time in seconds:', t_out - t_in, '(s)')

if __name__ == '__main__':
main()
54 changes: 54 additions & 0 deletions analytics/Network_Analysis/closeness_centrality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import sqlite3
import networkx as nx
from prettytable import PrettyTable
import time
from constants import DB_LOC, CONSTRUCT_GRAPH

# Define function to retrieve edge data from SQLite database
def get_edge_data():
conn = sqlite3.connect(DB_LOC)
cursor = conn.cursor()
query = CONSTRUCT_GRAPH
cursor.execute(query)
items = cursor.fetchall()
conn.close()
return items

# Define function to insert top node data into a table format in a md file
def insert_into_md(top_nodes):
myTable = PrettyTable(["Sno","Top 5000 valued packages", "Closeness Centrality"])
i=1
for node, value in top_nodes:
myTable.add_row([i , node , value])
i+=1
myTable.add_row([ '-' , '-','-'])


data = myTable.get_string()
with open('plots/closeness_centrality.md', 'w') as f:
f.write(data)

# Define function to create directed graph and calculate Closeness centrality metric for each node
def get_top_nodes(items):
G = nx.DiGraph()
G.add_edges_from(items)

close = nx.closeness_centrality(G,k=500)

top_nodes = sorted(close.items(), key=lambda x: x[1], reverse=True)[:5000]
return top_nodes

# Main function to call other functions
def main():
t_in = time.time()
print("program running")

items = get_edge_data()
top_nodes = get_top_nodes(items)
insert_into_md(top_nodes)

t_out = time.time()
print('Program run time in seconds:', t_out - t_in, '(s)')

if __name__ == '__main__':
main()
56 changes: 56 additions & 0 deletions analytics/Network_Analysis/core_minres.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import sqlite3
import networkx as nx
from prettytable import PrettyTable
import cpnet
import time
from constants import DB_LOC, CONSTRUCT_GRAPH
# Define function to retrieve edge data from SQLite database
def get_edge_data():
conn = sqlite3.connect(DB_LOC)
cursor = conn.cursor()
query = CONSTRUCT_GRAPH
cursor.execute(query)
items = cursor.fetchall()
conn.close()
return items

# Define function to create directed graph and calculate MINRES metric for each node
def get_top_nodes(items):
G = nx.DiGraph()
G.add_edges_from(items)

alg = cpnet.MINRES()
alg.detect(G)
pair_id = alg.get_pair_id()
coreness = alg.get_coreness()

top_nodes = sorted(coreness.items(), key=lambda x: x[1], reverse=True)[:500]
return top_nodes

# Define function to insert top node data into a table format in a md file
def insert_into_md(top_nodes):
myTable = PrettyTable(["Sno","Top 500 coreness value packages", "CP-MINRES"])
i=0
for node, value in top_nodes:
myTable.add_row([i , node , value])
i+=1
myTable.add_row([ '-' , '-','-'])

data = myTable.get_string()
with open('plots/minres.md', 'w') as f:
f.write(data)

# Main function to call other functions
def main():
t_in = time.time()
print("program running")

items = get_edge_data()
top_nodes = get_top_nodes(items)
insert_into_md(top_nodes)

t_out = time.time()
print('Program run time in seconds:', t_out - t_in, '(s)')

if __name__ == '__main__':
main()
55 changes: 55 additions & 0 deletions analytics/Network_Analysis/coreness.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import sqlite3
import networkx as nx
from prettytable import PrettyTable
import time
from constants import DB_LOC, CONSTRUCT_GRAPH

# Define function to retrieve edge data from SQLite database
def get_edge_data():
conn = sqlite3.connect(DB_LOC)
cursor = conn.cursor()
query = CONSTRUCT_GRAPH
cursor.execute(query)
items = cursor.fetchall()
conn.close()
return items

# Define function to insert top node data into a table format in a md file
def insert_into_md(top_nodes):
myTable = PrettyTable(["Sno","Top 5000 valued packages", "Coreness"])
i=1
for node, value in top_nodes:
myTable.add_row([i , node , value])
i+=1
myTable.add_row([ '-' , '-','-'])


data = myTable.get_string()
with open('plots/coreness.md', 'w') as f:
f.write(data)

# Define function to create directed graph and calculate Coreness metric for each node
def get_top_nodes(items):
G = nx.DiGraph()
G.add_edges_from(items)
# compute the core number of each node
G.remove_edges_from(nx.selfloop_edges(G))
core= nx.core_number(G)

top_nodes = sorted(core.items(), key=lambda x: x[1], reverse=True)[:50000]
return top_nodes

# Main function to call other functions
def main():
t_in = time.time()
print("program running")

items = get_edge_data()
top_nodes = get_top_nodes(items)
insert_into_md(top_nodes)

t_out = time.time()
print('Program run time in seconds:', t_out - t_in, '(s)')

if __name__ == '__main__':
main()
53 changes: 53 additions & 0 deletions analytics/Network_Analysis/degree_centrality.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import sqlite3
import networkx as nx
from prettytable import PrettyTable
import time
from constants import DB_LOC, CONSTRUCT_GRAPH

# Define function to retrieve edge data from SQLite database
def get_edge_data():
conn = sqlite3.connect(DB_LOC)
cursor = conn.cursor()
query = CONSTRUCT_GRAPH
cursor.execute(query)
items = cursor.fetchall()
conn.close()
return items

# Define function to insert top node data into a table format in a md file
def insert_into_md(top_nodes):
myTable = PrettyTable(["Sno","Top 5000 valued packages", "Degree Centrality"])
i=1
for node, value in top_nodes:
myTable.add_row([i , node , value])
i+=1
myTable.add_row([ '-' , '-','-'])


data = myTable.get_string()
with open('plots/degree_centrality.md', 'w') as f:
f.write(data)

# Define function to create directed graph and calculate Degree centrality metric for each node
def get_top_nodes(items):
G = nx.DiGraph()
G.add_edges_from(items)
deg = nx.degree_centrality(G)

top_nodes = sorted(deg.items(), key=lambda x: x[1], reverse=True)[:5000]
return top_nodes

# Main function to call other functions
def main():
t_in = time.time()
print("program running")

items = get_edge_data()
top_nodes = get_top_nodes(items)
insert_into_md(top_nodes)

t_out = time.time()
print('Program run time in seconds:', t_out - t_in, '(s)')

if __name__ == '__main__':
main()
Loading