-
Notifications
You must be signed in to change notification settings - Fork 5
/
twitter_conversations.py
45 lines (38 loc) · 1.68 KB
/
twitter_conversations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import sys
import json
from operator import itemgetter
import networkx as nx
def usage():
print("Usage")
print("python {} <filename>".format(sys.argv[0]))
if __name__ == "__main__":
if len(sys.argv) != 2:
usage()
sys.exit(1)
fname = sys.argv[1]
with open(fname) as f: #takes in a jsonl file of tweets as input
graph = nx.DiGraph()
for line in f:
tweet = json.loads(line)
if 'id' in tweet:
graph.add_node(tweet['id'],
tweet=tweet['text'],
author=tweet['user']['screen_name'],
created_at=tweet['created_at'])
if tweet['in_reply_to_status_id']:
reply_to = tweet['in_reply_to_status_id']
if reply_to in graph and tweet['user']['screen_name'] != graph.node[reply_to]['author']: #if the user is not replying to themselves
graph.add_edge(tweet['in_reply_to_status_id'], tweet['id'])
#Print some basic stats
print(nx.info(graph))
#Find most replied tweet (most indegree)
sorted_replied = sorted(graph.degree(), key=itemgetter(1), reverse=True)
most_replied_id, replies = sorted_replied[0]
print("Most replied tweet ({} replies:".format(replies))
print(graph.node[most_replied_id])
#Find longest conversation (longest path)
print("Longest discussion:")
longest_path = nx.dag_longest_path(graph)
for tweet_id in longest_path:
node = graph.node[tweet_id]
print("{} (by {} at {})".format(node['tweet'], node['author'], node['created_at']))