forked from ptwobrussell/Recipes-for-Mining-Twitter
-
Notifications
You must be signed in to change notification settings - Fork 2
/
recipe__extract_tweet_entities.py
55 lines (36 loc) · 1.42 KB
/
recipe__extract_tweet_entities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# -*- coding: utf-8 -*-
import json
import twitter_text
def get_entities(tweet):
extractor = twitter_text.Extractor(tweet['text'])
# Note: the production Twitter API contains a few additional fields in
# the entities hash that would require additional API calls to resolve
# See API resources that offer the include_entities parameter for details.
entities = {}
entities['user_mentions'] = []
for um in extractor.extract_mentioned_screen_names_with_indices():
entities['user_mentions'].append(um)
entities['hashtags'] = []
for ht in extractor.extract_hashtags_with_indices():
# Massage field name to match production twitter api
ht['text'] = ht['hashtag']
del ht['hashtag']
entities['hashtags'].append(ht)
entities['urls'] = []
for url in extractor.extract_urls_with_indices():
entities['urls'].append(url)
return entities
if __name__ == '__main__':
# A mocked up array of tweets for purposes of illustration.
# Assume tweets have been fetched from the /search resource or elsewhere.
tweets = \
[
{
'text' : 'Get @SocialWebMining example code at http://bit.ly/biais2 #w00t'
# ... more tweet fields ...
},
# ... more tweets ...
]
for tweet in tweets:
tweet['entities'] = get_entities(tweet)
print json.dumps(tweets, indent=1)