-
Notifications
You must be signed in to change notification settings - Fork 0
/
get_tweets.py
82 lines (54 loc) · 2.32 KB
/
get_tweets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/user/bin/python
import os
import fileinput
import re
from twitter import *
from TwitterSearch import *
class GetTweets():
def __init__(self, term, outfile):
#from venv vars
CONSUMER_KEY = os.environ['CONSUMER_KEY']
CONSUMER_SECRET = os.environ['CONSUMER_SECRET']
file = open(outfile, 'a')
try:
tso = TwitterSearchOrder()
tso.setKeywords([term])
tso.setLanguage('en')
tso.setCount(99)
tso.setIncludeEntities(False) # don't give us all those entity information
MY_TWITTER_CREDS = os.path.expanduser('.app_credentials')
if not os.path.exists(MY_TWITTER_CREDS):
oauth_dance("emotiscrape", CONSUMER_KEY, CONSUMER_SECRET, MY_TWITTER_CREDS)
oauth_token, oauth_secret = read_token_file(MY_TWITTER_CREDS)
t = TwitterSearch(
consumer_key = CONSUMER_KEY,
consumer_secret = CONSUMER_SECRET,
access_token = oauth_token,
access_token_secret = oauth_secret
)
for tweet in t.searchTweetsIterable(tso): # this is where the fun actually starts :)
cleanTweet = self.cleanup_tweet(tweet['text'], term).encode('utf-8')
file.write(cleanTweet)
#print cleanTweet
#print tweet['created_at'] , '\t', tweet['text']
#print( '@%s tweeted: %s' % ( tweet['user']['screen_name'], tweet['text'] ) )
print "pulled 99 tweets for %s, until %s [%s]" % (term, tweet['created_at'], tweet['id'])
except TwitterSearchException as e:
# take care of all those ugly errors if there are some
print(e)
def cleanup_tweet(self, s, term):
# drop links
pattern = '((mailto\:|(news|(ht|f)tp(s?))\://){1}\S+)'
s = re.sub(pattern, '', s)
# drop @replies
s = re.sub('(@){1}\S+', '', s)
# drop hastags
s = re.sub('(#){1}\S+', '', s)
# drop the search term
s = s.replace(term, '')
#remove double spaces
s.replace(' ', '')
return s.strip()
if __name__ == "__main__":
o = GetTweets(':)', "output/good.txt")
#p = GetTweets(':(', "output/bad.txt")