-
Notifications
You must be signed in to change notification settings - Fork 0
/
redditbackup.py
83 lines (71 loc) · 2.69 KB
/
redditbackup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env python
"""
Reddit backup!
Version: 10.01.xx
"""
import codecs
import getpass
#import re
import time
import urllib
import urllib2
from xml.dom import minidom
try:
import json
except ImportError:
try:
import simplejson as json
except ImportError:
print "Unable to find a json library. Please install simplejson or Python 2.6.* if you want to backup as json."
exit()
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor)
def urlopen(url, data=None, headers = {}):
if hasattr(data, "__iter__"):
data = urllib.urlencode(data)
return opener.open(urllib2.Request(url, data, headers))
if __name__ == "__main__":
HASBACON = False
format = None
datetime = time.strftime("%Y%m%d%H%M%S", time.gmtime())
while not HASBACON:
username = raw_input("Username: ")
password = getpass.getpass("Password: ")
if "WRONG_PASSWORD" in urlopen("https://www.reddit.com/api/login/", {"passwd": password, "user": username}).read():
print "Wrong username or password, please try again."
else:
HASBACON = True
while not format:
f = raw_input("Save backup as (x)ml or (j)son? ")
if f.lower().startswith("x"):
format = "xml"
elif f.lower().startswith("j"):
format = "json"
else:
print "No such option, please try again."
for section in ["comments", "submitted", "liked", "disliked", "hidden"]:
print "Downloading %s..." % section
after = ""
data = []
xml = None
count = 0
while not after is None:
c = json.loads(urlopen("http://www.reddit.com/user/%s/%s/.json?limit=100&after=%s" % (urllib.quote(username), section, after)).read())
if format == "xml":
x = minidom.parseString(urlopen("http://www.reddit.com/user/%s/%s/.xml?limit=100&after=%s" % (urllib.quote(username), section, after)).read())
if not xml:
xml = x
else:
for item in x.getElementsByTagName("item"):
xml.childNodes[0].childNodes[0].appendChild(item)
else:
data.extend(c["data"]["children"])
after = c["data"]["after"]
count += len(c["data"]["children"])
f = codecs.open("reddit.%s.%s.%s" % (section, datetime, format), "w", "utf-8")
if format == "json":
json.dump(data, f, sort_keys=True, indent=4, encoding="utf-8")
else:
xml.writexml(f, encoding="utf-8")
f.close()
print "Saved %d %s." % (count, section)
print "Mission Accomplished."