-
Notifications
You must be signed in to change notification settings - Fork 3
/
wikicolorize.py
149 lines (133 loc) · 4.83 KB
/
wikicolorize.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import base64
from lxml import html
import requests
import Algorithmia
from PIL import Image,ImageChops
from io import BytesIO
import pytesseract
import tweepy
import configparser
#Gets a random wikipedia image and context
def get_wikipedia_random(random_wiki_url):
wiki_image_data = requests.get(random_wiki_url)
wiki_image_site = html.fromstring(wiki_image_data.content)
#Gets the media file URL from WikiMedia
image_url = wiki_image_site.xpath('//div[@class="fullImageLink"]/a/img/@src')
#Gets the image description, removing the "English" part
image_description = wiki_image_site.xpath('//div[contains(@class,\'description\') and contains(@lang,\'en\')]/span/../text()')
#Gets the redirected URL after WikiMedia's randomization
media_url = wiki_image_data.url
print(media_url)
#Gets the images categories (clearner than description most times, but less descriptive as well)
image_categories = wiki_image_site.xpath('//div[@class="mw-normal-catlinks"]/ul/li/a/text()')
print(image_url)
#Checks if it is a JPG image
if image_url and image_url[0] is not None and image_url[0].lower().endswith(('.jpg', '.jpeg')):
wiki_image = requests.get(image_url[0])
byte_image = Image.open(BytesIO(wiki_image.content))
bw_image_path = 'bw.jpg'
local_path = '/a/web/path/'
byte_image.save(local_path+bw_image_path,format="JPEG") #Here you have to save the file in a path that can be accessed from the web (because of line 52)
is_bw = is_bw_image(byte_image)
if is_bw:
if not is_document(byte_image):
colored_image_path = colorize_image(bw_image_path)
bw_full_path = local_path+bw_image_path
if colored_image_path:
if tweet_image(colored_image_path,bw_full_path,media_url,image_description,image_categories):
return True
else:
return False
else:
return False
else:
return False
else:
return False
else:
return False
#Colorize Image
def colorize_image(image_path):
client = Algorithmia.client('YOUR-ALGORITHMIA-API-KEY')
algo = client.algo('algorithmiahq/ColorizationDemo/1.1.23') #If you pay for it you may use the non-demo Colorization API
input_url = 'http://your-web-host'+image_path #I know this sucks, but it seems like Algorithmia only accepts a URL as input, not an image (image saved in line 29)
try:
colored_image = algo.pipe(input_url).result[1]
filename_colored = save_image(colored_image)
if filename_colored:
return filename_colored
else:
return False
except:
return False
#Tries to OCR a scanned page with Tesseract OCR, if it fails, it probably isn't a document and is more likely to be a photo
def is_document(image):
try:
print(pytesseract.image_to_data(image))
return True
except Exception as e:
return False
#Checks if the image is black and white (Code by @Karl K @ https://stackoverflow.com/a/34175631/1284186)
def is_bw_image(im):
"""
Check if image is monochrome (1 channel or 3 identical channels)
"""
if im.mode not in ("L", "RGB"):
return False
raise ValueError("Unsuported image mode")
if im.mode == "RGB":
rgb = im.split()
if ImageChops.difference(rgb[0],rgb[1]).getextrema()[1]!=0:
return False
if ImageChops.difference(rgb[0],rgb[2]).getextrema()[1]!=0:
return False
return True
#Save the colored image in a *.png file
def save_image(imgstring):
imgdata = base64.b64decode(imgstring)
filename = 'colored.png' # I assume you have a way of picking unique filenames
try:
with open(filename, 'wb') as f:
f.write(imgdata)
f.close()
return filename
except:
return False
#Read configuration file
def read_config():
config = configparser.ConfigParser()
config.read('wikicolorize.config.ini')
return config['twitter']
#Twitter API bureaucracy
def twitter_api():
config = read_config()
access_token = config['twitter']['access_token']
access_token_secret = config['twitter']['access_token_secret']
consumer_key = config['twitter']['consumer_key']
consumer_secret = config['twitter']['consumer_secret']
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
return api
#Tweets the image with the description and/or category
def tweet_image(colored_image, bw_image, url, description, categories):
api = twitter_api()
# upload images and get media_ids
filenames = [bw_image, colored_image]
media_ids = []
for filename in filenames:
res = api.media_upload(filename)
media_ids.append(res.media_id)
# tweet with multiple images
api.update_status(status=categories[0]+' '+url, media_ids=media_ids)
#api.update_with_media(image, status=categories[0]+' '+url)
return True
if __name__ == "__main__":
random_wiki_url = 'http://commons.wikimedia.org/wiki/Special:Random/File'
get_wikipedia_random(random_wiki_url)
while True:
converted = get_wikipedia_random(random_wiki_url)
if converted:
break