Skip to content

Commit

Permalink
I think it all works!! Fast Download is very nice too.
Browse files Browse the repository at this point in the history
  • Loading branch information
cad0p committed Jul 6, 2019
1 parent 8ec0804 commit 2828f4b
Show file tree
Hide file tree
Showing 9 changed files with 240 additions and 145 deletions.
2 changes: 2 additions & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.h5 filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
8 changes: 7 additions & 1 deletion maskrcnn_modanet/arrange_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
savedvars = json.load(f)
path = savedvars['datapath']

fast_download = savedvars['fast_download'] == 'True'

import copy

import random
Expand All @@ -18,6 +20,7 @@

if not os.path.isfile(ann_path + 'instances_all.json'):
# copy the modanet instances to the annotations folder
print('Copying annotations from the original path')
with open(ann_orig_path + 'modanet2018_instances_' + sets_names[0] + '.json') as f:
instances = json.load(f)
with open(ann_path + 'instances_all.json', 'w') as outfile:
Expand Down Expand Up @@ -154,4 +157,7 @@

print('\nNow you can train using: maskrcnn-modanet train')

print('\nOr you can fix the dataset using: maskrcnn-modanet datasets fix')
print('\nOr you can fix the dataset using: maskrcnn-modanet datasets fix')

if fast_download:
print('Your dataset is already fixed anyway, since you fast-downloaded it.')
188 changes: 97 additions & 91 deletions maskrcnn_modanet/arrange_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,97 +6,103 @@
savedvars = json.load(f)
path = savedvars['datapath']

import io
import lmdb
import sqlite3
import pandas as pd
from PIL import Image
import sqlalchemy

# name of the set we are getting the annotations from. in the case of modanet, the set containing all info is the train one.
set_name = 'train'


img_orig_path = path + 'datasets/paperdoll/data/chictopia/'
ann_orig_path = path + 'datasets/modanet/annotations/'
img_path = path + "datasets/coco/images/"
ann_path = path + "datasets/coco/annotations/"

print("Img coming from : " + img_orig_path)
print("Ann coming from : " + ann_orig_path)
print("Img are now here: " + img_path)
print("Ann are now here: " + ann_path)


print(img_orig_path + 'chictopia.sqlite3')
db = sqlite3.connect(img_orig_path + 'chictopia.sqlite3')

with open(ann_orig_path + 'modanet2018_instances_' + set_name + '.json') as f:
instances = json.load(f)

#instances['images'][i]['id']
photosIDs = []
photosFILE_NAMEs = [None] * 1115985 #1097474
for instance in instances['images']:
photosIDs.append(instance['id'])
photosFILE_NAMEs[instance['id']] = instance['file_name']
#import ipdb; ipdb.set_trace()
#photosIDs = [100014, 100040]
photosIDsString = ''
for photoID in photosIDs:
photosIDsString += str(photoID) + ', '
photosIDsString = photosIDsString[:-2]
#print(photosIDsString)

sql = str(sqlalchemy.text("""
SELECT
*,
'http://images2.chictopia.com/' || path AS url
FROM photos
WHERE photos.post_id IS NOT NULL AND file_file_size IS NOT NULL
AND photos.id IN ( %s )
""" % photosIDsString))

photos = pd.read_sql(sql, con=db)
print('photos = %d' % (len(photos)))
photos.head()

class PhotoData(object):
def __init__(self, path):
self.env = lmdb.open(
path, map_size=2**36, readonly=True, lock=False
)

def __iter__(self):
with self.env.begin() as t:
with t.cursor() as c:
for key, value in c:
yield key, value

def __getitem__(self, index):
key = str(index).encode('ascii')
with self.env.begin() as t:
data = t.get(key)
if not data:
return None
with io.BytesIO(data) as f:
image = Image.open(f)
image.load()
return image

def __len__(self):
return self.env.stat()['entries']

photo_data = PhotoData(img_orig_path + 'photos.lmdb')
print("Total # of photos (also the ones without annotations) is " + str(len(photo_data)))
print()
print('Copying photos to the new folder (just for the first run)')
from progressbar import ProgressBar
pbar = ProgressBar()
for i in pbar(range(len(photosIDs))):
photo = photos.iloc[i]
if not os.path.isfile(img_path + photosFILE_NAMEs[photo.id]):
photo_data[photo.id].save(img_path + photosFILE_NAMEs[photo.id])
fast_download = savedvars['fast_download'] == 'True'

if fast_download:
print('Images already arranged!')
else:

import io
import lmdb
import sqlite3
import pandas as pd
from PIL import Image
import sqlalchemy

# name of the set we are getting the annotations from. in the case of modanet, the set containing all info is the train one.
set_name = 'train'


img_orig_path = path + 'datasets/paperdoll/data/chictopia/'
ann_orig_path = path + 'datasets/modanet/annotations/'
img_path = path + "datasets/coco/images/"
ann_path = path + "datasets/coco/annotations/"

print("Img coming from : " + img_orig_path)
print("Ann coming from : " + ann_orig_path)
print("Img are now here: " + img_path)
print("Ann are now here: " + ann_path)


print(img_orig_path + 'chictopia.sqlite3')
db = sqlite3.connect(img_orig_path + 'chictopia.sqlite3')

with open(ann_orig_path + 'modanet2018_instances_' + set_name + '.json') as f:
instances = json.load(f)

#instances['images'][i]['id']
photosIDs = []
photosFILE_NAMEs = [None] * 1115985 #1097474
for instance in instances['images']:
photosIDs.append(instance['id'])
photosFILE_NAMEs[instance['id']] = instance['file_name']
#import ipdb; ipdb.set_trace()
#photosIDs = [100014, 100040]
photosIDsString = ''
for photoID in photosIDs:
photosIDsString += str(photoID) + ', '
photosIDsString = photosIDsString[:-2]
#print(photosIDsString)

sql = str(sqlalchemy.text("""
SELECT
*,
'http://images2.chictopia.com/' || path AS url
FROM photos
WHERE photos.post_id IS NOT NULL AND file_file_size IS NOT NULL
AND photos.id IN ( %s )
""" % photosIDsString))

photos = pd.read_sql(sql, con=db)
print('photos = %d' % (len(photos)))
photos.head()

class PhotoData(object):
def __init__(self, path):
self.env = lmdb.open(
path, map_size=2**36, readonly=True, lock=False
)

def __iter__(self):
with self.env.begin() as t:
with t.cursor() as c:
for key, value in c:
yield key, value

def __getitem__(self, index):
key = str(index).encode('ascii')
with self.env.begin() as t:
data = t.get(key)
if not data:
return None
with io.BytesIO(data) as f:
image = Image.open(f)
image.load()
return image

def __len__(self):
return self.env.stat()['entries']

photo_data = PhotoData(img_orig_path + 'photos.lmdb')
print("Total # of photos (also the ones without annotations) is " + str(len(photo_data)))
print()
print('Copying photos to the new folder (just for the first run)')
from progressbar import ProgressBar
pbar = ProgressBar()
for i in pbar(range(len(photosIDs))):
photo = photos.iloc[i]
if not os.path.isfile(img_path + photosFILE_NAMEs[photo.id]):
photo_data[photo.id].save(img_path + photosFILE_NAMEs[photo.id])

print()
print()
41 changes: 32 additions & 9 deletions maskrcnn_modanet/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,17 +144,32 @@ def download(path):
dir_pkg_path = "/".join(dir_cli_path.split("/")[:-1]) + "/"
print(dir_pkg_path)

slow_download = input('Do you want to download the whole 1 million images (what I had to do) or to just download the 50k annotated with ModaNet?\nY for 1 million (40 GB), N for 50k: ')

if slow_download in ['y', 'Y']:
slow_download = True
else:
slow_download = False

fast_download = not slow_download

print('''downloading paperdoll dataset
taken from here:
https://github.com/kyamagu/paperdoll/tree/master/data/chictopia
''')

os.system("sh " + dir_pkg_path + "download.sh '" + path + "'")
failure = os.system("sh " + dir_pkg_path + "download.sh '" + path + "' " + str(1) if fast_download else str(0))

if failure:
print('Bash script failed. Run again this command after having downloaded the necessary packages')
exit()


print("If you don't have tree installed, just install it for bash terminal and run this command again: \nmaskrcnn-modanet datasets download")
print("\nThis command also stores your saved variables with the default values. run 'maskrcnn-modanet savedvars show' to see them")
savedvars = {
'savedvarspath': os.path.expanduser('~')+ '/.maskrcnn-modanet/' + 'savedvars.json',
'fast_download': str(fast_download),
'datapath': path,
'pkgpath': dir_pkg_path,
'seed' : None,
Expand Down Expand Up @@ -230,30 +245,34 @@ def save():
@click.option('-u', '--proc-img-url', callback=validators.check_if_url_downloadable)
@click.option('-s', '--segments', is_flag=True, default=False, help='For every annotation found in the image')
@click.option('-a', '--all-set', is_flag=True, default=False, help='Results for each image in the validation set')
@click.option('-mt', '--model-type', default='default', callback=validators.check_if_model_type_valid, help='Set \'trained\' for your last trained snapshot on the snapshots folder, \'coco\' for the image recognition of the COCO dataset. \'default\' is the default value and is the pretrained modanet snapshot you downloaded in the results folder.')
@click.option('-m', '--model-path', default=None, callback=validators.check_if_file_exists, help='If you want to use a custom model other than the best one found in results')
@click.option('-t', '--threshold-score', default=0.5, callback=validators.check_if_score_is_valid, help='Set the lowest level of confidence to show annotations for the image')
@click.pass_context
def image(ctx, proc_img_path, proc_img_url, segments, all_set, model_path, threshold_score):
def image(ctx, proc_img_path, proc_img_url, segments, all_set, model_path, threshold_score, model_type):
''' Show processed image '''
from maskrcnn_modanet import processimages

if (not segments or (segments and not all_set) ) and ((1 if proc_img_path else 0)+(1 if proc_img_url else 0)+(1 if all_set else 0)) == 1:
processimages.main(proc_img_path, proc_img_url, all_set, None, model_path, segments, False, threshold_score)
model, labels_to_names = processimages.loadModel(model_type=model_type, model_path=model_path)
processimages.main(proc_img_path, proc_img_url, all_set, None, model_path, segments, False, threshold_score, model=model, labels_to_names=labels_to_names)
else:
print_help(ctx, None, value=True)

@view.command()
@click.option('-p', '--proc-img-path', callback=validators.check_if_file_exists)
@click.option('-u', '--proc-img-url', callback=validators.check_if_url_downloadable)
@click.option('-mt', '--model-type', default='default', callback=validators.check_if_model_type_valid, help='Set \'trained\' for your last trained snapshot on the snapshots folder, \'coco\' for the image recognition of the COCO dataset. \'default\' is the default value and is the pretrained modanet snapshot you downloaded in the results folder.')
@click.option('-m', '--model-path', default=None, callback=validators.check_if_file_exists, help='If you want to use a custom model other than the best one found in results')
@click.option('-t', '--threshold-score', default=0.5, callback=validators.check_if_score_is_valid, help='Set the lowest level of confidence to show annotations for the image')
@click.pass_context
def annotations(ctx, proc_img_path, proc_img_url, model_path, threshold_score):
def annotations(ctx, proc_img_path, proc_img_url, model_path, threshold_score, model_type):
''' Show processed image annotations '''
from maskrcnn_modanet import processimages
segments = True; all_set = False
if (not segments or (segments and not all_set) ) and ((1 if proc_img_path else 0)+(1 if proc_img_url else 0)+(1 if all_set else 0)) == 1:
print(processimages.main(proc_img_path, proc_img_url, False, None, model_path, segments, True, threshold_score)) #function returns the annotations
model, labels_to_names = processimages.loadModel(model_type=model_type, model_path=model_path)
print(processimages.main(proc_img_path, proc_img_url, False, None, model_path, segments, True, threshold_score, model=model, labels_to_names=labels_to_names)) #function returns the annotations
else:
print_help(ctx, None, value=True)

Expand All @@ -264,32 +283,36 @@ def annotations(ctx, proc_img_path, proc_img_url, model_path, threshold_score):
@click.option('-s', '--segments', is_flag=True, default=False, help='For every annotation found in the image')
@click.option('-a', '--all-set', is_flag=True, default=False, help='Results for each image in the validation set')
@click.option('-l', '--limit', default=None, type=int, help='Works with option -a. Only saves the first l number of results')
@click.option('-mt', '--model-type', default='default', callback=validators.check_if_model_type_valid, help='Set \'trained\' for your last trained snapshot on the snapshots folder, \'coco\' for the image recognition of the COCO dataset. \'default\' is the default value and is the pretrained modanet snapshot you downloaded in the results folder.')
@click.option('-m', '--model-path', default=None, callback=validators.check_if_file_exists, help='If you want to use a custom model other than the best one found in results')
@click.option('-t', '--threshold-score', default=0.5, callback=validators.check_if_score_is_valid, help='Set the lowest level of confidence to show annotations for the image')
@click.option('--save-path', default='default', callback=validators.check_if_file_folder_exists, help='Set your save path (including extension .jpg). Defaults inside the processimages folder')
@click.pass_context
def image(ctx, proc_img_path, proc_img_url, save_path, segments, all_set, model_path, threshold_score, limit):
def image(ctx, proc_img_path, proc_img_url, save_path, segments, all_set, model_path, threshold_score, limit, model_type):
''' Save processed image '''
from maskrcnn_modanet import processimages

if (not segments or (segments and not all_set) ) and ((1 if proc_img_path else 0)+(1 if proc_img_url else 0)+(1 if all_set else 0)) == 1:
processimages.main(proc_img_path, proc_img_url, all_set, save_path, model_path, segments, False, threshold_score, limit)
model, labels_to_names = processimages.loadModel(model_type=model_type, model_path=model_path)
processimages.main(proc_img_path, proc_img_url, all_set, save_path, model_path, segments, False, threshold_score, limit, model=model, labels_to_names=labels_to_names)
else:
print_help(ctx, None, value=True)

@save.command()
@click.option('-p', '--proc-img-path', callback=validators.check_if_file_exists)
@click.option('-u', '--proc-img-url', callback=validators.check_if_url_downloadable)
@click.option('-mt', '--model-type', default='default', callback=validators.check_if_model_type_valid, help='Set \'trained\' for your last trained snapshot on the snapshots folder, \'coco\' for the image recognition of the COCO dataset. \'default\' is the default value and is the pretrained modanet snapshot you downloaded in the results folder.')
@click.option('-m', '--model-path', default=None, callback=validators.check_if_file_exists, help='If you want to use a custom model other than the best one found in results')
@click.option('-t', '--threshold-score', default=0.5, callback=validators.check_if_score_is_valid, help='Set the lowest level of confidence to show annotations for the image')
@click.option('--save-path', default='default', callback=validators.check_if_file_folder_exists, help='Set your save path (including extension .jpg). Defaults inside the processimages folder')
@click.pass_context
def annotations(ctx, proc_img_path, proc_img_url, save_path, model_path, threshold_score):
def annotations(ctx, proc_img_path, proc_img_url, save_path, model_path, threshold_score, model_type):
''' Save processed image annotations '''
from maskrcnn_modanet import processimages

segments = True; all_set = False
if (not segments or (segments and not all_set) ) and ((1 if proc_img_path else 0)+(1 if proc_img_url else 0)+(1 if all_set else 0)) == 1:
processimages.main(proc_img_path, proc_img_url, False, save_path, model_path, segments, True, threshold_score)
model, labels_to_names = processimages.loadModel(model_type=model_type, model_path=model_path)
processimages.main(proc_img_path, proc_img_url, False, save_path, model_path, segments, True, threshold_score, model=model, labels_to_names=labels_to_names)
else:
print_help(ctx, None, value=True)
10 changes: 8 additions & 2 deletions maskrcnn_modanet/cli/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,5 +81,11 @@ def check_if_score_is_valid(ctx, param, value):
def validate_offset(ctx, param, value):
''' Check if the offset is positive and lower it by 1 '''
if not (0 <= value):
raise BadParameter("The threshold score must be between 0 and 1.", ctx, param)
return value - 1 if value > 0 else value
raise BadParameter("The offset must be positive.", ctx, param)
return value - 1 if value > 0 else value

def check_if_model_type_valid(ctx, param, value):
''' Check if the value is either default, coco, or trained '''
if not value in ['default', 'coco', 'trained']:
raise BadParameter("The model_type must be either default, coco, or trained.", ctx, param)
return value
Loading

0 comments on commit 2828f4b

Please sign in to comment.