I think it all works!! Fast Download is very nice too.

cad0p · Jul 6, 2019 · 2828f4b · 2828f4b
1 parent 8ec0804
commit 2828f4b
Show file tree

Hide file tree

Showing 9 changed files with 240 additions and 145 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,2 @@
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
diff --git a/maskrcnn_modanet/arrange_annotations.py b/maskrcnn_modanet/arrange_annotations.py
@@ -5,6 +5,8 @@
 	savedvars = json.load(f)
 path = savedvars['datapath']
 
+fast_download = savedvars['fast_download'] == 'True'
+
 import copy
 
 import random
@@ -18,6 +20,7 @@
 
 if not os.path.isfile(ann_path + 'instances_all.json'):
 	# copy the modanet instances to the annotations folder
+	print('Copying annotations from the original path')
 	with open(ann_orig_path + 'modanet2018_instances_' + sets_names[0] + '.json') as f:
 		instances = json.load(f)
 	with open(ann_path + 'instances_all.json', 'w') as outfile:
@@ -154,4 +157,7 @@
 
 print('\nNow you can train using: maskrcnn-modanet train')
 
-print('\nOr you can fix the dataset using: maskrcnn-modanet datasets fix')
+print('\nOr you can fix the dataset using: maskrcnn-modanet datasets fix')
+
+if fast_download:
+	print('Your dataset is already fixed anyway, since you fast-downloaded it.')
diff --git a/maskrcnn_modanet/arrange_images.py b/maskrcnn_modanet/arrange_images.py
@@ -6,97 +6,103 @@
 	savedvars = json.load(f)
 path = savedvars['datapath']
 
-import io
-import lmdb
-import sqlite3
-import pandas as pd
-from PIL import Image
-import sqlalchemy
-
-# name of the set we are getting the annotations from. in the case of modanet, the set containing all info is the train one.
-set_name = 'train'
-
-
-img_orig_path = path + 'datasets/paperdoll/data/chictopia/'
-ann_orig_path = path + 'datasets/modanet/annotations/'
-img_path = path + "datasets/coco/images/"
-ann_path = path + "datasets/coco/annotations/"
-
-print("Img coming from : " + img_orig_path)
-print("Ann coming from : " + ann_orig_path)
-print("Img are now here: " + img_path)
-print("Ann are now here: " + ann_path)
-
-
-print(img_orig_path + 'chictopia.sqlite3')
-db = sqlite3.connect(img_orig_path + 'chictopia.sqlite3')
-
-with open(ann_orig_path + 'modanet2018_instances_' + set_name + '.json') as f:
-    instances = json.load(f)
-
-#instances['images'][i]['id']
-photosIDs = []
-photosFILE_NAMEs = [None] * 1115985 #1097474
-for instance in instances['images']:
-    photosIDs.append(instance['id'])
-    photosFILE_NAMEs[instance['id']] = instance['file_name']
-#import ipdb; ipdb.set_trace()
-#photosIDs = [100014, 100040]
-photosIDsString = ''
-for photoID in photosIDs:
-    photosIDsString += str(photoID) + ', '
-photosIDsString = photosIDsString[:-2]
-#print(photosIDsString)
-
-sql = str(sqlalchemy.text("""
-    SELECT
-        *,
-        'http://images2.chictopia.com/' || path AS url
-    FROM photos
-    WHERE photos.post_id IS NOT NULL AND file_file_size IS NOT NULL
-        AND photos.id IN ( %s )
-""" % photosIDsString))
-
-photos = pd.read_sql(sql, con=db)
-print('photos = %d' % (len(photos)))
-photos.head()
-
-class PhotoData(object):
-    def __init__(self, path):
-        self.env = lmdb.open(
-            path, map_size=2**36, readonly=True, lock=False
-        )
-
-    def __iter__(self):
-        with self.env.begin() as t:
-            with t.cursor() as c:
-                for key, value in c:
-                    yield key, value
-
-    def __getitem__(self, index):
-        key = str(index).encode('ascii')
-        with self.env.begin() as t:
-            data = t.get(key)
-        if not data:
-            return None
-        with io.BytesIO(data) as f:
-            image = Image.open(f)
-            image.load()
-            return image
-
-    def __len__(self):
-        return self.env.stat()['entries']
-
-photo_data = PhotoData(img_orig_path + 'photos.lmdb')
-print("Total # of photos (also the ones without annotations) is " + str(len(photo_data)))
-print()
-print('Copying photos to the new folder (just for the first run)')
-from progressbar import ProgressBar
-pbar = ProgressBar()
-for i in pbar(range(len(photosIDs))):
-    photo = photos.iloc[i]
-    if not os.path.isfile(img_path + photosFILE_NAMEs[photo.id]):
-    	photo_data[photo.id].save(img_path + photosFILE_NAMEs[photo.id])
+fast_download = savedvars['fast_download'] == 'True'
+
+if fast_download:
+    print('Images already arranged!')
+else:
+
+    import io
+    import lmdb
+    import sqlite3
+    import pandas as pd
+    from PIL import Image
+    import sqlalchemy
+
+    # name of the set we are getting the annotations from. in the case of modanet, the set containing all info is the train one.
+    set_name = 'train'
+
+
+    img_orig_path = path + 'datasets/paperdoll/data/chictopia/'
+    ann_orig_path = path + 'datasets/modanet/annotations/'
+    img_path = path + "datasets/coco/images/"
+    ann_path = path + "datasets/coco/annotations/"
+
+    print("Img coming from : " + img_orig_path)
+    print("Ann coming from : " + ann_orig_path)
+    print("Img are now here: " + img_path)
+    print("Ann are now here: " + ann_path)
+
+
+    print(img_orig_path + 'chictopia.sqlite3')
+    db = sqlite3.connect(img_orig_path + 'chictopia.sqlite3')
+
+    with open(ann_orig_path + 'modanet2018_instances_' + set_name + '.json') as f:
+        instances = json.load(f)
+
+    #instances['images'][i]['id']
+    photosIDs = []
+    photosFILE_NAMEs = [None] * 1115985 #1097474
+    for instance in instances['images']:
+        photosIDs.append(instance['id'])
+        photosFILE_NAMEs[instance['id']] = instance['file_name']
+    #import ipdb; ipdb.set_trace()
+    #photosIDs = [100014, 100040]
+    photosIDsString = ''
+    for photoID in photosIDs:
+        photosIDsString += str(photoID) + ', '
+    photosIDsString = photosIDsString[:-2]
+    #print(photosIDsString)
+
+    sql = str(sqlalchemy.text("""
+        SELECT
+            *,
+            'http://images2.chictopia.com/' || path AS url
+        FROM photos
+        WHERE photos.post_id IS NOT NULL AND file_file_size IS NOT NULL
+            AND photos.id IN ( %s )
+    """ % photosIDsString))
+
+    photos = pd.read_sql(sql, con=db)
+    print('photos = %d' % (len(photos)))
+    photos.head()
+
+    class PhotoData(object):
+        def __init__(self, path):
+            self.env = lmdb.open(
+                path, map_size=2**36, readonly=True, lock=False
+            )
+
+        def __iter__(self):
+            with self.env.begin() as t:
+                with t.cursor() as c:
+                    for key, value in c:
+                        yield key, value
+
+        def __getitem__(self, index):
+            key = str(index).encode('ascii')
+            with self.env.begin() as t:
+                data = t.get(key)
+            if not data:
+                return None
+            with io.BytesIO(data) as f:
+                image = Image.open(f)
+                image.load()
+                return image
+
+        def __len__(self):
+            return self.env.stat()['entries']
+
+    photo_data = PhotoData(img_orig_path + 'photos.lmdb')
+    print("Total # of photos (also the ones without annotations) is " + str(len(photo_data)))
+    print()
+    print('Copying photos to the new folder (just for the first run)')
+    from progressbar import ProgressBar
+    pbar = ProgressBar()
+    for i in pbar(range(len(photosIDs))):
+        photo = photos.iloc[i]
+        if not os.path.isfile(img_path + photosFILE_NAMEs[photo.id]):
+        	photo_data[photo.id].save(img_path + photosFILE_NAMEs[photo.id])
 
 print()
 print()
diff --git a/maskrcnn_modanet/cli/main.py b/maskrcnn_modanet/cli/main.py
@@ -144,17 +144,32 @@ def download(path):
 	dir_pkg_path = "/".join(dir_cli_path.split("/")[:-1]) + "/"
 	print(dir_pkg_path)
 
+	slow_download = input('Do you want to download the whole 1 million images (what I had to do) or to just download the 50k annotated with ModaNet?\nY for 1 million (40 GB), N for 50k: ')
+
+	if slow_download in ['y', 'Y']:
+		slow_download = True
+	else:
+		slow_download = False
+
+	fast_download = not slow_download
+
 	print('''downloading paperdoll dataset
 			taken from here:
 			https://github.com/kyamagu/paperdoll/tree/master/data/chictopia
 			''')
 
-	os.system("sh " + dir_pkg_path + "download.sh '" + path + "'")
+	failure = os.system("sh " + dir_pkg_path + "download.sh '" + path + "' " + str(1) if fast_download else str(0))
+
+	if failure:
+		print('Bash script failed. Run again this command after having downloaded the necessary packages')
+		exit()
+
 
 	print("If you don't have tree installed, just install it for bash terminal and run this command again: \nmaskrcnn-modanet datasets download")	
 	print("\nThis command also stores your saved variables with the default values. run 'maskrcnn-modanet savedvars show' to see them")
 	savedvars = {
 		'savedvarspath': os.path.expanduser('~')+ '/.maskrcnn-modanet/' + 'savedvars.json',
+		'fast_download': str(fast_download),
 		'datapath': path,
 		'pkgpath': dir_pkg_path,
 		'seed' : None,
@@ -230,30 +245,34 @@ def save():
 @click.option('-u', '--proc-img-url', callback=validators.check_if_url_downloadable)
 @click.option('-s', '--segments', is_flag=True, default=False, help='For every annotation found in the image')
 @click.option('-a', '--all-set', is_flag=True, default=False, help='Results for each image in the validation set')
+@click.option('-mt', '--model-type', default='default', callback=validators.check_if_model_type_valid, help='Set \'trained\' for your last trained snapshot on the snapshots folder, \'coco\' for the image recognition of the COCO dataset. \'default\' is the default value and is the pretrained modanet snapshot you downloaded in the results folder.')
 @click.option('-m', '--model-path', default=None, callback=validators.check_if_file_exists, help='If you want to use a custom model other than the best one found in results')
 @click.option('-t', '--threshold-score', default=0.5, callback=validators.check_if_score_is_valid, help='Set the lowest level of confidence to show annotations for the image')
 @click.pass_context
-def image(ctx, proc_img_path, proc_img_url, segments, all_set, model_path, threshold_score):
+def image(ctx, proc_img_path, proc_img_url, segments, all_set, model_path, threshold_score, model_type):
 	''' Show processed image '''
 	from maskrcnn_modanet import processimages
 
 	if (not segments or (segments and not all_set) ) and ((1 if proc_img_path else 0)+(1 if proc_img_url else 0)+(1 if all_set else 0)) == 1:
-		processimages.main(proc_img_path, proc_img_url, all_set, None, model_path, segments, False, threshold_score)
+		model, labels_to_names = processimages.loadModel(model_type=model_type, model_path=model_path)
+		processimages.main(proc_img_path, proc_img_url, all_set, None, model_path, segments, False, threshold_score, model=model, labels_to_names=labels_to_names)
 	else:
 		print_help(ctx, None,  value=True)
 
 @view.command()
 @click.option('-p', '--proc-img-path', callback=validators.check_if_file_exists)
 @click.option('-u', '--proc-img-url', callback=validators.check_if_url_downloadable)
+@click.option('-mt', '--model-type', default='default', callback=validators.check_if_model_type_valid, help='Set \'trained\' for your last trained snapshot on the snapshots folder, \'coco\' for the image recognition of the COCO dataset. \'default\' is the default value and is the pretrained modanet snapshot you downloaded in the results folder.')
 @click.option('-m', '--model-path', default=None, callback=validators.check_if_file_exists, help='If you want to use a custom model other than the best one found in results')
 @click.option('-t', '--threshold-score', default=0.5, callback=validators.check_if_score_is_valid, help='Set the lowest level of confidence to show annotations for the image')
 @click.pass_context
-def annotations(ctx, proc_img_path, proc_img_url, model_path, threshold_score):
+def annotations(ctx, proc_img_path, proc_img_url, model_path, threshold_score, model_type):
 	''' Show processed image annotations '''
 	from maskrcnn_modanet import processimages
 	segments = True; all_set = False
 	if (not segments or (segments and not all_set) ) and ((1 if proc_img_path else 0)+(1 if proc_img_url else 0)+(1 if all_set else 0)) == 1:
-		print(processimages.main(proc_img_path, proc_img_url, False, None, model_path, segments, True, threshold_score)) #function returns the annotations
+		model, labels_to_names = processimages.loadModel(model_type=model_type, model_path=model_path)
+		print(processimages.main(proc_img_path, proc_img_url, False, None, model_path, segments, True, threshold_score, model=model, labels_to_names=labels_to_names)) #function returns the annotations
 	else:
 		print_help(ctx, None,  value=True)
 
@@ -264,32 +283,36 @@ def annotations(ctx, proc_img_path, proc_img_url, model_path, threshold_score):
 @click.option('-s', '--segments', is_flag=True, default=False, help='For every annotation found in the image')
 @click.option('-a', '--all-set', is_flag=True, default=False, help='Results for each image in the validation set')
 @click.option('-l', '--limit', default=None, type=int, help='Works with option -a. Only saves the first l number of results')
+@click.option('-mt', '--model-type', default='default', callback=validators.check_if_model_type_valid, help='Set \'trained\' for your last trained snapshot on the snapshots folder, \'coco\' for the image recognition of the COCO dataset. \'default\' is the default value and is the pretrained modanet snapshot you downloaded in the results folder.')
 @click.option('-m', '--model-path', default=None, callback=validators.check_if_file_exists, help='If you want to use a custom model other than the best one found in results')
 @click.option('-t', '--threshold-score', default=0.5, callback=validators.check_if_score_is_valid, help='Set the lowest level of confidence to show annotations for the image')
 @click.option('--save-path', default='default', callback=validators.check_if_file_folder_exists, help='Set your save path (including extension .jpg). Defaults inside the processimages folder')
 @click.pass_context
-def image(ctx, proc_img_path, proc_img_url, save_path, segments, all_set, model_path, threshold_score, limit):
+def image(ctx, proc_img_path, proc_img_url, save_path, segments, all_set, model_path, threshold_score, limit, model_type):
 	''' Save processed image '''
 	from maskrcnn_modanet import processimages
 
 	if (not segments or (segments and not all_set) ) and ((1 if proc_img_path else 0)+(1 if proc_img_url else 0)+(1 if all_set else 0)) == 1:
-		processimages.main(proc_img_path, proc_img_url, all_set, save_path, model_path, segments, False, threshold_score, limit)
+		model, labels_to_names = processimages.loadModel(model_type=model_type, model_path=model_path)
+		processimages.main(proc_img_path, proc_img_url, all_set, save_path, model_path, segments, False, threshold_score, limit, model=model, labels_to_names=labels_to_names)
 	else:
 		print_help(ctx, None,  value=True)
 
 @save.command()
 @click.option('-p', '--proc-img-path', callback=validators.check_if_file_exists)
 @click.option('-u', '--proc-img-url', callback=validators.check_if_url_downloadable)
+@click.option('-mt', '--model-type', default='default', callback=validators.check_if_model_type_valid, help='Set \'trained\' for your last trained snapshot on the snapshots folder, \'coco\' for the image recognition of the COCO dataset. \'default\' is the default value and is the pretrained modanet snapshot you downloaded in the results folder.')
 @click.option('-m', '--model-path', default=None, callback=validators.check_if_file_exists, help='If you want to use a custom model other than the best one found in results')
 @click.option('-t', '--threshold-score', default=0.5, callback=validators.check_if_score_is_valid, help='Set the lowest level of confidence to show annotations for the image')
 @click.option('--save-path', default='default', callback=validators.check_if_file_folder_exists, help='Set your save path (including extension .jpg). Defaults inside the processimages folder')
 @click.pass_context
-def annotations(ctx, proc_img_path, proc_img_url, save_path, model_path, threshold_score):
+def annotations(ctx, proc_img_path, proc_img_url, save_path, model_path, threshold_score, model_type):
 	''' Save processed image annotations '''
 	from maskrcnn_modanet import processimages
 
 	segments = True; all_set = False
 	if (not segments or (segments and not all_set) ) and ((1 if proc_img_path else 0)+(1 if proc_img_url else 0)+(1 if all_set else 0)) == 1:
-		processimages.main(proc_img_path, proc_img_url, False, save_path, model_path, segments, True, threshold_score)
+		model, labels_to_names = processimages.loadModel(model_type=model_type, model_path=model_path)
+		processimages.main(proc_img_path, proc_img_url, False, save_path, model_path, segments, True, threshold_score, model=model, labels_to_names=labels_to_names)
 	else:
 		print_help(ctx, None,  value=True)
diff --git a/maskrcnn_modanet/cli/validators.py b/maskrcnn_modanet/cli/validators.py
@@ -81,5 +81,11 @@ def check_if_score_is_valid(ctx, param, value):
 def validate_offset(ctx, param, value):
 	''' Check if the offset is positive and lower it by 1 '''
 	if not (0 <= value):
-		raise BadParameter("The threshold score must be between 0 and 1.", ctx, param)
-	return value - 1 if value > 0 else value
+		raise BadParameter("The offset must be positive.", ctx, param)
+	return value - 1 if value > 0 else value
+
+def check_if_model_type_valid(ctx, param, value):
+	''' Check if the value is either default, coco, or trained '''
+	if not value in ['default', 'coco', 'trained']:
+		raise BadParameter("The model_type must be either default, coco, or trained.", ctx, param)
+	return value
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		*.h5 filter=lfs diff=lfs merge=lfs -text
		*.zip filter=lfs diff=lfs merge=lfs -text