diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..8f3effb --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +*.h5 filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text diff --git a/maskrcnn_modanet/arrange_annotations.py b/maskrcnn_modanet/arrange_annotations.py index 31c8604..4131e2d 100644 --- a/maskrcnn_modanet/arrange_annotations.py +++ b/maskrcnn_modanet/arrange_annotations.py @@ -5,6 +5,8 @@ savedvars = json.load(f) path = savedvars['datapath'] +fast_download = savedvars['fast_download'] == 'True' + import copy import random @@ -18,6 +20,7 @@ if not os.path.isfile(ann_path + 'instances_all.json'): # copy the modanet instances to the annotations folder + print('Copying annotations from the original path') with open(ann_orig_path + 'modanet2018_instances_' + sets_names[0] + '.json') as f: instances = json.load(f) with open(ann_path + 'instances_all.json', 'w') as outfile: @@ -154,4 +157,7 @@ print('\nNow you can train using: maskrcnn-modanet train') -print('\nOr you can fix the dataset using: maskrcnn-modanet datasets fix') \ No newline at end of file +print('\nOr you can fix the dataset using: maskrcnn-modanet datasets fix') + +if fast_download: + print('Your dataset is already fixed anyway, since you fast-downloaded it.') \ No newline at end of file diff --git a/maskrcnn_modanet/arrange_images.py b/maskrcnn_modanet/arrange_images.py index 0a786ee..bb01c77 100644 --- a/maskrcnn_modanet/arrange_images.py +++ b/maskrcnn_modanet/arrange_images.py @@ -6,97 +6,103 @@ savedvars = json.load(f) path = savedvars['datapath'] -import io -import lmdb -import sqlite3 -import pandas as pd -from PIL import Image -import sqlalchemy - -# name of the set we are getting the annotations from. in the case of modanet, the set containing all info is the train one. -set_name = 'train' - - -img_orig_path = path + 'datasets/paperdoll/data/chictopia/' -ann_orig_path = path + 'datasets/modanet/annotations/' -img_path = path + "datasets/coco/images/" -ann_path = path + "datasets/coco/annotations/" - -print("Img coming from : " + img_orig_path) -print("Ann coming from : " + ann_orig_path) -print("Img are now here: " + img_path) -print("Ann are now here: " + ann_path) - - -print(img_orig_path + 'chictopia.sqlite3') -db = sqlite3.connect(img_orig_path + 'chictopia.sqlite3') - -with open(ann_orig_path + 'modanet2018_instances_' + set_name + '.json') as f: - instances = json.load(f) - -#instances['images'][i]['id'] -photosIDs = [] -photosFILE_NAMEs = [None] * 1115985 #1097474 -for instance in instances['images']: - photosIDs.append(instance['id']) - photosFILE_NAMEs[instance['id']] = instance['file_name'] -#import ipdb; ipdb.set_trace() -#photosIDs = [100014, 100040] -photosIDsString = '' -for photoID in photosIDs: - photosIDsString += str(photoID) + ', ' -photosIDsString = photosIDsString[:-2] -#print(photosIDsString) - -sql = str(sqlalchemy.text(""" - SELECT - *, - 'http://images2.chictopia.com/' || path AS url - FROM photos - WHERE photos.post_id IS NOT NULL AND file_file_size IS NOT NULL - AND photos.id IN ( %s ) -""" % photosIDsString)) - -photos = pd.read_sql(sql, con=db) -print('photos = %d' % (len(photos))) -photos.head() - -class PhotoData(object): - def __init__(self, path): - self.env = lmdb.open( - path, map_size=2**36, readonly=True, lock=False - ) - - def __iter__(self): - with self.env.begin() as t: - with t.cursor() as c: - for key, value in c: - yield key, value - - def __getitem__(self, index): - key = str(index).encode('ascii') - with self.env.begin() as t: - data = t.get(key) - if not data: - return None - with io.BytesIO(data) as f: - image = Image.open(f) - image.load() - return image - - def __len__(self): - return self.env.stat()['entries'] - -photo_data = PhotoData(img_orig_path + 'photos.lmdb') -print("Total # of photos (also the ones without annotations) is " + str(len(photo_data))) -print() -print('Copying photos to the new folder (just for the first run)') -from progressbar import ProgressBar -pbar = ProgressBar() -for i in pbar(range(len(photosIDs))): - photo = photos.iloc[i] - if not os.path.isfile(img_path + photosFILE_NAMEs[photo.id]): - photo_data[photo.id].save(img_path + photosFILE_NAMEs[photo.id]) +fast_download = savedvars['fast_download'] == 'True' + +if fast_download: + print('Images already arranged!') +else: + + import io + import lmdb + import sqlite3 + import pandas as pd + from PIL import Image + import sqlalchemy + + # name of the set we are getting the annotations from. in the case of modanet, the set containing all info is the train one. + set_name = 'train' + + + img_orig_path = path + 'datasets/paperdoll/data/chictopia/' + ann_orig_path = path + 'datasets/modanet/annotations/' + img_path = path + "datasets/coco/images/" + ann_path = path + "datasets/coco/annotations/" + + print("Img coming from : " + img_orig_path) + print("Ann coming from : " + ann_orig_path) + print("Img are now here: " + img_path) + print("Ann are now here: " + ann_path) + + + print(img_orig_path + 'chictopia.sqlite3') + db = sqlite3.connect(img_orig_path + 'chictopia.sqlite3') + + with open(ann_orig_path + 'modanet2018_instances_' + set_name + '.json') as f: + instances = json.load(f) + + #instances['images'][i]['id'] + photosIDs = [] + photosFILE_NAMEs = [None] * 1115985 #1097474 + for instance in instances['images']: + photosIDs.append(instance['id']) + photosFILE_NAMEs[instance['id']] = instance['file_name'] + #import ipdb; ipdb.set_trace() + #photosIDs = [100014, 100040] + photosIDsString = '' + for photoID in photosIDs: + photosIDsString += str(photoID) + ', ' + photosIDsString = photosIDsString[:-2] + #print(photosIDsString) + + sql = str(sqlalchemy.text(""" + SELECT + *, + 'http://images2.chictopia.com/' || path AS url + FROM photos + WHERE photos.post_id IS NOT NULL AND file_file_size IS NOT NULL + AND photos.id IN ( %s ) + """ % photosIDsString)) + + photos = pd.read_sql(sql, con=db) + print('photos = %d' % (len(photos))) + photos.head() + + class PhotoData(object): + def __init__(self, path): + self.env = lmdb.open( + path, map_size=2**36, readonly=True, lock=False + ) + + def __iter__(self): + with self.env.begin() as t: + with t.cursor() as c: + for key, value in c: + yield key, value + + def __getitem__(self, index): + key = str(index).encode('ascii') + with self.env.begin() as t: + data = t.get(key) + if not data: + return None + with io.BytesIO(data) as f: + image = Image.open(f) + image.load() + return image + + def __len__(self): + return self.env.stat()['entries'] + + photo_data = PhotoData(img_orig_path + 'photos.lmdb') + print("Total # of photos (also the ones without annotations) is " + str(len(photo_data))) + print() + print('Copying photos to the new folder (just for the first run)') + from progressbar import ProgressBar + pbar = ProgressBar() + for i in pbar(range(len(photosIDs))): + photo = photos.iloc[i] + if not os.path.isfile(img_path + photosFILE_NAMEs[photo.id]): + photo_data[photo.id].save(img_path + photosFILE_NAMEs[photo.id]) print() print() \ No newline at end of file diff --git a/maskrcnn_modanet/cli/main.py b/maskrcnn_modanet/cli/main.py index 01aa0ab..6f0c3bf 100644 --- a/maskrcnn_modanet/cli/main.py +++ b/maskrcnn_modanet/cli/main.py @@ -144,17 +144,32 @@ def download(path): dir_pkg_path = "/".join(dir_cli_path.split("/")[:-1]) + "/" print(dir_pkg_path) + slow_download = input('Do you want to download the whole 1 million images (what I had to do) or to just download the 50k annotated with ModaNet?\nY for 1 million (40 GB), N for 50k: ') + + if slow_download in ['y', 'Y']: + slow_download = True + else: + slow_download = False + + fast_download = not slow_download + print('''downloading paperdoll dataset taken from here: https://github.com/kyamagu/paperdoll/tree/master/data/chictopia ''') - os.system("sh " + dir_pkg_path + "download.sh '" + path + "'") + failure = os.system("sh " + dir_pkg_path + "download.sh '" + path + "' " + str(1) if fast_download else str(0)) + + if failure: + print('Bash script failed. Run again this command after having downloaded the necessary packages') + exit() + print("If you don't have tree installed, just install it for bash terminal and run this command again: \nmaskrcnn-modanet datasets download") print("\nThis command also stores your saved variables with the default values. run 'maskrcnn-modanet savedvars show' to see them") savedvars = { 'savedvarspath': os.path.expanduser('~')+ '/.maskrcnn-modanet/' + 'savedvars.json', + 'fast_download': str(fast_download), 'datapath': path, 'pkgpath': dir_pkg_path, 'seed' : None, @@ -230,30 +245,34 @@ def save(): @click.option('-u', '--proc-img-url', callback=validators.check_if_url_downloadable) @click.option('-s', '--segments', is_flag=True, default=False, help='For every annotation found in the image') @click.option('-a', '--all-set', is_flag=True, default=False, help='Results for each image in the validation set') +@click.option('-mt', '--model-type', default='default', callback=validators.check_if_model_type_valid, help='Set \'trained\' for your last trained snapshot on the snapshots folder, \'coco\' for the image recognition of the COCO dataset. \'default\' is the default value and is the pretrained modanet snapshot you downloaded in the results folder.') @click.option('-m', '--model-path', default=None, callback=validators.check_if_file_exists, help='If you want to use a custom model other than the best one found in results') @click.option('-t', '--threshold-score', default=0.5, callback=validators.check_if_score_is_valid, help='Set the lowest level of confidence to show annotations for the image') @click.pass_context -def image(ctx, proc_img_path, proc_img_url, segments, all_set, model_path, threshold_score): +def image(ctx, proc_img_path, proc_img_url, segments, all_set, model_path, threshold_score, model_type): ''' Show processed image ''' from maskrcnn_modanet import processimages if (not segments or (segments and not all_set) ) and ((1 if proc_img_path else 0)+(1 if proc_img_url else 0)+(1 if all_set else 0)) == 1: - processimages.main(proc_img_path, proc_img_url, all_set, None, model_path, segments, False, threshold_score) + model, labels_to_names = processimages.loadModel(model_type=model_type, model_path=model_path) + processimages.main(proc_img_path, proc_img_url, all_set, None, model_path, segments, False, threshold_score, model=model, labels_to_names=labels_to_names) else: print_help(ctx, None, value=True) @view.command() @click.option('-p', '--proc-img-path', callback=validators.check_if_file_exists) @click.option('-u', '--proc-img-url', callback=validators.check_if_url_downloadable) +@click.option('-mt', '--model-type', default='default', callback=validators.check_if_model_type_valid, help='Set \'trained\' for your last trained snapshot on the snapshots folder, \'coco\' for the image recognition of the COCO dataset. \'default\' is the default value and is the pretrained modanet snapshot you downloaded in the results folder.') @click.option('-m', '--model-path', default=None, callback=validators.check_if_file_exists, help='If you want to use a custom model other than the best one found in results') @click.option('-t', '--threshold-score', default=0.5, callback=validators.check_if_score_is_valid, help='Set the lowest level of confidence to show annotations for the image') @click.pass_context -def annotations(ctx, proc_img_path, proc_img_url, model_path, threshold_score): +def annotations(ctx, proc_img_path, proc_img_url, model_path, threshold_score, model_type): ''' Show processed image annotations ''' from maskrcnn_modanet import processimages segments = True; all_set = False if (not segments or (segments and not all_set) ) and ((1 if proc_img_path else 0)+(1 if proc_img_url else 0)+(1 if all_set else 0)) == 1: - print(processimages.main(proc_img_path, proc_img_url, False, None, model_path, segments, True, threshold_score)) #function returns the annotations + model, labels_to_names = processimages.loadModel(model_type=model_type, model_path=model_path) + print(processimages.main(proc_img_path, proc_img_url, False, None, model_path, segments, True, threshold_score, model=model, labels_to_names=labels_to_names)) #function returns the annotations else: print_help(ctx, None, value=True) @@ -264,32 +283,36 @@ def annotations(ctx, proc_img_path, proc_img_url, model_path, threshold_score): @click.option('-s', '--segments', is_flag=True, default=False, help='For every annotation found in the image') @click.option('-a', '--all-set', is_flag=True, default=False, help='Results for each image in the validation set') @click.option('-l', '--limit', default=None, type=int, help='Works with option -a. Only saves the first l number of results') +@click.option('-mt', '--model-type', default='default', callback=validators.check_if_model_type_valid, help='Set \'trained\' for your last trained snapshot on the snapshots folder, \'coco\' for the image recognition of the COCO dataset. \'default\' is the default value and is the pretrained modanet snapshot you downloaded in the results folder.') @click.option('-m', '--model-path', default=None, callback=validators.check_if_file_exists, help='If you want to use a custom model other than the best one found in results') @click.option('-t', '--threshold-score', default=0.5, callback=validators.check_if_score_is_valid, help='Set the lowest level of confidence to show annotations for the image') @click.option('--save-path', default='default', callback=validators.check_if_file_folder_exists, help='Set your save path (including extension .jpg). Defaults inside the processimages folder') @click.pass_context -def image(ctx, proc_img_path, proc_img_url, save_path, segments, all_set, model_path, threshold_score, limit): +def image(ctx, proc_img_path, proc_img_url, save_path, segments, all_set, model_path, threshold_score, limit, model_type): ''' Save processed image ''' from maskrcnn_modanet import processimages if (not segments or (segments and not all_set) ) and ((1 if proc_img_path else 0)+(1 if proc_img_url else 0)+(1 if all_set else 0)) == 1: - processimages.main(proc_img_path, proc_img_url, all_set, save_path, model_path, segments, False, threshold_score, limit) + model, labels_to_names = processimages.loadModel(model_type=model_type, model_path=model_path) + processimages.main(proc_img_path, proc_img_url, all_set, save_path, model_path, segments, False, threshold_score, limit, model=model, labels_to_names=labels_to_names) else: print_help(ctx, None, value=True) @save.command() @click.option('-p', '--proc-img-path', callback=validators.check_if_file_exists) @click.option('-u', '--proc-img-url', callback=validators.check_if_url_downloadable) +@click.option('-mt', '--model-type', default='default', callback=validators.check_if_model_type_valid, help='Set \'trained\' for your last trained snapshot on the snapshots folder, \'coco\' for the image recognition of the COCO dataset. \'default\' is the default value and is the pretrained modanet snapshot you downloaded in the results folder.') @click.option('-m', '--model-path', default=None, callback=validators.check_if_file_exists, help='If you want to use a custom model other than the best one found in results') @click.option('-t', '--threshold-score', default=0.5, callback=validators.check_if_score_is_valid, help='Set the lowest level of confidence to show annotations for the image') @click.option('--save-path', default='default', callback=validators.check_if_file_folder_exists, help='Set your save path (including extension .jpg). Defaults inside the processimages folder') @click.pass_context -def annotations(ctx, proc_img_path, proc_img_url, save_path, model_path, threshold_score): +def annotations(ctx, proc_img_path, proc_img_url, save_path, model_path, threshold_score, model_type): ''' Save processed image annotations ''' from maskrcnn_modanet import processimages segments = True; all_set = False if (not segments or (segments and not all_set) ) and ((1 if proc_img_path else 0)+(1 if proc_img_url else 0)+(1 if all_set else 0)) == 1: - processimages.main(proc_img_path, proc_img_url, False, save_path, model_path, segments, True, threshold_score) + model, labels_to_names = processimages.loadModel(model_type=model_type, model_path=model_path) + processimages.main(proc_img_path, proc_img_url, False, save_path, model_path, segments, True, threshold_score, model=model, labels_to_names=labels_to_names) else: print_help(ctx, None, value=True) diff --git a/maskrcnn_modanet/cli/validators.py b/maskrcnn_modanet/cli/validators.py index d34cea0..576372e 100644 --- a/maskrcnn_modanet/cli/validators.py +++ b/maskrcnn_modanet/cli/validators.py @@ -81,5 +81,11 @@ def check_if_score_is_valid(ctx, param, value): def validate_offset(ctx, param, value): ''' Check if the offset is positive and lower it by 1 ''' if not (0 <= value): - raise BadParameter("The threshold score must be between 0 and 1.", ctx, param) - return value - 1 if value > 0 else value \ No newline at end of file + raise BadParameter("The offset must be positive.", ctx, param) + return value - 1 if value > 0 else value + +def check_if_model_type_valid(ctx, param, value): + ''' Check if the value is either default, coco, or trained ''' + if not value in ['default', 'coco', 'trained']: + raise BadParameter("The model_type must be either default, coco, or trained.", ctx, param) + return value \ No newline at end of file diff --git a/maskrcnn_modanet/download.sh b/maskrcnn_modanet/download.sh index 61b7d15..3485a7a 100644 --- a/maskrcnn_modanet/download.sh +++ b/maskrcnn_modanet/download.sh @@ -11,50 +11,62 @@ echo "saving your path location" PATH1=$1 echo $PATH1 +FAST=$2 +echo "fast download:" +echo $FAST + cd $PATH1 mkdir datasets cd datasets - -# download images dataset -git clone https://github.com/kyamagu/paperdoll -pwd -cd paperdoll/data/chictopia -pwd - -if [ ! -d "./photos.lmdb" ] +if [ ! $FAST ] then - echo "If you already have the 40GB file lying around, you can stop the download by closing this program now," - echo "putting the photos.lmdb file into ./datasets/paperdoll/data/chictopia" - echo "and then restarting this program again so that it thinks it's already downloaded (did you?)" - echo "or you could just wait a few hours of your precious time here.." - wget -c http://vision.is.tohoku.ac.jp/chictopia2/photos.lmdb.tar - tar xf photos.lmdb.tar - if [ -d "./photos.lmdb" ] + + # download images dataset + git clone https://github.com/kyamagu/paperdoll + pwd + cd paperdoll/data/chictopia + pwd + + if [ ! -d "./photos.lmdb" ] then - rm photos.lmdb.tar + echo "If you already have the 40GB file lying around, you can stop the download by closing this program now," + echo "putting the photos.lmdb file into ./datasets/paperdoll/data/chictopia" + echo "and then restarting this program again so that it thinks it's already downloaded (did you?)" + echo "or you could just wait a few hours of your precious time here.." + wget -c http://vision.is.tohoku.ac.jp/chictopia2/photos.lmdb.tar + tar xf photos.lmdb.tar + if [ -d "./photos.lmdb" ] + then + rm photos.lmdb.tar + else + echo "install tar and run again this command!" + exit 1 + fi + else echo "photos database already downloaded!" + fi + + echo "unzipping database.." + gunzip -c chictopia.sql.gz | sqlite3 chictopia.sqlite3 + if [ -f "./chictopia.sqlite3" ] + then + rm chictopia.sql.gz else - echo "install tar and run again this command!" + echo "install gunzip and sqlite3 and run again this command!" exit 1 fi -else echo "photos database already downloaded!" -fi -echo "unzipping database.." -gunzip -c chictopia.sql.gz | sqlite3 chictopia.sqlite3 -if [ -f "./chictopia.sqlite3" ] -then - rm chictopia.sql.gz + cd .. + cd .. + cd .. + else - echo "install gunzip and sqlite3 and run again this command!" - exit 1 + echo "Skipping downloading PaperDoll" fi -cd .. -cd .. -cd .. pwd + text="\n now downloading modanet annotations\n \t\t\ttaken from here:\n @@ -68,9 +80,47 @@ git clone https://github.com/eBay/modanet.git mkdir coco #this will be our dataset final folder cd coco -mkdir images + mkdir annotations +if [ $FAST ] +then + pwd + echo "downloading the images folder.. (2 GB)" + if [ ! -d "./images" ] + then + wget -c https://github.com/cad0p/maskrcnn-modanet/releases/download/v0.9/images.zip + echo "unzipping.." + unzip -q images.zip + if [ -d "./images" ] + then + rm images.zip + else + echo "could not unzip file. run command again" + exit 1 + fi + else + echo "images already downloaded!" + fi + + cd annotations + pwd + + echo "now downloading fixed ModaNet annotations (this can also be done with datasets fix command)" + if [ ! -f "./instances_all.json" ] + then + wget -c https://github.com/cad0p/maskrcnn-modanet/releases/download/v0.9/instances_all.json + else + echo "fixed ModaNet annotations already downloaded" + fi + cd .. + +else + echo "images will be downloaded afterwards by running datasets arrange command" + mkdir images +fi + + cd .. cd .. #now in main folder mkdir results @@ -84,7 +134,13 @@ then else echo "default coco snapshot already downloaded" fi -# resnet50_modanet.h5 + +if [ ! -f "./resnet50_modanet.h5" ] +then + echo "downloading the last available trained modanet snapshot" + wget -c https://github.com/cad0p/maskrcnn-modanet/releases/download/v0.9/resnet50_modanet.h5 +else echo "default modanet snapshot already downloaded" +fi mkdir snapshots mkdir processedimages diff --git a/maskrcnn_modanet/fix_annotations.py b/maskrcnn_modanet/fix_annotations.py index 166b2a7..9f507e7 100644 --- a/maskrcnn_modanet/fix_annotations.py +++ b/maskrcnn_modanet/fix_annotations.py @@ -416,7 +416,7 @@ def bboxContainsShape(bbox, shape, error=0.05): elif (shapeBbox(shape1)[2] * shapeBbox(shape1)[3] > 1000 and len(instances['annotations'][ann_index1]['segmentation']) > 1): - + # THIS PART CAN CAUSE FAULTS i.e. MOVE SHAPES INCORRECTLY # this is to avoid moving very small shapes wrongbbox2 = True # the bbox2 is more fit. let's move the shape1 to bbox2! (and then fit the box) diff --git a/maskrcnn_modanet/instagram_impl.py b/maskrcnn_modanet/instagram_impl.py index f8c2211..9868191 100644 --- a/maskrcnn_modanet/instagram_impl.py +++ b/maskrcnn_modanet/instagram_impl.py @@ -352,12 +352,6 @@ def instagramImpl(profile, limit=None, offset=0, process_images=True, profile_st BadCredentialsException) - with open(os.path.expanduser('~')+ '/.maskrcnn-modanet/' + 'savedvars.json') as f: - savedvars = json.load(f) - path = savedvars['datapath'] - - snp_path = path + "results/snapshots" - if not restore_result: instaloader = InstaloaderURL(dirname_pattern=path+'/results/instagram/{target}',download_pictures=True, download_videos=False, download_video_thumbnails=False, @@ -504,6 +498,8 @@ def instagramImpl(profile, limit=None, offset=0, process_images=True, profile_st elif restore_result: + log_file = open(log_path, 'w+') + print('Restoring results..') with open(profile_path + 'results.json') as f: results = json.load(f) diff --git a/maskrcnn_modanet/processimages.py b/maskrcnn_modanet/processimages.py index 1ab815c..7b2a88b 100644 --- a/maskrcnn_modanet/processimages.py +++ b/maskrcnn_modanet/processimages.py @@ -154,8 +154,8 @@ def main(proc_img_path=None, proc_img_url=None, all_set=True, save_path=None, mo # set the modified tf session as backend in keras keras.backend.tensorflow_backend.set_session(get_session()) - # adjust this to point to your trained model - if not model_path and not model: + # adjust this to point to your trained model + # get all models names in the results folder modelnames = [f for f in os.listdir(snp_path) if os.path.isfile(os.path.join(snp_path, f))] import re @@ -167,8 +167,8 @@ def extract_number(f): print(max(modelnames,key=extract_number)) model_path = os.path.join(snp_path, max(modelnames,key=extract_number)) - # load retinanet model - if not model: + # load retinanet model + model = models.load_model(model_path, backbone_name='resnet50') if not labels_to_names: # load label to names mapping for visualization purposes