From eb4c0faba1ccaa92b1a6739c9898f961a622bd13 Mon Sep 17 00:00:00 2001 From: Ruge Li <91452427+rugeli@users.noreply.github.com> Date: Mon, 18 Dec 2023 12:01:48 -0800 Subject: [PATCH] staging: run recipes from firebase (#179) * [wip] prep recipe data for packing * get creds from local file * save firebase creds to a .creds file * remove cred arg * check for already handled values in remote recipes * can pack one sphere * * adding username to .creds * formatting * move `write_username_to_creds` * download recipe testing * edit comment * code refactor * lint * format tests * add prep_db_doc * changed class name in DBRecipeHandler * fix lint and test errors * initialize firebase handler only once * refactor message * add remote db options in `pack` * remove a print statement * rename and reorg DB handler * fix tests * move database_ids enum to interface_objects * remove db_handler in pack and recipe_loader * send db_handler in to autopack * rename functions * integrate DATABASE_NAMES into interface_objects * lint * Feature/run inherited objects (#198) * turn off resolving inheritance while uploading * able to upload recipes having "inherit" key * get download and pack to work, refactors needed * refactors * formatting * testing and refactor * Feature/save metadata to firebase (#206) * refactor AWS and firebase handler * databases initiation handling * refactor * Update .gitignore Co-authored-by: Saurabh Mogre * add file existence check * refactor is_nested_list method * revert write_json_file * formatting --------- Co-authored-by: meganrm Co-authored-by: Saurabh Mogre --- cellpack/autopack/AWSHandler.py | 19 +- cellpack/autopack/DBRecipeHandler.py | 285 ++++++++++++++---- cellpack/autopack/FirebaseHandler.py | 137 ++++++--- cellpack/autopack/__init__.py | 44 ++- .../interface_objects/database_ids.py | 28 ++ cellpack/autopack/loaders/recipe_loader.py | 38 ++- cellpack/autopack/loaders/utils.py | 14 + .../upy/simularium/simularium_helper.py | 30 +- cellpack/bin/upload.py | 17 +- cellpack/tests/test_data_doc.py | 38 +++ cellpack/tests/test_db_recipe_loader.py | 192 ++++++++++++ ..._recipe_handler.py => test_db_uploader.py} | 30 +- 12 files changed, 715 insertions(+), 157 deletions(-) create mode 100644 cellpack/autopack/interface_objects/database_ids.py create mode 100644 cellpack/tests/test_data_doc.py create mode 100644 cellpack/tests/test_db_recipe_loader.py rename cellpack/tests/{test_db_recipe_handler.py => test_db_uploader.py} (87%) diff --git a/cellpack/autopack/AWSHandler.py b/cellpack/autopack/AWSHandler.py index 878638bd0..c93a6098f 100644 --- a/cellpack/autopack/AWSHandler.py +++ b/cellpack/autopack/AWSHandler.py @@ -10,6 +10,10 @@ class AWSHandler(object): Handles all the AWS S3 operations """ + # class attributes + _session_created = False + _s3_client = None + def __init__( self, bucket_name, @@ -18,12 +22,21 @@ def __init__( ): self.bucket_name = bucket_name self.folder_name = sub_folder_name - session = boto3.Session() - self.s3_client = session.client( + # Create a session if one does not exist + if not AWSHandler._session_created: + self._create_session(region_name) + AWSHandler._session_created = True + else: + # use the existing session + self.s3_client = AWSHandler._s3_client + + def _create_session(self, region_name): + AWSHandler._s3_client = boto3.client( "s3", endpoint_url=f"https://s3.{region_name}.amazonaws.com", region_name=region_name, ) + self.s3_client = AWSHandler._s3_client def get_aws_object_key(self, object_name): if self.folder_name is not None: @@ -82,4 +95,4 @@ def save_file(self, file_path): """ file_name = self.upload_file(file_path) if file_name: - return self.create_presigned_url(file_name) + return file_name, self.create_presigned_url(file_name) diff --git a/cellpack/autopack/DBRecipeHandler.py b/cellpack/autopack/DBRecipeHandler.py index 32ba99979..af4e97e54 100644 --- a/cellpack/autopack/DBRecipeHandler.py +++ b/cellpack/autopack/DBRecipeHandler.py @@ -21,6 +21,29 @@ def should_write(): def is_key(string_or_dict): return not isinstance(string_or_dict, dict) + @staticmethod + def is_nested_list(item): + if not isinstance(item, list): + return False + for element in item: + if isinstance(element, (list, tuple)): + return True + return False + + @staticmethod + def is_db_dict(item): + if isinstance(item, dict) and len(item) > 0: + for key, value in item.items(): + if key.isdigit() and isinstance(value, list): + return True + return False + + @staticmethod + def is_obj(comp_or_obj): + # in resolved DB data, if the top level of a downloaded comp doesn't have the key `name`, it's an obj + # TODO: true for all cases? better approaches? + return not comp_or_obj.get("name") and "object" in comp_or_obj + class CompositionDoc(DataDoc): """ @@ -30,6 +53,7 @@ class CompositionDoc(DataDoc): SHALLOW_MATCH = ["object", "count", "molarity"] DEFAULT_VALUES = {"object": None, "count": None, "regions": {}, "molarity": None} + KEY_TO_DICT_MAPPING = {"gradient": "gradients", "inherit": "objects"} def __init__( self, @@ -57,12 +81,10 @@ def as_dict(self): return data @staticmethod - def get_gradient_reference(downloaded_data, db): - if "gradient" in downloaded_data and db.is_reference( - downloaded_data["gradient"] - ): - gradient_key = downloaded_data["gradient"] - downloaded_data["gradient"], _ = db.get_doc_by_ref(gradient_key) + def get_reference_in_obj(downloaded_data, db): + for key in CompositionDoc.KEY_TO_DICT_MAPPING: + if key in downloaded_data and db.is_reference(downloaded_data[key]): + downloaded_data[key], _ = db.get_doc_by_ref(downloaded_data[key]) @staticmethod def get_reference_data(key_or_dict, db): @@ -74,14 +96,14 @@ def get_reference_data(key_or_dict, db): if DataDoc.is_key(key_or_dict) and db.is_reference(key_or_dict): key = key_or_dict downloaded_data, _ = db.get_doc_by_ref(key) - CompositionDoc.get_gradient_reference(downloaded_data, db) + CompositionDoc.get_reference_in_obj(downloaded_data, db) return downloaded_data, None elif key_or_dict and isinstance(key_or_dict, dict): object_dict = key_or_dict if "object" in object_dict and db.is_reference(object_dict["object"]): key = object_dict["object"] downloaded_data, _ = db.get_doc_by_ref(key) - CompositionDoc.get_gradient_reference(downloaded_data, db) + CompositionDoc.get_reference_in_obj(downloaded_data, db) return downloaded_data, key return {}, None @@ -119,12 +141,21 @@ def gradient_list_to_dict(prep_recipe_data): gradient_dict[gradient["name"]] = gradient prep_recipe_data["gradients"] = gradient_dict + def resolve_object_data(self, object_data, prep_recipe_data): + """ + Resolve the object data from the local data. + """ + for key in CompositionDoc.KEY_TO_DICT_MAPPING: + if key in object_data and isinstance(object_data[key], str): + target_dict = CompositionDoc.KEY_TO_DICT_MAPPING[key] + object_data[key] = prep_recipe_data[target_dict][object_data[key]] + def resolve_local_regions(self, local_data, recipe_data, db): """ Recursively resolves the regions of a composition from local data. Restructure the local data to match the db data. """ - unpack_recipe_data = DBRecipeHandler.prep_data_for_db(recipe_data) + unpack_recipe_data = DBUploader.prep_data_for_db(recipe_data) prep_recipe_data = ObjectDoc.convert_representation(unpack_recipe_data, db) # `gradients` is a list, convert it to dict for easy access and replace CompositionDoc.gradient_list_to_dict(prep_recipe_data) @@ -134,12 +165,7 @@ def resolve_local_regions(self, local_data, recipe_data, db): else: key_name = local_data["object"]["name"] local_data["object"] = prep_recipe_data["objects"][key_name] - if "gradient" in local_data["object"] and isinstance( - local_data["object"]["gradient"], str - ): - local_data["object"]["gradient"] = prep_recipe_data["gradients"][ - local_data["object"]["gradient"] - ] + self.resolve_object_data(local_data["object"], prep_recipe_data) for region_name in local_data["regions"]: for index, key_or_dict in enumerate(local_data["regions"][region_name]): if not DataDoc.is_key(key_or_dict): @@ -152,12 +178,9 @@ def resolve_local_regions(self, local_data, recipe_data, db): local_data["regions"][region_name][index][ "object" ] = prep_recipe_data["objects"][obj_item["name"]] - # replace gradient reference with gradient data + # replace reference in obj with actual data obj_data = local_data["regions"][region_name][index]["object"] - if "gradient" in obj_data and isinstance(obj_data["gradient"], str): - local_data["regions"][region_name][index]["object"][ - "gradient" - ] = prep_recipe_data["gradients"][obj_data["gradient"]] + self.resolve_object_data(obj_data, prep_recipe_data) else: comp_name = local_data["regions"][region_name][index] prep_comp_data = prep_recipe_data["composition"][comp_name] @@ -253,6 +276,7 @@ def should_write(self, db, recipe_data): # found a match, so shouldn't write return False, db.doc_id(doc) else: + # deeply compare resolved regions data self.resolve_db_regions(db_data, db) self.resolve_local_regions(local_data, recipe_data, db) difference = DeepDiff( @@ -314,6 +338,12 @@ def convert_representation(doc, db): ] = ObjectDoc.convert_positions_in_representation(position_value) return convert_doc + @staticmethod + def _object_contains_grad_or_inherit(obj_data): + return ( + "gradient" in obj_data and isinstance(obj_data["gradient"], dict) + ) or "inherit" in obj_data + def should_write(self, db): docs = db.get_doc_by_name("objects", self.name) if docs and len(docs) >= 1: @@ -321,7 +351,7 @@ def should_write(self, db): # if there is repr in the obj doc from db full_doc_data = ObjectDoc.convert_representation(doc, db) # unpack objects to dicts in local data for comparison - local_data = DBRecipeHandler.prep_data_for_db(self.as_dict()) + local_data = DBUploader.prep_data_for_db(self.as_dict()) difference = DeepDiff(full_doc_data, local_data, ignore_order=True) if not difference: return doc, db.doc_id(doc) @@ -337,7 +367,7 @@ def should_write(self, db, grad_name): docs = db.get_doc_by_name("gradients", grad_name) if docs and len(docs) >= 1: for doc in docs: - local_data = DBRecipeHandler.prep_data_for_db(db.doc_to_dict(doc)) + local_data = DBUploader.prep_data_for_db(db.doc_to_dict(doc)) db_data = db.doc_to_dict(doc) difference = DeepDiff(db_data, local_data, ignore_order=True) if not difference: @@ -345,20 +375,17 @@ def should_write(self, db, grad_name): return None, None -class DBRecipeHandler(object): +class DBUploader(object): + """ + Handles the uploading of data to the database. + """ + def __init__(self, db_handler): self.db = db_handler self.objects_to_path_map = {} self.comp_to_path_map = {} self.grad_to_path_map = {} - - @staticmethod - def is_nested_list(item): - return ( - isinstance(item, list) - and len(item) > 0 - and isinstance(item[0], (list, tuple)) - ) + self.objects_with_inherit_key = [] @staticmethod def prep_data_for_db(data): @@ -368,20 +395,18 @@ def prep_data_for_db(data): modified_data = {} for key, value in data.items(): # convert 2d array to dict - if DBRecipeHandler.is_nested_list(value): + if DataDoc.is_nested_list(value): flatten_dict = dict(zip([str(i) for i in range(len(value))], value)) - modified_data[key] = DBRecipeHandler.prep_data_for_db(flatten_dict) + modified_data[key] = DBUploader.prep_data_for_db(flatten_dict) # If the value is an object, we want to convert it to dict elif isinstance(value, object) and "__dict__" in dir(value): unpacked_value = vars(value) modified_data[key] = unpacked_value if isinstance(unpacked_value, dict): - modified_data[key] = DBRecipeHandler.prep_data_for_db( - unpacked_value - ) + modified_data[key] = DBUploader.prep_data_for_db(unpacked_value) # If the value is a dictionary, recursively convert its nested lists to dictionaries elif isinstance(value, dict): - modified_data[key] = DBRecipeHandler.prep_data_for_db(value) + modified_data[key] = DBUploader.prep_data_for_db(value) else: modified_data[key] = value return modified_data @@ -391,7 +416,7 @@ def upload_data(self, collection, data, id=None): If should_write is true, upload the data to the database """ # check if we need to convert part of the data(2d arrays and objs to dict) - modified_data = DBRecipeHandler.prep_data_for_db(data) + modified_data = DBUploader.prep_data_for_db(data) if id is None: name = modified_data["name"] doc = self.db.upload_doc(collection, modified_data) @@ -419,24 +444,38 @@ def upload_gradients(self, gradients): _, grad_path = self.upload_data("gradients", gradient_doc.settings) self.grad_to_path_map[gradient_name] = grad_path + def upload_single_object(self, obj_name, obj_data): + # replace gradient name with path to check if gradient exists in db + if "gradient" in obj_data[obj_name]: + grad_name = obj_data[obj_name]["gradient"] + obj_data[obj_name]["gradient"] = self.grad_to_path_map[grad_name] + object_doc = ObjectDoc(name=obj_name, settings=obj_data[obj_name]) + _, doc_id = object_doc.should_write(self.db) + if doc_id: + print(f"objects/{object_doc.name} is already in firestore") + obj_path = self.db.create_path("objects", doc_id) + self.objects_to_path_map[obj_name] = obj_path + else: + _, obj_path = self.upload_data("objects", object_doc.as_dict()) + self.objects_to_path_map[obj_name] = obj_path + def upload_objects(self, objects): + # modify a copy of objects to avoid key error when resolving local regions + modify_objects = copy.deepcopy(objects) for obj_name in objects: objects[obj_name]["name"] = obj_name - # modify a copy of objects to avoid key error when resolving local regions - modify_objects = copy.deepcopy(objects) - # replace gradient name with path to check if gradient exists in db - if "gradient" in modify_objects[obj_name]: - grad_name = modify_objects[obj_name]["gradient"] - modify_objects[obj_name]["gradient"] = self.grad_to_path_map[grad_name] - object_doc = ObjectDoc(name=obj_name, settings=modify_objects[obj_name]) - _, doc_id = object_doc.should_write(self.db) - if doc_id: - print(f"objects/{object_doc.name} is already in firestore") - obj_path = self.db.create_path("objects", doc_id) - self.objects_to_path_map[obj_name] = obj_path + if "inherit" not in objects[obj_name]: + self.upload_single_object(obj_name, modify_objects) else: - _, obj_path = self.upload_data("objects", object_doc.as_dict()) - self.objects_to_path_map[obj_name] = obj_path + self.objects_with_inherit_key.append(obj_name) + + # upload objs having `inherit` key only after all their base objs are uploaded + for obj_name in self.objects_with_inherit_key: + inherited_from = objects[obj_name]["inherit"] + modify_objects[obj_name]["inherit"] = self.objects_to_path_map[ + inherited_from + ] + self.upload_single_object(obj_name, modify_objects) def upload_compositions(self, compositions, recipe_to_save, recipe_data): references_to_update = {} @@ -482,13 +521,13 @@ def upload_compositions(self, compositions, recipe_to_save, recipe_data): references_to_update[comp_name].update({"comp_id": doc_id}) return references_to_update - def get_recipe_id(self, recipe_data): + def _get_recipe_id(self, recipe_data): """ We use customized recipe id to declare recipe's name and version """ recipe_name = recipe_data["name"] recipe_version = recipe_data["version"] - key = f"{recipe_name}_v-{recipe_version}" + key = f"{recipe_name}_v_{recipe_version}" return key def upload_collections(self, recipe_meta_data, recipe_data): @@ -526,12 +565,144 @@ def upload_recipe(self, recipe_meta_data, recipe_data): """ After all other collections are checked or uploaded, upload the recipe with references into db """ - recipe_id = self.get_recipe_id(recipe_data) + recipe_id = self._get_recipe_id(recipe_data) # if the recipe is already exists in db, just return recipe, _ = self.db.get_doc_by_id("recipes", recipe_id) if recipe: print(f"{recipe_id} is already in firestore") return recipe_to_save = self.upload_collections(recipe_meta_data, recipe_data) - key = self.get_recipe_id(recipe_to_save) - self.upload_data("recipes", recipe_to_save, key) + self.upload_data("recipes", recipe_to_save, recipe_id) + + def upload_result_metadata(self, file_name, url): + """ + Upload the metadata of the result file to the database. + """ + if self.db: + username = self.db.get_username() + timestamp = self.db.create_timestamp() + self.db.update_or_create( + "results", + file_name, + {"user": username, "timestamp": timestamp, "url": url.split("?")[0]}, + ) + + +class DBRecipeLoader(object): + """ + Handles the logic for downloading and parsing the recipe data from the database. + """ + + def __init__(self, db_handler): + self.db = db_handler + + def prep_db_doc_for_download(self, db_doc): + """ + convert data from db and resolve references. + """ + prep_data = {} + if isinstance(db_doc, dict): + for key, value in db_doc.items(): + if DataDoc.is_db_dict(value): + unpack_dict = [value[str(i)] for i in range(len(value))] + prep_data[key] = unpack_dict + elif key == "composition": + compositions = db_doc["composition"] + for comp_name, reference in compositions.items(): + ref_link = reference["inherit"] + comp_doc = CompositionDoc( + comp_name, + object_key=None, + count=None, + regions={}, + molarity=None, + ) + composition_data, _ = comp_doc.get_reference_data( + ref_link, self.db + ) + comp_doc.resolve_db_regions(composition_data, self.db) + compositions[comp_name] = composition_data + prep_data[key] = compositions + else: + prep_data[key] = value + return prep_data + + def collect_docs_by_id(self, collection, id): + return self.db.get_doc_by_id(collection, id) + + @staticmethod + def _get_grad_and_obj(obj_data, obj_dict, grad_dict): + """ + Collect gradient and inherited object data from the downloaded object data + return object data dict and gradient data dict with name as key + """ + obj_name = obj_data["name"] + for key, target_dict in CompositionDoc.KEY_TO_DICT_MAPPING.items(): + if key in obj_data: + item_name = obj_data[key]["name"] + target_dict = grad_dict if key == "gradient" else obj_dict + target_dict[item_name] = obj_data[key] + obj_dict[obj_name][key] = item_name + return obj_dict, grad_dict + + @staticmethod + def collect_and_sort_data(comp_data): + """ + Collect all object and gradient info from the downloaded composition data + Return autopack object data dict and gradient data dict with name as key + Return restructured composition dict with "composition" as key + """ + objects = {} + gradients = {} + composition = {} + for comp_name, comp_value in comp_data.items(): + composition[comp_name] = {} + if "count" in comp_value and comp_value["count"] is not None: + composition[comp_name]["count"] = comp_value["count"] + if "object" in comp_value and comp_value["object"] is not None: + composition[comp_name]["object"] = comp_value["object"]["name"] + object_copy = copy.deepcopy(comp_value["object"]) + objects[object_copy["name"]] = object_copy + if ObjectDoc._object_contains_grad_or_inherit(object_copy): + objects, gradients = DBRecipeLoader._get_grad_and_obj( + object_copy, objects, gradients + ) + if "regions" in comp_value and comp_value["regions"] is not None: + for region_name in comp_value["regions"]: + composition[comp_name].setdefault("regions", {})[region_name] = [] + for region_item in comp_value["regions"][region_name]: + if DataDoc.is_obj(region_item): + composition[comp_name]["regions"][region_name].append( + { + "object": region_item["object"].get("name"), + "count": region_item.get("count"), + } + ) + object_copy = copy.deepcopy(region_item["object"]) + objects[object_copy["name"]] = object_copy + if ObjectDoc._object_contains_grad_or_inherit(object_copy): + objects, gradients = DBRecipeLoader._get_grad_and_obj( + object_copy, objects, gradients + ) + else: + composition[comp_name]["regions"][region_name].append( + region_item["name"] + ) + return objects, gradients, composition + + @staticmethod + def compile_db_recipe_data(db_recipe_data, obj_dict, grad_dict, comp_dict): + """ + Compile recipe data from db recipe data into a ready-to-pack structure + """ + recipe_data = { + **{ + k: db_recipe_data[k] + for k in ["format_version", "version", "name", "bounding_box"] + }, + "objects": obj_dict, + "composition": comp_dict, + } + if grad_dict: + recipe_data["gradients"] = [{**v} for v in grad_dict.values()] + return recipe_data diff --git a/cellpack/autopack/FirebaseHandler.py b/cellpack/autopack/FirebaseHandler.py index 805b4eb60..8f1942388 100644 --- a/cellpack/autopack/FirebaseHandler.py +++ b/cellpack/autopack/FirebaseHandler.py @@ -1,5 +1,8 @@ +import ast import firebase_admin from firebase_admin import credentials, firestore +from google.cloud.exceptions import NotFound +from cellpack.autopack.loaders.utils import read_json_file, write_json_file class FirebaseHandler(object): @@ -7,19 +10,27 @@ class FirebaseHandler(object): Retrieve data and perform common tasks when working with firebase. """ - def __init__(self, cred_path): - login = credentials.Certificate(cred_path) - firebase_admin.initialize_app(login) - self.db = firestore.client() + # use class attributes to maintain a consistent state across all instances + _initialized = False + _db = None + + def __init__(self): + # check if firebase is already initialized + if not FirebaseHandler._initialized: + cred_path = FirebaseHandler.get_creds() + login = credentials.Certificate(cred_path) + firebase_admin.initialize_app(login) + FirebaseHandler._initialized = True + FirebaseHandler._db = firestore.client() + + self.db = FirebaseHandler._db self.name = "firebase" + # common utility methods @staticmethod def doc_to_dict(doc): return doc.to_dict() - def db_name(self): - return self.name - @staticmethod def doc_id(doc): return doc.id @@ -28,6 +39,10 @@ def doc_id(doc): def create_path(collection, doc_id): return f"firebase:{collection}/{doc_id}" + @staticmethod + def create_timestamp(): + return firestore.SERVER_TIMESTAMP + @staticmethod def get_path_from_ref(doc): return doc.path @@ -40,24 +55,41 @@ def get_collection_id_from_path(path): id = components[1] return collection, id - @staticmethod - def update_reference_on_doc(doc_ref, index, new_item_ref): - doc_ref.update({index: new_item_ref}) + # Create methods + def set_doc(self, collection, id, data): + doc, doc_ref = self.get_doc_by_id(collection, id) + if not doc: + doc_ref = self.db.collection(collection).document(id) + doc_ref.set(data) + print(f"successfully uploaded to path: {doc_ref.path}") + return doc_ref + else: + print( + f"ERROR: {doc_ref.path} already exists. If uploading new data, provide a unique recipe name." + ) + return + + def upload_doc(self, collection, data): + return self.db.collection(collection).add(data) + # Read methods @staticmethod - def update_elements_in_array(doc_ref, index, new_item_ref, remove_item): - doc_ref.update({index: firestore.ArrayRemove([remove_item])}) - doc_ref.update({index: firestore.ArrayUnion([new_item_ref])}) + def get_creds(): + creds = read_json_file("./.creds") + if creds is None or "firebase" not in creds: + creds = FirebaseHandler.write_creds_path() + return creds["firebase"] @staticmethod - def is_reference(path): - if not isinstance(path, str): - return False - if path is None: - return False - if path.startswith("firebase:"): - return True - return False + def get_username(): + creds = read_json_file("./.creds") + try: + return creds["username"] + except KeyError: + raise ValueError("No username found in .creds file") + + def db_name(self): + return self.name def get_doc_by_name(self, collection, name): db = self.db @@ -65,9 +97,9 @@ def get_doc_by_name(self, collection, name): docs = data_ref.where("name", "==", name).get() # docs is an array return docs - # `doc` is a DocumentSnapshot object - # `doc_ref` is a DocumentReference object to perform operations on the doc def get_doc_by_id(self, collection, id): + # `doc` is a DocumentSnapshot object + # `doc_ref` is a DocumentReference object to perform operations on the doc doc_ref = self.db.collection(collection).document(id) doc = doc_ref.get() if doc.exists: @@ -79,19 +111,56 @@ def get_doc_by_ref(self, path): collection, id = FirebaseHandler.get_collection_id_from_path(path) return self.get_doc_by_id(collection, id) - def set_doc(self, collection, id, data): - doc, doc_ref = self.get_doc_by_id(collection, id) - if not doc: - doc_ref = self.db.collection(collection).document(id) - doc_ref.set(data) - print(f"successfully uploaded to path: {doc_ref.path}") - return doc_ref + # Update methods + def update_doc(self, collection, id, data): + doc_ref = self.db.collection(collection).document(id) + doc_ref.update(data) + print(f"successfully updated to path: {doc_ref.path}") + return doc_ref + + @staticmethod + def update_reference_on_doc(doc_ref, index, new_item_ref): + doc_ref.update({index: new_item_ref}) + + @staticmethod + def update_elements_in_array(doc_ref, index, new_item_ref, remove_item): + doc_ref.update({index: firestore.ArrayRemove([remove_item])}) + doc_ref.update({index: firestore.ArrayUnion([new_item_ref])}) + + def update_or_create(self, collection, id, data): + """ + If the input id exists, update the doc. If not, create a new file. + """ + try: + self.update_doc(collection, id, data) + except NotFound: + self.set_doc(collection, id, data) + + # other utils + @staticmethod + def write_creds_path(): + path = ast.literal_eval(input("provide path to firebase credentials: ")) + data = read_json_file(path) + if data is None: + raise ValueError("The path to your credentials doesn't exist") + firebase_cred = {"firebase": data} + creds = read_json_file("./.creds") + if creds is None: + write_json_file("./.creds", firebase_cred) else: - print(f"ERROR, already data at this path:{collection}/{id}") - return + creds["firebase"] = data + write_json_file("./.creds", creds) + return firebase_cred - def upload_doc(self, collection, data): - return self.db.collection(collection).add(data) + @staticmethod + def is_reference(path): + if not isinstance(path, str): + return False + if path is None: + return False + if path.startswith("firebase:"): + return True + return False @staticmethod def is_firebase_obj(obj): diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 95a938a3d..18c7667b8 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -41,13 +41,16 @@ import re import shutil from os import path, environ +import getpass from pathlib import Path import urllib.request as urllib from collections import OrderedDict import ssl import json +from cellpack.autopack.DBRecipeHandler import DBRecipeLoader +from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS -from cellpack.autopack.interface_objects.meta_enum import MetaEnum +from cellpack.autopack.loaders.utils import read_json_file, write_json_file packageContainsVFCommands = 1 @@ -192,19 +195,15 @@ def checkPath(): autopackdir = pref_path["autopackdir"] -class DATABASE_NAME(MetaEnum): - GITHUB = "github:" - FIREBASE = "firebase:" - - REPLACE_PATH = { "autoPACKserver": autoPACKserver, "autopackdir": autopackdir, "autopackdata": appdata, - DATABASE_NAME.GITHUB: autoPACKserver, - DATABASE_NAME.FIREBASE: None, + f"{DATABASE_IDS.GITHUB}:": autoPACKserver, + f"{DATABASE_IDS.FIREBASE}:": None, } + global CURRENT_RECIPE_PATH CURRENT_RECIPE_PATH = appdata # we keep the file here, it come with the distribution @@ -280,7 +279,7 @@ def is_remote_path(file_path): """ @param file_path: str """ - for ele in DATABASE_NAME: + for ele in DATABASE_IDS.with_colon(): if ele in file_path: return True @@ -384,10 +383,16 @@ def read_text_file(filename, destination="", cache="collisionTrees", force=None) def load_file(filename, destination="", cache="geometries", force=None): if is_remote_path(filename): database_name, file_path = convert_db_shortname_to_url(filename) + # command example: `pack -r firebase:recipes/[FIREBASE-RECIPE-ID] -c [CONFIG-FILE-PATH]` if database_name == "firebase": - # TODO: read from firebase - # return data - pass + db = DATABASE_IDS.handlers().get(database_name) + db_handler = DBRecipeLoader(db) + recipe_id = file_path.split("/")[-1] + db_doc, _ = db_handler.collect_docs_by_id( + collection="recipes", id=recipe_id + ) + downloaded_recipe_data = db_handler.prep_db_doc_for_download(db_doc) + return downloaded_recipe_data, database_name else: local_file_path = get_local_file_location( file_path, destination=destination, cache=cache, force=force @@ -396,7 +401,7 @@ def load_file(filename, destination="", cache="geometries", force=None): local_file_path = get_local_file_location( filename, destination=destination, cache=cache, force=force ) - return json.load(open(local_file_path, "r")) + return json.load(open(local_file_path, "r")), None def fixPath(adict): # , k, v): @@ -532,16 +537,27 @@ def clearCaches(*args): print("problem cleaning ", cache_dir[k]) +def write_username_to_creds(): + username = getpass.getuser() + creds = read_json_file("./.creds") + if creds is None or "username" not in creds: + creds = {} + creds["username"] = username + write_json_file("./.creds", creds) + + # we should read a file to fill the RECIPE Dictionary # so we can add some and write/save setup # afdir or user_pref - if checkAtstartup: checkPath() # updatePathJSON() # checkRecipeAvailable() log.info("path are updated ") +# write username to creds +write_username_to_creds() + log.info(f"currently number recipes is {len(RECIPES)}") # check cache directory create if doesnt exit.abs//should be in user pref? # ? diff --git a/cellpack/autopack/interface_objects/database_ids.py b/cellpack/autopack/interface_objects/database_ids.py new file mode 100644 index 000000000..c9a7520e6 --- /dev/null +++ b/cellpack/autopack/interface_objects/database_ids.py @@ -0,0 +1,28 @@ +from .meta_enum import MetaEnum +from cellpack.autopack.AWSHandler import AWSHandler +from cellpack.autopack.FirebaseHandler import FirebaseHandler + + +class DATABASE_IDS(MetaEnum): + FIREBASE = "firebase" + GITHUB = "github" + AWS = "aws" + + @classmethod + def with_colon(cls): + return [f"{ele}:" for ele in cls.values()] + + @classmethod + def handlers(cls): + def create_aws_handler(bucket_name, sub_folder_name, region_name): + return AWSHandler( + bucket_name=bucket_name, + sub_folder_name=sub_folder_name, + region_name=region_name, + ) + + handlers_dict = { + cls.FIREBASE: FirebaseHandler(), + cls.AWS: create_aws_handler, + } + return handlers_dict diff --git a/cellpack/autopack/loaders/recipe_loader.py b/cellpack/autopack/loaders/recipe_loader.py index b04303ba0..c8677ba0a 100644 --- a/cellpack/autopack/loaders/recipe_loader.py +++ b/cellpack/autopack/loaders/recipe_loader.py @@ -17,6 +17,7 @@ ) from cellpack.autopack.loaders.migrate_v1_to_v2 import convert as convert_v1_to_v2 from cellpack.autopack.loaders.migrate_v2_to_v2_1 import convert as convert_v2_to_v2_1 +from cellpack.autopack.DBRecipeHandler import DBRecipeLoader encoder.FLOAT_REPR = lambda o: format(o, ".8g") CURRENT_VERSION = "2.1" @@ -165,8 +166,15 @@ def _migrate_version(self, old_recipe): f"{old_recipe['format_version']} is not a format version we support" ) - def _read(self): - new_values = autopack.load_file(self.file_path, cache="recipes") + def _read(self, resolve_inheritance=True): + new_values, database_name = autopack.load_file(self.file_path, cache="recipes") + if database_name == "firebase": + objects, gradients, composition = DBRecipeLoader.collect_and_sort_data( + new_values["composition"] + ) + new_values = DBRecipeLoader.compile_db_recipe_data( + new_values, objects, gradients, composition + ) recipe_data = RecipeLoader.default_values.copy() recipe_data = deep_merge(recipe_data, new_values) recipe_data["format_version"] = RecipeLoader._sanitize_format_version( @@ -177,9 +185,10 @@ def _read(self): # TODO: request any external data before returning if "objects" in recipe_data: - recipe_data["objects"] = RecipeLoader.resolve_inheritance( - recipe_data["objects"] - ) + if resolve_inheritance: + recipe_data["objects"] = RecipeLoader.resolve_inheritance( + recipe_data["objects"] + ) for _, obj in recipe_data["objects"].items(): reps = obj["representations"] if "representations" in obj else {} obj["representations"] = Representations( @@ -187,14 +196,25 @@ def _read(self): atomic=reps.get("atomic", None), packing=reps.get("packing", None), ) - partner_settings = obj["partners"] if "partners" in obj else [] - if len(partner_settings): - obj["partners"] = Partners(partner_settings) + # the key "all_partners" exists in obj["partners"] if the recipe is downloaded from a remote db + partner_settings = ( + [] + if ( + "partners" in obj + and "all_partners" in obj["partners"] + and not obj["partners"]["all_partners"] + ) + else obj.get("partners", []) + ) + obj["partners"] = Partners(partner_settings) if "type" in obj and not INGREDIENT_TYPE.is_member(obj["type"]): raise TypeError(f"{obj['type']} is not an allowed type") # handle gradients - if "gradients" in recipe_data: + # gradients in firebase recipes are already stored as a list of dicts + if "gradients" in recipe_data and not isinstance( + recipe_data["gradients"], list + ): gradients = [] for gradient_name, gradient_dict in recipe_data["gradients"].items(): gradients.append(GradientData(gradient_dict, gradient_name).data) diff --git a/cellpack/autopack/loaders/utils.py b/cellpack/autopack/loaders/utils.py index 997506f36..93e45fa1d 100644 --- a/cellpack/autopack/loaders/utils.py +++ b/cellpack/autopack/loaders/utils.py @@ -1,4 +1,5 @@ import os +import json from pathlib import Path @@ -21,3 +22,16 @@ def create_output_dir(out_base_folder, recipe_name, sub_dir=None): output_folder = Path(output_folder, sub_dir) os.makedirs(output_folder, exist_ok=True) return output_folder + + +def read_json_file(path): + if not Path(path).exists(): + return None + with open(path, "r") as file_name: + return json.load(file_name) + + +def write_json_file(path, data): + Path(path).parent.mkdir(exist_ok=True, parents=True) + with open(path, "w") as file_name: + json.dump(data, file_name) diff --git a/cellpack/autopack/upy/simularium/simularium_helper.py b/cellpack/autopack/upy/simularium/simularium_helper.py index 1965c28c8..031dccd88 100644 --- a/cellpack/autopack/upy/simularium/simularium_helper.py +++ b/cellpack/autopack/upy/simularium/simularium_helper.py @@ -23,7 +23,8 @@ from simulariumio.constants import DISPLAY_TYPE, VIZ_TYPE from cellpack.autopack.upy import hostHelper -from cellpack.autopack.AWSHandler import AWSHandler +from cellpack.autopack.DBRecipeHandler import DBUploader +from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS import collada @@ -1390,7 +1391,7 @@ def post_and_open_file(self, file_name, open_results_in_browser=True): simularium_file = Path(f"{file_name}.simularium") url = None try: - url = simulariumHelper.store_results_to_s3(simularium_file) + _, url = simulariumHelper.store_result_file(simularium_file, storage="aws") except Exception as e: aws_readme_url = ( "https://github.com/mesoscope/cellpack/blob/main/README.md#aws-s3" @@ -1407,14 +1408,23 @@ def post_and_open_file(self, file_name, open_results_in_browser=True): simulariumHelper.open_in_simularium(url) @staticmethod - def store_results_to_s3(file_path): - handler = AWSHandler( - bucket_name="cellpack-results", - sub_folder_name="simularium/", - region_name="us-west-2", - ) - url = handler.save_file(file_path) - return url + def store_result_file(file_path, storage=None): + if storage == "aws": + handler = DATABASE_IDS.handlers().get(storage) + initialized_handler = handler( + bucket_name="cellpack-results", + sub_folder_name="simularium/", + region_name="us-west-2", + ) + file_name, url = initialized_handler.save_file(file_path) + simulariumHelper.store_metadata(file_name, url, db="firebase") + return file_name, url + + @staticmethod + def store_metadata(file_name, url, db=None): + if db == "firebase": + db_handler = DBUploader(DATABASE_IDS.handlers().get(db)) + db_handler.upload_result_metadata(file_name, url) @staticmethod def open_in_simularium(aws_url): diff --git a/cellpack/bin/upload.py b/cellpack/bin/upload.py index 3f6fede2f..d7deb061d 100644 --- a/cellpack/bin/upload.py +++ b/cellpack/bin/upload.py @@ -1,19 +1,13 @@ -from enum import Enum import fire from cellpack.autopack.FirebaseHandler import FirebaseHandler -from cellpack.autopack.DBRecipeHandler import DBRecipeHandler +from cellpack.autopack.DBRecipeHandler import DBUploader +from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS from cellpack.autopack.loaders.recipe_loader import RecipeLoader -class DATABASE_IDS(Enum): - FIREBASE = "firebase" - GITHUB = "github" - - def upload( recipe_path, - cred_path, db_id=DATABASE_IDS.FIREBASE, ): """ @@ -22,13 +16,12 @@ def upload( :return: void """ if db_id == DATABASE_IDS.FIREBASE: - cred_path = cred_path # fetch the service key json file - db_handler = FirebaseHandler(cred_path) + db_handler = FirebaseHandler() recipe_loader = RecipeLoader(recipe_path) - recipe_full_data = recipe_loader.recipe_data + recipe_full_data = recipe_loader._read(resolve_inheritance=False) recipe_meta_data = recipe_loader.get_only_recipe_metadata() - recipe_db_handler = DBRecipeHandler(db_handler) + recipe_db_handler = DBUploader(db_handler) recipe_db_handler.upload_recipe(recipe_meta_data, recipe_full_data) diff --git a/cellpack/tests/test_data_doc.py b/cellpack/tests/test_data_doc.py new file mode 100644 index 000000000..e6a1f0ba1 --- /dev/null +++ b/cellpack/tests/test_data_doc.py @@ -0,0 +1,38 @@ +from cellpack.autopack.DBRecipeHandler import DataDoc +from cellpack.tests.mocks.mock_db import MockDB + +mock_db = MockDB({}) + +object_example = { + "count": 121, + "object": { + "gradient": { + "mode": "surface", + "name": "nucleus_surface_gradient", + }, + "name": "peroxisome", + }, +} + +composition_example = { + "count": 1, + "regions": {"interior": []}, + "object": { + "name": "mean_nucleus", + "partners": {"all_partners": []}, + }, + "name": "nucleus", +} + + +def test_is_nested_list(): + assert DataDoc.is_nested_list([]) is False + assert DataDoc.is_nested_list([[], []]) is True + assert DataDoc.is_nested_list([[1, 2], [3, 4]]) is True + assert DataDoc.is_nested_list([1, [1, 2]]) is True + assert DataDoc.is_nested_list([[1, 2], 1]) is True + + +def test_is_obj(): + assert DataDoc.is_obj(object_example) is True + assert DataDoc.is_obj(composition_example) is False diff --git a/cellpack/tests/test_db_recipe_loader.py b/cellpack/tests/test_db_recipe_loader.py new file mode 100644 index 000000000..7d87cdd4f --- /dev/null +++ b/cellpack/tests/test_db_recipe_loader.py @@ -0,0 +1,192 @@ +import pytest +from cellpack.autopack.DBRecipeHandler import DBRecipeLoader +from cellpack.tests.mocks.mock_db import MockDB + +mock_db = MockDB({}) + +downloaded_data_from_firebase = { + "version": "linear", + "format_version": "2.1", + "composition": { + "membrane": { + "count": 1, + "regions": { + "interior": [ + { + "count": 121, + "object": { + "gradient": { + "mode": "surface", + "name": "nucleus_surface_gradient", + }, + "name": "peroxisome", + }, + }, + { + "count": 1, + "regions": {"interior": []}, + "object": { + "name": "mean_nucleus", + "partners": {"all_partners": []}, + }, + "name": "nucleus", + }, + ] + }, + "object": { + "name": "mean_membrane", + "type": "mesh", + }, + "name": "membrane", + }, + "nucleus": { + "count": 1, + "regions": {"interior": []}, + "object": { + "name": "mean_nucleus", + "partners": {"all_partners": []}, + }, + "name": "nucleus", + }, + "bounding_area": { + "count": None, + "regions": { + "interior": [ + { + "count": 1, + "regions": { + "interior": [ + { + "count": 121, + "object": { + "gradient": { + "mode": "surface", + "name": "nucleus_surface_gradient", + }, + "name": "peroxisome", + }, + }, + { + "count": 1, + "regions": {"interior": []}, + "object": { + "name": "mean_nucleus", + "partners": {"all_partners": []}, + }, + "name": "nucleus", + }, + ] + }, + "object": { + "name": "mean_membrane", + "type": "mesh", + }, + "name": "membrane", + } + ] + }, + "name": "bounding_area", + }, + }, + "version": "linear", + "bounding_box": [[-110, -45, -62], [110, 45, 62]], + "name": "test_recipe", +} + + +compiled_firebase_recipe_example = { + "name": "test_recipe", + "format_version": "2.1", + "version": "linear", + "bounding_box": [[-110, -45, -62], [110, 45, 62]], + "objects": { + "mean_membrane": { + "name": "mean_membrane", + "type": "mesh", + }, + "peroxisome": { + "name": "peroxisome", + "gradient": "nucleus_surface_gradient", + }, + "mean_nucleus": { + "name": "mean_nucleus", + "partners": {"all_partners": []}, + }, + }, + "gradients": [ + { + "name": "nucleus_surface_gradient", + "mode": "surface", + } + ], + "composition": { + "bounding_area": {"regions": {"interior": ["membrane"]}}, + "membrane": { + "count": 1, + "object": "mean_membrane", + "regions": { + "interior": [{"object": "peroxisome", "count": 121}, "nucleus"] + }, + }, + "nucleus": { + "count": 1, + "object": "mean_nucleus", + "regions": {"interior": []}, + }, + }, +} + + +def test_get_grad_and_obj(): + obj_data = downloaded_data_from_firebase["composition"]["membrane"]["regions"][ + "interior" + ][0]["object"] + obj_dict = { + "peroxisome": { + "gradient": { + "mode": "surface", + "name": "nucleus_surface_gradient", + }, + "name": "peroxisome", + } + } + grad_dict = {} + obj_dict, grad_dict = DBRecipeLoader._get_grad_and_obj( + obj_data, obj_dict, grad_dict + ) + assert obj_dict == { + "peroxisome": {"gradient": "nucleus_surface_gradient", "name": "peroxisome"} + } + assert grad_dict == { + "nucleus_surface_gradient": { + "mode": "surface", + "name": "nucleus_surface_gradient", + } + } + + +@pytest.fixture +def sort_data_from_composition(): + return DBRecipeLoader.collect_and_sort_data( + downloaded_data_from_firebase["composition"] + ) + + +def test_collect_and_sort_data(sort_data_from_composition): + objects, gradients, composition = sort_data_from_composition + assert objects == compiled_firebase_recipe_example["objects"] + assert gradients == { + "nucleus_surface_gradient": { + "name": "nucleus_surface_gradient", + "mode": "surface", + } + } + assert composition == compiled_firebase_recipe_example["composition"] + + +def test_compile_db_recipe_data(sort_data_from_composition): + objects, gradients, composition = sort_data_from_composition + compiled_recipe = DBRecipeLoader.compile_db_recipe_data( + downloaded_data_from_firebase, objects, gradients, composition + ) + assert compiled_recipe == compiled_firebase_recipe_example diff --git a/cellpack/tests/test_db_recipe_handler.py b/cellpack/tests/test_db_uploader.py similarity index 87% rename from cellpack/tests/test_db_recipe_handler.py rename to cellpack/tests/test_db_uploader.py index b2dd591f0..89a02e690 100644 --- a/cellpack/tests/test_db_recipe_handler.py +++ b/cellpack/tests/test_db_uploader.py @@ -1,16 +1,10 @@ -from cellpack.autopack.DBRecipeHandler import DBRecipeHandler +from cellpack.autopack.DBRecipeHandler import DBUploader from cellpack.tests.mocks.mock_db import MockDB from unittest.mock import MagicMock, patch mock_db = MockDB({}) -def test_is_nested_list(): - assert DBRecipeHandler.is_nested_list([]) is False - assert DBRecipeHandler.is_nested_list([[], []]) is True - assert DBRecipeHandler.is_nested_list([[1, 2], [3, 4]]) is True - - def test_prep_data_for_db(): input_data = { "bounding_box": [[0, 0, 0], [1000, 1000, 1]], @@ -25,7 +19,7 @@ def test_prep_data_for_db(): }, "max_jitter": [1, 1, 0], } - new_data = DBRecipeHandler.prep_data_for_db(input_data) + new_data = DBUploader.prep_data_for_db(input_data) assert new_data == converted_data @@ -38,7 +32,7 @@ def test_upload_data_with_recipe_and_id(): "composition": {"test": {"inherit": "firebase:test_collection/test_id"}}, } id = "test_id" - recipe_doc = DBRecipeHandler(mock_db) + recipe_doc = DBUploader(mock_db) expected_result = recipe_doc.upload_data(collection, data, id) assert expected_result[0] == "test_id" @@ -51,7 +45,7 @@ def test_upload_data_with_object(): "name": "test", "test_key": "test_value", } - object_doc = DBRecipeHandler(mock_db) + object_doc = DBUploader(mock_db) expected_result = object_doc.upload_data(collection, data) assert expected_result[0] == "test_id" @@ -60,14 +54,14 @@ def test_upload_data_with_object(): def test_upload_objects(): data = {"test": {"test_key": "test_value"}} - object_doc = DBRecipeHandler(mock_db) + object_doc = DBUploader(mock_db) object_doc.upload_objects(data) assert object_doc.objects_to_path_map == {"test": "firebase:objects/test_id"} def test_upload_objects_with_gradient(): data = {"test": {"test_key": "test_value", "gradient": "test_grad_name"}} - object_handler = DBRecipeHandler(mock_db) + object_handler = DBUploader(mock_db) object_handler.grad_to_path_map = {"test_grad_name": "firebase:gradients/test_id"} with patch( @@ -102,7 +96,7 @@ def test_upload_compositions(): }, } - composition_doc = DBRecipeHandler(mock_db) + composition_doc = DBUploader(mock_db) references_to_update = composition_doc.upload_compositions( composition, recipe_to_save, recipe_data ) @@ -116,7 +110,7 @@ def test_upload_compositions(): def test_upload_gradients(): data = [{"name": "test_grad_name", "test_key": "test_value"}] - gradient_doc = DBRecipeHandler(mock_db) + gradient_doc = DBUploader(mock_db) gradient_doc.upload_gradients(data) assert gradient_doc.grad_to_path_map == { "test_grad_name": "firebase:gradients/test_id" @@ -130,8 +124,8 @@ def test_get_recipe_id(): "objects": None, "composition": {}, } - recipe_doc = DBRecipeHandler(mock_db) - assert recipe_doc.get_recipe_id(recipe_data) == "test_v-1.0.0" + recipe_doc = DBUploader(mock_db) + assert recipe_doc._get_recipe_id(recipe_data) == "test_v_1.0.0" def test_upload_collections(): @@ -154,7 +148,7 @@ def test_upload_collections(): }, } - recipe_doc = DBRecipeHandler(mock_db) + recipe_doc = DBUploader(mock_db) expected_result = { "name": "one_sphere", "version": "1.0.0", @@ -188,7 +182,7 @@ def test_upload_recipe(): }, } - recipe_doc = DBRecipeHandler(mock_db) + recipe_doc = DBUploader(mock_db) recipe_doc.upload_recipe(recipe_meta_data, recipe_data) assert recipe_doc.comp_to_path_map == { "space": {"path": "firebase:composition/test_id", "id": "test_id"},