diff --git a/cellpack/autopack/AWSHandler.py b/cellpack/autopack/AWSHandler.py index 977b0608..0bbecf0b 100644 --- a/cellpack/autopack/AWSHandler.py +++ b/cellpack/autopack/AWSHandler.py @@ -19,7 +19,7 @@ def __init__( self, bucket_name, sub_folder_name=None, - region_name=None, + region_name="us-west-2", ): self.bucket_name = bucket_name self.folder_name = sub_folder_name diff --git a/cellpack/autopack/Analysis.py b/cellpack/autopack/Analysis.py index ddbf7343..5a219734 100644 --- a/cellpack/autopack/Analysis.py +++ b/cellpack/autopack/Analysis.py @@ -667,12 +667,13 @@ def pack( self, seed=20, show_plotly_plot=True, + **kwargs, ): if show_plotly_plot: self.plotly.update_title(self.env.place_method) t1 = time() - results = self.env.pack_grid(seedNum=seed) + results = self.env.pack_grid(seedNum=seed, **kwargs) self.seed_to_results[seed] = results t2 = time() run_time = t2 - t1 @@ -994,6 +995,7 @@ def pack_one_seed( show_grid=False, plot_figures=False, save_gradient_data_as_image=False, + clean_grid_cache=False, ): """ Packs one seed of a recipe and returns the recipe object @@ -1009,6 +1011,7 @@ def pack_one_seed( seed=seed, # TODO: fix this to disable plotly if using simularium show_plotly_plot=(show_grid and two_d) and not use_simularium, + clean_grid_cache=clean_grid_cache, ) self.center = self.env.grid.getCenter() @@ -1198,6 +1201,7 @@ def doloop( save_gradient_data_as_image = packing_config_data.get( "save_gradient_data_as_image", False ) + clean_grid_cache = packing_config_data.get("clean_grid_cache", False) seed_list = get_seed_list(packing_config_data, recipe_data) if seed_list is None: @@ -1260,6 +1264,7 @@ def doloop( get_distance_distribution=get_distance_distribution, image_export_options=image_export_options, save_gradient_data_as_image=save_gradient_data_as_image, + clean_grid_cache=clean_grid_cache, ) ) for future in concurrent.futures.as_completed(futures): @@ -1302,6 +1307,7 @@ def doloop( show_grid=show_grid, plot_figures=plot_figures, save_gradient_data_as_image=save_gradient_data_as_image, + clean_grid_cache=clean_grid_cache, ) self.writeJSON(center_distance_file, center_distance_dict) diff --git a/cellpack/autopack/Environment.py b/cellpack/autopack/Environment.py index 10bc5d19..13048cfa 100644 --- a/cellpack/autopack/Environment.py +++ b/cellpack/autopack/Environment.py @@ -76,7 +76,7 @@ from .Compartment import CompartmentList, Compartment from .Recipe import Recipe from .ingredient import GrowIngredient, ActinIngredient -from cellpack.autopack import IOutils +from cellpack.autopack import IOutils, get_cache_location, get_local_file_location from .octree import Octree from .Gradient import Gradient from .transformation import signed_angle_between_vectors @@ -153,13 +153,16 @@ def __init__(self, config=None, recipe=None): self.grid_file_out = ( f"{self.out_folder}/{self.name}_{config['name']}_{self.version}_grid.dat" ) - if recipe.get("grid_file_path") is not None: - self.grid_file_out = recipe["grid_file_path"] - - should_load_grid_file = ( - os.path.isfile(self.grid_file_out) and self.load_from_grid_file - ) - self.previous_grid_file = self.grid_file_out if should_load_grid_file else None + self.previous_grid_file = None + if self.load_from_grid_file: + # first check if grid file path is specified in recipe + if recipe.get("grid_file_path") is not None: + self.grid_file_out = get_local_file_location( + recipe["grid_file_path"], cache="grids" + ) + # check if grid file is already present in the output folder + if os.path.isfile(self.grid_file_out): + self.previous_grid_file = self.grid_file_out self.setupfile = "" self.current_path = None # the path of the recipe file self.custom_paths = None @@ -282,6 +285,17 @@ def _setup(self): for gradient_data in self.recipe_data["gradients"]: self.set_gradient(gradient_data) + def clean_grid_cache(self, grid_file_name): + """ + Clean the grid cache + """ + local_file_path = get_cache_location( + name=grid_file_name, cache="grids", destination="" + ) + if os.path.exists(local_file_path): + print(f"Removing grid cache file: {local_file_path}") # TODO: change to log + os.remove(local_file_path) + def get_compartment_object_by_name(self, compartment_name): """ Returns compartment object by name @@ -502,7 +516,7 @@ def save_result( if not os.path.isfile(self.grid_file_out) and self.load_from_grid_file: # do not overwrite if grid was loaded from file self.grid.result_filename = self.grid_file_out - self.saveGridToFile(self.grid_file_out) + self.save_grids_to_pickle(self.grid_file_out) if save_grid_logs: self.saveGridLogsAsJson(self.result_file + "_grid-data.json") self.collectResultPerIngredient() @@ -2184,6 +2198,11 @@ def pack_grid( distances=distances, all_objects=all_objects, ) + + if kw.get("clean_grid_cache", False): + grid_file_name = str(self.previous_grid_file).split(os.path.sep)[-1] + self.clean_grid_cache(grid_file_name=grid_file_name) + return all_objects def restore_molecules_array(self, ingr): diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 5c3e6ea4..3878458a 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -34,25 +34,26 @@ AF @author: Ludovic Autin with editing by Graham Johnson """ +import getpass +import json import logging import logging.config -import sys import os import re import shutil -from os import path, environ -import getpass -from pathlib import Path +import ssl +import sys import urllib.request as urllib from collections import OrderedDict -import ssl -import json +from pathlib import Path + +import boto3 +import botocore + from cellpack.autopack.DBRecipeHandler import DBRecipeLoader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS - from cellpack.autopack.loaders.utils import read_json_file, write_json_file - packageContainsVFCommands = 1 ssl._create_default_https_context = ssl._create_unverified_context use_json_hook = True @@ -60,7 +61,9 @@ os.environ["NUMEXPR_MAX_THREADS"] = "32" ############################################################################### -log_file_path = path.join(path.dirname(path.abspath(__file__)), "../logging.conf") +log_file_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "../logging.conf" +) logging.config.fileConfig(log_file_path, disable_existing_loggers=False) log = logging.getLogger("autopack") log.propagate = False @@ -76,24 +79,9 @@ def make_directory_if_needed(directory): # #Setup autopack data directory. # ============================================================================== # the dir will have all the recipe + cache. - -APPNAME = "autoPACK" - - -if sys.platform == "darwin": - # from AppKit import NSSearchPathForDirectoriesInDomains - # http://developer.apple.com/DOCUMENTATION/Cocoa/Reference/Foundation/Miscellaneous/Foundation_Functions/Reference/reference.html#//apple_ref/c/func/NSSearchPathForDirectoriesInDomains - # NSApplicationSupportDirectory = 14 - # NSUserDomainMask = 1 - # True for expanding the tilde into a fully qualified path - # appdata = path.join(NSSearchPathForDirectoriesInDomains(14, 1, True)[0], APPNAME) - appdata = os.path.expanduser("~") + "/Library/Application Support/autoPACK" -elif sys.platform == "win32": - appdata = path.join(environ["APPDATA"], APPNAME) -else: - appdata = path.expanduser(path.join("~", "." + APPNAME)) +appdata = Path(__file__).parents[2] / ".cache" make_directory_if_needed(appdata) -log.info(f"autoPACK data dir created {appdata}") +log.info(f"cellPACK data dir created {appdata}") appdata = Path(appdata) @@ -109,23 +97,24 @@ def url_exists(url): # setup the cache directory inside the app data folder # ============================================================================== - -cache_results = appdata / "cache_results" -cache_geoms = appdata / "cache_geometries" -cache_sphere = appdata / "cache_collisionTrees" -cache_recipes = appdata / "cache_recipes" +cache_results = appdata / "results" +cache_geoms = appdata / "geometries" +cache_sphere = appdata / "collisionTrees" +cache_recipes = appdata / "recipes" +cache_grids = appdata / "grids" preferences = appdata / "preferences" # we can now use some json/xml file for storing preferences and options. # need others ? -cache_dir = { +CACHE_DIR = { "geometries": cache_geoms, "results": cache_results, "collisionTrees": cache_sphere, "recipes": cache_recipes, + "grids": cache_grids, "prefs": preferences, } -for _, dir in cache_dir.items(): +for _, dir in CACHE_DIR.items(): make_directory_if_needed(dir) usePP = False @@ -261,8 +250,49 @@ def updateReplacePath(newPaths): REPLACE_PATH[w[0]] = w[1] +def download_file_from_s3(s3_uri, local_file_path): + s3_client = boto3.client("s3") + bucket_name, key = parse_s3_uri(s3_uri) + + try: + s3_client.download_file(bucket_name, key, local_file_path) + print("File downloaded successfully.") + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == "404": + print("The object does not exist.") + else: + print("An error occurred while downloading the file.") + + +def parse_s3_uri(s3_uri): + # Remove the "s3://" prefix and split the remaining string into bucket name and key + s3_uri = s3_uri.replace("s3://", "") + parts = s3_uri.split("/") + bucket_name = parts[0] + folder = "/".join(parts[1:-1]) + key = parts[-1] + + return bucket_name, folder, key + + def download_file(url, local_file_path, reporthook): - if url_exists(url): + if is_s3_url(url): + # download from s3 + # bucket_name, folder, key = parse_s3_uri(url) + # s3_handler = DATABASE_IDS.handlers().get(DATABASE_IDS.AWS) + # s3_handler = s3_handler(bucket_name, folder) + s3_client = boto3.client("s3") + bucket_name, folder, key = parse_s3_uri(url) + try: + s3_client.download_file(bucket_name, f"{folder}/{key}", local_file_path) + print("File downloaded successfully.") + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == "404": + print("The object does not exist.") + else: + print("An error occurred while downloading the file.") + + elif url_exists(url): try: urllib.urlretrieve(url, local_file_path, reporthook=reporthook) except Exception as e: @@ -272,7 +302,14 @@ def download_file(url, local_file_path, reporthook): def is_full_url(file_path): - return file_path.find("http") != -1 or file_path.find("ftp") != -1 + url_regex = re.compile( + r"^(?:http|https|ftp|s3)://", re.IGNORECASE + ) # check http, https, ftp, s3 + return re.match(url_regex, file_path) is not None + + +def is_s3_url(file_path): + return file_path.find("s3://") != -1 def is_remote_path(file_path): @@ -300,7 +337,7 @@ def get_cache_location(name, cache, destination): name: str destination: str """ - local_file_directory = cache_dir[cache] / destination + local_file_directory = CACHE_DIR[cache] / destination local_file_path = local_file_directory / name make_directory_if_needed(local_file_directory) return local_file_path @@ -340,8 +377,8 @@ def get_local_file_location( # not url, use pathlib input_file_location = Path(input_file_location) - if os.path.isfile(cache_dir[cache] / input_file_location): - return cache_dir[cache] / input_file_location + if os.path.isfile(CACHE_DIR[cache] / input_file_location): + return CACHE_DIR[cache] / input_file_location if os.path.isfile(CURRENT_RECIPE_PATH / input_file_location): # if no folder provided, use the current_recipe_folder return CURRENT_RECIPE_PATH / input_file_location @@ -353,7 +390,7 @@ def get_local_file_location( if helper is not None: reporthook = helper.reporthook name = input_file_location - local_file_path = cache_dir[cache] / destination / name + local_file_path = CACHE_DIR[cache] / destination / name download_file(url, local_file_path, reporthook) return local_file_path return input_file_location @@ -536,12 +573,12 @@ def saveRecipeAvailableJSON(recipe_dictionary, filename): def clearCaches(*args): # can't work if file are open! - for k in cache_dir: + for k in CACHE_DIR: try: - shutil.rmtree(cache_dir[k]) - os.makedirs(cache_dir[k]) + shutil.rmtree(CACHE_DIR[k]) + os.makedirs(CACHE_DIR[k]) except: # noqa: E722 - print("problem cleaning ", cache_dir[k]) + print("problem cleaning ", CACHE_DIR[k]) def write_username_to_creds(): diff --git a/cellpack/autopack/loaders/config_loader.py b/cellpack/autopack/loaders/config_loader.py index 647cb248..d77b5bfd 100644 --- a/cellpack/autopack/loaders/config_loader.py +++ b/cellpack/autopack/loaders/config_loader.py @@ -23,6 +23,7 @@ class Inner_Grid_Methods(MetaEnum): class ConfigLoader(object): default_values = { + "clean_grid_cache": False, "format": "simularium", "load_from_grid_file": False, "inner_grid_method": "trimesh", diff --git a/cellpack/bin/clean.py b/cellpack/bin/clean.py new file mode 100644 index 00000000..6a6327d6 --- /dev/null +++ b/cellpack/bin/clean.py @@ -0,0 +1,32 @@ +# cleans the local cache directory +import shutil +from cellpack.autopack import CACHE_DIR +import fire +import os + + +def clean(): + """ + Cleans the local cache directory + :return: void + """ + for _, folder in CACHE_DIR.items(): + for filename in os.listdir(folder): + file_path = os.path.join(folder, filename) + try: + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + except Exception as e: + print(f"Failed to delete {file_path}. Exception: {e}") + print("Cache cleaned") + + +# Run directly from command line +def main(): + fire.Fire(clean) + + +if __name__ == "__main__": + main() diff --git a/cellpack/tests/packing-configs/test_url_load_config.json b/cellpack/tests/packing-configs/test_url_load_config.json new file mode 100644 index 00000000..0039a028 --- /dev/null +++ b/cellpack/tests/packing-configs/test_url_load_config.json @@ -0,0 +1,7 @@ +{ + "name": "test_url_load_config", + "clean_grid_cache": false, + "load_from_grid_file": true, + "out": "cellpack/tests/outputs", + "save_analyze_result": true +} \ No newline at end of file diff --git a/cellpack/tests/recipes/v2/test_url_load.json b/cellpack/tests/recipes/v2/test_url_load.json new file mode 100644 index 00000000..b1abb816 --- /dev/null +++ b/cellpack/tests/recipes/v2/test_url_load.json @@ -0,0 +1,89 @@ +{ + "version": "1.0.0", + "format_version": "2.1", + "name": "test_url_loading", + "bounding_box": [ + [ + -5, + -5, + -5 + ], + [ + 5, + 5, + 5 + ] + ], + "objects": { + "membrane_mesh": { + "type": "mesh", + "color": [ + 1, + 0, + 1 + ], + "representations": { + "mesh": { + "path": "https://cellpack-results.s3.us-west-2.amazonaws.com/data/meshes", + "name": "sphere_4.obj", + "format": "obj" + } + } + }, + "nucleus_mesh": { + "type": "mesh", + "color": [ + 0, + 1, + 1 + ], + "representations": { + "mesh": { + "path": "https://cellpack-results.s3.us-west-2.amazonaws.com/data/meshes", + "name": "sphere_2.obj", + "format": "obj" + } + } + }, + "primitive_sphere": { + "type": "single_sphere", + "color": [ + 0.2, + 0.7, + 0.1 + ], + "radius": 0.5, + "packing_mode": "random" + } + }, + "composition": { + "bounding_area": { + "regions": { + "interior": [ + "membrane" + ] + } + }, + "membrane": { + "object": "membrane_mesh", + "count": 1, + "regions": { + "interior": [ + "nucleus", + { + "object": "primitive_sphere", + "count": 100 + } + ] + } + }, + "nucleus": { + "object": "nucleus_mesh", + "count": 1, + "regions": { + "interior": [] + } + } + }, + "grid_file_path": "https://cellpack-results.s3.us-west-2.amazonaws.com/data/grids/nested_mesh_grid.dat" +} \ No newline at end of file