Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Load grid from URL #256

Merged
merged 19 commits into from
Jun 13, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion cellpack/autopack/AWSHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def __init__(
self,
bucket_name,
sub_folder_name=None,
region_name=None,
region_name="us-west-2",
):
self.bucket_name = bucket_name
self.folder_name = sub_folder_name
Expand Down
8 changes: 7 additions & 1 deletion cellpack/autopack/Analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -667,12 +667,13 @@ def pack(
self,
seed=20,
show_plotly_plot=True,
**kwargs,
):
if show_plotly_plot:
self.plotly.update_title(self.env.place_method)

t1 = time()
results = self.env.pack_grid(seedNum=seed)
results = self.env.pack_grid(seedNum=seed, **kwargs)
self.seed_to_results[seed] = results
t2 = time()
run_time = t2 - t1
Expand Down Expand Up @@ -994,6 +995,7 @@ def pack_one_seed(
show_grid=False,
plot_figures=False,
save_gradient_data_as_image=False,
clean_grid_cache=False,
):
"""
Packs one seed of a recipe and returns the recipe object
Expand All @@ -1009,6 +1011,7 @@ def pack_one_seed(
seed=seed,
# TODO: fix this to disable plotly if using simularium
show_plotly_plot=(show_grid and two_d) and not use_simularium,
clean_grid_cache=clean_grid_cache,
)

self.center = self.env.grid.getCenter()
Expand Down Expand Up @@ -1198,6 +1201,7 @@ def doloop(
save_gradient_data_as_image = packing_config_data.get(
"save_gradient_data_as_image", False
)
clean_grid_cache = packing_config_data.get("clean_grid_cache", False)

seed_list = get_seed_list(packing_config_data, recipe_data)
if seed_list is None:
Expand Down Expand Up @@ -1260,6 +1264,7 @@ def doloop(
get_distance_distribution=get_distance_distribution,
image_export_options=image_export_options,
save_gradient_data_as_image=save_gradient_data_as_image,
clean_grid_cache=clean_grid_cache,
)
)
for future in concurrent.futures.as_completed(futures):
Expand Down Expand Up @@ -1302,6 +1307,7 @@ def doloop(
show_grid=show_grid,
plot_figures=plot_figures,
save_gradient_data_as_image=save_gradient_data_as_image,
clean_grid_cache=clean_grid_cache,
)

self.writeJSON(center_distance_file, center_distance_dict)
Expand Down
37 changes: 28 additions & 9 deletions cellpack/autopack/Environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
from .Compartment import CompartmentList, Compartment
from .Recipe import Recipe
from .ingredient import GrowIngredient, ActinIngredient
from cellpack.autopack import IOutils
from cellpack.autopack import IOutils, get_cache_location, get_local_file_location
from .octree import Octree
from .Gradient import Gradient
from .transformation import signed_angle_between_vectors
Expand Down Expand Up @@ -153,13 +153,16 @@ def __init__(self, config=None, recipe=None):
self.grid_file_out = (
f"{self.out_folder}/{self.name}_{config['name']}_{self.version}_grid.dat"
)
if recipe.get("grid_file_path") is not None:
self.grid_file_out = recipe["grid_file_path"]

should_load_grid_file = (
os.path.isfile(self.grid_file_out) and self.load_from_grid_file
)
self.previous_grid_file = self.grid_file_out if should_load_grid_file else None
self.previous_grid_file = None
if self.load_from_grid_file:
# first check if grid file path is specified in recipe
if recipe.get("grid_file_path") is not None:
self.grid_file_out = get_local_file_location(
recipe["grid_file_path"], cache="grids"
)
# check if grid file is already present in the output folder
if os.path.isfile(self.grid_file_out):
self.previous_grid_file = self.grid_file_out
self.setupfile = ""
self.current_path = None # the path of the recipe file
self.custom_paths = None
Expand Down Expand Up @@ -282,6 +285,17 @@ def _setup(self):
for gradient_data in self.recipe_data["gradients"]:
self.set_gradient(gradient_data)

def clean_grid_cache(self, grid_file_name):
"""
Clean the grid cache
"""
local_file_path = get_cache_location(
name=grid_file_name, cache="grids", destination=""
)
if os.path.exists(local_file_path):
print(f"Removing grid cache file: {local_file_path}") # TODO: change to log
os.remove(local_file_path)

def get_compartment_object_by_name(self, compartment_name):
"""
Returns compartment object by name
Expand Down Expand Up @@ -502,7 +516,7 @@ def save_result(
if not os.path.isfile(self.grid_file_out) and self.load_from_grid_file:
# do not overwrite if grid was loaded from file
self.grid.result_filename = self.grid_file_out
self.saveGridToFile(self.grid_file_out)
self.save_grids_to_pickle(self.grid_file_out)
if save_grid_logs:
self.saveGridLogsAsJson(self.result_file + "_grid-data.json")
self.collectResultPerIngredient()
Expand Down Expand Up @@ -2184,6 +2198,11 @@ def pack_grid(
distances=distances,
all_objects=all_objects,
)

if kw.get("clean_grid_cache", False):
grid_file_name = str(self.previous_grid_file).split(os.path.sep)[-1]
self.clean_grid_cache(grid_file_name=grid_file_name)

return all_objects

def restore_molecules_array(self, ingr):
Expand Down
123 changes: 80 additions & 43 deletions cellpack/autopack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,33 +34,36 @@
AF
@author: Ludovic Autin with editing by Graham Johnson
"""
import getpass
import json
import logging
import logging.config
import sys
import os
import re
import shutil
from os import path, environ
import getpass
from pathlib import Path
import ssl
import sys
import urllib.request as urllib
from collections import OrderedDict
import ssl
import json
from pathlib import Path

import boto3
import botocore
mogres marked this conversation as resolved.
Show resolved Hide resolved

from cellpack.autopack.DBRecipeHandler import DBRecipeLoader
from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS

from cellpack.autopack.loaders.utils import read_json_file, write_json_file


packageContainsVFCommands = 1
ssl._create_default_https_context = ssl._create_unverified_context
use_json_hook = True
afdir = Path(os.path.abspath(__path__[0]))
os.environ["NUMEXPR_MAX_THREADS"] = "32"

###############################################################################
log_file_path = path.join(path.dirname(path.abspath(__file__)), "../logging.conf")
log_file_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "../logging.conf"
)
logging.config.fileConfig(log_file_path, disable_existing_loggers=False)
log = logging.getLogger("autopack")
log.propagate = False
Expand All @@ -76,24 +79,9 @@ def make_directory_if_needed(directory):
# #Setup autopack data directory.
# ==============================================================================
# the dir will have all the recipe + cache.

APPNAME = "autoPACK"


if sys.platform == "darwin":
# from AppKit import NSSearchPathForDirectoriesInDomains
# http://developer.apple.com/DOCUMENTATION/Cocoa/Reference/Foundation/Miscellaneous/Foundation_Functions/Reference/reference.html#//apple_ref/c/func/NSSearchPathForDirectoriesInDomains
# NSApplicationSupportDirectory = 14
# NSUserDomainMask = 1
# True for expanding the tilde into a fully qualified path
# appdata = path.join(NSSearchPathForDirectoriesInDomains(14, 1, True)[0], APPNAME)
appdata = os.path.expanduser("~") + "/Library/Application Support/autoPACK"
elif sys.platform == "win32":
appdata = path.join(environ["APPDATA"], APPNAME)
else:
appdata = path.expanduser(path.join("~", "." + APPNAME))
appdata = Path(__file__).parents[2] / ".cache"
make_directory_if_needed(appdata)
log.info(f"autoPACK data dir created {appdata}")
log.info(f"cellPACK data dir created {appdata}")
appdata = Path(appdata)


Expand All @@ -109,23 +97,24 @@ def url_exists(url):
# setup the cache directory inside the app data folder
# ==============================================================================


cache_results = appdata / "cache_results"
cache_geoms = appdata / "cache_geometries"
cache_sphere = appdata / "cache_collisionTrees"
cache_recipes = appdata / "cache_recipes"
cache_results = appdata / "results"
cache_geoms = appdata / "geometries"
cache_sphere = appdata / "collisionTrees"
meganrm marked this conversation as resolved.
Show resolved Hide resolved
cache_recipes = appdata / "recipes"
cache_grids = appdata / "grids"
preferences = appdata / "preferences"
# we can now use some json/xml file for storing preferences and options.
# need others ?
cache_dir = {
CACHE_DIR = {
"geometries": cache_geoms,
"results": cache_results,
"collisionTrees": cache_sphere,
"recipes": cache_recipes,
"grids": cache_grids,
"prefs": preferences,
}

for _, dir in cache_dir.items():
for _, dir in CACHE_DIR.items():
make_directory_if_needed(dir)

usePP = False
Expand Down Expand Up @@ -261,8 +250,49 @@ def updateReplacePath(newPaths):
REPLACE_PATH[w[0]] = w[1]


def download_file_from_s3(s3_uri, local_file_path):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it seems like this function does the same thing as def download_file() and isn't being used. we can keep one of them and remove the other

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point!

s3_client = boto3.client("s3")
bucket_name, key = parse_s3_uri(s3_uri)

try:
s3_client.download_file(bucket_name, key, local_file_path)
print("File downloaded successfully.")
except botocore.exceptions.ClientError as e:
if e.response["Error"]["Code"] == "404":
print("The object does not exist.")
else:
print("An error occurred while downloading the file.")


def parse_s3_uri(s3_uri):
# Remove the "s3://" prefix and split the remaining string into bucket name and key
s3_uri = s3_uri.replace("s3://", "")
parts = s3_uri.split("/")
bucket_name = parts[0]
folder = "/".join(parts[1:-1])
key = parts[-1]

return bucket_name, folder, key


def download_file(url, local_file_path, reporthook):
if url_exists(url):
if is_s3_url(url):
# download from s3
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These steps you've commented out are indeed the correct way to initiate the s3 client, we have functionality in AWSHandler that handles the initiation of clients and manages multiple existing clients. I'd suggest moving either download_file or download_file_from_s3 to AWSHandler to keep aws related util functions more organized and avoid potential client conflicts in the future. What do you think?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense! It looks like you have moved these files in your branch already? In that case, I will leave these S3 related functions here so the refactoring in your branch doesn't have merge conflicts.

# bucket_name, folder, key = parse_s3_uri(url)
# s3_handler = DATABASE_IDS.handlers().get(DATABASE_IDS.AWS)
# s3_handler = s3_handler(bucket_name, folder)
s3_client = boto3.client("s3")
bucket_name, folder, key = parse_s3_uri(url)
try:
s3_client.download_file(bucket_name, f"{folder}/{key}", local_file_path)
print("File downloaded successfully.")
except botocore.exceptions.ClientError as e:
if e.response["Error"]["Code"] == "404":
print("The object does not exist.")
else:
print("An error occurred while downloading the file.")

elif url_exists(url):
try:
urllib.urlretrieve(url, local_file_path, reporthook=reporthook)
except Exception as e:
Expand All @@ -272,7 +302,14 @@ def download_file(url, local_file_path, reporthook):


def is_full_url(file_path):
return file_path.find("http") != -1 or file_path.find("ftp") != -1
url_regex = re.compile(
r"^(?:http|https|ftp|s3)://", re.IGNORECASE
) # check http, https, ftp, s3
return re.match(url_regex, file_path) is not None


def is_s3_url(file_path):
return file_path.find("s3://") != -1


def is_remote_path(file_path):
Expand Down Expand Up @@ -300,7 +337,7 @@ def get_cache_location(name, cache, destination):
name: str
destination: str
"""
local_file_directory = cache_dir[cache] / destination
local_file_directory = CACHE_DIR[cache] / destination
local_file_path = local_file_directory / name
make_directory_if_needed(local_file_directory)
return local_file_path
Expand Down Expand Up @@ -340,8 +377,8 @@ def get_local_file_location(

# not url, use pathlib
input_file_location = Path(input_file_location)
if os.path.isfile(cache_dir[cache] / input_file_location):
return cache_dir[cache] / input_file_location
if os.path.isfile(CACHE_DIR[cache] / input_file_location):
return CACHE_DIR[cache] / input_file_location
if os.path.isfile(CURRENT_RECIPE_PATH / input_file_location):
# if no folder provided, use the current_recipe_folder
return CURRENT_RECIPE_PATH / input_file_location
Expand All @@ -353,7 +390,7 @@ def get_local_file_location(
if helper is not None:
reporthook = helper.reporthook
name = input_file_location
local_file_path = cache_dir[cache] / destination / name
local_file_path = CACHE_DIR[cache] / destination / name
download_file(url, local_file_path, reporthook)
return local_file_path
return input_file_location
Expand Down Expand Up @@ -536,12 +573,12 @@ def saveRecipeAvailableJSON(recipe_dictionary, filename):

def clearCaches(*args):
# can't work if file are open!
for k in cache_dir:
for k in CACHE_DIR:
try:
shutil.rmtree(cache_dir[k])
os.makedirs(cache_dir[k])
shutil.rmtree(CACHE_DIR[k])
os.makedirs(CACHE_DIR[k])
except: # noqa: E722
print("problem cleaning ", cache_dir[k])
print("problem cleaning ", CACHE_DIR[k])


def write_username_to_creds():
Expand Down
1 change: 1 addition & 0 deletions cellpack/autopack/loaders/config_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ class Inner_Grid_Methods(MetaEnum):

class ConfigLoader(object):
default_values = {
"clean_grid_cache": False,
"format": "simularium",
"load_from_grid_file": False,
"inner_grid_method": "trimesh",
Expand Down
32 changes: 32 additions & 0 deletions cellpack/bin/clean.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# cleans the local cache directory
import shutil
from cellpack.autopack import CACHE_DIR
import fire
import os


def clean():
"""
Cleans the local cache directory
:return: void
"""
for _, folder in CACHE_DIR.items():
for filename in os.listdir(folder):
file_path = os.path.join(folder, filename)
try:
if os.path.isfile(file_path) or os.path.islink(file_path):
os.unlink(file_path)
elif os.path.isdir(file_path):
shutil.rmtree(file_path)
except Exception as e:
print(f"Failed to delete {file_path}. Exception: {e}")
print("Cache cleaned")


# Run directly from command line
def main():
fire.Fire(clean)


if __name__ == "__main__":
main()
Loading
Loading