Skip to content

Commit

Permalink
Merge branch 'main' of github.com:mesoscope/cellpack into feature/mov…
Browse files Browse the repository at this point in the history
…e_analyses
  • Loading branch information
mogres committed May 8, 2024
2 parents 0f020c5 + a8e71d8 commit 7db43dd
Show file tree
Hide file tree
Showing 22 changed files with 451 additions and 51 deletions.
9 changes: 5 additions & 4 deletions .github/workflows/analyze.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,16 @@ on:

jobs:
Analyze:
if: ${{ !contains(github.event.head_commit.message, 'Bump version') }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: [3.9]
os: [ubuntu-latest]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install Dependencies
Expand All @@ -41,9 +42,9 @@ jobs:
runs-on: ubuntu-latest
needs: [Analyze]
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1-node16
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/build-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ on:

jobs:
docs:
if: ${{ !contains(github.event.head_commit.message, 'Bump version') }}
runs-on: ubuntu-latest
steps:
- uses: actions/[email protected]
Expand Down
18 changes: 11 additions & 7 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ on:

jobs:
test:
if: ${{ !contains(github.event.head_commit.message, 'Bump version') }}
runs-on: ${{ matrix.os }}
strategy:
matrix:
Expand All @@ -46,11 +47,11 @@ jobs:

lint:
runs-on: ubuntu-latest

if: ${{ !contains(github.event.head_commit.message, 'Bump version') }}
steps:
- uses: actions/checkout@v1
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v1
uses: actions/setup-python@v5
with:
python-version: 3.9
- name: Install Dependencies
Expand All @@ -68,6 +69,11 @@ jobs:
if: success() && startsWith(github.ref, 'refs/tags/')
needs: [lint, test]
runs-on: ubuntu-latest
environment:
name: pypi
url: https://pypi.org/p/cellpack
permissions:
id-token: write

steps:
- uses: actions/checkout@v1
Expand All @@ -83,7 +89,5 @@ jobs:
run: |
python setup.py sdist bdist_wheel
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@master
with:
user: meganrm
password: ${{ secrets.PYPI_TOKEN }}
uses: pypa/gh-action-pypi-publish@release/v1

29 changes: 29 additions & 0 deletions .github/workflows/cleanup-firebase.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Cleanup Firebase Metadata

on:
schedule:
- cron: "24 18 * * 1" # Runs at 18:24 UTC every Monday

jobs:
cleanup:
runs-on: ${{ matrix.os }}
strategy:
matrix:
python-version: [3.9]
os: [ubuntu-latest, windows-latest, macOS-latest]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .[all]
- name: Cleanup Firebase Metadata
env:
FIREBASE_TOKEN: ${{ secrets.FIREBASE_TOKEN }}
FIREBASE_EMAIL: ${{ secrets.FIREBASE_EMAIL }}
run: |
python cellpack/bin/cleanup_tasks.py
4 changes: 2 additions & 2 deletions .github/workflows/cleanup.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ jobs:
cleanup:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v1-node16
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
Expand Down
2 changes: 1 addition & 1 deletion cellpack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
__email__ = "[email protected]"
# Do not edit this string manually, always use bumpversion
# Details in CONTRIBUTING.md
__version__ = "1.0.3"
__version__ = "1.0.8"

from .autopack.loaders.recipe_loader import RecipeLoader # noqa: F401

Expand Down
42 changes: 33 additions & 9 deletions cellpack/autopack/AWSHandler.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
from pathlib import Path
from urllib.parse import parse_qs, urlparse, urlunparse

import boto3
from botocore.exceptions import ClientError
Expand Down Expand Up @@ -40,7 +41,7 @@ def _create_session(self, region_name):

def get_aws_object_key(self, object_name):
if self.folder_name is not None:
object_name = self.folder_name + object_name
object_name = f"{self.folder_name}/{object_name}"
else:
object_name = object_name
return object_name
Expand Down Expand Up @@ -76,23 +77,46 @@ def create_presigned_url(self, object_name, expiration=3600):
"""
object_name = self.get_aws_object_key(object_name)
# Generate a presigned URL for the S3 object
# The response contains the presigned URL
# https://{self.bucket_name}.s3.{region}.amazonaws.com/{object_key}
try:
url = self.s3_client.generate_presigned_url(
"get_object",
Params={"Bucket": self.bucket_name, "Key": object_name},
ExpiresIn=expiration,
)
base_url = urlunparse(urlparse(url)._replace(query="", fragment=""))
return base_url
except ClientError as e:
logging.error(e)
logging.error(f"Error generating presigned URL: {e}")
return None
# The response contains the presigned URL
# https://{self.bucket_name}.s3.{region}.amazonaws.com/{object_key}
return url

def save_file(self, file_path):
def is_url_valid(self, url):
"""
Validate the url's scheme, bucket name, and query parameters, etc.
"""
parsed_url = urlparse(url)
# Check the scheme
if parsed_url.scheme != "https":
return False
# Check the bucket name
if not parsed_url.path.startswith(f"/{self.bucket_name}/"):
return False
# Check unwanted query parameters
unwanted_query_params = ["AWSAccessKeyId", "Signature", "Expires"]
if parsed_url.query:
query_params = parse_qs(parsed_url.query)
for param in unwanted_query_params:
if param in query_params:
return False
return True

def save_file_and_get_url(self, file_path):
"""
Uploads a file to S3 and returns the presigned url
Uploads a file to S3 and returns the base url
"""
file_name = self.upload_file(file_path)
if file_name:
return file_name, self.create_presigned_url(file_name)
base_url = self.create_presigned_url(file_name)
if file_name and base_url:
if self.is_url_valid(base_url):
return file_name, base_url
2 changes: 1 addition & 1 deletion cellpack/autopack/Compartment.py
Original file line number Diff line number Diff line change
Expand Up @@ -1218,7 +1218,7 @@ def BuildGrid_trimesh(

off_grid_surface_points = surface_points_in_bounding_box

ex = False # True if nbGridPoints == len(idarray) else False
ex = True # True if nbGridPoints == len(idarray) else False

surfacePoints, surfacePointsNormals = self.extendGridArrays(
nbGridPoints,
Expand Down
67 changes: 66 additions & 1 deletion cellpack/autopack/DBRecipeHandler.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import copy
from datetime import datetime, timezone
from enum import Enum

from deepdiff import DeepDiff
import requests

from cellpack.autopack.utils import deep_merge

Expand Down Expand Up @@ -375,6 +378,36 @@ def should_write(self, db, grad_name):
return None, None


class ResultDoc:
def __init__(self, db):
self.db = db

def handle_expired_results(self):
"""
Check if the results in the database are expired and delete them if the linked object expired.
"""
current_utc = datetime.now(timezone.utc)
results = self.db.get_all_docs("results")
if results:
for result in results:
result_data = self.db.doc_to_dict(result)
result_age = current_utc - result_data["timestamp"]
if result_age.days > 180 and not self.validate_existence(
result_data["url"]
):
self.db.delete_doc("results", self.db.doc_id(result))
print("Results cleanup complete.")
else:
print("No results found in the database.")

def validate_existence(self, url):
"""
Validate the existence of an S3 object by checking if the URL is accessible.
Returns True if the URL is accessible.
"""
return requests.head(url).status_code == requests.codes.ok


class DBUploader(object):
"""
Handles the uploading of data to the database.
Expand Down Expand Up @@ -404,6 +437,9 @@ def prep_data_for_db(data):
modified_data[key] = unpacked_value
if isinstance(unpacked_value, dict):
modified_data[key] = DBUploader.prep_data_for_db(unpacked_value)
# If the value is an enum, convert it to a string. e.g. during a version migration process where "type" in a v1 recipe is an enum
elif isinstance(value, Enum):
modified_data[key] = value.name
# If the value is a dictionary, recursively convert its nested lists to dictionaries
elif isinstance(value, dict):
modified_data[key] = DBUploader.prep_data_for_db(value)
Expand Down Expand Up @@ -572,6 +608,7 @@ def upload_recipe(self, recipe_meta_data, recipe_data):
print(f"{recipe_id} is already in firestore")
return
recipe_to_save = self.upload_collections(recipe_meta_data, recipe_data)
recipe_to_save["recipe_path"] = self.db.create_path("recipes", recipe_id)
self.upload_data("recipes", recipe_to_save, recipe_id)

def upload_result_metadata(self, file_name, url):
Expand All @@ -584,7 +621,7 @@ def upload_result_metadata(self, file_name, url):
self.db.update_or_create(
"results",
file_name,
{"user": username, "timestamp": timestamp, "url": url.split("?")[0]},
{"user": username, "timestamp": timestamp, "url": url},
)


Expand Down Expand Up @@ -630,6 +667,18 @@ def prep_db_doc_for_download(self, db_doc):
def collect_docs_by_id(self, collection, id):
return self.db.get_doc_by_id(collection, id)

def validate_input_recipe_path(self, path):
"""
Validates if the input path corresponds to a recipe path in the database.
Format of a recipe path: firebase:recipes/[RECIPE-ID]
"""
collection, id = self.db.get_collection_id_from_path(path)
recipe_path = self.db.get_value(collection, id, "recipe_path")
if not recipe_path:
raise ValueError(
f"No recipe found at the input path: '{path}'. Please ensure the recipe exists in the database and is spelled correctly. Expected path format: 'firebase:recipes/[RECIPE-ID]'"
)

@staticmethod
def _get_grad_and_obj(obj_data, obj_dict, grad_dict):
"""
Expand Down Expand Up @@ -706,3 +755,19 @@ def compile_db_recipe_data(db_recipe_data, obj_dict, grad_dict, comp_dict):
if grad_dict:
recipe_data["gradients"] = [{**v} for v in grad_dict.values()]
return recipe_data


class DBMaintenance(object):
"""
Handles the maintenance of the database.
"""

def __init__(self, db_handler):
self.db = db_handler
self.result_doc = ResultDoc(self.db)

def cleanup_results(self):
"""
Check if the results in the database are expired and delete them if the linked object expired.
"""
self.result_doc.handle_expired_results()
Loading

0 comments on commit 7db43dd

Please sign in to comment.