From abe3de09045c6dd96febe738bc965175036f460d Mon Sep 17 00:00:00 2001 From: Khoroshevskyi Date: Tue, 3 Dec 2024 15:57:03 -0500 Subject: [PATCH] Updated bedms to new version + stability improvements --- pephub/_version.py | 2 +- pephub/const.py | 4 +++ pephub/routers/api/v1/project.py | 43 +++++++++++++++++++++---------- requirements/requirements-all.txt | 2 +- 4 files changed, 36 insertions(+), 15 deletions(-) diff --git a/pephub/_version.py b/pephub/_version.py index f075dd36..745162e7 100644 --- a/pephub/_version.py +++ b/pephub/_version.py @@ -1 +1 @@ -__version__ = "0.14.1" +__version__ = "0.14.2" diff --git a/pephub/const.py b/pephub/const.py index 18c1ad76..89af9111 100644 --- a/pephub/const.py +++ b/pephub/const.py @@ -142,3 +142,7 @@ ARCHIVE_URL_PATH = "https://cloud2.databio.org/pephub/" MAX_PROCESSED_PROJECT_SIZE = 5000 + +MAX_STANDARDIZED_PROJECT_SIZE = 100 + +BEDMS_REPO_URL = "databio/attribute-standardizer-model6" diff --git a/pephub/routers/api/v1/project.py b/pephub/routers/api/v1/project.py index 5e7a44d7..5d873229 100644 --- a/pephub/routers/api/v1/project.py +++ b/pephub/routers/api/v1/project.py @@ -55,7 +55,11 @@ ConfigResponseModel, StandardizerResponse, ) -from ....const import MAX_PROCESSED_PROJECT_SIZE +from ....const import ( + MAX_PROCESSED_PROJECT_SIZE, + BEDMS_REPO_URL, + MAX_STANDARDIZED_PROJECT_SIZE, +) from .helpers import verify_updated_project from bedms import AttrStandardizer @@ -1182,33 +1186,46 @@ def delete_full_history( response_model=StandardizerResponse, ) async def get_standardized_cols( - pep: peppy.Project = Depends(get_project), + pep: dict = Depends(get_project), schema: str = "", ): """ - Standardize PEP metadata column headers using BEDmess. + Standardize PEP metadata column headers using BEDms. - :param namespace: pep: PEP string to be standardized + :param pep: PEP string to be standardized :param schema: Schema for AttrStandardizer :return dict: Standardized results """ - if schema == "": + if schema == "" or schema not in ["ENCODE", "BEDBASE", "FAIRTRACKS"]: raise HTTPException( - code=500, - detail="Schema is required! Available schemas are ENCODE and Fairtracks", + status_code=404, + detail="Schema not available! Available schemas are ENCODE, BEDBASE and FAIRTRACKS.", + ) + + if len(pep["_sample_dict"]) > MAX_STANDARDIZED_PROJECT_SIZE: + # raise HTTPException( + # status_code=400, + # detail=f"Project is too large. Cannot standardize. " + # f"Limit is {MAX_STANDARDIZED_PROJECT_SIZE} samples.", + # ) + prj = peppy.Project.from_dict( + { + "_config": pep["_config"], + "_sample_dict": pep["_sample_dict"][:50], + } ) - return {} - - prj = peppy.Project.from_dict(pep) - model = AttrStandardizer(schema) + else: + prj = peppy.Project.from_dict(pep) + model = AttrStandardizer(repo_id=BEDMS_REPO_URL, model_name=schema.lower()) try: results = model.standardize(pep=prj) - except Exception: + except Exception as e: + _LOGGER.error(f"Error standardizing PEP. {e}") raise HTTPException( - code=400, + status_code=400, detail=f"Error standardizing PEP.", ) diff --git a/requirements/requirements-all.txt b/requirements/requirements-all.txt index d7903965..25631478 100644 --- a/requirements/requirements-all.txt +++ b/requirements/requirements-all.txt @@ -16,4 +16,4 @@ fastembed numpy<2.0.0 slowapi cachetools>=4.2.4 -bedms>=0.1.0 \ No newline at end of file +bedms>=0.2.0 \ No newline at end of file