Merge branch 'celery-integration' into feature/new-celery-image-req

omics-datascience · Aug 24, 2023 · 0fe343d · 0fe343d
2 parents 70d314f + 8def78e
commit 0fe343d
Show file tree

Hide file tree

Showing 10 changed files with 119 additions and 17 deletions.
diff --git a/DEPLOYING.md b/DEPLOYING.md
@@ -257,7 +257,7 @@ In order to create a database dump you can execute the following command **insid
 That command will restore the database using a compressed dump as source. You can use the flags `--numInsertionWorkersPerCollection [number of workers]` to increase importing speed or `-vvvv` to check importing status.
 
 
-### Importing media file
+### Importing _media_ folder
 
 To import a `media` folder backup inside a new environment you must (from the root project folder):
 

diff --git a/Dockerfile b/Dockerfile
@@ -43,7 +43,7 @@ RUN apt-get update && apt-get install -y python3-pip curl libcurl4-openssl-dev l
     && curl -fsSL https://deb.nodesource.com/setup_16.x | bash - && apt-get install -y nodejs && mkdir /config \
     && mkdir /src
 
-# Installs Python dependencies
+# Installs Python dependencies and compiles the frontend
 ADD config/requirements.txt /config/
 WORKDIR /src
 ADD src .
@@ -53,7 +53,7 @@ RUN pip3 install -r /config/requirements.txt && npm --prefix /src/frontend/stati
 # Media folder
 VOLUME /src/media
 
-# Healtchecker and Housekeeping
+# Healthcheck and Housekeeping
 HEALTHCHECK --interval=5m --timeout=30s CMD ["/bin/bash", "-c", "/src/tools/checks.sh"]
 
 ENTRYPOINT ["/bin/bash", "-c", "/src/entrypoint.sh"]

diff --git a/Dockerfile-celery b/Dockerfile-celery
@@ -1,4 +1,3 @@
-# TODO: try to change to alpine image
 FROM python:3.8.17-slim-bullseye
 
 # Docker Files Vars
@@ -43,8 +42,14 @@ RUN apt-get update && apt-get install -y python3-pip curl libcurl4-openssl-dev l
     && mkdir /config \
     && mkdir /src
 
+<<<<<<< HEAD
 # Installs Python dependencies
 ADD config/requirements_celery.txt /config/requirements.txt
+=======
+# Installs Python dependencies and compiles the frontend
+# TODO: make a custom requirements.txt for celery workers' image. They don't need Daphne or Flower for example
+ADD config/requirements.txt /config/
+>>>>>>> celery-integration
 WORKDIR /src
 ADD src .
 RUN pip install --upgrade pip && pip3 install -r /config/requirements.txt

diff --git a/README.md b/README.md
@@ -58,8 +58,9 @@ Every time you want to work with Multiomix, you need to follow the below steps:
         - `npm run prod`: compiles code in production mode.
 1. Run Celery tasks queue to run experiments:
    1. `cd src`
-   2. `python3 -m celery -A multiomics_intermediate worker -l info -Q correlation_analysis` <!-- TODO: add the other queues when implemented -->
-   2. If you want to check Task in the GUI you can run [Flower](https://flower.readthedocs.io/en/latest/index.html) `python3 -m celery -A multiomics_intermediate flower`
+   1. `python3 -m celery -A multiomics_intermediate worker -l info -Q correlation_analysis`
+   1. `python3 -m celery -A multiomics_intermediate worker -l info -Q feature_selection`
+   1. If you want to check Task in the GUI you can run [Flower](https://flower.readthedocs.io/en/latest/index.html) `python3 -m celery -A multiomics_intermediate flower`
 
 ### Linter and Typescript
 

diff --git a/config/requirements.txt b/config/requirements.txt
@@ -10,6 +10,7 @@ scipy==1.8.1
 channels==2.4.0
 channels-redis==3.1.0
 daphne==2.5.0
+django-object-actions==4.1.0
 scikit-survival==0.20.0
 django-generate-secret-key==1.0.2
 requests==2.23.0

diff --git a/src/api_service/exceptions.py b/src/api_service/exceptions.py
@@ -9,7 +9,7 @@ class NoSamplesInCommon(Exception):
 
 
 class CouldNotDeleteInMongo(Exception):
-    """Raised when could not deleted the associated tuples in MongoDB for a particular experiment's result"""
+    """Raised when could not delete the associated tuples in MongoDB for a particular experiment's result"""
     pass
 
 

diff --git a/src/common/functions.py b/src/common/functions.py
@@ -88,9 +88,9 @@ def close_db_connection():
 def check_if_stopped(is_aborted: AbortEvent, exception: Type[Exception]):
     """
     Check if the event is set raising the corresponding exception.
-    @param is_aborted: Stop event to check if the experiment was stopped
+    @param is_aborted: Stop event to check if It's set.
     @param exception: Exception to raise if the event is set.
-    @raise ExperimentStopped If the stop event is set
+    @raise Specified exception If the stop event is set.
     """
     if is_aborted():
         raise exception
diff --git a/src/feature_selection/admin.py b/src/feature_selection/admin.py
@@ -1,11 +1,26 @@
+import csv
 from typing import Optional
 from django.contrib import admin
+from django.db.models import QuerySet
+from django.http import StreamingHttpResponse
+from django.utils import timezone
+from django_object_actions import DjangoObjectActions
 from biomarkers.models import BiomarkerState
 from feature_selection.models import FSExperiment, SVMParameters, ClusteringParameters, TrainedModel, ClusterLabelsSet, \
     ClusterLabel, SVMTimesRecord, RFTimesRecord, ClusteringTimesRecord, RFParameters, CoxRegressionParameters, \
     BBHAParameters
 
 
+class Echo:
+    """
+    An object that implements just the write method of the file-like interface.
+    Taken from https://docs.djangoproject.com/en/4.2/howto/outputting-csv/#streaming-csv-files
+    """
+    def write(self, value):
+        """Write the value by returning it, instead of storing in a buffer."""
+        return value
+
+
 class FSExperimentAdmin(admin.ModelAdmin):
     @staticmethod
     @admin.display(description='Origin Biomarker')
@@ -42,18 +57,97 @@ def biomarker(obj: TrainedModel) -> Optional[str]:
 common_time_record_fields = ('pk', 'number_of_features', 'number_of_samples', 'execution_time', 'fitness')
 
 
-class SVMTimesRecordAdmin(admin.ModelAdmin):
+class SVMTimesRecordAdmin(DjangoObjectActions, admin.ModelAdmin):
     list_display = common_time_record_fields + ('test_time', 'number_of_iterations', 'time_by_iteration',
                                                 'max_iterations', 'optimizer', 'kernel')
 
+    def export(self, _request, queryset: QuerySet[SVMTimesRecord]):
+        """
+        Returns the QuerySet data as a CSV in a StreamingResponse.
+        Taken from https://docs.djangoproject.com/en/4.2/howto/outputting-csv/#streaming-csv-files
+        """
+        data = queryset.values_list('number_of_features', 'number_of_samples', 'execution_time', 'fitness',
+                                    'train_score', 'test_time', 'number_of_iterations', 'time_by_iteration',
+                                    'max_iterations', 'optimizer', 'kernel')
+        rows = (elem for elem in data)
+        pseudo_buffer = Echo()
+        writer = csv.writer(pseudo_buffer)
 
-class RFTimesRecordAdmin(admin.ModelAdmin):
-    list_display = common_time_record_fields + ('test_time', 'number_of_trees')
+        # Sets CSV header
+        header = writer.writerow(('Number of features', 'Number of samples', 'Execution time', 'Fitness',
+                                  'Train score', 'Test time', 'Number of iterations', 'Time by iteration',
+                                  'Max iterations', 'Optimizer', 'Kernel'))
+
+        # Returns the CSV as a StreamingResponse with the current date (only) in the filename
+        today = timezone.now().strftime("%Y-%m-%d")
+        return StreamingHttpResponse(
+            [header] + [writer.writerow(row) for row in rows],
+            content_type="text/csv",
+            headers={"Content-Disposition": f'attachment; filename="SVMTimesRecord-{today}.csv"'}
+        )
+
+    changelist_actions = ('export', )
 
 
-class ClusteringTimesRecordAdmin(admin.ModelAdmin):
+
+class RFTimesRecordAdmin(DjangoObjectActions, admin.ModelAdmin):
+    list_display = common_time_record_fields + ('test_time', 'number_of_trees')
+
+    def export(self, _request, queryset: QuerySet[RFTimesRecord]):
+        """
+        Returns the QuerySet data as a CSV in a StreamingResponse.
+        Taken from https://docs.djangoproject.com/en/4.2/howto/outputting-csv/#streaming-csv-files
+        """
+        data = queryset.values_list('number_of_features', 'number_of_samples', 'execution_time', 'fitness',
+                                    'train_score', 'test_time', 'number_of_trees')
+        rows = (elem for elem in data)
+        pseudo_buffer = Echo()
+        writer = csv.writer(pseudo_buffer)
+
+        # Sets CSV header
+        header = writer.writerow(('Number of features', 'Number of samples', 'Execution time', 'Fitness',
+                                  'Train score', 'Test time', 'Number of trees'))
+
+        # Returns the CSV as a StreamingResponse with the current date (only) in the filename
+        today = timezone.now().strftime("%Y-%m-%d")
+        return StreamingHttpResponse(
+            [header] + [writer.writerow(row) for row in rows],
+            content_type="text/csv",
+            headers={"Content-Disposition": f'attachment; filename="RFTimesRecord-{today}.csv"'}
+        )
+
+    changelist_actions = ('export', )
+
+
+class ClusteringTimesRecordAdmin(DjangoObjectActions, admin.ModelAdmin):
     list_display = common_time_record_fields + ('number_of_clusters', 'algorithm', 'scoring_method', 'fs_experiment')
 
+    def export(self, _request, queryset: QuerySet[ClusteringTimesRecord]):
+        """
+        Returns the QuerySet data as a CSV in a StreamingResponse.
+        Taken from https://docs.djangoproject.com/en/4.2/howto/outputting-csv/#streaming-csv-files
+        """
+        data = queryset.values_list('number_of_features', 'number_of_samples', 'execution_time', 'fitness',
+                                    'train_score', 'number_of_clusters', 'algorithm', 'scoring_method')
+
+        rows = (elem for elem in data)
+        pseudo_buffer = Echo()
+        writer = csv.writer(pseudo_buffer)
+
+        # Sets CSV header
+        header = writer.writerow(('Number of features', 'Number of samples', 'Execution time', 'Fitness',
+             'Train score', 'Number of clusters', 'Algorithm', 'Scoring method'))
+
+        # Returns the CSV as a StreamingResponse with the current date (only) in the filename
+        today = timezone.now().strftime("%Y-%m-%d")
+        return StreamingHttpResponse(
+            [header] + [writer.writerow(row) for row in rows],
+            content_type="text/csv",
+            headers={"Content-Disposition": f'attachment; filename="ClusteringTimesRecord-{today}.csv"'}
+        )
+
+    changelist_actions = ('export', )
+
 
 class BBHAParametersAdmin(admin.ModelAdmin):
     list_display = ('n_stars', 'n_iterations', 'version_used', 'fs_experiment')

diff --git a/src/frontend/static/frontend/src/components/biomarkers/BiomarkersPanel.tsx b/src/frontend/static/frontend/src/components/biomarkers/BiomarkersPanel.tsx
@@ -148,7 +148,7 @@ export class BiomarkersPanel extends React.Component<{}, BiomarkersPanelState> {
         }
     }
 
-    /** Makes a request to delete an Experiment. */
+    /** Makes a request to stop an FSExperiment. */
     stopFSExperiment = () => {
         if (this.state.biomarkerToStop === null) {
             return
@@ -162,15 +162,15 @@ export class BiomarkersPanel extends React.Component<{}, BiomarkersPanelState> {
                 headers: myHeaders,
                 searchParams: { biomarkerId }
             }).then((response) => {
-                // If OK is returned refresh the experiments
+                // If OK closes the modal
                 if (response.ok) {
                     this.setState({ biomarkerToStop: null })
                 } else {
                     alertGeneralError()
                 }
             }).catch((err) => {
                 alertGeneralError()
-                console.log('Error deleting experiment ->', err)
+                console.log('Error stopping FSExperiment ->', err)
             }).finally(() => {
                 this.setState({ stoppingExperiment: false })
             })

diff --git a/src/multiomics_intermediate/settings.py b/src/multiomics_intermediate/settings.py
@@ -36,6 +36,7 @@
     'django.contrib.sessions',
     'django.contrib.messages',
     'django.contrib.staticfiles',
+    'django_object_actions',
     'django_filters',
     'api_service.apps.ApiServiceConfig',
     'frontend',
@@ -218,7 +219,7 @@
 
 # Number of threads used in ThreadPool to run experiments. Please take memory in consideration
 # IMPORTANT: needs a server restart
-THREAD_POOL_SIZE: int = int(os.getenv('THREAD_POOL_SIZE', 5))  # TODO: remove and document in DEPLOYING to set in CELERY
+THREAD_POOL_SIZE: int = int(os.getenv('THREAD_POOL_SIZE', 5))  # TODO: remove and document in DEPLOYING to set in CELERY when finished the implementation of Celery integration
 
 # Time limit in seconds for a correlation analysis to be computed. If the experiment is not finished in this time, it is
 # marked as TIMEOUT_EXCEEDED