Skip to content

Commit

Permalink
updated docker image version back to 3.3.2 (#32)
Browse files Browse the repository at this point in the history
* updated back to 3.3.2

latest version includes pdf related packages and we don't need them now

* delated unnecessary dependencies

* lint

* removed extra_file in tests

* added help info
  • Loading branch information
qchiujunhao authored Dec 11, 2024
1 parent 66fd012 commit ebbeba1
Show file tree
Hide file tree
Showing 9 changed files with 141 additions and 278 deletions.
12 changes: 0 additions & 12 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,10 @@ RUN apt-get update && \
apt-get install -y --no-install-recommends git unzip libgomp1 && \
rm -rf /var/lib/apt/lists/*

RUN apt-get update && apt-get install -y \
libgdk-pixbuf2.0-0 \
libpangocairo-1.0-0 \
libcairo2 \
libpango-1.0-0 \
libglib2.0-0 \
libfontconfig1 \
libfreetype6 \
&& rm -rf /var/lib/apt/lists/*

# Install Python packages
RUN pip install -U pip && \
pip install --no-cache-dir --no-compile joblib && \
pip install --no-cache-dir --no-compile h5py && \
pip install --no-cache-dir --no-compile weasyprint && \
pip install --no-cache-dir --no-compile markdown2 && \
pip install --no-cache-dir --no-compile pycaret[analysis,models]==${VERSION} && \
pip install --no-cache-dir --no-compile explainerdashboard

Expand Down
127 changes: 0 additions & 127 deletions tools/generate_md.py

This file was deleted.

2 changes: 1 addition & 1 deletion tools/pycaret_macros.xml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<token name="@PROFILE@">21.05</token>
<xml name="python_requirements">
<requirements>
<container type="docker">quay.io/goeckslab/galaxy-pycaret:latest</container>
<container type="docker">quay.io/goeckslab/galaxy-pycaret:3.3.2</container>
</requirements>
</xml>
<xml name="macro_citations">
Expand Down
17 changes: 0 additions & 17 deletions tools/pycaret_predict.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
import argparse
import logging
import os
import shutil
import tempfile

from generate_md import generate_report_from_path

import h5py

import joblib
Expand Down Expand Up @@ -131,19 +127,6 @@ def evaluate(self, data_path):
return predictions, metrics, plot_paths


def generate_md(plots, metrics):
LOG.error(plots)
if not os.path.exists("markdown"):
os.mkdir("markdown")
if not os.path.exists("markdown/Evaluation"):
os.mkdir("markdown/Evaluation")
for plot, path in plots.items():
shutil.copy(path, "markdown/Evaluation/")
LOG.error(type(metrics))
metrics.to_csv("markdown/Evaluation/metrics.csv", index=False)
generate_report_from_path("markdown", "evaluation.pdf", format="pdf")


def generate_html_report(plots, metrics):
"""Generate an HTML evaluation report."""
plots_html = ""
Expand Down
8 changes: 8 additions & 0 deletions tools/pycaret_predict.xml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@
</tests>
<help>
This tool uses PyCaret to evaluate a machine learning model or do prediction.

**Outputs**:

- **prediction**: The prediction results on the dataset in a csv format.

- **report**: The evaluation report is generated in HTML format.
if you upload a dataset with a target column and select the target column in the target_feature input field.

</help>
<expand macro="macro_citations" />
</tool>
39 changes: 25 additions & 14 deletions tools/pycaret_train.xml
Original file line number Diff line number Diff line change
Expand Up @@ -49,14 +49,16 @@
#if $test_file
--test_file $test_file
#end if
--model_type $model_type &&
mkdir -p $comparison_result.extra_files_path &&
cp -r best_model.csv $comparison_result.extra_files_path
--model_type $model_type
]]>
</command>
<inputs>
<param name="input_file" type="data" format="csv,tabular" label="Input Dataset (CSV or TSV)" />
<param name="test_file" type="data" format="csv,tabular" optional="true" label="Test Dataset (CSV or TSV)" />
<param name="input_file" type="data" format="csv,tabular" label="Train Dataset (CSV or TSV)" />
<param name="test_file" type="data" format="csv,tabular" optional="true" label="Test Dataset (CSV or TSV)"
help="If a test set is not provided,
the selected training set will be split into training, validation, and test sets.
If a test set is provided, the training set will only be split into training and validation sets.
BTW, cross-validation is always applied by default." />
<param name="target_feature" multiple="false" type="data_column" use_header_names="true" data_ref="input_file" label="Select the target column:" />
<conditional name="model_selection">
<param name="model_type" type="select" label="Task">
Expand Down Expand Up @@ -150,6 +152,7 @@
<outputs>
<data name="model" format="h5" from_work_dir="pycaret_model.h5" label="${tool.name} best model on ${on_string}" />
<data name="comparison_result" format="html" from_work_dir="comparison_result.html" label="${tool.name} Comparison result on ${on_string}"/>
<data name="best_model_csv" format="csv" from_work_dir="best_model.csv" label="${tool.name} The prams of the best model on ${on_string}" hidden="true" />
</outputs>
<tests>
<test>
Expand All @@ -166,33 +169,41 @@
<param name="remove_outliers" value="true"/>
<param name="remove_multicollinearity" value="true"/>
<output name="model" file="expected_model_classification_customized.h5" compare="sim_size"/>
<output name="comparison_result" file="expected_comparison_result_classification_customized.html" compare="sim_size">
<extra_files type="file" name="best_model.csv" value="expected_best_model_classification_customized.csv" />
</output>
<output name="comparison_result" file="expected_comparison_result_classification_customized.html" compare="sim_size" />
<output name="best_model_csv" value="expected_best_model_classification_customized.csv" />
</test>
<test>
<param name="input_file" value="pcr.tsv"/>
<param name="target_feature" value="11"/>
<param name="model_type" value="classification"/>
<param name="random_seed" value="42"/>
<output name="model" file="expected_model_classification.h5" compare="sim_size"/>
<output name="comparison_result" file="expected_comparison_result_classification.html" compare="sim_size">
<extra_files type="file" name="best_model.csv" value="expected_best_model_classification.csv" />
</output>
<output name="comparison_result" file="expected_comparison_result_classification.html" compare="sim_size" />
<output name="best_model_csv" value="expected_best_model_classification.csv" />
</test>
<test>
<param name="input_file" value="auto-mpg.tsv"/>
<param name="target_feature" value="1"/>
<param name="model_type" value="regression"/>
<param name="random_seed" value="42"/>
<output name="model" file="expected_model_regression.h5" compare="sim_size" />
<output name="comparison_result" file="expected_comparison_result_regression.html" compare="sim_size">
<extra_files type="file" name="best_model.csv" value="expected_best_model_regression.csv" />
</output>
<output name="comparison_result" file="expected_comparison_result_regression.html" compare="sim_size" />
<output name="best_model_csv" value="expected_best_model_regression.csv" />
</test>
</tests>
<help>
This tool uses PyCaret to train and evaluate machine learning models.
It compares different models on a dataset and provides the best model based on the performance metrics.

**Outputs**

- **Model**: The best model trained on the dataset in h5 format.


- **Comparison Result**: The comparison result of different models in html format.
It contains the performance metrics of different models, plots of the best model
on the testing set (or part of the training set if a separate test set is not uploaded), and feature analysis plots.

</help>
<expand macro="macro_citations" />
</tool>
Loading

0 comments on commit ebbeba1

Please sign in to comment.