Reproduce Paper #419
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Reproduce Paper | |
# This action runs a Jupyter notebook in the examples folder that uses astartes to generate | |
# train/val/test splits on two datasets (RDB7 and QM9). These dynamically generated splits | |
# are then compared to the splits which were created at the time of paper writing, to ensure | |
# that subsequent releases of astartes have backwards compatibility. | |
on: | |
schedule: | |
- cron: "0 8 * * 1-5" | |
workflow_dispatch: | |
concurrency: | |
group: actions-id-${{ github.workflow }} | |
cancel-in-progress: true | |
jobs: | |
build: | |
runs-on: ubuntu-latest | |
defaults: | |
run: | |
shell: bash -el {0} | |
name: Reproduce Paper Data Splits | |
steps: | |
- uses: actions/checkout@v3 | |
- uses: mamba-org/setup-micromamba@main | |
with: | |
environment-name: temp | |
condarc: | | |
channels: | |
- defaults | |
- conda-forge | |
channel_priority: flexible | |
create-args: | | |
python=3.11 | |
- name: Install Dependencies | |
run: | | |
python -m pip install -e .[molecules] | |
python -m pip install scikit-learn==1.2.2 | |
python -m pip install notebook | |
python -m pip install "numpy<2" | |
- name: Backup Reference Splits | |
run: | | |
cd examples/reproduce_paper_splits | |
cp QM9_splits/QM9_splits_kmeans.pkl REFERENCE_QM9_splits_kmeans.pkl | |
cp QM9_splits/QM9_splits_random.pkl REFERENCE_QM9_splits_random.pkl | |
cp QM9_splits/QM9_splits_scaffold.pkl REFERENCE_QM9_splits_scaffold.pkl | |
cp RDB7_splits/RDB7_splits_kmeans.pkl REFERENCE_RDB7_splits_kmeans.pkl | |
cp RDB7_splits/RDB7_splits_random.pkl REFERENCE_RDB7_splits_random.pkl | |
cp RDB7_splits/RDB7_splits_scaffold.pkl REFERENCE_RDB7_splits_scaffold.pkl | |
- name: Execute Splitting Notebooks | |
run: | | |
cd examples/reproduce_paper_splits | |
ipython QM9_make_splits.ipynb | |
ipython RDB7_make_splits.ipynb | |
- name: Upload New Splits as Artifacts | |
uses: actions/upload-artifact@v3 | |
with: | |
name: dynamically-generated-splits | |
path: | | |
examples/reproduce_paper_splits/QM9_splits/ | |
examples/reproduce_paper_splits/RDB7_splits/ | |
- name: Compare Reference to Current | |
run: | | |
cd examples/reproduce_paper_splits | |
python validate_splits.py |