diff --git a/data/mp/get_mp_traj.py b/data/mp/get_mp_traj.py
index 17954994..478356af 100644
--- a/data/mp/get_mp_traj.py
+++ b/data/mp/get_mp_traj.py
@@ -118,7 +118,7 @@
# %% use gzip CLI to check all files for archive corruption
for path in tqdm(glob(f"{module_dir}/mp-tasks/*.json.gz")):
try:
- subprocess.run(["gzip", "--test", path], check=True) # noqa: S607
+ subprocess.run(["gzip", "--test", path], check=True)
except subprocess.CalledProcessError as exc:
print(f"{path} raised {exc.stderr}")
# ask user to delete corrupted file
diff --git a/models/orb/orb.yml b/models/orb/orb.yml
index 102f80d9..d5b305f4 100644
--- a/models/orb/orb.yml
+++ b/models/orb/orb.yml
@@ -25,7 +25,7 @@ authors: # required (only name, other keys are optional)
- name: Arthur Hussey
affiliation: Orbital Materials
-repo: https://github.com/orbital-materials/orb-models/
+repo: https://github.com/orbital-materials/orb-models
url: "#" # placeholder
doi: "#" # placeholder
paper: "#" # placeholder
diff --git a/pyproject.toml b/pyproject.toml
index 47aef4b8..cb744287 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -106,7 +106,8 @@ ignore = [
"S108",
"S310",
"S311",
- "S603",
+ "S603", # subprocess-without-shell-equals-true
+ "S607", # start-process-with-partial-path
"T201",
"TD",
"TRY003",
diff --git a/readme.md b/readme.md
index 9e055acd..8ce69c98 100644
--- a/readme.md
+++ b/readme.md
@@ -25,7 +25,11 @@ Our results show that ML models have become robust enough to deploy them as tria
-We welcome contributions that add new models to the leaderboard through GitHub PRs. See the [contributing guide](https://janosh.github.io/matbench-discovery/contribute) for details.
+If you'd like to refer to Matbench Discovery in a publication, please cite the [preprint](https://doi.org/10.48550/arXiv.2308.14920):
+
+> Riebesell, Janosh, Rhys E. A. Goodall, Philipp Benner, Yuan Chiang, Bowen Deng, Alpha A. Lee, Anubhav Jain, and Kristin A. Persson. "Matbench Discovery -- A Framework to Evaluate Machine Learning Crystal Stability Predictions." arXiv, August 28, 2023. https://doi.org/10.48550/arXiv.2308.14920.
+
+We welcome new models additions to the leaderboard through GitHub PRs. See the [contributing guide](https://janosh.github.io/matbench-discovery/contribute) for details.
If you're interested in joining this work, please reach out via [GitHub discussion](https://github.com/janosh/matbench-discovery/discussions) or [email](mailto:janosh.riebesell@gmail.com?subject=Collaborate%20on%20Matbench%20Discovery).
diff --git a/scripts/model_figs/metrics_tables.py b/scripts/model_figs/metrics_tables.py
index 3d905a02..91646f73 100644
--- a/scripts/model_figs/metrics_tables.py
+++ b/scripts/model_figs/metrics_tables.py
@@ -1,4 +1,8 @@
# %%
+import itertools
+import subprocess
+from glob import glob
+
import numpy as np
import pandas as pd
import yaml
@@ -7,7 +11,7 @@
from pymatviz.utils import si_fmt
from sklearn.dummy import DummyClassifier
-from matbench_discovery import DATA_DIR, PDF_FIGS, SCRIPTS, SITE_FIGS
+from matbench_discovery import DATA_DIR, PDF_FIGS, ROOT, SCRIPTS, SITE_FIGS
from matbench_discovery.data import DataFiles, df_wbm
from matbench_discovery.enums import MbdKey, Open
from matbench_discovery.metrics import stable_metrics
@@ -33,11 +37,11 @@
date_added_col = "Date Added"
df_met.loc[Key.train_set.label] = df_met.loc[date_added_col] = ""
-hide_closed = False # hide proprietary models (openness != OSOD)
-closed_models = [
+non_compliant_models = [
key
for key, meta in MODEL_METADATA.items()
- if meta.get("openness", Open.OSOD) != Open.OSOD
+ if meta.get("openness", Open.OSOD)
+ != Open.OSOD # TODO add `or uses_extra_training_data`
]
with open(f"{DATA_DIR}/training-sets.yml") as file:
@@ -51,7 +55,7 @@
# Add model version as hover tooltip to model name
model_version = model_metadata.get("model_version", "")
- css_cls = "proprietary" if model in closed_models else ""
+ css_cls = "non-compliant" if model in non_compliant_models else ""
attrs = {"title": f"Version: {model_version}", "class": css_cls}
html_attr_str = " ".join(f'{k}="{v}"' for k, v in attrs.items() if v)
df_met.loc[Key.model_name.label, model] = f"{model}"
@@ -205,18 +209,19 @@
]
show_cols = [*f"F1,DAF,Prec,Acc,TPR,TNR,MAE,RMSE,{R2_col}".split(","), *meta_cols]
-for label, df_met in (
- ("", df_metrics),
- ("-first-10k", df_metrics_10k),
- ("-uniq-protos", df_metrics_uniq_protos),
+for (label, df_met), show_non_compliant in itertools.product(
+ (
+ ("", df_metrics),
+ ("-first-10k", df_metrics_10k),
+ ("-uniq-protos", df_metrics_uniq_protos),
+ ),
+ (True, False),
):
# abbreviate long column names
df_met = df_met.rename(index={"R2": R2_col, "Precision": "Prec", "Accuracy": "Acc"})
df_met.index.name = "Model"
# only keep columns we want to show
- df_table = df_met.drop(columns=closed_models if hide_closed else []).T.filter(
- show_cols
- )
+ df_table = df_met.T.filter(show_cols)
df_table = df_table.set_index(Key.model_name.label)
df_table.index.name = None
@@ -276,15 +281,37 @@
# draw line between classification and regression metrics
styles=f"{col_selector} {{ border-left: 1px solid white; }}{hide_scroll_bar}",
)
- suffix = "" if hide_closed else "-with-closed"
+ suffix = "" if show_non_compliant else "-only-compliant"
+ non_compliant_idx = [ # get index HTML strings of non-compliant models
+ idx
+ for idx in styler.index
+ if any(f">{model_name}<" in idx for model_name in non_compliant_models)
+ ]
try:
- df_to_pdf(styler, f"{PDF_FIGS}/metrics-table{label}{suffix}.pdf")
+ for pdf_path in (PDF_FIGS, f"{ROOT}/site/static/figs"):
+ df_to_pdf(
+ styler.hide([] if show_non_compliant else non_compliant_idx),
+ f"{pdf_path}/metrics-table{label}{suffix}.pdf",
+ )
except (ImportError, RuntimeError) as exc:
print(f"df_to_pdf failed: {exc}")
display(styler.set_caption(df_met.attrs.get("title")))
+try:
+ # convert PDFs in site/static/figs to SVGs
+ for pdf_path in glob(f"{ROOT}/site/static/figs/metrics-table*.pdf"):
+ subprocess.run(
+ ["pdf2svg", pdf_path, pdf_path.replace(".pdf", ".svg")], check=False
+ )
+
+ # svgo compress SVGs
+ subprocess.run(["svgo", "--multipass", f"{ROOT}/site/static/figs"], check=False)
+except FileNotFoundError: # skip in CI where pdf2svg and svgo not installed
+ pass
+
+
# %% PNG metrics table unused
if False:
try:
diff --git a/site/src/figs/metrics-table-first-10k.svelte b/site/src/figs/metrics-table-first-10k.svelte
index 06d10e79..efcd2169 100644
--- a/site/src/figs/metrics-table-first-10k.svelte
+++ b/site/src/figs/metrics-table-first-10k.svelte
@@ -358,7 +358,7 @@
- MatterSim |
+ MatterSim |
0.989 |
6.401 |
0.978 |
@@ -368,7 +368,7 @@
0.906 |
- ORB |
+ ORB |
0.988 |
6.390 |
0.977 |
@@ -378,7 +378,7 @@
0.908 |
- GNoME |
+ GNoME |
0.967 |
6.127 |
0.937 |
diff --git a/site/src/figs/metrics-table-uniq-protos.svelte b/site/src/figs/metrics-table-uniq-protos.svelte
index 1cc20981..5b228066 100644
--- a/site/src/figs/metrics-table-uniq-protos.svelte
+++ b/site/src/figs/metrics-table-uniq-protos.svelte
@@ -412,7 +412,7 @@
- ORB |
+ ORB |
0.867 |
6.020 |
0.920 |
@@ -428,7 +428,7 @@
2024-09-02 |
- MatterSim |
+ MatterSim |
0.859 |
5.646 |
0.863 |
@@ -444,7 +444,7 @@
2024-06-16 |
- GNoME |
+ GNoME |
0.829 |
5.523 |
0.844 |
diff --git a/site/src/figs/metrics-table.svelte b/site/src/figs/metrics-table.svelte
index bda8dbb5..cdf94348 100644
--- a/site/src/figs/metrics-table.svelte
+++ b/site/src/figs/metrics-table.svelte
@@ -432,7 +432,7 @@
- ORB |
+ ORB |
0.845 |
5.263 |
0.903 |
@@ -444,7 +444,7 @@
0.804 |
- MatterSim |
+ MatterSim |
0.832 |
4.838 |
0.830 |
@@ -456,7 +456,7 @@
0.809 |
- GNoME |
+ GNoME |
0.810 |
4.810 |
0.825 |
diff --git a/site/src/lib/CaptionedMetricsTable.svelte b/site/src/lib/CaptionedMetricsTable.svelte
index a43ba1f7..f0d3bff8 100644
--- a/site/src/lib/CaptionedMetricsTable.svelte
+++ b/site/src/lib/CaptionedMetricsTable.svelte
@@ -2,14 +2,23 @@
import MetricsTable from '$figs/metrics-table-uniq-protos.svelte'
import { pretty_num } from 'elementari'
- export let show_proprietary = false
+ export let show_non_compliant = false
let n_wbm_stable_uniq_protos = 32_942
let n_wbm_uniq_protos = 215_488
-