diff --git a/docs/core/datasets/oc20dense.md b/docs/core/datasets/oc20dense.md index fb07a09ad0..64639889cc 100644 --- a/docs/core/datasets/oc20dense.md +++ b/docs/core/datasets/oc20dense.md @@ -11,7 +11,7 @@ The OC20Dense dataset is a validation dataset which was used to assess model per |ASE Trajectories |29G |112G | [ee937e5290f8f720c914dc9a56e0281f](https://dl.fbaipublicfiles.com/opencatalystproject/data/adsorbml/oc20_dense_trajectories.tar.gz) | The following files are also provided to be used for evaluation and general information: -* `oc20dense_mapping.pkl` : Mapping of the LMDB `sid` to general metadata information - +* `oc20dense_mapping.pkl` : Mapping of the LMDB `sid` to general metadata information. If this file is not present, run the command `python src/fairchem/core/scripts/download_large_files.py adsorbml` from the root of the fairchem repo to download it. - * `system_id`: Unique system identifier for an adsorbate, bulk, surface combination. * `config_id`: Unique configuration identifier, where `rand` and `heur` correspond to random and heuristic initial configurations, respectively. * `mpid`: Materials Project bulk identifier. diff --git a/docs/tutorials/NRR/NRR_example.md b/docs/tutorials/NRR/NRR_example.md index b69e078d1a..cc5ab6d074 100644 --- a/docs/tutorials/NRR/NRR_example.md +++ b/docs/tutorials/NRR/NRR_example.md @@ -62,7 +62,7 @@ To do this, we will enumerate adsorbate-slab configurations and run ML relaxatio +++ -Be sure to set the path in `fairchem/data/oc/configs/paths.py` to point to the correct place or pass the paths as an argument. The database pickles can be found in `fairchem/data/oc/databases/pkls`. We will show one explicitly here as an example and then run all of them in an automated fashion for brevity. +Be sure to set the path in `fairchem/data/oc/configs/paths.py` to point to the correct place or pass the paths as an argument. The database pickles can be found in `fairchem/data/oc/databases/pkls` (some pkl files are only downloaded by running the command `python src/fairchem/core/scripts/download_large_files.py oc` from the root of the fairchem repo). We will show one explicitly here as an example and then run all of them in an automated fashion for brevity. ```{code-cell} ipython3 import fairchem.data.oc diff --git a/src/fairchem/applications/AdsorbML/README.md b/src/fairchem/applications/AdsorbML/README.md index ca5be57379..700c06b67c 100644 --- a/src/fairchem/applications/AdsorbML/README.md +++ b/src/fairchem/applications/AdsorbML/README.md @@ -21,7 +21,7 @@ NOTE - ASE trajectories exclude systems that were not converged or had invalid c |ASE Trajectories |29G |112G | [ee937e5290f8f720c914dc9a56e0281f](https://dl.fbaipublicfiles.com/opencatalystproject/data/adsorbml/oc20_dense_trajectories.tar.gz) | The following files are also provided to be used for evaluation and general information: -* `oc20dense_mapping.pkl` : Mapping of the LMDB `sid` to general metadata information - +* `oc20dense_mapping.pkl` : Mapping of the LMDB `sid` to general metadata information. If this file is not present, run the command `python src/fairchem/core/scripts/download_large_files.py adsorbml` from the root of the fairchem repo to download it. - * `system_id`: Unique system identifier for an adsorbate, bulk, surface combination. * `config_id`: Unique configuration identifier, where `rand` and `heur` correspond to random and heuristic initial configurations, respectively. * `mpid`: Materials Project bulk identifier. diff --git a/src/fairchem/applications/AdsorbML/adsorbml/2023_neurips_challenge/challenge_eval.py b/src/fairchem/applications/AdsorbML/adsorbml/2023_neurips_challenge/challenge_eval.py index d7e801fe0b..01c492bbae 100644 --- a/src/fairchem/applications/AdsorbML/adsorbml/2023_neurips_challenge/challenge_eval.py +++ b/src/fairchem/applications/AdsorbML/adsorbml/2023_neurips_challenge/challenge_eval.py @@ -7,6 +7,8 @@ import numpy as np +from fairchem.core.scripts import download_large_files + def is_successful(best_pred_energy, best_dft_energy, SUCCESS_THRESHOLD=0.1): """ @@ -161,6 +163,11 @@ def main(): # targets and metadata are expected to be in # the same directory as this script + if ( + not Path(__file__).with_name("oc20dense_val_targets.pkl").exists() + or not Path(__file__).with_name("ml_relaxed_dft_targets.pkl").exists() + ): + download_large_files.download_file_group("adsorbml") targets = pickle.load( open(Path(__file__).with_name("oc20dense_val_targets.pkl"), "rb") ) diff --git a/src/fairchem/core/models/equiformer_v2/transformer_block.py b/src/fairchem/core/models/equiformer_v2/transformer_block.py index bdb97ea468..e7669d301b 100755 --- a/src/fairchem/core/models/equiformer_v2/transformer_block.py +++ b/src/fairchem/core/models/equiformer_v2/transformer_block.py @@ -652,7 +652,7 @@ def forward( batch, # for GraphDropPath node_offset: int = 0, ): - output_embedding = x + output_embedding = x.clone() x_res = output_embedding.embedding output_embedding.embedding = self.norm_1(output_embedding.embedding) diff --git a/src/fairchem/core/scripts/download_large_files.py b/src/fairchem/core/scripts/download_large_files.py new file mode 100644 index 0000000000..f79fa21561 --- /dev/null +++ b/src/fairchem/core/scripts/download_large_files.py @@ -0,0 +1,76 @@ +from __future__ import annotations + +import argparse +from pathlib import Path +from urllib.request import urlretrieve + +from fairchem.core.common.tutorial_utils import fairchem_root + +S3_ROOT = "https://dl.fbaipublicfiles.com/opencatalystproject/data/large_files/" + +FILE_GROUPS = { + "odac": [ + Path("configs/odac/s2ef/scaling_factors/painn.pt"), + Path("src/fairchem/data/odac/force_field/data_w_oms.json"), + Path( + "src/fairchem/data/odac/promising_mof/promising_mof_features/JmolData.jar" + ), + Path( + "src/fairchem/data/odac/promising_mof/promising_mof_energies/adsorption_energy.txt" + ), + Path("src/fairchem/data/odac/supercell_info.csv"), + ], + "oc": [Path("src/fairchem/data/oc/databases/pkls/bulks.pkl")], + "adsorbml": [ + Path( + "src/fairchem/applications/AdsorbML/adsorbml/2023_neurips_challenge/oc20dense_mapping.pkl" + ), + Path( + "src/fairchem/applications/AdsorbML/adsorbml/2023_neurips_challenge/ml_relaxed_dft_targets.pkl" + ), + ], + "cattsunami": [ + Path("tests/applications/cattsunami/tests/autoframe_inputs_dissociation.pkl"), + Path("tests/applications/cattsunami/tests/autoframe_inputs_transfer.pkl"), + ], + "docs": [ + Path("docs/tutorials/NRR/NRR_example_bulks.pkl"), + Path("docs/core/fine-tuning/supporting-information.json"), + ], +} + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument( + "file_group", + type=str, + help="Group of files to download", + default="ALL", + choices=["ALL", *list(FILE_GROUPS)], + ) + return parser.parse_args() + + +def download_file_group(file_group): + if file_group in FILE_GROUPS: + files_to_download = FILE_GROUPS[file_group] + elif file_group == "ALL": + files_to_download = [item for group in FILE_GROUPS.values() for item in group] + else: + raise ValueError( + f'Requested file group {file_group} not recognized. Please select one of {["ALL", *list(FILE_GROUPS)]}' + ) + + fc_root = fairchem_root().parents[1] + for file in files_to_download: + if not (fc_root / file).exists(): + print(f"Downloading {file}...") + urlretrieve(S3_ROOT + file.name, fc_root / file) + else: + print(f"{file} already exists") + + +if __name__ == "__main__": + args = parse_args() + download_file_group(args.file_group) diff --git a/src/fairchem/data/oc/README.md b/src/fairchem/data/oc/README.md index 20205d1d5a..06aba8887f 100644 --- a/src/fairchem/data/oc/README.md +++ b/src/fairchem/data/oc/README.md @@ -9,6 +9,7 @@ This repository hosts the adsorbate-catalyst input generation workflow used in t To install just run in your favorite environment with python >= 3.9 * `pip install fairchem-data-oc` +* `python src/fairchem/core/scripts/download_large_files.py oc` ## Workflow @@ -155,7 +156,7 @@ python structure_generator.py \ ### Bulks -A database of bulk materials taken from existing databases (i.e. Materials Project) and relaxed with consistent RPBE settings may be found in `ocdata/databases/pkls/bulks.pkl`. To preview what bulks are available, view the corresponding mapping between indices and bulks (bulk id and composition): https://dl.fbaipublicfiles.com/opencatalystproject/data/input_generation/mapping_bulks_2021sep20.txt +A database of bulk materials taken from existing databases (i.e. Materials Project) and relaxed with consistent RPBE settings may be found in `databases/pkls/bulks.pkl` (if not, run the command `python src/fairchem/core/scripts/download_large_files.py oc` from the root of the fairchem repo). To preview what bulks are available, view the corresponding mapping between indices and bulks (bulk id and composition): https://dl.fbaipublicfiles.com/opencatalystproject/data/input_generation/mapping_bulks_2021sep20.txt ### Adsorbates diff --git a/src/fairchem/data/oc/core/bulk.py b/src/fairchem/data/oc/core/bulk.py index 9568ad3622..6710b43880 100644 --- a/src/fairchem/data/oc/core/bulk.py +++ b/src/fairchem/data/oc/core/bulk.py @@ -9,6 +9,8 @@ from fairchem.data.oc.core.slab import Slab from fairchem.data.oc.databases.pkls import BULK_PKL_PATH +from fairchem.core.scripts import download_large_files + if TYPE_CHECKING: import ase @@ -51,6 +53,8 @@ def __init__( self.src_id = None else: if bulk_db is None: + if bulk_db_path == BULK_PKL_PATH and not os.path.exists(BULK_PKL_PATH): + download_large_files.download_file_group("oc") with open(bulk_db_path, "rb") as fp: bulk_db = pickle.load(fp) diff --git a/src/fairchem/data/oc/databases/update.py b/src/fairchem/data/oc/databases/update.py index f9ca1f6452..bab75709c3 100644 --- a/src/fairchem/data/oc/databases/update.py +++ b/src/fairchem/data/oc/databases/update.py @@ -6,12 +6,15 @@ from __future__ import annotations import pickle +from pathlib import Path import ase.io from ase.atoms import Atoms from ase.calculators.singlepoint import SinglePointCalculator as SPC from tqdm import tqdm +from fairchem.core.scripts import download_large_files + # Monkey patch fix def pbc_patch(self): @@ -29,7 +32,7 @@ def set_pbc_patch(self, pbc): def update_pkls(): with open( - "ocdata/databases/pkls/adsorbates.pkl", + "oc/databases/pkls/adsorbates.pkl", "rb", ) as fp: data = pickle.load(fp) @@ -38,13 +41,15 @@ def update_pkls(): pbc = data[idx][0].cell._pbc data[idx][0]._pbc = pbc with open( - "ocdata/databases/pkls/adsorbates_new.pkl", + "oc/databases/pkls/adsorbates_new.pkl", "wb", ) as fp: pickle.dump(data, fp) + if not Path("oc/databases/pkls/bulks.pkl").exists(): + download_large_files.download_file_group("oc") with open( - "ocdata/databases/pkls/bulks.pkl", + "oc/databases/pkls/bulks.pkl", "rb", ) as fp: data = pickle.load(fp) @@ -64,7 +69,7 @@ def update_pkls(): bulks.append((atoms, bulk_id)) with open( - "ocdata/databases/pkls/bulks_new.pkl", + "oc/databases/pkls/bulks_new.pkl", "wb", ) as f: pickle.dump(bulks, f) @@ -73,7 +78,7 @@ def update_pkls(): def update_dbs(): for db_name in ["adsorbates", "bulks"]: db = ase.io.read( - f"ocdata/databases/ase/{db_name}.db", + f"oc/databases/ase/{db_name}.db", ":", ) new_data = [] @@ -90,7 +95,7 @@ def update_dbs(): new_data.append(atoms) ase.io.write( - f"ocdata/databases/ase/{db_name}_new.db", + f"oc/databases/ase/{db_name}_new.db", new_data, ) diff --git a/src/fairchem/data/odac/README.md b/src/fairchem/data/odac/README.md index d6529edd74..f46ababd05 100644 --- a/src/fairchem/data/odac/README.md +++ b/src/fairchem/data/odac/README.md @@ -4,9 +4,11 @@ To download the ODAC23 dataset, please see the links [here](https://fair-chem.gi Pre-trained ML models and configs are available [here](https://fair-chem.github.io/core/model_checkpoints.html#open-direct-air-capture-2023-odac23). +Large ODAC files can be downloaded by running the command `python src/fairchem/core/scripts/download_large_files.py odac` from the root of the fairchem repo. + This repository contains the list of [promising MOFs](https://github.com/FAIR-Chem/fairchem/tree/main/src/fairchem/data/odac/promising_mof) discovered in the ODAC23 paper, as well as details of the [classifical force field calculations](https://github.com/FAIR-Chem/fairchem/tree/main/src/fairchem/data/odac/force_field). -Information about supercells can be found in [supercell_info.csv](https://github.com/FAIR-Chem/fairchem/blob/main/src/fairchem/data/odac/supercell_info.csv) for each example. +Information about supercells can be found in [supercell_info.csv](https://dl.fbaipublicfiles.com/opencatalystproject/data/large_files/supercell_info.csv) for each example (this file is downloaded to the local repo only when the above script is run). ## Citing diff --git a/src/fairchem/data/odac/force_field/README.md b/src/fairchem/data/odac/force_field/README.md index debe565bda..25714603f3 100644 --- a/src/fairchem/data/odac/force_field/README.md +++ b/src/fairchem/data/odac/force_field/README.md @@ -2,7 +2,7 @@ This folder contains data and scripts related to the classical FF analysis performed in this work. -- The `data_w_oms.json` file contains all successful FF interaction energy calculations with both system information and DFT-computed interaction energies. Calculations were performed across the in-domain training, validation, and test sets. +- The `data_w_oms.json` file contains all successful FF interaction energy calculations with both system information and DFT-computed interaction energies. Calculations were performed across the in-domain training, validation, and test sets. If this file is not present, run the command `python src/fairchem/core/scripts/download_large_files.py odac` from the root of the fairchem repo to download it. - The `data_w_ml.json` file contains the same information for systems with successful ML interaction energy predictions. Only systems in the in-domain test set are included here. - The `FF_analysis.py` script performs the error calculations discussed in the paper and generates the four panels of Figure 5. All of the data used in this analysis is contained in 'data_w_oms.json" for reproducibility. - The `FF_calcs` folder contains example calculations for classical FF interaction energy predictions. diff --git a/src/fairchem/data/odac/promising_mof/promising_mof_energies/energy.py b/src/fairchem/data/odac/promising_mof/promising_mof_energies/energy.py index 6a9d37924e..547806cc01 100644 --- a/src/fairchem/data/odac/promising_mof/promising_mof_energies/energy.py +++ b/src/fairchem/data/odac/promising_mof/promising_mof_energies/energy.py @@ -1,8 +1,14 @@ from __future__ import annotations +import os + import matplotlib.pyploat as plt import pandas as pd +from fairchem.core.scripts import download_large_files + +if not os.path.exists("adsorption_energy.txt"): + download_large_files.download_file_group("odac") raw_ads_energy_data = pd.read_csv("adsorption_energy.txt", header=None, sep=" ") complete_data = pd.DataFrame( index=range(raw_ads_energy_data.shape[0]), @@ -170,12 +176,12 @@ current_lowest_energy < lowest_energy_data_co2.loc[index_this_case, "ads_energy_ev"] ): - lowest_energy_data_co2.loc[index_this_case, "ads_energy_ev"] = ( - current_lowest_energy - ) - lowest_energy_data_co2.loc[index_this_case, "configuration_index"] = ( - current_configuration_index - ) + lowest_energy_data_co2.loc[ + index_this_case, "ads_energy_ev" + ] = current_lowest_energy + lowest_energy_data_co2.loc[ + index_this_case, "configuration_index" + ] = current_configuration_index lowest_energy_data_co2.loc[index_this_case, "Name"] = current_name @@ -212,12 +218,12 @@ current_lowest_energy < lowest_energy_data_h2o.loc[index_this_case, "ads_energy_ev"] ): - lowest_energy_data_h2o.loc[index_this_case, "ads_energy_ev"] = ( - current_lowest_energy - ) - lowest_energy_data_h2o.loc[index_this_case, "configuration_index"] = ( - current_configuration_index - ) + lowest_energy_data_h2o.loc[ + index_this_case, "ads_energy_ev" + ] = current_lowest_energy + lowest_energy_data_h2o.loc[ + index_this_case, "configuration_index" + ] = current_configuration_index lowest_energy_data_h2o.loc[index_this_case, "Name"] = current_name lowest_energy_data_co_ads = pd.DataFrame( @@ -254,12 +260,12 @@ current_lowest_energy < lowest_energy_data_co_ads.loc[index_this_case, "ads_energy_ev"] ): - lowest_energy_data_co_ads.loc[index_this_case, "ads_energy_ev"] = ( - current_lowest_energy - ) - lowest_energy_data_co_ads.loc[index_this_case, "configuration_index"] = ( - current_configuration_index - ) + lowest_energy_data_co_ads.loc[ + index_this_case, "ads_energy_ev" + ] = current_lowest_energy + lowest_energy_data_co_ads.loc[ + index_this_case, "configuration_index" + ] = current_configuration_index lowest_energy_data_co_ads.loc[index_this_case, "Name"] = current_name @@ -298,12 +304,12 @@ current_lowest_energy < lowest_energy_data_co_ads_2.loc[index_this_case, "ads_energy_ev"] ): - lowest_energy_data_co_ads_2.loc[index_this_case, "ads_energy_ev"] = ( - current_lowest_energy - ) - lowest_energy_data_co_ads_2.loc[index_this_case, "configuration_index"] = ( - current_configuration_index - ) + lowest_energy_data_co_ads_2.loc[ + index_this_case, "ads_energy_ev" + ] = current_lowest_energy + lowest_energy_data_co_ads_2.loc[ + index_this_case, "configuration_index" + ] = current_configuration_index lowest_energy_data_co_ads_2.loc[index_this_case, "Name"] = current_name @@ -439,9 +445,9 @@ current_lowest_energy < lowest_energy_data_co2_defective.loc[index_this_case, "ads_energy_ev"] ): - lowest_energy_data_co2_defective.loc[index_this_case, "ads_energy_ev"] = ( - current_lowest_energy - ) + lowest_energy_data_co2_defective.loc[ + index_this_case, "ads_energy_ev" + ] = current_lowest_energy lowest_energy_data_co2_defective.loc[ index_this_case, "configuration_index" ] = current_configuration_index @@ -485,9 +491,9 @@ current_lowest_energy < lowest_energy_data_h2o_defective.loc[index_this_case, "ads_energy_ev"] ): - lowest_energy_data_h2o_defective.loc[index_this_case, "ads_energy_ev"] = ( - current_lowest_energy - ) + lowest_energy_data_h2o_defective.loc[ + index_this_case, "ads_energy_ev" + ] = current_lowest_energy lowest_energy_data_h2o_defective.loc[ index_this_case, "configuration_index" ] = current_configuration_index @@ -542,9 +548,9 @@ lowest_energy_data_co_ads_defective.loc[ index_this_case, "configuration_index" ] = current_configuration_index - lowest_energy_data_co_ads_defective.loc[index_this_case, "Name"] = ( - current_name - ) + lowest_energy_data_co_ads_defective.loc[ + index_this_case, "Name" + ] = current_name lowest_energy_data_co_ads_2_defective = pd.DataFrame( columns=complete_data_merged_defective_co_ads_2.columns @@ -600,9 +606,9 @@ lowest_energy_data_co_ads_2_defective.loc[ index_this_case, "configuration_index" ] = current_configuration_index - lowest_energy_data_co_ads_2_defective.loc[index_this_case, "Name"] = ( - current_name - ) + lowest_energy_data_co_ads_2_defective.loc[ + index_this_case, "Name" + ] = current_name adsorption_data_defective = pd.DataFrame( @@ -646,136 +652,132 @@ # adsorption_data_defective_defective.iloc[count,0]=mof_name - adsorption_data_defective.loc[count, "n_converged_CO2"] = ( - complete_data_merged_defective[ - (complete_data_merged_defective["MOF"] == mof_name) - & (complete_data_merged_defective["defect_conc"] == current_defect_conc) - & (complete_data_merged_defective["defect_index"] == current_defect_index) - & (complete_data_merged_defective["n_CO2"] == 1) - & (complete_data_merged_defective["n_H2O"] == 0) - ].shape[0] - ) - adsorption_data_defective.loc[count, "n_converged_H2O"] = ( - complete_data_merged_defective[ - (complete_data_merged_defective["MOF"] == mof_name) - & (complete_data_merged_defective["defect_conc"] == current_defect_conc) - & (complete_data_merged_defective["defect_index"] == current_defect_index) - & (complete_data_merged_defective["n_CO2"] == 0) - & (complete_data_merged_defective["n_H2O"] == 1) - ].shape[0] - ) - adsorption_data_defective.loc[count, "n_converged_co"] = ( - complete_data_merged_defective[ - (complete_data_merged_defective["MOF"] == mof_name) - & (complete_data_merged_defective["defect_conc"] == current_defect_conc) - & (complete_data_merged_defective["defect_index"] == current_defect_index) - & (complete_data_merged_defective["n_CO2"] == 1) - & (complete_data_merged_defective["n_H2O"] == 1) - ].shape[0] - ) - adsorption_data_defective.loc[count, "n_converged_co_2"] = ( - complete_data_merged_defective[ - (complete_data_merged_defective["MOF"] == mof_name) - & (complete_data_merged_defective["defect_conc"] == current_defect_conc) - & (complete_data_merged_defective["defect_index"] == current_defect_index) - & (complete_data_merged_defective["n_CO2"] == 1) - & (complete_data_merged_defective["n_H2O"] == 2) - ].shape[0] - ) + adsorption_data_defective.loc[ + count, "n_converged_CO2" + ] = complete_data_merged_defective[ + (complete_data_merged_defective["MOF"] == mof_name) + & (complete_data_merged_defective["defect_conc"] == current_defect_conc) + & (complete_data_merged_defective["defect_index"] == current_defect_index) + & (complete_data_merged_defective["n_CO2"] == 1) + & (complete_data_merged_defective["n_H2O"] == 0) + ].shape[ + 0 + ] + adsorption_data_defective.loc[ + count, "n_converged_H2O" + ] = complete_data_merged_defective[ + (complete_data_merged_defective["MOF"] == mof_name) + & (complete_data_merged_defective["defect_conc"] == current_defect_conc) + & (complete_data_merged_defective["defect_index"] == current_defect_index) + & (complete_data_merged_defective["n_CO2"] == 0) + & (complete_data_merged_defective["n_H2O"] == 1) + ].shape[ + 0 + ] + adsorption_data_defective.loc[ + count, "n_converged_co" + ] = complete_data_merged_defective[ + (complete_data_merged_defective["MOF"] == mof_name) + & (complete_data_merged_defective["defect_conc"] == current_defect_conc) + & (complete_data_merged_defective["defect_index"] == current_defect_index) + & (complete_data_merged_defective["n_CO2"] == 1) + & (complete_data_merged_defective["n_H2O"] == 1) + ].shape[ + 0 + ] + adsorption_data_defective.loc[ + count, "n_converged_co_2" + ] = complete_data_merged_defective[ + (complete_data_merged_defective["MOF"] == mof_name) + & (complete_data_merged_defective["defect_conc"] == current_defect_conc) + & (complete_data_merged_defective["defect_index"] == current_defect_index) + & (complete_data_merged_defective["n_CO2"] == 1) + & (complete_data_merged_defective["n_H2O"] == 2) + ].shape[ + 0 + ] if not lowest_energy_data_co2_defective[ (lowest_energy_data_co2_defective["MOF"] == mof_name) & (lowest_energy_data_co2_defective["defect_conc"] == current_defect_conc) & (lowest_energy_data_co2_defective["defect_index"] == current_defect_index) ].empty: - adsorption_data_defective.loc[count, "ads_CO2"] = ( - lowest_energy_data_co2_defective[ - (lowest_energy_data_co2_defective["MOF"] == mof_name) - & ( - lowest_energy_data_co2_defective["defect_conc"] - == current_defect_conc - ) - & ( - lowest_energy_data_co2_defective["defect_index"] - == current_defect_index - ) - ].iloc[0, 6] - ) - adsorption_data_defective.loc[count, "config_CO2"] = ( - lowest_energy_data_co2_defective[ - (lowest_energy_data_co2_defective["MOF"] == mof_name) - & ( - lowest_energy_data_co2_defective["defect_conc"] - == current_defect_conc - ) - & ( - lowest_energy_data_co2_defective["defect_index"] - == current_defect_index - ) - ].iloc[0, 5] - ) + adsorption_data_defective.loc[ + count, "ads_CO2" + ] = lowest_energy_data_co2_defective[ + (lowest_energy_data_co2_defective["MOF"] == mof_name) + & (lowest_energy_data_co2_defective["defect_conc"] == current_defect_conc) + & (lowest_energy_data_co2_defective["defect_index"] == current_defect_index) + ].iloc[ + 0, 6 + ] + adsorption_data_defective.loc[ + count, "config_CO2" + ] = lowest_energy_data_co2_defective[ + (lowest_energy_data_co2_defective["MOF"] == mof_name) + & (lowest_energy_data_co2_defective["defect_conc"] == current_defect_conc) + & (lowest_energy_data_co2_defective["defect_index"] == current_defect_index) + ].iloc[ + 0, 5 + ] if not lowest_energy_data_h2o_defective[ (lowest_energy_data_h2o_defective["MOF"] == mof_name) & (lowest_energy_data_h2o_defective["defect_conc"] == current_defect_conc) & (lowest_energy_data_h2o_defective["defect_index"] == current_defect_index) ].empty: - adsorption_data_defective.loc[count, "ads_H2O"] = ( - lowest_energy_data_h2o_defective[ - (lowest_energy_data_h2o_defective["MOF"] == mof_name) - & ( - lowest_energy_data_h2o_defective["defect_conc"] - == current_defect_conc - ) - & ( - lowest_energy_data_h2o_defective["defect_index"] - == current_defect_index - ) - ].iloc[0, 6] - ) - adsorption_data_defective.loc[count, "config_H2O"] = ( - lowest_energy_data_h2o_defective[ - (lowest_energy_data_h2o_defective["MOF"] == mof_name) - & ( - lowest_energy_data_h2o_defective["defect_conc"] - == current_defect_conc - ) - & ( - lowest_energy_data_h2o_defective["defect_index"] - == current_defect_index - ) - ].iloc[0, 5] - ) + adsorption_data_defective.loc[ + count, "ads_H2O" + ] = lowest_energy_data_h2o_defective[ + (lowest_energy_data_h2o_defective["MOF"] == mof_name) + & (lowest_energy_data_h2o_defective["defect_conc"] == current_defect_conc) + & (lowest_energy_data_h2o_defective["defect_index"] == current_defect_index) + ].iloc[ + 0, 6 + ] + adsorption_data_defective.loc[ + count, "config_H2O" + ] = lowest_energy_data_h2o_defective[ + (lowest_energy_data_h2o_defective["MOF"] == mof_name) + & (lowest_energy_data_h2o_defective["defect_conc"] == current_defect_conc) + & (lowest_energy_data_h2o_defective["defect_index"] == current_defect_index) + ].iloc[ + 0, 5 + ] if not lowest_energy_data_co_ads_defective[ (lowest_energy_data_co_ads_defective["MOF"] == mof_name) & (lowest_energy_data_co_ads_defective["defect_conc"] == current_defect_conc) & (lowest_energy_data_co_ads_defective["defect_index"] == current_defect_index) ].empty: - adsorption_data_defective.loc[count, "ads_co"] = ( - lowest_energy_data_co_ads_defective[ - (lowest_energy_data_co_ads_defective["MOF"] == mof_name) - & ( - lowest_energy_data_co_ads_defective["defect_conc"] - == current_defect_conc - ) - & ( - lowest_energy_data_co_ads_defective["defect_index"] - == current_defect_index - ) - ].iloc[0, 6] - ) - adsorption_data_defective.loc[count, "config_co"] = ( - lowest_energy_data_co_ads_defective[ - (lowest_energy_data_co_ads_defective["MOF"] == mof_name) - & ( - lowest_energy_data_co_ads_defective["defect_conc"] - == current_defect_conc - ) - & ( - lowest_energy_data_co_ads_defective["defect_index"] - == current_defect_index - ) - ].iloc[0, 5] - ) + adsorption_data_defective.loc[ + count, "ads_co" + ] = lowest_energy_data_co_ads_defective[ + (lowest_energy_data_co_ads_defective["MOF"] == mof_name) + & ( + lowest_energy_data_co_ads_defective["defect_conc"] + == current_defect_conc + ) + & ( + lowest_energy_data_co_ads_defective["defect_index"] + == current_defect_index + ) + ].iloc[ + 0, 6 + ] + adsorption_data_defective.loc[ + count, "config_co" + ] = lowest_energy_data_co_ads_defective[ + (lowest_energy_data_co_ads_defective["MOF"] == mof_name) + & ( + lowest_energy_data_co_ads_defective["defect_conc"] + == current_defect_conc + ) + & ( + lowest_energy_data_co_ads_defective["defect_index"] + == current_defect_index + ) + ].iloc[ + 0, 5 + ] if not lowest_energy_data_co_ads_2_defective[ (lowest_energy_data_co_ads_2_defective["MOF"] == mof_name) & (lowest_energy_data_co_ads_2_defective["defect_conc"] == current_defect_conc) @@ -784,32 +786,36 @@ == current_defect_index ) ].empty: - adsorption_data_defective.loc[count, "ads_co_2"] = ( - lowest_energy_data_co_ads_2_defective[ - (lowest_energy_data_co_ads_2_defective["MOF"] == mof_name) - & ( - lowest_energy_data_co_ads_2_defective["defect_conc"] - == current_defect_conc - ) - & ( - lowest_energy_data_co_ads_2_defective["defect_index"] - == current_defect_index - ) - ].iloc[0, 6] - ) - adsorption_data_defective.loc[count, "config_co_2"] = ( - lowest_energy_data_co_ads_2_defective[ - (lowest_energy_data_co_ads_2_defective["MOF"] == mof_name) - & ( - lowest_energy_data_co_ads_2_defective["defect_conc"] - == current_defect_conc - ) - & ( - lowest_energy_data_co_ads_2_defective["defect_index"] - == current_defect_index - ) - ].iloc[0, 5] - ) + adsorption_data_defective.loc[ + count, "ads_co_2" + ] = lowest_energy_data_co_ads_2_defective[ + (lowest_energy_data_co_ads_2_defective["MOF"] == mof_name) + & ( + lowest_energy_data_co_ads_2_defective["defect_conc"] + == current_defect_conc + ) + & ( + lowest_energy_data_co_ads_2_defective["defect_index"] + == current_defect_index + ) + ].iloc[ + 0, 6 + ] + adsorption_data_defective.loc[ + count, "config_co_2" + ] = lowest_energy_data_co_ads_2_defective[ + (lowest_energy_data_co_ads_2_defective["MOF"] == mof_name) + & ( + lowest_energy_data_co_ads_2_defective["defect_conc"] + == current_defect_conc + ) + & ( + lowest_energy_data_co_ads_2_defective["defect_index"] + == current_defect_index + ) + ].iloc[ + 0, 5 + ] # read the mofs missing DDEC charges diff --git a/src/fairchem/data/odac/promising_mof/promising_mof_features/readme b/src/fairchem/data/odac/promising_mof/promising_mof_features/readme index afb41617a0..4910e85eae 100644 --- a/src/fairchem/data/odac/promising_mof/promising_mof_features/readme +++ b/src/fairchem/data/odac/promising_mof/promising_mof_features/readme @@ -7,10 +7,10 @@ Three criterias have to be satisfied: 1. 2 rings are parallel; 2. the distance o 2. metal-oxygen-metal bridges: [$(select {metal})]~[$(select oxygen)]~[$(select {metal})] 3. uncoordinated nitrogen atoms: [$([#7X2r5])] -We recommend using the jmolData.jar for high-throughput calculations. jmol.jar, which takes more time to run, is good for visualization and debug. +We recommend using the JmolData.jar for high-throughput calculations. jmol.jar, which takes more time to run, is good for visualization and debug. Steps: 1. Change the content of 'list_MOF.txt' to the paths of the MOFs -2. Use 'java -jar JmolData.jar -on -s features.txt' to run the script +2. Use 'java -jar JmolData.jar -on -s features.txt' to run the script. If JmolData.jar is missing, run the command `python src/fairchem/core/scripts/download_large_files.py odac` from the root of the fairchem repo to download it. 3. The output will be saved in the 'output.txt' in the same directory by default, and it can be modified at the last line of the code. 'output.txt' has 10 columns: 1. ID is the index in 'list_MOF.txt'. diff --git a/tests/applications/cattsunami/tests/conftest.py b/tests/applications/cattsunami/tests/conftest.py index 24222d9cf7..9afdc0a963 100644 --- a/tests/applications/cattsunami/tests/conftest.py +++ b/tests/applications/cattsunami/tests/conftest.py @@ -1,6 +1,9 @@ -from pathlib import Path +import os import pickle +from pathlib import Path + import pytest +from fairchem.core.scripts import download_large_files @pytest.fixture(scope="class") @@ -17,11 +20,17 @@ def desorption_inputs(request): @pytest.fixture(scope="class") def dissociation_inputs(request): - with open(Path(__file__).parent / "autoframe_inputs_dissociation.pkl", "rb") as fp: + pkl_path = Path(__file__).parent / "autoframe_inputs_dissociation.pkl" + if not pkl_path.exists(): + download_large_files.download_file_group("cattsunami") + with open(pkl_path, "rb") as fp: request.cls.inputs = pickle.load(fp) @pytest.fixture(scope="class") def transfer_inputs(request): - with open(Path(__file__).parent / "autoframe_inputs_transfer.pkl", "rb") as fp: + pkl_path = Path(__file__).parent / "autoframe_inputs_transfer.pkl" + if not pkl_path.exists(): + download_large_files.download_file_group("cattsunami") + with open(pkl_path, "rb") as fp: request.cls.inputs = pickle.load(fp) diff --git a/tests/core/test_download_large_files.py b/tests/core/test_download_large_files.py new file mode 100644 index 0000000000..991f8ce348 --- /dev/null +++ b/tests/core/test_download_large_files.py @@ -0,0 +1,16 @@ +import os +from unittest.mock import patch + +from fairchem.core.scripts import download_large_files as dl_large + + +@patch.object(dl_large, "urlretrieve") +def test_download_large_files(url_mock): + def urlretrieve_mock(x, y): + if not os.path.exists(os.path.dirname(y)): + raise ValueError( + f"The path to {y} does not exist. fairchem directory structure has changed," + ) + + url_mock.side_effect = urlretrieve_mock + dl_large.download_file_group("ALL")