Merge pull request #42 from usnistgov/develop

Develop
usnistgov · Nov 14, 2021 · e1415d2 · e1415d2
2 parents 4e3a405 + d4f8c08
commit e1415d2
Show file tree

Hide file tree

Showing 15 changed files with 285 additions and 253 deletions.
diff --git a/README.md b/README.md
@@ -26,6 +26,26 @@ The line graph convolution updates the triplet representations and the pair repr
 
 ![ALIGNN layer schematic](https://github.com/usnistgov/alignn/blob/main/alignn/tex/alignn2.png)
 
+Performances
+-------------------------
+
+On QM9 dataset
+
+![QM9](https://github.com/usnistgov/alignn/blob/develop/alignn/tex/qm9.PNG)
+
+On Materials project dataset
+
+![MP](https://github.com/usnistgov/alignn/blob/develop/alignn/tex/MP.PNG)
+
+On JARVIS-DFT dataset (classification)
+
+![JV-class](https://github.com/usnistgov/alignn/blob/develop/alignn/tex/jvclass.PNG)
+
+On JARVIS-DFT dataset (regression)
+
+![JV-reg1](https://github.com/usnistgov/alignn/blob/develop/alignn/tex/jv.PNG)
+![JV-reg2](https://github.com/usnistgov/alignn/blob/develop/alignn/tex/jv2.PNG)
+
 Installation
 -------------------------
 First create a conda environment:
@@ -61,7 +81,7 @@ Examples
 ---------
 
 #### Dataset
-Users can keep their structure files in `POSCAR`, `.cif`, or `.xyz` files in a directory. In the examples below we will use POSCAR format files. In the same directory, there should be an `id_prop.csv` file.
+Users can keep their structure files in `POSCAR`, `.cif`, `.xyz` or `.pdb` files in a directory. In the examples below we will use POSCAR format files. In the same directory, there should be an `id_prop.csv` file.
 
 In this directory, `id_prop.csv`, the filenames, and correponding target values are kept in `comma separated values (csv) format`.
 
@@ -94,13 +114,31 @@ An example is given below for training formation energy per atom, bandgap and to
 python alignn/scripts/train_folder.py --root_dir "alignn/examples/sample_data_multi_prop" --config "alignn/examples/sample_data/config_example.json" --output_dir=temp
 ```
 #### Automated model training
-Users can try training using multiple example scripts to run multiple dataset (such as JARVIS-DFT, Materials project, QM9_JCTC etc.). Look into the [alignn/scripts'](https://github.com/usnistgov/alignn/tree/main/alignn/scripts) folder. This is done primarily to make the trainings more automated rather than making folder/ csv files etc.
-These scripts automatically download datasets from [Databases](https://jarvis-tools.readthedocs.io/en/master/databases.html) in [jarvis-tools] (https://github.com/usnistgov/jarvis) package and train several models. Make sure you specify your specific queuing system details in the scripts.
+Users can try training using multiple example scripts to run multiple dataset (such as JARVIS-DFT, Materials project, QM9_JCTC etc.). Look into the [alignn/scripts/train_*.py](https://github.com/usnistgov/alignn/tree/main/alignn/scripts) folder. This is done primarily to make the trainings more automated rather than making folder/ csv files etc.
+These scripts automatically download datasets from [Databases in jarvis-tools](https://jarvis-tools.readthedocs.io/en/master/databases.html) and train several models. Make sure you specify your specific queuing system details in the scripts.
 
 Using pretrained models
 -------------------------
 
-All the trained models are distributed on [figshare](https://figshare.com/projects/ALIGNN_models/126478) and this [pretrained_model.py script](https://github.com/usnistgov/alignn/blob/develop/alignn/scripts/pretrained_model.py) can be applied to use them.
+All the trained models are distributed on [figshare](https://figshare.com/projects/ALIGNN_models/126478) and this [pretrained.py script](https://github.com/usnistgov/alignn/blob/develop/alignn/pretrained.py) can be applied to use them.
+
+A brief help section is shown using:
+
+```
+python alignn/pretrained.py -h
+```
+An example of prediction formation energy per atom using JARVIS-DFT dataset trained model is shown below:
+
+```
+python alignn/pretrained.py --model_name jv_formation_energy_peratom_alignn --file_format poscar --file_path alignn/examples/sample_data/POSCAR-JVASP-10.vasp
+```
+
+Web-app
+------------
+
+A basic web-app is for direct-prediction available at [JARVIS-ALIGNN app](https://jarvis.nist.gov/jalignn/)
+
+![JARVIS-ALIGNN](https://github.com/usnistgov/alignn/blob/develop/alignn/tex/jalignn.PNG)
 
 Notes:
 1) If you are using GPUs, make sure you have a compatible dgl-cuda version installed, for example: dgl-cu101 or dgl-cu111.

diff --git a/alignn/__init__.py b/alignn/__init__.py
@@ -1,2 +1,2 @@
 """Version number."""
-__version__ = "2021.11.11"
+__version__ = "2021.11.12"
diff --git a/alignn/alignn_train_folder.py b/alignn/alignn_train_folder.py
diff --git a/alignn/pretrained.py b/alignn/pretrained.py
@@ -0,0 +1,136 @@
+"""Module to download and load pre-trained ALIGNN models."""
+import requests
+import os
+import zipfile
+from tqdm import tqdm
+from alignn.models.alignn import ALIGNN, ALIGNNConfig
+import tempfile
+import torch
+import sys
+from jarvis.db.jsonutils import loadjson
+import argparse
+from jarvis.core.atoms import Atoms
+from jarvis.core.graphs import Graph
+
+all_models = loadjson(
+    str(os.path.join(os.path.dirname(__file__), "pretrained_models.json"))
+)
+
+
+parser = argparse.ArgumentParser(
+    description="Atomistic Line Graph Neural Network Pretrained Models"
+)
+parser.add_argument(
+    "--model_name",
+    default="jv_formation_energy_peratom_alignn",
+    help="Choose a model from these "
+    + str(len(list(all_models.keys())))
+    + " models:"
+    + ", ".join(list(all_models.keys())),
+)
+
+parser.add_argument(
+    "--file_format", default="poscar", help="poscar/cif/xyz/pdb file format."
+)
+
+parser.add_argument(
+    "--file_path",
+    default="alignn/examples/sample_data/POSCAR-JVASP-10.vasp",
+    help="Path to file.",
+)
+
+parser.add_argument(
+    "--cutoff",
+    default=8,
+    help="Distance cut-off for graph constuction"
+    + ", usually 8 for solids and 5 for molecules.",
+)
+
+
+device = "cpu"
+if torch.cuda.is_available():
+    device = torch.device("cuda")
+
+
+def get_prediction(
+    model_name="jv_formation_energy_peratom_alignn",
+    atoms=None,
+    cutoff=8,
+):
+    """Get model with progress bar."""
+    tmp = all_models[model_name]
+    url = tmp[0]
+    output_features = tmp[1]
+    zfile = model_name + ".zip"
+    path = str(os.path.join(os.path.dirname(__file__), zfile))
+    if not os.path.isfile(path):
+        response = requests.get(url, stream=True)
+        total_size_in_bytes = int(response.headers.get("content-length", 0))
+        block_size = 1024  # 1 Kibibyte
+        progress_bar = tqdm(
+            total=total_size_in_bytes, unit="iB", unit_scale=True
+        )
+        with open(path, "wb") as file:
+            for data in response.iter_content(block_size):
+                progress_bar.update(len(data))
+                file.write(data)
+        progress_bar.close()
+    zp = zipfile.ZipFile(path)
+    names = zp.namelist()
+    for i in names:
+        if "checkpoint_" in i and "pt" in i:
+            tmp = i
+            # print("chk", i)
+    # print("Loading the zipfile...", zipfile.ZipFile(path).namelist())
+    data = zipfile.ZipFile(path).read(tmp)
+    model = ALIGNN(
+        ALIGNNConfig(name="alignn", output_features=output_features)
+    )
+    new_file, filename = tempfile.mkstemp()
+    with open(filename, "wb") as f:
+        f.write(data)
+    model.load_state_dict(torch.load(filename, map_location=device)["model"])
+    model.to(device)
+    model.eval()
+    if os.path.exists(filename):
+        os.remove(filename)
+
+    # print("Loading completed.")
+    g, lg = Graph.atom_dgl_multigraph(atoms, cutoff=float(cutoff))
+    out_data = (
+        model([g.to(device), lg.to(device)])
+        .detach()
+        .cpu()
+        .numpy()
+        .flatten()
+        .tolist()
+    )
+    return out_data
+
+
+if __name__ == "__main__":
+    args = parser.parse_args(sys.argv[1:])
+    model_name = args.model_name
+    file_path = args.file_path
+    file_format = args.file_format
+    cutoff = args.cutoff
+    if file_format == "poscar":
+        atoms = Atoms.from_poscar(file_path)
+    elif file_format == "cif":
+        atoms = Atoms.from_cif(file_path)
+    elif file_format == "xyz":
+        atoms = Atoms.from_xyz(file_path, box_size=500)
+    elif file_format == "pdb":
+        atoms = Atoms.from_pdb(file_path, max_lat=500)
+    else:
+        raise NotImplementedError("File format not implemented", file_format)
+
+    out_data = get_prediction(
+        model_name=model_name, cutoff=float(cutoff), atoms=atoms
+    )
+
+    print("Predicted value:", model_name, file_path, out_data)
+
+
+# x = get_model()
+# print(x)