usnistgov · msehabibur · Jan 20, 2023 · Jan 21, 2023 · Jan 21, 2023 · Mar 6, 2023
diff --git a/alignn/predict.py b/alignn/predict.py
@@ -0,0 +1,72 @@
+from alignn.models.alignn import ALIGNN, ALIGNNConfig
+import torch
+output_features =  1
+#directory of checkpoint_file, basically, your optimized model
+filename = 'DataSet_A_Model/checkpoint_150.pt'
+device = "cpu"
+if torch.cuda.is_available():
+    device = torch.device("cuda")
+model = ALIGNN(ALIGNNConfig(name="alignn", output_features=output_features))
+model.load_state_dict(torch.load(filename, map_location=device)["model"])
+model.eval()
+
+import os
+import csv
+from jarvis.core.atoms import Atoms
+from alignn.graphs import Graph
+import re
+
+cutoff = 8.0
+max_neighbors = 12
+
+#directory where you have all the poscar/cif and you would like to apply your optimized model on them.
+
+sample_data_folder = '/Users/habibur/Habibur_Python_Scripts/alignn/alignn/data/'
+
+# id_prop.csv; a csv file where you have all the ids of the poscar/cif in the first column and corresponding properties in the second column.
+
+csv_file = 'id_prop.csv'
+# In this output.csv file, all the ids and corresponding properties will be printed out.
+output_file = 'output.csv'
+
+with open(os.path.join(sample_data_folder, csv_file), newline='') as f:
+    reader = csv.reader(f)
+    file_list = [row[0] for row in reader]
+
+atoms_list = []
+for file in file_list:
+    atoms = Atoms.from_cif(os.path.join(sample_data_folder, file))
+    atoms_list.append(atoms)
+
+g_list = []
+lg_list = []
+for atoms in atoms_list:
+    g, lg = Graph.atom_dgl_multigraph(
+        atoms, cutoff=float(cutoff), max_neighbors=max_neighbors
+    )
+    g_list.append(g)
+    lg_list.append(lg)
+
+out_data_list = []
+for g, lg in zip(g_list, lg_list):
+    out_data = (
+        model([g.to(device), lg.to(device)])
+        .detach()
+        .cpu()
+        .numpy()
+        .flatten()
+        .tolist()
+    )
+    out_data_str = str(out_data)
+    # Extract data within square brackets
+    match = re.search(r'\[(.*)\]', out_data_str)
+    if match:
+        out_data_list.append(match.group(1))
+    else:
+        out_data_list.append('')
+
+with open(os.path.join(sample_data_folder, output_file), mode='w', newline='') as f:
+    writer = csv.writer(f)
+    writer.writerow(['Filename', 'Output'])
+    for i, file in enumerate(file_list):
+        writer.writerow([file, out_data_list[i]])
diff --git a/alignn/train.py b/alignn/train.py
@@ -1012,26 +1012,25 @@ def es_score(engine):
             mean_absolute_error(np.array(targets), np.array(predictions)),
         )
         if config.store_outputs and not classification:
-            x = []
-            y = []
-            for i in history["EOS"]:
-                x.append(i[0].cpu().numpy().tolist())
-                y.append(i[1].cpu().numpy().tolist())
-            x = np.array(x, dtype="float").flatten()
-            y = np.array(y, dtype="float").flatten()
-            f = open(
-                os.path.join(
-                    config.output_dir, "prediction_results_train_set.csv"
-                ),
-                "w",
-            )
+            # save training targets and predictions here
             # TODO: Add IDs
-            f.write("target,prediction\n")
-            for i, j in zip(x, y):
-                f.write("%6f, %6f\n" % (j, i))
-                line = str(i) + "," + str(j) + "\n"
-                f.write(line)
-            f.close()
+            resultsfile = os.path.join(
+                config.output_dir, "prediction_results_train_set.csv"
+            )
+
+            target_vals, predictions = [], []
+
+            for tgt, pred in history["trainEOS"]:
+                target_vals.append(tgt.cpu().numpy().tolist())
+                predictions.append(pred.cpu().numpy().tolist())
+
+            target_vals = np.array(target_vals, dtype="float").flatten()
+            predictions = np.array(predictions, dtype="float").flatten()
+
+            with open(resultsfile, "w") as f:
+                print("target,prediction", file=f)
+                for target_val, predicted_val in zip(target_vals, predictions):
+                    print(f"{target_val}, {predicted_val}", file=f)
 
     # TODO: Fix IDs for train loader
     """

diff --git a/move.py b/move.py
@@ -0,0 +1,51 @@
+import os
+import random
+import shutil
+import pandas as pd
+
+# set the path to the folder containing the .cif files and csv file
+folder_path = "/Users/habibur/Habibur_Python_Scripts/alignn/alignn/DataSet_B"
+
+# set the path to the new folder where you want to move the selected files
+new_folder_path = "/Users/habibur/Habibur_Python_Scripts/alignn/alignn/20%/"
+
+# set the percentage of files you want to select
+percent_to_select = 10
+
+# read the csv file into a pandas dataframe
+csv_file_path = os.path.join(folder_path, "id_prop.csv")
+df = pd.read_csv(csv_file_path, index_col=0)
+
+# get a list of all the files in the folder with the .cif extension
+file_list = [f for f in os.listdir(folder_path) if f.endswith(".cif")]
+
+# calculate the number of files to select
+num_to_select = int(len(file_list) * (percent_to_select / 100))
+
+# randomly select the files to move
+files_to_move = random.sample(file_list, num_to_select)
+
+# move the selected files to the new folder
+moved_files = []
+for file_name in files_to_move:
+    file_path = os.path.join(folder_path, file_name)
+    new_file_path = os.path.join(new_folder_path, file_name)
+    shutil.move(file_path, new_file_path)
+    moved_files.append(file_name)
+
+# create a new dataframe with the moved files and their corresponding values
+moved_df = df.loc[moved_files]
+
+# write the moved files and values to a new csv file
+moved_csv_path = os.path.join(new_folder_path, "moved_files.csv")
+moved_df.to_csv(moved_csv_path)
+
+
+
+
+
+
+
+
+
+
diff --git a/update_id_prop.py b/update_id_prop.py
@@ -0,0 +1,22 @@
+import pandas as pd
+
+# Set the path to the id_prop.csv file
+id_prop_csv_path = "/Users/habibur/Habibur_Python_Scripts/alignn/alignn/id_prop.csv"
+
+# Set the path to the moved_files.csv file
+moved_files_csv_path = "/Users/habibur/Habibur_Python_Scripts/alignn/alignn/moved_files.csv"
+
+# Read in the id_prop.csv file as a pandas DataFrame
+id_prop_df = pd.read_csv(id_prop_csv_path, index_col=0)
+
+# Read in the moved_files.csv file as a pandas DataFrame
+moved_files_df = pd.read_csv(moved_files_csv_path, index_col=0)
+
+# Get a list of the files that were moved
+moved_files = moved_files_df.index.tolist()
+
+# Drop the rows in id_prop_df that correspond to the moved files
+id_prop_df = id_prop_df.drop(moved_files, errors="ignore")
+
+# Write the updated id_prop.csv file back to disk
+id_prop_df.to_csv(id_prop_csv_path)