diff --git a/alignn/predict.py b/alignn/predict.py new file mode 100644 index 00000000..c688df12 --- /dev/null +++ b/alignn/predict.py @@ -0,0 +1,72 @@ +from alignn.models.alignn import ALIGNN, ALIGNNConfig +import torch +output_features = 1 +#directory of checkpoint_file, basically, your optimized model +filename = 'DataSet_A_Model/checkpoint_150.pt' +device = "cpu" +if torch.cuda.is_available(): + device = torch.device("cuda") +model = ALIGNN(ALIGNNConfig(name="alignn", output_features=output_features)) +model.load_state_dict(torch.load(filename, map_location=device)["model"]) +model.eval() + +import os +import csv +from jarvis.core.atoms import Atoms +from alignn.graphs import Graph +import re + +cutoff = 8.0 +max_neighbors = 12 + +#directory where you have all the poscar/cif and you would like to apply your optimized model on them. + +sample_data_folder = '/Users/habibur/Habibur_Python_Scripts/alignn/alignn/data/' + +# id_prop.csv; a csv file where you have all the ids of the poscar/cif in the first column and corresponding properties in the second column. + +csv_file = 'id_prop.csv' +# In this output.csv file, all the ids and corresponding properties will be printed out. +output_file = 'output.csv' + +with open(os.path.join(sample_data_folder, csv_file), newline='') as f: + reader = csv.reader(f) + file_list = [row[0] for row in reader] + +atoms_list = [] +for file in file_list: + atoms = Atoms.from_cif(os.path.join(sample_data_folder, file)) + atoms_list.append(atoms) + +g_list = [] +lg_list = [] +for atoms in atoms_list: + g, lg = Graph.atom_dgl_multigraph( + atoms, cutoff=float(cutoff), max_neighbors=max_neighbors + ) + g_list.append(g) + lg_list.append(lg) + +out_data_list = [] +for g, lg in zip(g_list, lg_list): + out_data = ( + model([g.to(device), lg.to(device)]) + .detach() + .cpu() + .numpy() + .flatten() + .tolist() + ) + out_data_str = str(out_data) + # Extract data within square brackets + match = re.search(r'\[(.*)\]', out_data_str) + if match: + out_data_list.append(match.group(1)) + else: + out_data_list.append('') + +with open(os.path.join(sample_data_folder, output_file), mode='w', newline='') as f: + writer = csv.writer(f) + writer.writerow(['Filename', 'Output']) + for i, file in enumerate(file_list): + writer.writerow([file, out_data_list[i]]) diff --git a/alignn/train.py b/alignn/train.py index 4fa072e4..bcda0d64 100644 --- a/alignn/train.py +++ b/alignn/train.py @@ -1012,26 +1012,25 @@ def es_score(engine): mean_absolute_error(np.array(targets), np.array(predictions)), ) if config.store_outputs and not classification: - x = [] - y = [] - for i in history["EOS"]: - x.append(i[0].cpu().numpy().tolist()) - y.append(i[1].cpu().numpy().tolist()) - x = np.array(x, dtype="float").flatten() - y = np.array(y, dtype="float").flatten() - f = open( - os.path.join( - config.output_dir, "prediction_results_train_set.csv" - ), - "w", - ) + # save training targets and predictions here # TODO: Add IDs - f.write("target,prediction\n") - for i, j in zip(x, y): - f.write("%6f, %6f\n" % (j, i)) - line = str(i) + "," + str(j) + "\n" - f.write(line) - f.close() + resultsfile = os.path.join( + config.output_dir, "prediction_results_train_set.csv" + ) + + target_vals, predictions = [], [] + + for tgt, pred in history["trainEOS"]: + target_vals.append(tgt.cpu().numpy().tolist()) + predictions.append(pred.cpu().numpy().tolist()) + + target_vals = np.array(target_vals, dtype="float").flatten() + predictions = np.array(predictions, dtype="float").flatten() + + with open(resultsfile, "w") as f: + print("target,prediction", file=f) + for target_val, predicted_val in zip(target_vals, predictions): + print(f"{target_val}, {predicted_val}", file=f) # TODO: Fix IDs for train loader """ diff --git a/move.py b/move.py new file mode 100644 index 00000000..32a5b522 --- /dev/null +++ b/move.py @@ -0,0 +1,51 @@ +import os +import random +import shutil +import pandas as pd + +# set the path to the folder containing the .cif files and csv file +folder_path = "/Users/habibur/Habibur_Python_Scripts/alignn/alignn/DataSet_B" + +# set the path to the new folder where you want to move the selected files +new_folder_path = "/Users/habibur/Habibur_Python_Scripts/alignn/alignn/20%/" + +# set the percentage of files you want to select +percent_to_select = 10 + +# read the csv file into a pandas dataframe +csv_file_path = os.path.join(folder_path, "id_prop.csv") +df = pd.read_csv(csv_file_path, index_col=0) + +# get a list of all the files in the folder with the .cif extension +file_list = [f for f in os.listdir(folder_path) if f.endswith(".cif")] + +# calculate the number of files to select +num_to_select = int(len(file_list) * (percent_to_select / 100)) + +# randomly select the files to move +files_to_move = random.sample(file_list, num_to_select) + +# move the selected files to the new folder +moved_files = [] +for file_name in files_to_move: + file_path = os.path.join(folder_path, file_name) + new_file_path = os.path.join(new_folder_path, file_name) + shutil.move(file_path, new_file_path) + moved_files.append(file_name) + +# create a new dataframe with the moved files and their corresponding values +moved_df = df.loc[moved_files] + +# write the moved files and values to a new csv file +moved_csv_path = os.path.join(new_folder_path, "moved_files.csv") +moved_df.to_csv(moved_csv_path) + + + + + + + + + + diff --git a/update_id_prop.py b/update_id_prop.py new file mode 100644 index 00000000..843d3f3f --- /dev/null +++ b/update_id_prop.py @@ -0,0 +1,22 @@ +import pandas as pd + +# Set the path to the id_prop.csv file +id_prop_csv_path = "/Users/habibur/Habibur_Python_Scripts/alignn/alignn/id_prop.csv" + +# Set the path to the moved_files.csv file +moved_files_csv_path = "/Users/habibur/Habibur_Python_Scripts/alignn/alignn/moved_files.csv" + +# Read in the id_prop.csv file as a pandas DataFrame +id_prop_df = pd.read_csv(id_prop_csv_path, index_col=0) + +# Read in the moved_files.csv file as a pandas DataFrame +moved_files_df = pd.read_csv(moved_files_csv_path, index_col=0) + +# Get a list of the files that were moved +moved_files = moved_files_df.index.tolist() + +# Drop the rows in id_prop_df that correspond to the moved files +id_prop_df = id_prop_df.drop(moved_files, errors="ignore") + +# Write the updated id_prop.csv file back to disk +id_prop_df.to_csv(id_prop_csv_path)