Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add predict.py script #92

Open
wants to merge 8 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
72 changes: 72 additions & 0 deletions alignn/predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
from alignn.models.alignn import ALIGNN, ALIGNNConfig
import torch
output_features = 1
#directory of checkpoint_file, basically, your optimized model
filename = 'DataSet_A_Model/checkpoint_150.pt'
device = "cpu"
if torch.cuda.is_available():
device = torch.device("cuda")
model = ALIGNN(ALIGNNConfig(name="alignn", output_features=output_features))
model.load_state_dict(torch.load(filename, map_location=device)["model"])
model.eval()

import os
import csv
from jarvis.core.atoms import Atoms
from alignn.graphs import Graph
import re

cutoff = 8.0
max_neighbors = 12

#directory where you have all the poscar/cif and you would like to apply your optimized model on them.

sample_data_folder = '/Users/habibur/Habibur_Python_Scripts/alignn/alignn/data/'

# id_prop.csv; a csv file where you have all the ids of the poscar/cif in the first column and corresponding properties in the second column.

csv_file = 'id_prop.csv'
# In this output.csv file, all the ids and corresponding properties will be printed out.
output_file = 'output.csv'

with open(os.path.join(sample_data_folder, csv_file), newline='') as f:
reader = csv.reader(f)
file_list = [row[0] for row in reader]

atoms_list = []
for file in file_list:
atoms = Atoms.from_cif(os.path.join(sample_data_folder, file))
atoms_list.append(atoms)

g_list = []
lg_list = []
for atoms in atoms_list:
g, lg = Graph.atom_dgl_multigraph(
atoms, cutoff=float(cutoff), max_neighbors=max_neighbors
)
g_list.append(g)
lg_list.append(lg)

out_data_list = []
for g, lg in zip(g_list, lg_list):
out_data = (
model([g.to(device), lg.to(device)])
.detach()
.cpu()
.numpy()
.flatten()
.tolist()
)
out_data_str = str(out_data)
# Extract data within square brackets
match = re.search(r'\[(.*)\]', out_data_str)
if match:
out_data_list.append(match.group(1))
else:
out_data_list.append('')

with open(os.path.join(sample_data_folder, output_file), mode='w', newline='') as f:
writer = csv.writer(f)
writer.writerow(['Filename', 'Output'])
for i, file in enumerate(file_list):
writer.writerow([file, out_data_list[i]])
37 changes: 18 additions & 19 deletions alignn/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -1012,26 +1012,25 @@ def es_score(engine):
mean_absolute_error(np.array(targets), np.array(predictions)),
)
if config.store_outputs and not classification:
x = []
y = []
for i in history["EOS"]:
x.append(i[0].cpu().numpy().tolist())
y.append(i[1].cpu().numpy().tolist())
x = np.array(x, dtype="float").flatten()
y = np.array(y, dtype="float").flatten()
f = open(
os.path.join(
config.output_dir, "prediction_results_train_set.csv"
),
"w",
)
# save training targets and predictions here
# TODO: Add IDs
f.write("target,prediction\n")
for i, j in zip(x, y):
f.write("%6f, %6f\n" % (j, i))
line = str(i) + "," + str(j) + "\n"
f.write(line)
f.close()
resultsfile = os.path.join(
config.output_dir, "prediction_results_train_set.csv"
)

target_vals, predictions = [], []

for tgt, pred in history["trainEOS"]:
target_vals.append(tgt.cpu().numpy().tolist())
predictions.append(pred.cpu().numpy().tolist())

target_vals = np.array(target_vals, dtype="float").flatten()
predictions = np.array(predictions, dtype="float").flatten()

with open(resultsfile, "w") as f:
print("target,prediction", file=f)
for target_val, predicted_val in zip(target_vals, predictions):
print(f"{target_val}, {predicted_val}", file=f)

# TODO: Fix IDs for train loader
"""
Expand Down
51 changes: 51 additions & 0 deletions move.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import os
import random
import shutil
import pandas as pd

# set the path to the folder containing the .cif files and csv file
folder_path = "/Users/habibur/Habibur_Python_Scripts/alignn/alignn/DataSet_B"

# set the path to the new folder where you want to move the selected files
new_folder_path = "/Users/habibur/Habibur_Python_Scripts/alignn/alignn/20%/"

# set the percentage of files you want to select
percent_to_select = 10

# read the csv file into a pandas dataframe
csv_file_path = os.path.join(folder_path, "id_prop.csv")
df = pd.read_csv(csv_file_path, index_col=0)

# get a list of all the files in the folder with the .cif extension
file_list = [f for f in os.listdir(folder_path) if f.endswith(".cif")]

# calculate the number of files to select
num_to_select = int(len(file_list) * (percent_to_select / 100))

# randomly select the files to move
files_to_move = random.sample(file_list, num_to_select)

# move the selected files to the new folder
moved_files = []
for file_name in files_to_move:
file_path = os.path.join(folder_path, file_name)
new_file_path = os.path.join(new_folder_path, file_name)
shutil.move(file_path, new_file_path)
moved_files.append(file_name)

# create a new dataframe with the moved files and their corresponding values
moved_df = df.loc[moved_files]

# write the moved files and values to a new csv file
moved_csv_path = os.path.join(new_folder_path, "moved_files.csv")
moved_df.to_csv(moved_csv_path)










22 changes: 22 additions & 0 deletions update_id_prop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import pandas as pd

# Set the path to the id_prop.csv file
id_prop_csv_path = "/Users/habibur/Habibur_Python_Scripts/alignn/alignn/id_prop.csv"

# Set the path to the moved_files.csv file
moved_files_csv_path = "/Users/habibur/Habibur_Python_Scripts/alignn/alignn/moved_files.csv"

# Read in the id_prop.csv file as a pandas DataFrame
id_prop_df = pd.read_csv(id_prop_csv_path, index_col=0)

# Read in the moved_files.csv file as a pandas DataFrame
moved_files_df = pd.read_csv(moved_files_csv_path, index_col=0)

# Get a list of the files that were moved
moved_files = moved_files_df.index.tolist()

# Drop the rows in id_prop_df that correspond to the moved files
id_prop_df = id_prop_df.drop(moved_files, errors="ignore")

# Write the updated id_prop.csv file back to disk
id_prop_df.to_csv(id_prop_csv_path)
Loading