From a3eb54109fe6af456917665d3e6da217638ea09f Mon Sep 17 00:00:00 2001 From: Logan Ward Date: Thu, 14 Dec 2023 10:52:59 -0500 Subject: [PATCH] Started a per-element SOAP model Not yet sure why the forces are broken --- jitterbug/model/dscribe/local.py | 95 +++++++++++++++++++++++++------- tests/models/test_soap.py | 75 +++++++++++++++++-------- 2 files changed, 127 insertions(+), 43 deletions(-) diff --git a/jitterbug/model/dscribe/local.py b/jitterbug/model/dscribe/local.py index 70f4c79..17f51de 100644 --- a/jitterbug/model/dscribe/local.py +++ b/jitterbug/model/dscribe/local.py @@ -43,12 +43,37 @@ def forward(self, x) -> torch.Tensor: return torch.tensordot(self.alpha, esd, dims=([0], [0])) -def make_gpr_model(train_descriptors: np.ndarray, num_inducing_points: int, use_ard_kernel: bool = False) -> InducedKernelGPR: +class PerElementModel(torch.nn.Module): + """Train a different machine learning model for each element""" + + def __init__(self, models: list[InducedKernelGPR], dtype=torch.float32): + super().__init__() + self.dtype = dtype + self.models = torch.nn.ModuleList(models) + + def forward(self, element: torch.IntTensor, x: torch.Tensor) -> torch.Tensor: + output = torch.zeros(x.shape[0], dtype=self.dtype).to(element.device) + + for i, model in enumerate(self.models): + mask = element == i + energies = model(x[mask, :]) + output[mask] = energies + + return output + + +def make_gpr_model( + num_elements: int, + train_descriptors: np.ndarray, + num_inducing_points: int, + use_ard_kernel: bool = False +) -> PerElementModel: """Make the GPR model for a certain atomic system Assumes that the descriptors have already been scaled Args: + num_elements: Number of elements to include in model train_descriptors: 3D array of all training points (num_configurations, num_atoms, num_descriptors) num_inducing_points: Number of inducing points to use in the kernel. More points, more complex model use_ard_kernel: Whether to use a different length scale parameter for each descriptor @@ -62,15 +87,21 @@ def make_gpr_model(train_descriptors: np.ndarray, num_inducing_points: int, use_ inducing_inds = np.random.choice(descriptors.shape[0], size=(num_inducing_points,), replace=False) inducing_points = descriptors[inducing_inds, :] - # Make the model - return InducedKernelGPR( - inducing_x=torch.from_numpy(inducing_points), - use_ard=use_ard_kernel, - ) + # Make a model for each element + models = [ + InducedKernelGPR( + inducing_x=torch.from_numpy(inducing_points), + use_ard=use_ard_kernel, + ) for _ in range(num_elements) + ] + + # Combine to form a big model + return PerElementModel(models) def train_model(model: torch.nn.Module, - train_x: np.ndarray, + train_elem: np.ndarray, + train_desc: np.ndarray, train_y: np.ndarray, steps: int, batch_size: int = 4, @@ -79,11 +110,12 @@ def train_model(model: torch.nn.Module, verbose: bool = False) -> pd.DataFrame: """Train the kernel model over many iterations - Assumes that the descriptors have already been scaled + Assumes that the descriptors and energies have already been scaled Args: model: Model to be trained - train_x: 3D array of all training points (num_configurations, num_atoms, num_descriptors) + train_elem: 2D array of atomic numbers (num_configurations, num_atoms) + train_desc: 3D array of the descriptors all training points (num_configurations, num_atoms, num_descriptors) train_y: Energies for each training point steps: Number of interactions over all training points batch_size: Number of conformers per batch @@ -94,13 +126,20 @@ def train_model(model: torch.nn.Module, Mean loss over each iteration """ + # Convert element types to indices + elem_types = np.unique(train_elem) + elem_ind_x = np.empty_like(train_elem) + for i, z in enumerate(elem_types): + elem_ind_x[train_elem == z] = i + # Convert the data to Torches - n_conf, n_atoms = train_x.shape[:2] - train_x = torch.from_numpy(train_x) + n_conf, n_atoms = train_desc.shape[:2] + train_desc = torch.from_numpy(train_desc) train_y = torch.from_numpy(train_y) + elem_ind_x = torch.from_numpy(elem_ind_x) # Make the data loader - dataset = TensorDataset(train_x, train_y) + dataset = TensorDataset(elem_ind_x, train_desc, train_y) loader = DataLoader(dataset, batch_size=batch_size, shuffle=True, drop_last=True) # Convert the model to training mode on the device @@ -117,15 +156,17 @@ def train_model(model: torch.nn.Module, losses = [] for _ in tqdm(range(steps), disable=not verbose, leave=False): epoch_loss = 0 - for batch_x, batch_y in loader: + for batch_elem, batch_desc, batch_y in loader: # Prepare at the beginning of each batch opt.zero_grad() - batch_x = batch_x.to(device) + batch_elem = batch_elem.to(device) + batch_desc = batch_desc.to(device) batch_y = batch_y.to(device) # Predict on all configurations - batch_x = torch.reshape(batch_x, (-1, batch_x.shape[-1])) # Flatten from (n_confs, n_atoms, n_desc) -> (n_confs * n_atoms, n_desc) - pred_y_per_atoms_flat = model(batch_x) + batch_elem = torch.reshape(batch_elem, (-1,)) + batch_desc = torch.reshape(batch_desc, (-1, batch_desc.shape[-1])) # Flatten from (n_confs, n_atoms, n_desc) -> (n_confs * n_atoms, n_desc) + pred_y_per_atoms_flat = model(batch_elem, batch_desc) # Get the mean sum for each atom pred_y_per_atoms = torch.reshape(pred_y_per_atoms_flat, (batch_size, n_atoms)) @@ -151,6 +192,7 @@ class DScribeLocalCalculator(Calculator): """Calculator which uses descriptors for each atom and PyTorch to compute energy Keyword Args: + element_list (list[int]): Atomic numbers of elements used in model model (torch.nn.Module): A machine learning model which takes descriptors as inputs desc (DescriptorLocal): Tool used to compute the descriptors desc_scaling (tuple[np.ndarray, np.ndarray]): A offset and factor with which to adjust the energy per atom predictions, @@ -166,11 +208,18 @@ class DScribeLocalCalculator(Calculator): 'desc': None, 'desc_scaling': (0., 1.), 'energy_scaling': (0., 1.), + 'element_list': (), 'device': 'cpu' } def calculate(self, atoms=None, properties=('energy', 'forces', 'energies'), system_changes=all_changes): + # Get the atoms + elem_list = self.parameters['element_list'] + if any(x not in elem_list for x in atoms.get_atomic_numbers()): + raise ValueError('Atoms object contains elements not included in the potential') + elems = np.array([elem_list.index(z) for z in atoms.get_atomic_numbers()]) + # Compute the descriptors for the atoms d_desc_d_pos, desc = self.parameters['desc'].derivatives(atoms, attach=True) @@ -183,6 +232,7 @@ def calculate(self, atoms=None, properties=('energy', 'forces', 'energies'), desc = torch.from_numpy(desc.astype(np.float32)) desc.requires_grad = True d_desc_d_pos = torch.from_numpy(d_desc_d_pos.astype(np.float32)) + elems = torch.from_numpy(elems) # Move the model to device if need be model: torch.nn.Module = self.parameters['model'] @@ -192,8 +242,9 @@ def calculate(self, atoms=None, properties=('energy', 'forces', 'energies'), # Run inference offset, scale = self.parameters['energy_scaling'] model.eval() # Ensure we're in eval mode + elems = elems.to(device) desc = desc.to(device) - pred_energies_dist = model(desc) + pred_energies_dist = model(elems, desc) pred_energies = pred_energies_dist * scale + offset pred_energy = torch.sum(pred_energies) self.results['energy'] = pred_energy.item() @@ -250,22 +301,26 @@ def __init__(self, def train_calculator(self, data: list[Atoms]) -> Calculator: # Train it using the user-provided options - train_x = self.descriptors.create(data) + train_x = self.descriptors.create(data).astype(np.float32) offset_x = train_x.mean(axis=(0, 1)) scale_x = np.clip(train_x.std(axis=(0, 1)), a_min=1e-6, a_max=None) train_x -= offset_x train_x /= scale_x - train_y = np.array([a.get_potential_energy() for a in data]) + train_y = np.array([a.get_potential_energy() for a in data]).astype(np.float32) scale_y, offset_y = np.std(train_y), np.mean(train_y) train_y = (train_y - offset_y) / scale_y + # Get the element for each atom + elements = np.array([x.get_atomic_numbers() for x in data]) + # Make then train the model model = self.model_fn(train_x) - train_model(model, train_x, train_y, device=self.device, **self.train_options) + train_model(model, elements, train_x, train_y, device=self.device, **self.train_options) # Make the calculator return DScribeLocalCalculator( + element_list=np.unique(elements).tolist(), model=model, desc=self.descriptors, desc_scaling=(offset_x, scale_x), diff --git a/tests/models/test_soap.py b/tests/models/test_soap.py index d920137..2764205 100644 --- a/tests/models/test_soap.py +++ b/tests/models/test_soap.py @@ -7,7 +7,7 @@ @fixture -def soap(train_set): +def soap(train_set, elements): species = sorted(set(sum([a.get_chemical_symbols() for a in train_set], []))) return SOAP( species=species, @@ -19,73 +19,102 @@ def soap(train_set): @fixture def descriptors(train_set, soap): - return soap.create(train_set) + return soap.create(train_set).astype(np.float32) + + +@fixture() +def elements(train_set): + return np.array([atoms.get_atomic_numbers() for atoms in train_set]) @mark.parametrize('use_adr', [True, False]) -def test_make_model(use_adr, descriptors, train_set): - model = make_gpr_model(descriptors, 4, use_ard_kernel=use_adr) +def test_make_model(use_adr, elements, descriptors, train_set, soap): + model = make_gpr_model(len(soap.species), descriptors, 4, use_ard_kernel=use_adr) # Evaluate on a single point model.eval() - pred_y = model(torch.from_numpy(descriptors[0, :, :])) + pred_y = model(torch.from_numpy(elements[0, :]), torch.from_numpy(descriptors[0, :, :])) assert pred_y.shape == (3,) # 3 Atoms @mark.parametrize('use_adr', [True, False]) -def test_train(descriptors, train_set, use_adr): +def test_train(elements, descriptors, train_set, use_adr): + # Convert elements to element indices + elem_types = np.unique(elements) + element_inds = np.empty_like(elements) + for i, z in enumerate(elem_types): + element_inds[elements == z] = i + # Make the model and the training set - train_y = np.array([a.get_potential_energy() for a in train_set]) + train_y = np.array([a.get_potential_energy() for a in train_set]).astype(np.float32) train_y -= train_y.min() - model = make_gpr_model(descriptors, 4, use_ard_kernel=use_adr) - model.inducing_x.requires_grad = False + model = make_gpr_model(elements.max() + 1, descriptors, 4, use_ard_kernel=use_adr) + for submodel in model.models: + submodel.inducing_x.requires_grad = False # Evaluate the untrained model model.eval() - pred_y = model(torch.from_numpy(descriptors.reshape((-1, descriptors.shape[-1])))) - assert pred_y.dtype == torch.float64 + pred_y = model( + torch.from_numpy(element_inds.flatten()), + torch.from_numpy(descriptors.reshape((-1, descriptors.shape[-1]))) + ) + assert pred_y.dtype == torch.float32 error_y = pred_y.sum(axis=-1).detach().numpy() - train_y mae_untrained = np.abs(error_y).mean() # Train - losses = train_model(model, descriptors, train_y, 64) + losses = train_model(model, elements, descriptors, train_y, 64) assert len(losses) == 64 # Run the evaluation model.eval() - pred_y = model(torch.from_numpy(descriptors.reshape((-1, descriptors.shape[-1])))) + pred_y = model( + torch.from_numpy(element_inds.flatten()), + torch.from_numpy(descriptors.reshape((-1, descriptors.shape[-1]))) + ) error_y = pred_y.sum(axis=-1).detach().numpy() - train_y mae_trained = np.abs(error_y).mean() assert mae_trained < mae_untrained -def test_calculator(descriptors, soap, train_set): +def test_calculator(elements, descriptors, soap, train_set): # Scale the input and outputs - train_y = np.array([a.get_potential_energy() for a in train_set]) - train_y -= train_y.mean() + train_y = np.array([a.get_potential_energy() for a in train_set]).astype(np.float32) + offset_y = train_y.mean() + scale_y = train_y.std() + train_y -= offset_y + train_y /= scale_y offset_x = descriptors.mean(axis=(0, 1)) scale_x = np.clip(descriptors.std(axis=(0, 1)), a_min=1e-6, a_max=None) descriptors = (descriptors - offset_x) / scale_x # Assemble and train for a few instances so that we get nonzero forces - model = make_gpr_model(descriptors, 32) - train_model(model, descriptors, train_y, 32) + model = make_gpr_model(elements.max() + 1, descriptors, 32) + train_model(model, elements, descriptors, train_y, 32) # Make the model calc = DScribeLocalCalculator( model=model, desc=soap, desc_scaling=(offset_x, scale_x), + energy_scaling=(offset_y / len(train_set[0]), scale_y / len(train_set[0])), + element_list=[1, 8], ) - energies = [] + true_energies = [] + pred_energies = [] for atoms in train_set: + # Get the true result + true_energies.append(atoms.get_potential_energy()) + + # Test the potential atoms.calc = calc forces = atoms.get_forces() - energies.append(atoms.get_potential_energy()) + pred_energies.append(atoms.get_potential_energy()) numerical_forces = calc.calculate_numerical_forces(atoms, d=1e-4) - assert np.isclose(forces[:, :2], numerical_forces[:, :2], rtol=5e-1).all() # Make them agree w/i 50% (PES is not smooth) - assert np.std(energies) > 1e-6 + assert np.isclose(forces[:, :2], numerical_forces[:, :2], atol=1e-2).all() + assert np.std(pred_energies) > 1e-6 + assert np.mean(np.subtract(pred_energies, true_energies)) < 0.1 def test_model(soap, train_set): @@ -93,7 +122,7 @@ def test_model(soap, train_set): model = DScribeLocalEnergyModel( reference=train_set[0], descriptors=soap, - model_fn=lambda x: make_gpr_model(x, num_inducing_points=32), + model_fn=lambda x: make_gpr_model(1, x, num_inducing_points=32), num_calculators=4, )