From 0a41f491cddc12ac86e2929cdbe4779897ea65a3 Mon Sep 17 00:00:00 2001 From: Muammar El Khatib Date: Sun, 19 Jan 2020 12:29:53 -0800 Subject: [PATCH] Use client.submit instead of dask.compute for SVM computations Gaussian. --- ml4chem/data/visualization.py | 7 +++---- ml4chem/features/gaussian.py | 8 +++----- 2 files changed, 6 insertions(+), 9 deletions(-) diff --git a/ml4chem/data/visualization.py b/ml4chem/data/visualization.py index 9ce7a47..7fc7d71 100644 --- a/ml4chem/data/visualization.py +++ b/ml4chem/data/visualization.py @@ -5,7 +5,6 @@ import matplotlib.pyplot as plt from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error from ml4chem.data.serialization import load -import time def parity(predictions, true, scores=False, filename=None, **kwargs): @@ -30,7 +29,7 @@ def parity(predictions, true, scores=False, filename=None, **kwargs): min_val = min(true) max_val = max(true) - fig = plt.figure(figsize=(6.0, 6.0)) + fig = plt.figure(figsize=(6, 6)) ax = fig.add_subplot(111) ax.plot(true, predictions, "r.") ax.plot([min_val, max_val], [min_val, max_val], "k-", lw=0.3) @@ -284,7 +283,7 @@ def plot_atomic_features( labels = {str(axis[i]): "t-SNE-{}".format(i + 1) for i in range(len(axis))} - tsne = manifold.TSNE(n_components=dimensions) + tsne = manifold.TSNE(n_components=dimensions, perplexity=5) tsne_result = tsne.fit_transform(full_ls) @@ -324,4 +323,4 @@ def plot_atomic_features( except: pass - return plt + return plt, df diff --git a/ml4chem/features/gaussian.py b/ml4chem/features/gaussian.py index 6eace27..edffd9c 100644 --- a/ml4chem/features/gaussian.py +++ b/ml4chem/features/gaussian.py @@ -365,7 +365,6 @@ def calculate(self, images=None, purpose="training", data=None, svm=False): scaled_feature_space.append(features) - # scaled_feature_space = client.gather(scaled_feature_space) else: scaled_feature_space = [] @@ -397,13 +396,12 @@ def calculate(self, images=None, purpose="training", data=None, svm=False): # image = (hash, ase_image) -> tuple for atom in image[1]: - reference_space.append( - self.restack_atom(i, atom, scaled_feature_space) - ) + restacked_atom = client.submit(self.restack_atom, *(i, atom, scaled_feature_space)) + reference_space.append(restacked_atom) feature_space.append(restacked) - reference_space = dask.compute(*reference_space, scheduler=self.scheduler) + reference_space = client.gather(reference_space) elif svm is False and purpose == "training": for i, image in enumerate(images.items()):