From 7b9bba85036f2d49f4639a66d6a08261f0ba89cb Mon Sep 17 00:00:00 2001 From: James Beilsten-Edmands <30625594+jbeilstenedmands@users.noreply.github.com> Date: Wed, 25 Sep 2024 15:44:35 +0100 Subject: [PATCH] Suggested modifications --- src/dials/algorithms/correlation/analysis.py | 21 ++- .../algorithms/symmetry/cosym/__init__.py | 136 +++++++----------- src/dials/algorithms/symmetry/cosym/target.py | 30 ++-- src/dials/command_line/correlation_matrix.py | 5 +- 4 files changed, 90 insertions(+), 102 deletions(-) diff --git a/src/dials/algorithms/correlation/analysis.py b/src/dials/algorithms/correlation/analysis.py index b7461f046a..10e96bebdb 100644 --- a/src/dials/algorithms/correlation/analysis.py +++ b/src/dials/algorithms/correlation/analysis.py @@ -12,6 +12,7 @@ import iotbx.phil from dxtbx.model import ExperimentList +from libtbx import Auto from libtbx.phil import scope_extract from scitbx.array_family import flex @@ -49,6 +50,15 @@ """, process_includes=True, ) +phil_overrides = phil_scope.fetch( + source=iotbx.phil.parse( + """\ +cc_weights=sigma +weights=standard_error +""" + ) +) +working_phil = phil_scope.fetch(sources=[phil_overrides]) class CorrelationMatrix: @@ -125,9 +135,8 @@ def __init__( # If dimensions are optimised for clustering, need cc_weights=sigma # Otherwise results end up being nonsensical even for high-quality data # Outlier rejection was also found to be beneficial for optimising clustering dimensionality - if self.params.clustering.optimise_dimensions: - self.params.cc_weights = "sigma" - self.params.clustering.outlier_rejection = True + if self.params.dimensions is Auto and self.params.cc_weights != "sigma": + raise ValueError("To optimise dimensions, cc_weights=sigma is required.") self.cosym_analysis = CosymAnalysis(self.datasets, self.params) @@ -189,7 +198,11 @@ def calculate_matrices(self): self.cosym_analysis._intialise_target() # Cosym proceedures to calculate the cos-angle matrix - self.cosym_analysis._determine_dimensions() + if self.params.dimensions is Auto: + self.cosym_analysis._determine_dimensions( + self.cosym_analysis.number_of_datasets, + outlier_rejection=self.params.outlier_rejection, + ) self.cosym_analysis._optimise( self.cosym_analysis.params.minimization.engine, max_iterations=self.cosym_analysis.params.minimization.max_iterations, diff --git a/src/dials/algorithms/symmetry/cosym/__init__.py b/src/dials/algorithms/symmetry/cosym/__init__.py index 9db18bfe59..88779dd8a6 100644 --- a/src/dials/algorithms/symmetry/cosym/__init__.py +++ b/src/dials/algorithms/symmetry/cosym/__init__.py @@ -74,17 +74,6 @@ .type = int(value_min=2) .short_caption = "Dimensions" -clustering - .short_caption = "Options for clustering behaviour" -{ - optimise_dimensions = False - .type = bool - .short_caption = "Set to True for optimisation of dimensions during dataset clustering" - outlier_rejection = False - .type = bool - .short_caption = "Set to True for outlier rejection when optimising dimensions during dataset clustering" -} - use_curvatures = True .type = bool .short_caption = "Use curvatures" @@ -252,8 +241,6 @@ def _map_space_group_to_input_cell(intensities, space_group): else: params.nproc = 1 - self.number_of_datasets = len(intensities) - def _intialise_target(self): if self.params.dimensions is Auto: dimensions = None @@ -276,94 +263,76 @@ def _intialise_target(self): weights=self.params.weights, cc_weights=self.params.cc_weights, nproc=self.params.nproc, - outlier_rejection=self.params.clustering.outlier_rejection, ) - def _determine_dimensions(self): - if ( - self.params.dimensions is Auto - and self.target.dim == 2 - and not self.params.clustering.optimise_dimensions - ): - self.params.dimensions = 2 - - elif self.params.dimensions is Auto: - if self.params.clustering.optimise_dimensions: - dims_to_test = self.number_of_datasets - else: - dims_to_test = self.target.dim - - logger.info("=" * 80) - logger.info( - "\nAutomatic determination of number of dimensions for analysis" + def _determine_dimensions(self, dims_to_test, outlier_rejection=False): + + logger.info("=" * 80) + logger.info("\nAutomatic determination of number of dimensions for analysis") + dimensions = [] + functional = [] + for dim in range(1, dims_to_test + 1): + logger.debug("Testing dimension: %i", dim) + self.target.set_dimensions(dim) + max_calls = self.params.minimization.max_calls + self._optimise( + self.params.minimization.engine, + max_iterations=self.params.minimization.max_iterations, + max_calls=min(20, max_calls) if max_calls else max_calls, ) - dimensions = [] - functional = [] - for dim in range(1, dims_to_test + 1): - logger.debug("Testing dimension: %i", dim) - self.target.set_dimensions(dim) - max_calls = self.params.minimization.max_calls - self._optimise( - self.params.minimization.engine, - max_iterations=self.params.minimization.max_iterations, - max_calls=min(20, max_calls) if max_calls else max_calls, - ) - if self.params.clustering.optimise_dimensions: - score = self.target.compute_functional( - self.minimizer.x, score_mode=True - ) - else: - score = self.minimizer.fun + dimensions.append(dim) + functional.append( + self.target.compute_functional_score_for_dimension_assessment( + self.minimizer.x, outlier_rejection + ) + ) - dimensions.append(dim) - functional.append(score) + # Find the elbow point of the curve, in the same manner as that used by + # distl spotfinder for resolution method 1 (Zhang et al 2006). + # See also dials/algorithms/spot_finding/per_image_analysis.py - # Find the elbow point of the curve, in the same manner as that used by - # distl spotfinder for resolution method 1 (Zhang et al 2006). - # See also dials/algorithms/spot_finding/per_image_analysis.py + x = np.array(dimensions) + y = np.array(functional) + slopes = (y[-1] - y[:-1]) / (x[-1] - x[:-1]) + p_m = slopes.argmin() - x = np.array(dimensions) - y = np.array(functional) - slopes = (y[-1] - y[:-1]) / (x[-1] - x[:-1]) - p_m = slopes.argmin() + x1 = matrix.col((x[p_m], y[p_m])) + x2 = matrix.col((x[-1], y[-1])) - x1 = matrix.col((x[p_m], y[p_m])) - x2 = matrix.col((x[-1], y[-1])) + gaps = [] + v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize() - gaps = [] - v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize() + for i in range(p_m, len(x)): + x0 = matrix.col((x[i], y[i])) + r = x1 - x0 + g = abs(v.dot(r)) + gaps.append(g) - for i in range(p_m, len(x)): - x0 = matrix.col((x[i], y[i])) - r = x1 - x0 - g = abs(v.dot(r)) - gaps.append(g) + p_g = np.array(gaps).argmax() - p_g = np.array(gaps).argmax() + x_g = x[p_g + p_m] - x_g = x[p_g + p_m] + logger.info( + dials.util.tabulate( + zip(dimensions, functional), headers=("Dimensions", "Functional") + ) + ) + logger.info("Best number of dimensions: %i", x_g) + if int(x_g) < 2: logger.info( - dials.util.tabulate( - zip(dimensions, functional), headers=("Dimensions", "Functional") - ) + "As a minimum of 2-dimensions is required, dimensions have been set to 2." ) - - # Dimensions must be >= 2 for clustering - logger.info("Best number of dimensions: %i", x_g) - if self.params.clustering.optimise_dimensions and int(x_g) < 2: - logger.info( - "As a minimum of 2-dimensions is required for cosine-angle clustering, dimensions have been set to 2." - ) - self.target.set_dimensions(2) - else: - self.target.set_dimensions(int(x_g)) - logger.info("Using %i dimensions for analysis", self.target.dim) + self.target.set_dimensions(2) + else: + self.target.set_dimensions(int(x_g)) + logger.info("Using %i dimensions for analysis", self.target.dim) def run(self): self._intialise_target() - self._determine_dimensions() + if self.params.dimensions is Auto and self.target.dim != 2: + self._determine_dimensions(self.target.dim) self._optimise( self.params.minimization.engine, max_iterations=self.params.minimization.max_iterations, @@ -375,6 +344,7 @@ def run(self): @Subject.notify_event(event="optimised") def _optimise(self, engine, max_iterations=None, max_calls=None): + np.random.seed(self.params.seed) NN = len(set(self.dataset_ids)) n_sym_ops = len(self.target.sym_ops) diff --git a/src/dials/algorithms/symmetry/cosym/target.py b/src/dials/algorithms/symmetry/cosym/target.py index 5334049a86..863bdeb660 100644 --- a/src/dials/algorithms/symmetry/cosym/target.py +++ b/src/dials/algorithms/symmetry/cosym/target.py @@ -164,7 +164,6 @@ def __init__( dimensions=None, nproc=1, cc_weights=None, - outlier_rejection=False, ): r"""Initialise a Target object. @@ -245,8 +244,6 @@ def __init__( else: self.rij_matrix, self.wij_matrix = self._compute_rij_wij() - self.outlier_rejection = outlier_rejection - def set_dimensions(self, dimensions): """Set the number of dimensions for analysis. @@ -458,7 +455,7 @@ def sample_size(x, y): return rij, wij - def compute_functional(self, x: np.ndarray, score_mode=False) -> float: + def compute_functional(self, x: np.ndarray) -> float: """Compute the target function at coordinates `x`. Args: @@ -475,18 +472,23 @@ def compute_functional(self, x: np.ndarray, score_mode=False) -> float: elements = np.square(self.rij_matrix - x.T @ x) if self.wij_matrix is not None: np.multiply(self.wij_matrix, elements, out=elements) - if score_mode: - if self.outlier_rejection: - q1, q2, q3 = np.quantile(elements, (0.25, 0.5, 0.75)) - inliers = elements[elements < q2 + (q3 - q1)] - else: - inliers = elements - else: - inliers = elements - - f = 0.5 * inliers.sum() + f = 0.5 * elements.sum() return f + def compute_functional_score_for_dimension_assessment( + self, x: np.ndarray, outlier_rejection: bool = True + ) -> float: + if not outlier_rejection: + return self.compute_functional(x) + x = x.reshape((self.dim, x.size // self.dim)) + elements = np.square(self.rij_matrix - x.T @ x) + if self.wij_matrix is not None: + np.multiply(self.wij_matrix, elements, out=elements) + + q1, q2, q3 = np.quantile(elements, (0.25, 0.5, 0.75)) + inliers = elements[elements < q2 + (q3 - q1)] + return 0.5 * inliers.sum() + def compute_gradients_fd(self, x: np.ndarray, eps=1e-6) -> np.ndarray: """Compute the gradients at coordinates `x` using finite differences. diff --git a/src/dials/command_line/correlation_matrix.py b/src/dials/command_line/correlation_matrix.py index d19c78c65e..3e68c46efe 100644 --- a/src/dials/command_line/correlation_matrix.py +++ b/src/dials/command_line/correlation_matrix.py @@ -23,11 +23,14 @@ phil_scope = iotbx.phil.parse( """\ -include scope dials.algorithms.correlation.analysis.phil_scope +include scope dials.algorithms.correlation.analysis.working_phil seed = 42 .type = int(value_min=0) +outlier_rejection = True + .type = bool + .help = "Use outlier rejection when optimising dimensions of analysis." output { log = dials.correlation_matrix.log .type = str