Skip to content

Commit

Permalink
Suggested modifications
Browse files Browse the repository at this point in the history
  • Loading branch information
jbeilstenedmands committed Sep 25, 2024
1 parent 71ca53a commit 7b9bba8
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 102 deletions.
21 changes: 17 additions & 4 deletions src/dials/algorithms/correlation/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import iotbx.phil
from dxtbx.model import ExperimentList
from libtbx import Auto
from libtbx.phil import scope_extract
from scitbx.array_family import flex

Expand Down Expand Up @@ -49,6 +50,15 @@
""",
process_includes=True,
)
phil_overrides = phil_scope.fetch(
source=iotbx.phil.parse(
"""\
cc_weights=sigma
weights=standard_error
"""
)
)
working_phil = phil_scope.fetch(sources=[phil_overrides])


class CorrelationMatrix:
Expand Down Expand Up @@ -125,9 +135,8 @@ def __init__(
# If dimensions are optimised for clustering, need cc_weights=sigma
# Otherwise results end up being nonsensical even for high-quality data
# Outlier rejection was also found to be beneficial for optimising clustering dimensionality
if self.params.clustering.optimise_dimensions:
self.params.cc_weights = "sigma"
self.params.clustering.outlier_rejection = True
if self.params.dimensions is Auto and self.params.cc_weights != "sigma":
raise ValueError("To optimise dimensions, cc_weights=sigma is required.")

self.cosym_analysis = CosymAnalysis(self.datasets, self.params)

Expand Down Expand Up @@ -189,7 +198,11 @@ def calculate_matrices(self):
self.cosym_analysis._intialise_target()

# Cosym proceedures to calculate the cos-angle matrix
self.cosym_analysis._determine_dimensions()
if self.params.dimensions is Auto:
self.cosym_analysis._determine_dimensions(
self.cosym_analysis.number_of_datasets,
outlier_rejection=self.params.outlier_rejection,
)
self.cosym_analysis._optimise(
self.cosym_analysis.params.minimization.engine,
max_iterations=self.cosym_analysis.params.minimization.max_iterations,
Expand Down
136 changes: 53 additions & 83 deletions src/dials/algorithms/symmetry/cosym/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,6 @@
.type = int(value_min=2)
.short_caption = "Dimensions"
clustering
.short_caption = "Options for clustering behaviour"
{
optimise_dimensions = False
.type = bool
.short_caption = "Set to True for optimisation of dimensions during dataset clustering"
outlier_rejection = False
.type = bool
.short_caption = "Set to True for outlier rejection when optimising dimensions during dataset clustering"
}
use_curvatures = True
.type = bool
.short_caption = "Use curvatures"
Expand Down Expand Up @@ -252,8 +241,6 @@ def _map_space_group_to_input_cell(intensities, space_group):
else:
params.nproc = 1

self.number_of_datasets = len(intensities)

def _intialise_target(self):
if self.params.dimensions is Auto:
dimensions = None
Expand All @@ -276,94 +263,76 @@ def _intialise_target(self):
weights=self.params.weights,
cc_weights=self.params.cc_weights,
nproc=self.params.nproc,
outlier_rejection=self.params.clustering.outlier_rejection,
)

def _determine_dimensions(self):
if (
self.params.dimensions is Auto
and self.target.dim == 2
and not self.params.clustering.optimise_dimensions
):
self.params.dimensions = 2

elif self.params.dimensions is Auto:
if self.params.clustering.optimise_dimensions:
dims_to_test = self.number_of_datasets
else:
dims_to_test = self.target.dim

logger.info("=" * 80)
logger.info(
"\nAutomatic determination of number of dimensions for analysis"
def _determine_dimensions(self, dims_to_test, outlier_rejection=False):

logger.info("=" * 80)
logger.info("\nAutomatic determination of number of dimensions for analysis")
dimensions = []
functional = []
for dim in range(1, dims_to_test + 1):
logger.debug("Testing dimension: %i", dim)
self.target.set_dimensions(dim)
max_calls = self.params.minimization.max_calls
self._optimise(
self.params.minimization.engine,
max_iterations=self.params.minimization.max_iterations,
max_calls=min(20, max_calls) if max_calls else max_calls,
)
dimensions = []
functional = []
for dim in range(1, dims_to_test + 1):
logger.debug("Testing dimension: %i", dim)
self.target.set_dimensions(dim)
max_calls = self.params.minimization.max_calls
self._optimise(
self.params.minimization.engine,
max_iterations=self.params.minimization.max_iterations,
max_calls=min(20, max_calls) if max_calls else max_calls,
)

if self.params.clustering.optimise_dimensions:
score = self.target.compute_functional(
self.minimizer.x, score_mode=True
)
else:
score = self.minimizer.fun
dimensions.append(dim)
functional.append(
self.target.compute_functional_score_for_dimension_assessment(
self.minimizer.x, outlier_rejection
)
)

dimensions.append(dim)
functional.append(score)
# Find the elbow point of the curve, in the same manner as that used by
# distl spotfinder for resolution method 1 (Zhang et al 2006).
# See also dials/algorithms/spot_finding/per_image_analysis.py

# Find the elbow point of the curve, in the same manner as that used by
# distl spotfinder for resolution method 1 (Zhang et al 2006).
# See also dials/algorithms/spot_finding/per_image_analysis.py
x = np.array(dimensions)
y = np.array(functional)
slopes = (y[-1] - y[:-1]) / (x[-1] - x[:-1])
p_m = slopes.argmin()

x = np.array(dimensions)
y = np.array(functional)
slopes = (y[-1] - y[:-1]) / (x[-1] - x[:-1])
p_m = slopes.argmin()
x1 = matrix.col((x[p_m], y[p_m]))
x2 = matrix.col((x[-1], y[-1]))

x1 = matrix.col((x[p_m], y[p_m]))
x2 = matrix.col((x[-1], y[-1]))
gaps = []
v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize()

gaps = []
v = matrix.col(((x2[1] - x1[1]), -(x2[0] - x1[0]))).normalize()
for i in range(p_m, len(x)):
x0 = matrix.col((x[i], y[i]))
r = x1 - x0
g = abs(v.dot(r))
gaps.append(g)

for i in range(p_m, len(x)):
x0 = matrix.col((x[i], y[i]))
r = x1 - x0
g = abs(v.dot(r))
gaps.append(g)
p_g = np.array(gaps).argmax()

p_g = np.array(gaps).argmax()
x_g = x[p_g + p_m]

x_g = x[p_g + p_m]
logger.info(
dials.util.tabulate(
zip(dimensions, functional), headers=("Dimensions", "Functional")
)
)

logger.info("Best number of dimensions: %i", x_g)
if int(x_g) < 2:
logger.info(
dials.util.tabulate(
zip(dimensions, functional), headers=("Dimensions", "Functional")
)
"As a minimum of 2-dimensions is required, dimensions have been set to 2."
)

# Dimensions must be >= 2 for clustering
logger.info("Best number of dimensions: %i", x_g)
if self.params.clustering.optimise_dimensions and int(x_g) < 2:
logger.info(
"As a minimum of 2-dimensions is required for cosine-angle clustering, dimensions have been set to 2."
)
self.target.set_dimensions(2)
else:
self.target.set_dimensions(int(x_g))
logger.info("Using %i dimensions for analysis", self.target.dim)
self.target.set_dimensions(2)
else:
self.target.set_dimensions(int(x_g))
logger.info("Using %i dimensions for analysis", self.target.dim)

def run(self):
self._intialise_target()
self._determine_dimensions()
if self.params.dimensions is Auto and self.target.dim != 2:
self._determine_dimensions(self.target.dim)
self._optimise(
self.params.minimization.engine,
max_iterations=self.params.minimization.max_iterations,
Expand All @@ -375,6 +344,7 @@ def run(self):

@Subject.notify_event(event="optimised")
def _optimise(self, engine, max_iterations=None, max_calls=None):
np.random.seed(self.params.seed)
NN = len(set(self.dataset_ids))
n_sym_ops = len(self.target.sym_ops)

Expand Down
30 changes: 16 additions & 14 deletions src/dials/algorithms/symmetry/cosym/target.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,6 @@ def __init__(
dimensions=None,
nproc=1,
cc_weights=None,
outlier_rejection=False,
):
r"""Initialise a Target object.
Expand Down Expand Up @@ -245,8 +244,6 @@ def __init__(
else:
self.rij_matrix, self.wij_matrix = self._compute_rij_wij()

self.outlier_rejection = outlier_rejection

def set_dimensions(self, dimensions):
"""Set the number of dimensions for analysis.
Expand Down Expand Up @@ -458,7 +455,7 @@ def sample_size(x, y):

return rij, wij

def compute_functional(self, x: np.ndarray, score_mode=False) -> float:
def compute_functional(self, x: np.ndarray) -> float:
"""Compute the target function at coordinates `x`.
Args:
Expand All @@ -475,18 +472,23 @@ def compute_functional(self, x: np.ndarray, score_mode=False) -> float:
elements = np.square(self.rij_matrix - x.T @ x)
if self.wij_matrix is not None:
np.multiply(self.wij_matrix, elements, out=elements)
if score_mode:
if self.outlier_rejection:
q1, q2, q3 = np.quantile(elements, (0.25, 0.5, 0.75))
inliers = elements[elements < q2 + (q3 - q1)]
else:
inliers = elements
else:
inliers = elements

f = 0.5 * inliers.sum()
f = 0.5 * elements.sum()
return f

def compute_functional_score_for_dimension_assessment(
self, x: np.ndarray, outlier_rejection: bool = True
) -> float:
if not outlier_rejection:
return self.compute_functional(x)
x = x.reshape((self.dim, x.size // self.dim))
elements = np.square(self.rij_matrix - x.T @ x)
if self.wij_matrix is not None:
np.multiply(self.wij_matrix, elements, out=elements)

q1, q2, q3 = np.quantile(elements, (0.25, 0.5, 0.75))
inliers = elements[elements < q2 + (q3 - q1)]
return 0.5 * inliers.sum()

def compute_gradients_fd(self, x: np.ndarray, eps=1e-6) -> np.ndarray:
"""Compute the gradients at coordinates `x` using finite differences.
Expand Down
5 changes: 4 additions & 1 deletion src/dials/command_line/correlation_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,14 @@

phil_scope = iotbx.phil.parse(
"""\
include scope dials.algorithms.correlation.analysis.phil_scope
include scope dials.algorithms.correlation.analysis.working_phil
seed = 42
.type = int(value_min=0)
outlier_rejection = True
.type = bool
.help = "Use outlier rejection when optimising dimensions of analysis."
output {
log = dials.correlation_matrix.log
.type = str
Expand Down

0 comments on commit 7b9bba8

Please sign in to comment.