Skip to content

Commit

Permalink
Implement resolution fitting on cc_half significance level
Browse files Browse the repository at this point in the history
  • Loading branch information
jbeilstenedmands committed Jan 18, 2024
1 parent 7b19b5f commit 682cac2
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 10 deletions.
1 change: 1 addition & 0 deletions newsfragments/XXX.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
``dials.estimate_resolution``: Add resolution estimate based on limit of cc1/2 significance.
5 changes: 3 additions & 2 deletions src/dials/algorithms/scaling/scaling_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,7 @@ def merging_stats_from_scaled_array(
use_internal_variance=False,
anomalous=True,
additional_stats=False,
cc_one_half_significance_level=0.01,
):
"""Calculate the normal and anomalous merging statistics."""

Expand All @@ -509,7 +510,7 @@ def merging_stats_from_scaled_array(
sigma_filtering=None,
eliminate_sys_absent=False,
use_internal_variance=use_internal_variance,
cc_one_half_significance_level=0.01,
cc_one_half_significance_level=cc_one_half_significance_level,
additional_stats=additional_stats,
)
except (RuntimeError, Sorry) as e:
Expand All @@ -536,7 +537,7 @@ def merging_stats_from_scaled_array(
n_bins=n_bins,
anomalous=True,
sigma_filtering=None,
cc_one_half_significance_level=0.01,
cc_one_half_significance_level=cc_one_half_significance_level,
eliminate_sys_absent=False,
use_internal_variance=use_internal_variance,
additional_stats=additional_stats,
Expand Down
74 changes: 68 additions & 6 deletions src/dials/util/resolution_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@

import numpy as np
import scipy.optimize
from scipy.optimize import curve_fit
from scipy.special import expit

import iotbx.merging_statistics
import iotbx.mtz
Expand Down Expand Up @@ -43,6 +45,7 @@ class metrics(enum.Enum):
I_MEAN_OVER_SIGMA_MEAN = "i_mean_over_sigi_mean"
RMERGE = "r_merge"
COMPLETENESS = "completeness"
CC_HALF_SIGNIFICANCE_LEVEL = "cc_half_significance_level"


def polynomial_fit(x, y, degree=5, n_obs=None):
Expand Down Expand Up @@ -248,6 +251,53 @@ def _get_cc_half_critical_values(merging_stats, cc_half_method):
return flex.double(critical).reversed()


def resolution_cc_half_significance(merging_stats, cc_half_method="half_dataset"):
"""Fit the crossover from statistically significant to not significant cc half"""
metric = "cc_one_half_sigma_tau" if cc_half_method == "sigma_tau" else "cc_one_half"
critical_values = _get_cc_half_critical_values(merging_stats, cc_half_method)
y_obs = flex.double(getattr(b, metric) for b in merging_stats.bins).reversed()
d_star_sq = flex.double(
uctbx.d_as_d_star_sq(b.d_min) for b in merging_stats.bins
).reversed()

data = [0 if c > y else 1 for c, y in zip(critical_values, y_obs)]
y_fit = None
if all(data):
d_min = 1.0 / math.sqrt(flex.max(d_star_sq))
elif not any(data):
d_min = None
else:
# fit a logistic curve to find when crosses over threshold.

def f(x, r, res):
xpr = r * (x - res)
return 1.0 - expit(xpr)

start_res = None
for x, v in zip(d_star_sq, data):
if not v:
start_res = x # finds the highest d-value where cc1/2 not significant.
assert start_res
params = [100.0, start_res] # try to use a sensible starting point
try:
fit_params, _ = curve_fit(f, d_star_sq, data, params)
except Exception as e:
logger.debug(f"Error fitting curve: {e}")
d_min = None
else:
if fit_params[0] < 0: # fitted the 'wrong way', not reliable!
d_min = None
elif (
fit_params[1] <= 0.0
): # negative resolution limit! something went badly wrong.
d_min = None
else:
d_min = 1.0 / math.sqrt(fit_params[1])
y_fit = flex.double(f(d_star_sq, fit_params[0], fit_params[1]))

return ResolutionResult(d_star_sq, flex.double(data), y_fit, d_min)


def resolution_cc_half(
merging_stats, limit, cc_half_method="half_dataset", model=tanh_fit
):
Expand Down Expand Up @@ -446,6 +496,7 @@ def plot_result(metric, result):
metrics.I_MEAN_OVER_SIGMA_MEAN: "&lt;I&gt;/<σ(I)>",
metrics.RMERGE: "R<sub>merge</sub> ",
metrics.COMPLETENESS: "Completeness",
metrics.CC_HALF_SIGNIFICANCE_LEVEL: "CC<sub>½</sub> significance",
}
d_star_sq_tickvals, d_star_sq_ticktext = plots.d_star_sq_to_d_ticks(
result.d_star_sq, 5
Expand Down Expand Up @@ -591,15 +642,21 @@ def from_reflections_and_experiments(cls, reflection_tables, experiments, params
variances = flex.double()
for table in reflection_tables:
if "intensity.scale.value" in table:
table = filter_reflection_table(
table, ["scale"], partiality_threshold=0.4
)
try:
table = filter_reflection_table(
table, ["scale"], partiality_threshold=0.4
)
except ValueError:
continue
intensities.extend(table["intensity.scale.value"])
variances.extend(table["intensity.scale.variance"])
else:
table = filter_reflection_table(
table, ["profile"], partiality_threshold=0.4
)
try:
table = filter_reflection_table(
table, ["profile"], partiality_threshold=0.4
)
except ValueError:
continue
intensities.extend(table["intensity.prf.value"])
variances.extend(table["intensity.prf.variance"])
indices.extend(table["miller_index"])
Expand Down Expand Up @@ -637,6 +694,11 @@ def resolution(self, metric, limit=None):
if self._params.cc_half_fit == "tanh"
else polynomial_fit,
)
elif metric == metrics.CC_HALF_SIGNIFICANCE_LEVEL:
return resolution_cc_half_significance(
self._merging_statistics,
cc_half_method=self._params.cc_half_method,
)
elif metric == metrics.CC_REF:
return self._resolution_cc_ref(limit=self._params.cc_ref)
else:
Expand Down
9 changes: 7 additions & 2 deletions tests/command_line/test_estimate_resolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,15 @@ def test_x4wide(input_files, dials_data, run_in_tmp_path, capsys):
"Resolution I/sig: 1.53",
"Resolution Mn(I/sig): 1.51",
"Resolution Mn(I)/Mn(sig): 1.49",
"Resolution cc_half_significance_level: 1.20",
)
for line in expected_output:
assert line in captured.out
assert run_in_tmp_path.joinpath("resolutionizer.html").is_file()
expected_keys = {
"cc_half",
"cc_ref",
"cc_half_significance_level",
"isigma",
"misigma",
"i_mean_over_sigma_mean",
Expand All @@ -77,8 +79,10 @@ def test_multi_sequence_with_batch_range(dials_data, run_in_tmp_path, capsys):
["batch_range=1900,3600", str(refls), str(expts)],
)
captured = capsys.readouterr()

expected_output = "Resolution cc_half: 0.61"
expected_output = (
"Resolution cc_half: 0.59",
"Resolution cc_half_significance_level: 0.59",
)
for line in expected_output:
assert line in captured.out
assert run_in_tmp_path.joinpath("dials.estimate_resolution.html").is_file()
Expand Down Expand Up @@ -111,6 +115,7 @@ def test_handle_fit_failure(dials_data, run_in_tmp_path, capsys):
expected_output = (
"Resolution fit against cc_half failed: Not enough reflections for fitting",
"Resolution Mn(I/sig): 0.62",
"Resolution cc_half_significance_level: 0.62",
)
for line in expected_output:
assert line in captured.out
Expand Down

0 comments on commit 682cac2

Please sign in to comment.