From 5328db6721179b4b05f6ec555e846d1a607b516b Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Sep 2023 17:41:27 +0200 Subject: [PATCH 01/22] added more possible length combinations for getting parameters --- src/pygama/math/peak_fitting.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/pygama/math/peak_fitting.py b/src/pygama/math/peak_fitting.py index 9c60e2c8c..67e07409d 100644 --- a/src/pygama/math/peak_fitting.py +++ b/src/pygama/math/peak_fitting.py @@ -918,6 +918,8 @@ def get_mu_func(func, pars, cov = None, errors=None): n_sig, mu, sigma, n_bkg, hstep = pars elif len(pars) ==7: n_sig, mu, sigma, n_bkg, hstep, low_range, high_range = pars + elif len(pars) ==8: + n_sig, mu, sigma, n_bkg, hstep, low_range, high_range, components = pars if errors is not None: return mu, errors[1] elif cov is not None: @@ -930,6 +932,8 @@ def get_mu_func(func, pars, cov = None, errors=None): n_sig, mu, sigma, htail, tau, n_bkg, hstep = pars elif len(pars) ==9: n_sig, mu, sigma, htail, tau, n_bkg, hstep, low_range, high_range = pars + elif len(pars) ==10: + n_sig, mu, sigma, htail, tau, n_bkg, hstep, low_range, high_range, components = pars if errors is not None: return mu, errors[1] elif cov is not None: @@ -948,6 +952,8 @@ def get_fwhm_func(func, pars, cov = None): n_sig, mu, sigma, n_bkg, hstep = pars elif len(pars) ==7: n_sig, mu, sigma, n_bkg, hstep, low_range, high_range = pars + elif len(pars) ==8: + n_sig, mu, sigma, n_bkg, hstep, low_range, high_range, components = pars if cov is None: return sigma*2*np.sqrt(2*np.log(2)) else: @@ -958,6 +964,12 @@ def get_fwhm_func(func, pars, cov = None): n_sig, mu, sigma, htail, tau, n_bkg, hstep = pars elif len(pars) ==9: n_sig, mu, sigma, htail, tau, n_bkg, hstep, low_range, high_range = pars + if cov is not None: + cov = cov[:7,:][:,:7] + elif len(pars) ==10: + n_sig, mu, sigma, htail, tau, n_bkg, hstep, low_range, high_range, components = pars + if cov is not None: + cov = cov[:7,:][:,:7] return radford_fwhm(sigma, htail, tau, cov) else: @@ -971,6 +983,8 @@ def get_total_events_func(func, pars, cov = None, errors=None): n_sig, mu, sigma, n_bkg, hstep = pars elif len(pars) ==7: n_sig, mu, sigma, n_bkg, hstep, low_range, high_range = pars + elif len(pars) ==8: + n_sig, mu, sigma, n_bkg, hstep, low_range, high_range, components = pars if errors is not None: return n_sig+n_bkg, np.sqrt(errors[0]**2 + errors[3]**2) elif cov is not None: @@ -983,6 +997,8 @@ def get_total_events_func(func, pars, cov = None, errors=None): n_sig, mu, sigma, htail, tau, n_bkg, hstep = pars elif len(pars) ==9: n_sig, mu, sigma, htail, tau, n_bkg, hstep, low_range, high_range = pars + elif len(pars) ==10: + n_sig, mu, sigma, htail, tau, n_bkg, hstep, low_range, high_range, components = pars if errors is not None: return n_sig+n_bkg, np.sqrt(errors[0]**2 + errors[5]**2) elif cov is not None: From fc8b1299bd365b05f9f6a92a6130007bf7938e73 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Sep 2023 17:42:50 +0200 Subject: [PATCH 02/22] updated for changes to calibrations --- src/pygama/pargen/energy_optimisation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 1354f74cf..893a0a93f 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -1038,11 +1038,12 @@ def event_selection( e_upper_lim = peak_loc + (1.5 * kev_width[1]) / rough_adc_to_kev e_ranges = (int(peak_loc - e_lower_lim), int(e_upper_lim - peak_loc)) - params, errors, covs, bins, ranges, p_val, valid_pks = pgc.hpge_fit_E_peaks( + params, errors, covs, bins, ranges, p_val, valid_pks, pk_funcs = pgc.hpge_fit_E_peaks( energy, [peak_loc], [e_ranges], n_bins=(np.nanmax(energy) - np.nanmin(energy)) // 1, + uncal_is_int=True ) if params[0] is None: log.debug("Fit failed, using max guess") From 56136ff2f0beee0447ca5a6571ebf794d3c11b5f Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Sep 2023 17:44:28 +0200 Subject: [PATCH 03/22] rewrote fitting to fit in stages dropping tail if unnecessary with prior on tail to remove degeneracy, added check on guess so no guess params are none, bounded mu to be in fit range, improved validity checks --- src/pygama/pargen/energy_cal.py | 294 +++++++++++++++++++++++--------- 1 file changed, 215 insertions(+), 79 deletions(-) diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py index 6f520505c..8a54f3bfd 100644 --- a/src/pygama/pargen/energy_cal.py +++ b/src/pygama/pargen/energy_cal.py @@ -18,6 +18,7 @@ import pygama.math.histogram as pgh import pygama.math.peak_fitting as pgf import pygama.math.utils as pgu +from pygama.pargen.utils import * log = logging.getLogger(__name__) @@ -246,7 +247,7 @@ def hpge_fit_E_peak_tops( return np.array(pars_list, dtype=object), np.array(cov_list, dtype=object) -def get_hpge_E_peak_par_guess(hist, bins, var, func): +def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess): """Get parameter guesses for func fit to peak in hist Parameters @@ -263,7 +264,7 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func): or func == pgf.extended_gauss_step_pdf ): # get mu and height from a gauss fit, also sigma as fallback - pars, cov = pgf.gauss_mode_width_max(hist, bins, var) + pars, cov = pgf.gauss_mode_width_max(hist, bins, var, mode_guess=mode_guess, n_bins=10) bin_centres = pgh.get_bin_centers(bins) if pars is None: log.info("get_hpge_E_peak_par_guess: gauss_mode_width_max failed") @@ -316,7 +317,13 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func): n_bkg = np.sum(hist) - n_sig hstep = step / (bg + np.mean(hist[:10])) - return [n_sig, mu, sigma / 2, n_bkg, hstep, bins[0], bins[-1], 0] + + parguess = [n_sig, mu, sigma / 2, n_bkg, hstep, bins[0], bins[-1], 0] + for i, guess in enumerate(parguess): + if np.isnan(guess): + parguess[i]=0 + + return parguess if ( func == pgf.radford_cdf @@ -324,7 +331,7 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func): or func == pgf.extended_radford_pdf ): # guess mu, height - pars, cov = pgf.gauss_mode_width_max(hist, bins, var) + pars, cov = pgf.gauss_mode_width_max(hist, bins, var, mode_guess=mode_guess, n_bins=10) bin_centres = pgh.get_bin_centers(bins) if pars is None: log.info("get_hpge_E_peak_par_guess: gauss_mode_width_max failed") @@ -386,6 +393,10 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func): parguess = [n_sig, mu, sigma, htail, tau, n_bkg, hstep, bins[0], bins[-1], 0] + for i, guess in enumerate(parguess): + if np.isnan(guess): + parguess[i]=0 + return parguess else: @@ -422,7 +433,7 @@ def get_hpge_E_fixed(func): return None -def get_hpge_E_bounds(func): +def get_hpge_E_bounds(func, parguess): if ( func == pgf.radford_cdf or func == pgf.radford_pdf @@ -430,7 +441,7 @@ def get_hpge_E_bounds(func): ): return [ (0, None), - (None, None), + (parguess[-3], parguess[-2]), (0, None), (0, 1), (None, None), @@ -448,7 +459,7 @@ def get_hpge_E_bounds(func): ): return [ (0, None), - (None, None), + (parguess[-3], parguess[-2]), (0, None), (0, None), (-1, 1), @@ -461,6 +472,98 @@ def get_hpge_E_bounds(func): log.error(f"get_hpge_E_bounds not implemented for {func.__name__}") return [] +class tail_prior: + """ + Generic least-squares cost function with error. + """ + verbose=0 + errordef = Minuit.LIKELIHOOD # for Minuit to compute errors correctly + + def __init__(self, data, model): + self.model = model # model predicts y for given x + self.data=data + #self.x = np.asarray(x) + + def _call(self, *pars): + return self.__call__( *pars[0]) + + def __call__(self, n_sig, mu, sigma, htail, + tau, n_bkg, hstep, + lower_range ,upper_range, components): + return 100 * np.log(htail+0.1) #len(self.data)/ + +def staged_fit(energies, hist, bins, var, func_i, gof_func_i, simplex, mode_guess): + par_guesses = get_hpge_E_peak_par_guess(hist, bins, var, func_i, mode_guess) + bounds = get_hpge_E_bounds(func_i, par_guesses) + fixed, mask = get_hpge_E_fixed(func_i) + + if func_i == pgf.extended_radford_pdf or func_i == pgf.radford_pdf: + cost_func = cost.ExtendedUnbinnedNLL(energies, func_i) +tail_prior(energies, func_i) + m = Minuit(cost_func, *par_guesses) + m.limits = bounds + for fix in fixed: + m.fixed[fix] = True + + m.values["htail"] = 0 + m.values["tau"] = 0 + m.fixed["htail"] = True + m.fixed["tau"] = True + if simplex == True: + m.simplex().migrad() + else: + m.migrad() + try: + #set htail to guess + m.values["htail"] = par_guesses[3] + m.values["tau"] = par_guesses[4] + m.fixed = False + for fix in fixed: + m.fixed[fix] = True + + if simplex == True: + m.simplex().migrad() + else: + m.migrad() + m.hesse() + pars_i = m.values + errs_i = m.errors + cov_i = m.covariance + valid_fit = m.valid + if valid_fit == False: + raise RuntimeError + except: + func_i = pgf.extended_gauss_step_pdf + gof_func_i = pgf.gauss_step_pdf + pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit(energies, hist, bins, var, + func_i, gof_func_i, simplex, mode_guess) + + #check htail + if m.values["htail"]<0.01 or m.values["htail"]<2*m.errors["htail"] or np.isnan(m.values).any():# or + func_i = pgf.extended_gauss_step_pdf + gof_func_i = pgf.gauss_step_pdf + pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit(energies, hist, bins, var, + func_i, gof_func_i, simplex, mode_guess) + + else: + cost_func = cost.ExtendedUnbinnedNLL(energies, func_i) + m = Minuit(cost_func, *par_guesses) + m.limits = bounds + for fix in fixed: + m.fixed[fix] = True + if simplex == True: + m.simplex().migrad() + else: + m.migrad() + + m.hesse() + + pars_i = m.values + errs_i = m.errors + cov_i = m.covariance + + valid_fit = m.valid + + return pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit def hpge_fit_E_peaks( E_uncal, @@ -512,15 +615,16 @@ def hpge_fit_E_peaks( ranges: list of array a list of [Euc_min, Euc_max] used for each peak fit """ - pars = [] - covs = [] - binws = [] - ranges = [] - errors = [] - p_vals = [] - valid_pks = [] - - for i_peak in range(len(mode_guesses)): + pars = np.zeros(len(mode_guesses), dtype='object') + errors = np.zeros(len(mode_guesses), dtype='object') + covs = np.zeros(len(mode_guesses), dtype='object') + binws = np.zeros(len(mode_guesses)) + ranges = np.zeros(len(mode_guesses), dtype='object') + p_vals = np.zeros(len(mode_guesses)) + valid_pks = np.zeros(len(mode_guesses),dtype=bool) + out_funcs= np.zeros(len(mode_guesses), dtype='object') + + for i_peak, mode_guess in enumerate(mode_guesses): # get args for this peak wwidth_i = wwidths if not isinstance(wwidths, list) else wwidths[i_peak] n_bins_i = n_bins if np.isscalar(n_bins) else n_bins[i_peak] @@ -538,40 +642,51 @@ def hpge_fit_E_peaks( # bin a histogram Euc_min = mode_guesses[i_peak] - wleft_i Euc_max = mode_guesses[i_peak] + wright_i - Euc_min, Euc_max, n_bins_i = pgh.better_int_binning( - x_lo=Euc_min, x_hi=Euc_max, n_bins=n_bins_i - ) + if uncal_is_int ==True: + Euc_min, Euc_max, n_bins_i = pgh.better_int_binning( + x_lo=Euc_min, x_hi=Euc_max, n_bins=n_bins_i + ) if method == "unbinned": energies = E_uncal[(E_uncal > Euc_min) & (E_uncal < Euc_max)][:n_events] hist, bins, var = pgh.get_hist( energies, bins=n_bins_i, range=(Euc_min, Euc_max) ) - par_guesses = get_hpge_E_peak_par_guess(hist, bins, var, func_i) - bounds = get_hpge_E_bounds(func_i) - fixed, mask = get_hpge_E_fixed(func_i) - - cost_func = cost.ExtendedUnbinnedNLL(energies, func_i) - m = Minuit(cost_func, *par_guesses) - m.limits = bounds - for fix in fixed: - m.fixed[fix] = True - if simplex == True: - m.simplex().migrad() + if func_i == pgf.extended_radford_pdf or pgf.extended_gauss_step_pdf: + pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit(energies, hist, bins, var, + func_i, gof_func_i, simplex, mode_guess) else: - m.migrad() - m.minos() - - pars_i = m.values - errs_i = m.errors - cov_i = m.covariance + + par_guesses = get_hpge_E_peak_par_guess(hist, bins, var, func_i) + bounds = get_hpge_E_bounds(func_i, par_guesses) + fixed, mask = get_hpge_E_fixed(func_i) + + cost_func = cost.ExtendedUnbinnedNLL(energies, func_i) + m = Minuit(cost_func, *par_guesses) + m.limits = bounds + for fix in fixed: + m.fixed[fix] = True + if simplex == True: + m.simplex().migrad() + else: + m.migrad() + m.hesse() + + pars_i = m.values + errs_i = m.errors + cov_i = m.covariance + valid_fit = m.valid + + csqr = pgf.goodness_of_fit( + hist, bins, None, gof_func_i, pars_i, method="Pearson", scale_bins=True + ) else: hist, bins, var = pgh.get_hist( E_uncal, bins=n_bins_i, range=(Euc_min, Euc_max) ) par_guesses = get_hpge_E_peak_par_guess(hist, bins, var, func_i) - bounds = get_hpge_E_bounds(func_i) + bounds = get_hpge_E_bounds(func_i, par_guesses) fixed, mask = get_hpge_E_fixed(func_i) pars_i, errs_i, cov_i = pgf.fit_binned( func_i, @@ -585,72 +700,90 @@ def hpge_fit_E_peaks( simplex=simplex, bounds=bounds, ) + valid_fit=True - csqr = pgf.goodness_of_fit( - hist, bins, None, gof_func_i, pars_i, method="Pearson" - ) - p_val = scipy.stats.chi2.sf(csqr[0], csqr[1]) + csqr = pgf.goodness_of_fit( + hist, bins, None, gof_func_i, pars_i, method="Pearson", scale_bins=False + ) + + if np.isnan(pars_i).any(): + log.debug( + f"hpge_fit_E_peaks: fit failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}, par is nan : {pars_i}" + ) + raise RuntimeError - pars_i = np.array(pars_i)[mask] - errs_i = np.array(errs_i)[mask] - cov_i = np.array(cov_i)[mask, :][:, mask] + p_val = scipy.stats.chi2.sf(csqr[0], csqr[1]+ len(np.where(mask)[0])) - valid_pks.append(True) total_events = pgf.get_total_events_func(func_i, pars_i, errors=errs_i) if ( - sum(sum(c) if c is not None else 0 for c in cov_i) == np.inf - or sum(sum(c) if c is not None else 0 for c in cov_i) == 0 - or np.isnan(sum(sum(c) if c is not None else 0 for c in cov_i)) + sum(sum(c) if c is not None else 0 for c in cov_i[mask,:][:,mask]) == np.inf + or sum(sum(c) if c is not None else 0 for c in cov_i[mask,:][:,mask]) == 0 + or np.isnan(sum(sum(c) if c is not None else 0 for c in cov_i[mask,:][:,mask])) ): log.debug( f"hpge_fit_E_peaks: cov estimation failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}" ) - valid_pks[-1] = False + valid_pks[i_peak] = False # pars_i, errs_i, cov_i, p_val = None, None, None, None - elif (np.abs(errs_i / pars_i) < 1e-7).any(): + elif valid_fit == False: log.debug( - f"hpge_fit_E_peaks: cov estimation failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}, parameter error too low" + f"hpge_fit_E_peaks: peak fitting failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}" ) - valid_pks[-1] = False + valid_pks[i_peak] = False + + elif ((np.abs(np.array(errs_i)[mask] / np.array(pars_i)[mask]) < 1e-7).any() + or np.isnan(np.array(errs_i)[mask]).any()): + log.debug( + f"hpge_fit_E_peaks: failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}, parameter error too low" + ) + valid_pks[i_peak] = False # pars_i, errs_i, cov_i, p_val = None, None, None, None elif np.abs(total_events[0] - np.sum(hist)) / np.sum(hist) > 0.1: log.debug( f"hpge_fit_E_peaks: fit failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}, total_events is outside limit" ) - valid_pks[-1] = False + valid_pks[i_peak] = False # pars_i, errs_i, cov_i, p_val = None, None, None, None - elif p_val < allowed_p_val: + elif p_val < allowed_p_val or np.isnan(p_val): log.debug( f"hpge_fit_E_peaks: fit failed for i_peak={i_peak}, p-value too low: {p_val}" ) - valid_pks[-1] = False + valid_pks[i_peak] = False # pars_i, errs_i, cov_i, p_val = None, None, None, None + else: + valid_pks[i_peak] = True except: - valid_pks.append(False) - pars_i, errs_i, cov_i, p_val = None, None, None, None + log.debug( + f"hpge_fit_E_peaks: fit failed for i_peak={i_peak}, unknown error" + ) + valid_pks[i_peak] = False + pars_i, errs_i, cov_i = return_nans(func_i)#None, None, None, None + p_val = 0 # get binning binw_1 = (bins[-1] - bins[0]) / (len(bins) - 1) - pars.append(pars_i) - errors.append(errs_i) - covs.append(cov_i) - binws.append(binw_1) - ranges.append([Euc_min, Euc_max]) - p_vals.append(p_val) + pars[i_peak] = pars_i + errors[i_peak] = errs_i + covs[i_peak] = cov_i + binws[i_peak] =binw_1 + ranges[i_peak] =[Euc_min, Euc_max] + p_vals[i_peak] =p_val + out_funcs[i_peak] =func_i return ( - np.array(pars, dtype=object), - np.array(errors, dtype=object), - np.array(covs, dtype=object), - np.array(binws), - np.array(ranges), - np.array(p_vals), - np.array(valid_pks, dtype=bool), + pars, + errors, + covs, + binws, + ranges, + p_vals, + valid_pks, + out_funcs ) @@ -877,7 +1010,8 @@ def hpge_E_calibration( idx = [i for i, E in enumerate(peaks_keV) if E in got_peaks_keV] range_keV = [range_keV[i] for i in idx] funcs = [funcs[i] for i in idx] - + gof_funcs = [gof_funcs[i] for i in idx] + # Drop peaks to not be fitted tmp = zip( *[ @@ -910,13 +1044,13 @@ def hpge_E_calibration( derco = np.polyder(np.poly1d(roughpars)).coefficients der = [pgf.poly(Ei, derco) for Ei in got_peaks_keV] range_uncal = [float(range_keV) / d for d in der] - n_bins = [range_keV / 0.5 / d for d in der] + n_bins = [int(range_keV / 0.5 / d) for d in der] elif isinstance(range_keV, tuple): rangeleft_keV, rangeright_keV = range_keV derco = np.polyder(np.poly1d(roughpars)).coefficients der = [pgf.poly(Ei, derco) for Ei in got_peaks_keV] range_uncal = [(rangeleft_keV / d, rangeright_keV / d) for d in der] - n_bins = [sum(range_keV) / 0.5 / d for d in der] + n_bins = [int(sum(range_keV) / 0.5 / d) for d in der] elif isinstance(range_keV, list): derco = np.polyder(np.poly1d(roughpars)).coefficients der = [pgf.poly(Ei, derco) for Ei in got_peaks_keV] @@ -925,7 +1059,7 @@ def hpge_E_calibration( for r, d in zip(range_keV, der) ] n_bins = [ - sum(r) / 0.5 / d if isinstance(r, tuple) else r / 0.2 / d + int(sum(r) / 0.5 / d) if isinstance(r, tuple) else int(r / 0.2 / d) for r, d in zip(range_keV, der) ] @@ -937,6 +1071,7 @@ def hpge_E_calibration( pk_ranges, pk_pvals, valid_pks, + pk_funcs ) = hpge_fit_E_peaks( E_uncal, got_peaks_locs, @@ -957,6 +1092,7 @@ def hpge_E_calibration( results["pk_ranges"] = pk_ranges results["pk_pvals"] = pk_pvals results["pk_validities"] = valid_pks + results["pk_funcs"] = pk_funcs # Drop failed fits fitidx = [i == True for i in valid_pks] fitted_peaks_keV = results["fitted_keV"] = got_peaks_keV[fitidx] @@ -967,11 +1103,11 @@ def hpge_E_calibration( pk_binws = np.asarray(pk_binws)[fitidx] pk_ranges = np.asarray(pk_ranges)[fitidx] pk_pvals = np.asarray(pk_pvals)[fitidx] + pk_funcs = np.asarray(pk_funcs)[fitidx] log.info(f"{sum(fitidx)} peaks fitted:") - for i, (Ei, parsi, errorsi, covsi) in enumerate( - zip(fitted_peaks_keV, pk_pars, pk_errors, pk_covs) + for i, (Ei, parsi, errorsi, covsi, func_i) in enumerate( + zip(fitted_peaks_keV, pk_pars, pk_errors, pk_covs, pk_funcs) ): - func_i = funcs[i] if hasattr(funcs, "__len__") else funcs varnames = func_i.__code__.co_varnames[1 : len(pk_pars[-1]) + 1] parsi = np.asarray(parsi, dtype=float) errorsi = np.asarray(errorsi, dtype=float) @@ -988,7 +1124,7 @@ def hpge_E_calibration( # Do a second calibration to the results of the full peak fits mus = [ pgf.get_mu_func(func_i, pars_i, errors=errors_i) - for func_i, pars_i, errors_i in zip(funcs, pk_pars, pk_errors) + for func_i, pars_i, errors_i in zip(pk_funcs, pk_pars, pk_errors) ] mus, mu_vars = zip(*mus) mus = np.asarray(mus) @@ -1008,7 +1144,7 @@ def hpge_E_calibration( # Finally, calculate fwhms in keV uncal_fwhms = [ pgf.get_fwhm_func(func_i, pars_i, cov=covs_i) - for func_i, pars_i, covs_i in zip(funcs, pk_pars, pk_covs) + for func_i, pars_i, covs_i in zip(pk_funcs, pk_pars, pk_covs) ] uncal_fwhms, uncal_fwhm_errs = zip(*uncal_fwhms) uncal_fwhms = np.asarray(uncal_fwhms) From 25ce6a17006facea4a494c5d0d7d4101dff05805 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Sep 2023 17:46:05 +0200 Subject: [PATCH 04/22] changes for new cal fitting, added high stats fitting for super calibrations, wrote resolution fitting to include both linear and quadratic fits, changed results output for clarity --- src/pygama/pargen/ecal_th.py | 1040 ++++++++++++++++++++++------------ 1 file changed, 675 insertions(+), 365 deletions(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index eadc7b0ef..578d44867 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -28,6 +28,7 @@ import pygama.math.peak_fitting as pgf import pygama.pargen.cuts as cts import pygama.pargen.energy_cal as cal +from pygama.pargen.utils import * log = logging.getLogger(__name__) @@ -42,63 +43,6 @@ def fwhm_slope(x: np.array, m0: float, m1: float, m2: float = None) -> np.array: return np.sqrt(m0 + m1 * x + m2 * x**2) -def load_data( - files: list[str], - lh5_path: str, - energy_params: list[str], - hit_dict: dict = {}, - cut_parameters: list[str] = ["bl_mean", "bl_std", "pz_std"], -) -> pd.DataFrame: - df = lh5.load_dfs(files, ["timestamp", "trapTmax"], lh5_path) - pulser_props = cts.find_pulser_properties(df, energy="trapTmax") - if len(pulser_props) > 0: - final_mask = None - for entry in pulser_props: - e_cut = (df.trapTmax.values < entry[0] + entry[1]) & ( - df.trapTmax.values > entry[0] - entry[1] - ) - if final_mask is None: - final_mask = e_cut - else: - final_mask = final_mask | e_cut - ids = ~(final_mask) - log.debug(f"pulser found: {pulser_props}") - - else: - ids = np.ones(len(df), dtype=bool) - log.debug(f"no pulser found") - - sto = lh5.LH5Store() - table = sto.read_object(lh5_path, files)[0] - - if len(hit_dict.keys()) == 0: - out_df = df.copy() - for param in energy_params: - try: - out_df[param] = table[param].nda - - except RuntimeError: - param = param.split("_")[0] - out_df[param] = table[param].nda - - else: - out_df = table.eval(hit_dict).get_dataframe() - out_df = pd.concat([df, out_df], axis=1) - out_df["is_not_pulser"] = ids - - cut_parameters = cts.get_keys(table, cut_parameters) - - for param in energy_params: - if param not in out_df: - out_df[param] = table[param].nda - if cut_parameters is not None: - for param in cut_parameters: - if param not in df: - out_df[param] = table[param].nda - log.debug("Data Loaded") - return out_df - - def apply_cuts( data: pd.DataFrame, hit_dict, @@ -112,13 +56,12 @@ def apply_cuts( ) mask = cts.get_cut_indexes(data, cut_dict) - data["is_valid_cal"] = mask + data[final_cut_field] = mask else: - data["is_valid_cal"] = np.ones(len(data), dtype=bool) - data["is_usable"] = data["is_valid_cal"] & data["is_not_pulser"] + data[final_cut_field] = np.ones(len(data), dtype=bool) - events_pqc = len(data.query("is_usable")) + events_pqc = len(data.query(f"{final_cut_field}&is_not_pulser")) log.debug(f"{events_pqc} events valid for calibration") return data, hit_dict @@ -146,6 +89,32 @@ def gen_pars_dict(pars, deg, energy_param): return out_dict +class fwhm_linear: + def func(x,a,b): + return np.sqrt(a + b * x) + + def string_func(input_param): + return f"(a+b*{input_param})**(0.5)" + + def guess(xs, ys, y_errs): + return [np.nanmin(ys), 10**-3] + + def bounds(): + return [(0,None),(0,None)] + +class fwhm_quadratic: + + def func(x, a, b, c): + return np.sqrt(a + b * x + c*x**2) + + def string_func(input_param): + return f"(a+b*{input_param}+c*{input_param}**2)**(0.5)" + + def guess(xs, ys, y_errs): + return [np.nanmin(ys), 10**-3, 10**-5] + + def bounds(): + return [(0,None),(0,None),(0,None)] class calibrate_parameter: glines = [ @@ -191,8 +160,8 @@ class calibrate_parameter: def __init__( self, - data, energy_param, + selection_string = "is_usable", plot_options: dict = None, guess_keV: float | None = None, threshold: int = 0, @@ -200,9 +169,14 @@ def __init__( n_events: int = None, simplex: bool = True, deg: int = 1, + cal_energy_param:str = None ): - self.data = data self.energy_param = energy_param + if cal_energy_param is None: + self.cal_energy_param = f"{self.energy_param}_cal" + else: + self.cal_energy_param = cal_energy_param + self.selection_string = selection_string self.guess_keV = guess_keV self.threshold = threshold self.p_val = p_val @@ -213,7 +187,6 @@ def __init__( self.output_dict = {} self.hit_dict = {} - self.plot_dict = {} def fit_energy_res(self): fitted_peaks = self.results["fitted_keV"] @@ -235,6 +208,10 @@ def fit_energy_res(self): log.info(f"Tl DEP found at index {i}") indexes.append(i) continue + elif peak == 511.0: + log.info(f"e annhilation found at index {i}") + indexes.append(i) + continue elif np.isnan(dfwhms[i]): log.info(f"{peak} failed") indexes.append(i) @@ -244,49 +221,130 @@ def fit_energy_res(self): fit_fwhms = np.delete(fwhms, [indexes]) fit_dfwhms = np.delete(dfwhms, [indexes]) ##### - param_guess = [2, 0.001] - param_bounds = (0, np.inf) for i, peak in enumerate(fwhm_peaks): log.info( f"FWHM of {peak} keV peak is: {fit_fwhms[i]:1.2f} +- {fit_dfwhms[i]:1.2f} keV" ) try: - self.fit_pars, self.fit_covs = curve_fit( - fwhm_slope, - fwhm_peaks, - fit_fwhms, - sigma=fit_dfwhms, - p0=param_guess, - bounds=param_bounds, - absolute_sigma=True, + if 2614.50 not in fwhm_peaks: + raise RuntimeError + + c_lin = cost.LeastSquares( + fwhm_peaks, fit_fwhms, fit_dfwhms, fwhm_linear.func ) - rng = np.random.default_rng(1) - pars_b = rng.multivariate_normal(self.fit_pars, self.fit_covs, size=1000) - fits = np.array([fwhm_slope(fwhm_peaks, *par_b) for par_b in pars_b]) - qbb_vals = np.array([fwhm_slope(2039.0, *par_b) for par_b in pars_b]) - self.qbb_err = np.nanstd(qbb_vals) - predicted_fwhms = fwhm_slope(fwhm_peaks, *self.fit_pars) - self.fit_qbb = fwhm_slope(2039.0, *self.fit_pars) + c_lin.loss = "soft_l1" + m_lin = Minuit(c_lin, *fwhm_linear.guess(fwhm_peaks, fit_fwhms, fit_dfwhms)) + m_lin.limits = fwhm_linear.bounds() + m_lin.simplex() + m_lin.migrad() + m_lin.hesse() - if 2614.50 not in fwhm_peaks: - self.fit_qbb = np.nan - self.qbb_err = np.nan - log.info(f"FWHM curve fit: {self.fit_pars}") + rng = np.random.default_rng(1) + pars_b = rng.multivariate_normal(m_lin.values, + m_lin.covariance, size=1000) + fits = np.array([fwhm_linear.func(fwhm_peaks, *par_b) for par_b in pars_b]) + qbb_vals = np.array([fwhm_linear.func(2039.0, *par_b) for par_b in pars_b]) + qbb_err = np.nanstd(qbb_vals) + predicted_fwhms = fwhm_linear.func(fwhm_peaks, *m_lin.values) + fit_qbb = fwhm_linear.func(2039.0, *m_lin.values) + + p_val = scipy.stats.chi2.sf(m_lin.fval, len(fwhm_peaks)-len(m_lin.values)) + + self.fwhm_fit_linear = {"function":fwhm_linear.__name__, + "module":fwhm_linear.__module__, + "expression":fwhm_linear.string_func("x"), + "Qbb_fwhm(keV)": fit_qbb, + "Qbb_fwhm_err(keV)":qbb_err, + "pars":m_lin.values, + "errors":m_lin.errors, + "cov":m_lin.covariance, + "csqr": (m_lin.fval, len(fwhm_peaks)-len(m_lin.values)), + "p_val":p_val} + + + log.info(f'FWHM linear fit: {self.fwhm_fit_linear["pars"].to_dict()}') log.info(f"FWHM fit values:") - for peak in fwhm_peaks: - log.info( - f"Predicted FWHM of {peak} keV peak is: {fwhm_slope(peak, *self.fit_pars):.2f} keV" - ) + log.info(f"\t Energy | FWHM (keV) | Predicted (keV)") + for i, (peak, fwhm, fwhme) in enumerate( + zip(fwhm_peaks, fit_fwhms, fit_dfwhms) + ): + log.info( + f"\t{i}".ljust(4) + + str(peak).ljust(9) + + f"| {fwhm:.2f}+-{fwhme:.2f} ".ljust(5) + +f"| {fwhm_linear.func(peak, *self.fwhm_fit_linear['pars']):.2f}".ljust(5) + ) + log.info( - f"FWHM energy resolution at Qbb: {self.fit_qbb:1.2f} +- {self.qbb_err:1.2f} keV" + f"FWHM energy resolution at Qbb (linear fit): {fit_qbb:1.2f} +- {qbb_err:1.2f} keV" + ) + except RuntimeError: + log.error(f"FWHM linear fit failed for {self.energy_param}") + pars, errs, cov = return_nans(fwhm_linear.func) + self.fwhm_fit_linear = {"function":fwhm_linear.__name__, + "module":fwhm_linear.__module__, + "expression":fwhm_linear.string_func("x"), + "Qbb_fwhm(keV)": np.nan, + "Qbb_fwhm_err(keV)":np.nan, + "pars":pars, + "errors":errs, + "cov":cov, + "csqr":(np.nan, np.nan), + "p_val":0} + log.error("FWHM linear fit failed to converge") + try: + if 2614.50 not in fwhm_peaks: + raise RuntimeError + c_quad = cost.LeastSquares( + fwhm_peaks, fit_fwhms, fit_dfwhms, fwhm_quadratic.func ) + c_quad.loss = "soft_l1" + m_quad = Minuit(c_quad, *fwhm_quadratic.guess(fwhm_peaks, fit_fwhms, fit_dfwhms)) + m_quad.limits = fwhm_quadratic.bounds() + m_quad.simplex() + m_quad.migrad() + m_quad.hesse() + + rng = np.random.default_rng(1) + pars_b = rng.multivariate_normal(m_quad.values, + m_quad.covariance, size=1000) + fits = np.array([fwhm_quadratic.func(fwhm_peaks, *par_b) for par_b in pars_b]) + qbb_vals = np.array([fwhm_quadratic.func(2039.0, *par_b) for par_b in pars_b]) + qbb_err = np.nanstd(qbb_vals) + predicted_fwhms = fwhm_quadratic.func(fwhm_peaks, *m_quad.values) + fit_qbb = fwhm_quadratic.func(2039.0, *m_quad.values) + + p_val = scipy.stats.chi2.sf(m_quad.fval, len(fwhm_peaks)-len(m_quad.values)) + + self.fwhm_fit_quadratic = {"function":fwhm_quadratic.__name__, + "module":fwhm_quadratic.__module__, + "expression":fwhm_quadratic.string_func("x"), + "Qbb_fwhm(keV)": fit_qbb, + "Qbb_fwhm_err(keV)":qbb_err, + "pars":m_quad.values, + "errors":m_quad.errors, + "cov":m_quad.covariance, + "csqr": (m_quad.fval, len(fwhm_peaks)-len(m_quad.values)), + "p_val":p_val + } + log.info(f'FWHM quadratic fit: {self.fwhm_fit_quadratic["pars"].to_dict()}') + log.info( + f"FWHM energy resolution at Qbb (quadratic fit): {fit_qbb:1.2f} +- {qbb_err:1.2f} keV" + ) except RuntimeError: - log.error(f"FWHM fit failed for {energy_param}") - self.fit_pars = np.array([np.nan, np.nan]) - self.fit_covs = np.array([[np.nan, np.nan], [np.nan, np.nan]]) - self.fit_qbb = np.nan - self.qbb_err = np.nan - log.error("FWHM fit failed to converge") + log.error(f"FWHM quadratic fit failed for {self.energy_param}") + pars, errs, cov = return_nans(fwhm_quadratic.func) + self.fwhm_fit_quadratic = {"function":fwhm_quadratic.__name__, + "module":fwhm_quadratic.__module__, + "expression":fwhm_quadratic.string_func("x"), + "Qbb_fwhm(keV)": np.nan, + "Qbb_fwhm_err(keV)":np.nan, + "pars":pars, + "errors":errs, + "cov":cov, + "csqr":(np.nan, np.nan), + "p_val":0} + log.error("FWHM quadratic fit failed to converge") def gen_pars_dict(self): if self.deg == 1: @@ -310,11 +368,66 @@ def gen_pars_dict(self): return out_dict - def calibrate_parameter(self): + def get_results_dict(self, data): + if np.isnan(self.pars).all(): + return {} + else: + fwhm_linear = self.fwhm_fit_linear.copy() + fwhm_linear["pars"] = fwhm_linear['pars'].to_dict() + fwhm_linear["errors"] = fwhm_linear['errors'].to_dict() + fwhm_linear["cov"] = fwhm_linear["cov"].tolist() + fwhm_quad = self.fwhm_fit_quadratic.copy() + fwhm_quad["pars"] = fwhm_quad['pars'].to_dict() + fwhm_quad["errors"] = fwhm_quad['errors'].to_dict() + fwhm_quad["cov"] = fwhm_quad["cov"].tolist() + + pk_dict = {Ei:{"function":func_i.__name__, + "module":func_i.__module__, + "pars(uncal)":parsi.to_dict(), + "errs(uncal)":errorsi.to_dict(), + "p_val": pvali, + "fwhm (keV)": list(fwhmi)} + for i, (Ei, parsi, errorsi, pvali, fwhmi, func_i) in enumerate( + zip(self.results["fitted_keV"], + self.results["pk_pars"][self.results["pk_validities"]], + self.results["pk_errors"][self.results["pk_validities"]], + self.results["pk_pvals"][self.results["pk_validities"]], + self.results["pk_fwhms"], + self.funcs) + )} + + return { + "total_fep": len( + data.query( + f"{self.cal_energy_param}>2604&{self.cal_energy_param}<2624" + ) + ), + "total_dep": len( + data.query( + f"{self.cal_energy_param}>1587&{self.cal_energy_param}<1597" + ) + ), + "pass_fep": len( + data.query( + f"{self.cal_energy_param}>2604&{self.cal_energy_param}<2624&{self.selection_string}" + ) + ), + "pass_dep": len( + data.query( + f"{self.cal_energy_param}>1587&{self.cal_energy_param}<1597&{self.selection_string}" + ) + ), + "eres_linear": fwhm_linear, + "eres_quadratic":fwhm_quad, + "fitted_peaks": self.results["fitted_keV"].tolist(), + "pk_fits":pk_dict + } + + def calibrate_parameter(self, data): kev_ranges = self.range_keV.copy() if self.guess_keV is None: self.guess_keV = 2620 / np.nanpercentile( - self.data.query(f"is_usable & {self.energy_param}>{self.threshold}")[ + data.query(f"{self.selection_string} & {self.energy_param}>{self.threshold}")[ self.energy_param ], 99, @@ -325,7 +438,7 @@ def calibrate_parameter(self): try: self.pars, self.cov, self.results = cal.hpge_E_calibration( - self.data.query("is_usable")[self.energy_param], + data.query(self.selection_string)[self.energy_param], self.glines, self.guess_keV, deg=self.deg, @@ -340,51 +453,49 @@ def calibrate_parameter(self): pk_pars = self.results["pk_pars"] found_peaks = self.results["got_peaks_locs"] fitted_peaks = self.results["fitted_keV"] + fitted_funcs = self.results["pk_funcs"] + if self.pars is None: + raise ValueError + + for i, peak in enumerate(self.results["got_peaks_keV"]): + idx = np.where(peak ==self.glines)[0][0] + self.funcs[idx] = fitted_funcs[i] + if fitted_funcs[i] == pgf.extended_radford_pdf: + self.gof_funcs[idx] = pgf.radford_pdf + else: + self.gof_funcs[idx] = pgf.gauss_step_pdf except: found_peaks = np.array([]) fitted_peaks = np.array([]) + fitted_funcs = np.array([]) + + if len(fitted_peaks) != len(self.glines) or self.gof_funcs[-1]==pgf.gauss_step_pdf: + if self.glines[-1] in fitted_peaks: + if fitted_funcs[-1] == pgf.extended_gauss_step_pdf: + self.funcs = [pgf.extended_gauss_step_pdf for entry in self.glines] + self.gof_funcs = [pgf.gauss_step_pdf for entry in self.glines] + + for i, peak in enumerate(self.glines): + if peak not in fitted_peaks: + kev_ranges[i] = (kev_ranges[i][0] - 5, kev_ranges[i][1] - 5) + for i, peak in enumerate(self.glines): + if peak not in fitted_peaks: + kev_ranges[i] = (kev_ranges[i][0] - 5, kev_ranges[i][1] - 5) + for i, peak in enumerate(fitted_peaks): + try: + if ( + self.results["pk_fwhms"][:, 1][i] + / self.results["pk_fwhms"][:, 0][i] + > 0.05 + ): + index = np.where(self.glines == peak)[0][0] + kev_ranges[i] = (kev_ranges[index][0] - 5, kev_ranges[index][1] - 5) + except: + pass - for i, peak in enumerate(self.glines): - if peak not in fitted_peaks: - kev_ranges[i] = (kev_ranges[i][0] - 5, kev_ranges[i][1] - 5) - for i, peak in enumerate(self.glines): - if peak not in fitted_peaks: - kev_ranges[i] = (kev_ranges[i][0] - 5, kev_ranges[i][1] - 5) - for i, peak in enumerate(fitted_peaks): - try: - if ( - self.results["pk_fwhms"][:, 1][i] - / self.results["pk_fwhms"][:, 0][i] - > 0.05 - ): - index = np.where(self.glines == peak)[0][0] - kev_ranges[i] = (kev_ranges[index][0] - 5, kev_ranges[index][1] - 5) - except: - pass - - try: - self.pars, self.cov, self.results = cal.hpge_E_calibration( - self.data.query("is_usable")[self.energy_param], - self.glines, - self.guess_keV, - deg=self.deg, - range_keV=kev_ranges, - funcs=self.funcs, - gof_funcs=self.gof_funcs, - n_events=self.n_events, - allowed_p_val=self.p_val, - simplex=self.simplex, - verbose=False, - ) - except: - self.pars = None - if self.pars is None: - log.error( - f"Calibration failed for {self.energy_param}, trying with 0 p_val" - ) try: self.pars, self.cov, self.results = cal.hpge_E_calibration( - self.data.query("is_usable")[self.energy_param], + data.query(self.selection_string)[self.energy_param], self.glines, self.guess_keV, deg=self.deg, @@ -392,147 +503,282 @@ def calibrate_parameter(self): funcs=self.funcs, gof_funcs=self.gof_funcs, n_events=self.n_events, - allowed_p_val=0, + allowed_p_val=self.p_val, simplex=self.simplex, verbose=False, ) + fitted_peaks = self.results["fitted_keV"] + fitted_funcs = self.results["pk_funcs"] + + log.debug("Calibrated found") + log.info(f"Calibration pars are {self.pars}") + + for i, peak in enumerate(self.results["got_peaks_keV"]): + idx = np.where(peak ==self.glines)[0][0] + self.funcs[idx] = fitted_funcs[i] + if fitted_funcs[i] == pgf.extended_radford_pdf: + self.gof_funcs[idx] = pgf.radford_pdf + else: + self.gof_funcs[idx] = pgf.gauss_step_pdf if self.pars is None: raise ValueError - - self.fit_energy_res() - self.data[f"{self.energy_param}_cal"] = pgf.poly( - self.data[self.energy_param], self.pars - ) - self.hit_dict[f"{self.energy_param}_cal"] = self.gen_pars_dict() - self.output_dict[f"{self.energy_param}_cal"] = { - "Qbb_fwhm": np.nan, - "Qbb_fwhm_err": np.nan, - "2.6_fwhm": np.nan, - "2.6_fwhm_err": np.nan, - "eres_pars": self.fit_pars.tolist(), - "fitted_peaks": np.nan, - "p_vals": np.nan, - "fwhms": np.nan, - "peak_fit_pars": np.nan, - "peak_fit_errs": np.nan, - "total_fep": len( - self.data.query( - f"{self.energy_param}_cal>2604&{self.energy_param}_cal<2624" - ) - ), - "total_dep": len( - self.data.query( - f"{self.energy_param}_cal>1587&{self.energy_param}_cal<1597" - ) - ), - "pass_fep": len( - self.data.query( - f"{self.energy_param}_cal>2604&{self.energy_param}_cal<2624&is_usable" - ) - ), - "pass_dep": len( - self.data.query( - f"{self.energy_param}_cal>1587&{self.energy_param}_cal<1597&is_usable" - ) - ), - } + except: - log.error( - f"Calibration failed completely for {self.energy_param} even with 0 p_val" - ) self.pars = np.full(self.deg + 1, np.nan) - - self.hit_dict[f"{self.energy_param}_cal"] = self.gen_pars_dict() - - self.output_dict[f"{self.energy_param}_cal"] = { - "Qbb_fwhm": np.nan, - "Qbb_fwhm_err": np.nan, - "2.6_fwhm": np.nan, - "2.6_fwhm_err": np.nan, - "eres_pars": [np.nan, np.nan], - "fitted_peaks": np.nan, - "fwhms": np.nan, - "peak_fit_pars": np.nan, - "peak_fit_errs": np.nan, - "p_vals": np.nan, - "total_fep": np.nan, - "total_dep": np.nan, - "pass_fep": np.nan, - "pass_dep": np.nan, - } - + self.results = None + + log.error( + f"Calibration failed completely for {self.energy_param}" + ) else: - log.debug("done") + log.debug("Calibrated found") log.info(f"Calibration pars are {self.pars}") + if ~np.isnan(self.pars).all(): + self.fit_energy_res() + self.hit_dict[self.cal_energy_param] = self.gen_pars_dict() + data[f"{self.energy_param}_cal"] = pgf.poly( + data[self.energy_param], self.pars + ) - self.data[f"{self.energy_param}_cal"] = pgf.poly( - self.data[self.energy_param], self.pars - ) + def fill_plot_dict(self, data, plot_dict={}): + for key, item in self.plot_options.items(): + if item["options"] is not None: + plot_dict[key] = item["function"](self, data, **item["options"]) + else: + plot_dict[key] = item["function"](self, data) + return plot_dict - pk_rs_dict = { - peak: self.results["pk_pars"][self.results["pk_validities"]][i].tolist() - for i, peak in enumerate(self.results["fitted_keV"]) - } - pk_errs_dict = { - peak: self.results["pk_errors"][self.results["pk_validities"]][ - i - ].tolist() - for i, peak in enumerate(self.results["fitted_keV"]) - } - self.fit_energy_res() - self.hit_dict[f"{self.energy_param}_cal"] = self.gen_pars_dict() +class high_stats_fitting(calibrate_parameter): + glines = [ + 238.632, + 511, + 583.191, + 727.330, + 763, + 785, + 860.564, + 893, + 1079, + 1513, + 1592.53, + 1620.50, + 2103.53, + 2614.50, + 3125, + 3198, + 3474 + ] # gamma lines used for calibration + range_keV = [ + (10, 10), + (30,30), + (30, 30), + (30, 30), + (30, 15), + (15, 30), + (30, 25), + (25, 30), + (30, 30), + (30, 30), + (30, 20), + (20, 30), + (30, 30), + (30, 30), + (30, 30), + (30, 30), + (30, 30), + ] # side bands width + funcs = [ + pgf.extended_gauss_step_pdf, #probably should be gauss on exp + pgf.extended_gauss_step_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_radford_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_radford_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_gauss_step_pdf, + pgf.extended_gauss_step_pdf, + ] + gof_funcs = [ + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.radford_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.radford_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + pgf.gauss_step_pdf, + ] - if self.results["fitted_keV"][-1] == 2614.50: - fep_fwhm = round(self.results["pk_fwhms"][-1, 0], 2) - fep_dwhm = round(self.results["pk_fwhms"][-1, 1], 2) - else: - fep_fwhm = np.nan - fep_dwhm = np.nan - - self.output_dict[f"{self.energy_param}_cal"] = { - "Qbb_fwhm": round(self.fit_qbb, 2), - "Qbb_fwhm_err": round(self.qbb_err, 2), - "2.6_fwhm": fep_fwhm, - "2.6_fwhm_err": fep_dwhm, - "eres_pars": self.fit_pars.tolist(), - "fitted_peaks": self.results["fitted_keV"].tolist(), - "fwhms": self.results["pk_fwhms"].tolist(), - "peak_fit_pars": pk_rs_dict, - "peak_fit_errs": pk_errs_dict, - "p_vals": self.results["pk_pvals"].tolist(), - "total_fep": len( - self.data.query( - f"{self.energy_param}_cal>2604&{self.energy_param}_cal<2624" - ) - ), - "total_dep": len( - self.data.query( - f"{self.energy_param}_cal>1587&{self.energy_param}_cal<1597" - ) - ), - "pass_fep": len( - self.data.query( - f"{self.energy_param}_cal>2604&{self.energy_param}_cal<2624&is_usable" - ) - ), - "pass_dep": len( - self.data.query( - f"{self.energy_param}_cal>1587&{self.energy_param}_cal<1597&is_usable" - ) - ), + def __init__(self, energy_param, selection_string, threshold, p_val, + plot_options={}, simplex=False): + self.energy_param = energy_param + self.cal_energy_param = energy_param + self.selection_string = selection_string + self.threshold = threshold + self.p_val = p_val + self.plot_options = plot_options + self.simplex = simplex + self.results = {} + self.plot_dict = {} + self.n_events=None + self.output_dict = {} + self.pars=[1,0] + + def get_results_dict(self, data): + if self.results: + fwhm_linear = self.fwhm_fit_linear.copy() + fwhm_linear["pars"] = fwhm_linear['pars'].to_dict() + fwhm_linear["errors"] = fwhm_linear['errors'].to_dict() + fwhm_linear["cov"] = fwhm_linear["cov"].tolist() + fwhm_quad = self.fwhm_fit_quadratic.copy() + fwhm_quad["pars"] = fwhm_quad['pars'].to_dict() + fwhm_quad["errors"] = fwhm_quad['errors'].to_dict() + fwhm_quad["cov"] = fwhm_quad["cov"].tolist() + + pk_dict = {Ei:{"function":func_i.__name__, + "module":func_i.__module__, + "pars(cal)":parsi.to_dict(), + "errs(cal)":errorsi.to_dict(), + "p_val": pvali, + "fwhm (keV)": list(fwhmi)} + for i, (Ei, parsi, errorsi, pvali, fwhmi, func_i) in enumerate( + zip(self.results["fitted_keV"], + self.results["pk_pars"][self.results["pk_validities"]], + self.results["pk_errors"][self.results["pk_validities"]], + self.results["pk_pvals"][self.results["pk_validities"]], + self.results["pk_fwhms"], + self.funcs) + )} + + return { + "eres_linear": fwhm_linear, + "eres_quadratic":fwhm_quad, + "fitted_peaks": self.results["fitted_keV"].tolist(), + "pk_fits":pk_dict } - log.info( - f"Results {self.energy_param}: {json.dumps(self.output_dict[f'{self.energy_param}_cal'], indent=2)}" + else: + return {} + + + def fit_peaks(self, data): + log.debug(f"Fitting {self.energy_param}") + try: + n_bins = [int((self.range_keV[i][1]+self.range_keV[i][0]) /0.2) for i in range(len(self.glines))] + pk_pars, pk_errors, pk_covs, pk_binws, pk_ranges, pk_pvals, valid_pks, pk_funcs = cal.hpge_fit_E_peaks( + data.query(self.selection_string)[self.energy_param], + self.glines, + self.range_keV, + n_bins=n_bins, + funcs=self.funcs, + method="unbinned", + gof_funcs=self.gof_funcs, + n_events=None, + allowed_p_val=self.p_val ) + for idx, peak in enumerate(self.glines): + #idx = np.where(peak ==self.glines)[0][0] + self.funcs[idx] = pk_funcs[idx] + if pk_funcs[idx] == pgf.extended_radford_pdf: + self.gof_funcs[idx] = pgf.radford_pdf + else: + self.gof_funcs[idx] = pgf.gauss_step_pdf + + self.results["got_peaks_keV"] = self.glines + self.results["pk_pars"] = pk_pars + self.results["pk_errors"] = pk_errors + self.results["pk_covs"] = pk_covs + self.results["pk_binws"] = pk_binws + self.results["pk_ranges"] = pk_ranges + self.results["pk_pvals"] = pk_pvals + + + for i, pk in enumerate(self.results["got_peaks_keV"]): + try: + if self.results["pk_pars"][i]["n_sig"]<10: + valid_pks[i] = False + elif 2*self.results["pk_errors"][i]["n_sig"]>self.results["pk_pars"][i]["n_sig"]: + valid_pks[i] = False + except: + pass + + self.results["pk_validities"] = valid_pks + + # Drop failed fits + fitted_peaks_keV = self.results["fitted_keV"] = np.asarray(self.glines)[valid_pks] + pk_pars = np.asarray(pk_pars, dtype=object)[valid_pks] # ragged + pk_errors = np.asarray(pk_errors, dtype=object)[valid_pks] + pk_covs = np.asarray(pk_covs, dtype=object)[valid_pks] + pk_binws = np.asarray(pk_binws)[valid_pks] + pk_ranges = np.asarray(pk_ranges)[valid_pks] + pk_pvals = np.asarray(pk_pvals)[valid_pks] + pk_funcs = np.asarray(pk_funcs)[valid_pks] + + + + log.info(f"{len(np.where(valid_pks)[0])} peaks fitted:") + for i, (Ei, parsi, errorsi, covsi, func_i) in enumerate( + zip(fitted_peaks_keV, pk_pars, pk_errors, pk_covs, pk_funcs) + ): + varnames = func_i.__code__.co_varnames[1 : len(pk_pars[-1]) + 1] + parsi = np.asarray(parsi, dtype=float) + errorsi = np.asarray(errorsi, dtype=float) + covsi = np.asarray(covsi, dtype=float) + # parsigsi = np.sqrt(covsi.diagonal()) + log.info(f"\tEnergy: {str(Ei)}") + log.info(f"\t\tParameter | Value +/- Sigma ") + for vari, pari, errorsi in zip(varnames, parsi, errorsi): + log.info( + f'\t\t{str(vari).ljust(10)} | {("%4.2f" % pari).rjust(8)} +/- {("%4.2f" % errorsi).ljust(8)}' + ) - def fill_plot_dict(self): - for key, item in self.plot_options.items(): - if item["options"] is not None: - self.plot_dict[key] = item["function"](self, **item["options"]) - else: - self.plot_dict[key] = item["function"](self) + cal_fwhms = [ + pgf.get_fwhm_func(func_i, pars_i, cov=covs_i) + for func_i, pars_i, covs_i in zip(pk_funcs, pk_pars, pk_covs) + ] + + cal_fwhms, cal_fwhms_errs = zip(*cal_fwhms) + cal_fwhms = np.asarray(cal_fwhms) + cal_fwhms_errs = np.asarray(cal_fwhms_errs) + self.results["pk_fwhms"] = np.asarray( + [(u, e) for u, e in zip(cal_fwhms, cal_fwhms_errs)] + ) + + log.info(f"{len(cal_fwhms)} FWHMs found:") + log.info(f"\t Energy | FWHM ") + for i, (Ei, fwhm, fwhme) in enumerate( + zip(fitted_peaks_keV, cal_fwhms, cal_fwhms_errs) + ): + log.info( + f"\t{i}".ljust(4) + + str(Ei).ljust(9) + + f"| {fwhm:.2f}+-{fwhme:.2f} keV".ljust(5) + ) + self.fit_energy_res() + log.debug(f"high stats fitting successful") + except: + self.results = {} + log.debug(f"high stats fitting failed") + def get_peak_labels( @@ -564,9 +810,11 @@ def get_peak_label(peak: float) -> str: return "Tl SEP" elif peak == 2614.5: return "Tl FEP" + else: + return "" -def plot_fits(ecal_class, figsize=[12, 8], fontsize=12, ncols=3, n_rows=3): +def plot_fits(ecal_class, data, figsize=[12, 8], fontsize=12, ncols=3, nrows=3, binning_keV=5): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize @@ -580,6 +828,7 @@ def plot_fits(ecal_class, figsize=[12, 8], fontsize=12, ncols=3, n_rows=3): if peak in fitted_peaks: fitted_gof_funcs.append(ecal_class.gof_funcs[i]) + mus = [ pgf.get_mu_func(func_i, pars_i) if pars_i is not None else np.nan for func_i, pars_i in zip(fitted_gof_funcs, pk_pars) @@ -590,43 +839,45 @@ def plot_fits(ecal_class, figsize=[12, 8], fontsize=12, ncols=3, n_rows=3): der = [pgf.poly(5, derco) for Ei in fitted_peaks] for i, peak in enumerate(mus): range_adu = 5 / der[i] - # plt.subplot(math.ceil((len(mus)) / 2), 2, i + 1) - plt.subplot(n_rows, ncols, i + 1) - binning = np.arange(pk_ranges[i][0], pk_ranges[i][1], 1) - bin_cs = (binning[1:] + binning[:-1]) / 2 - energies = ecal_class.data.query( - f"{ecal_class.energy_param}>{pk_ranges[i][0]}&{ecal_class.energy_param}<{pk_ranges[i][1]}&is_usable" - )[ecal_class.energy_param] - energies = energies.iloc[: ecal_class.n_events] - - counts, bs, bars = plt.hist(energies, bins=binning, histtype="step") - if pk_pars[i] is not None: - fit_vals = fitted_gof_funcs[i](bin_cs, *pk_pars[i]) * np.diff(bs) - plt.plot(bin_cs, fit_vals) - plt.step( - bin_cs, - [ - (fval - count) / count if count != 0 else (fval - count) - for count, fval in zip(counts, fit_vals) - ], - ) + plt.subplot(nrows, ncols, i + 1) + try: + binning = np.arange(pk_ranges[i][0], pk_ranges[i][1], 0.1/ der[i]) + bin_cs = (binning[1:] + binning[:-1]) / 2 + energies = data.query( + f"{ecal_class.energy_param}>{pk_ranges[i][0]}&{ecal_class.energy_param}<{pk_ranges[i][1]}&{ecal_class.selection_string}" + )[ecal_class.energy_param] + energies = energies.iloc[: ecal_class.n_events] + + counts, bs, bars = plt.hist(energies, bins=binning, histtype="step") + if pk_pars[i] is not None: + fit_vals = fitted_gof_funcs[i](bin_cs, *pk_pars[i][:-1], 0) * np.diff(bs)[0] + plt.plot(bin_cs, fit_vals) + plt.step( + bin_cs, + [ + (fval - count) / count if count != 0 else (fval - count) + for count, fval in zip(counts, fit_vals) + ], + ) - plt.annotate( - get_peak_label(fitted_peaks[i]), (0.02, 0.9), xycoords="axes fraction" - ) - plt.annotate( - f"{fitted_peaks[i]:.1f} keV", (0.02, 0.8), xycoords="axes fraction" - ) - plt.annotate( - f"p-value : {p_vals[i]:.4f}", (0.02, 0.7), xycoords="axes fraction" - ) - plt.xlabel("Energy (keV)") - plt.ylabel("Counts") - plt.legend(loc="upper left", frameon=False) - plt.xlim([peak - range_adu, peak + range_adu]) - locs, labels = plt.xticks() - new_locs, new_labels = get_peak_labels(locs, ecal_class.pars) - plt.xticks(ticks=new_locs, labels=new_labels) + plt.annotate( + get_peak_label(fitted_peaks[i]), (0.02, 0.9), xycoords="axes fraction" + ) + plt.annotate( + f"{fitted_peaks[i]:.1f} keV", (0.02, 0.8), xycoords="axes fraction" + ) + plt.annotate( + f"p-value : {p_vals[i]:.4f}", (0.02, 0.7), xycoords="axes fraction" + ) + plt.xlabel("Energy (keV)") + plt.ylabel("Counts") + plt.legend(loc="upper left", frameon=False) + plt.xlim([peak - range_adu, peak + range_adu]) + locs, labels = plt.xticks() + new_locs, new_labels = get_peak_labels(locs, ecal_class.pars) + plt.xticks(ticks=new_locs, labels=new_labels) + except: + pass plt.tight_layout() plt.close() @@ -634,28 +885,28 @@ def plot_fits(ecal_class, figsize=[12, 8], fontsize=12, ncols=3, n_rows=3): def plot_2614_timemap( - ecal_class, figsize=[12, 8], fontsize=12, erange=[2580, 2630], dx=1, time_dx=180 + ecal_class, data, figsize=[12, 8], fontsize=12, erange=[2580, 2630], dx=1, time_dx=180 ): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize - selection = ecal_class.data.query( - f"{ecal_class.energy_param}_cal>2560&{ecal_class.energy_param}_cal<2660&is_usable" + selection = data.query( + f"{ecal_class.cal_energy_param}>2560&{ecal_class.cal_energy_param}<2660&{ecal_class.selection_string}" ) + fig = plt.figure() if len(selection) == 0: pass else: time_bins = np.arange( - (np.amin(ecal_class.data["timestamp"]) // time_dx) * time_dx, - ((np.amax(ecal_class.data["timestamp"]) // time_dx) + 2) * time_dx, + (np.amin(data["timestamp"]) // time_dx) * time_dx, + ((np.amax(data["timestamp"]) // time_dx) + 2) * time_dx, time_dx, ) - fig = plt.figure() plt.hist2d( selection["timestamp"], - selection[f"{ecal_class.energy_param}_cal"], + selection[ecal_class.cal_energy_param], bins=[time_bins, np.arange(erange[0], erange[1] + dx, dx)], norm=LogNorm(), ) @@ -676,31 +927,31 @@ def plot_2614_timemap( def plot_pulser_timemap( - ecal_class, figsize=[12, 8], fontsize=12, dx=0.2, time_dx=180, n_spread=3 + ecal_class, data, figsize=[12, 8], fontsize=12, dx=0.2, time_dx=180, n_spread=3 ): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize time_bins = np.arange( - (np.amin(ecal_class.data["timestamp"]) // time_dx) * time_dx, - ((np.amax(ecal_class.data["timestamp"]) // time_dx) + 2) * time_dx, + (np.amin(data["timestamp"]) // time_dx) * time_dx, + ((np.amax(data["timestamp"]) // time_dx) + 2) * time_dx, time_dx, ) - selection = ecal_class.data.query(f"~is_not_pulser") + selection = data.query(f"~is_not_pulser") fig = plt.figure() if len(selection) == 0: pass else: - mean = np.nanpercentile(selection[f"{ecal_class.energy_param}_cal"], 50) + mean = np.nanpercentile(selection[ecal_class.cal_energy_param], 50) spread = mean - np.nanpercentile( - selection[f"{ecal_class.energy_param}_cal"], 10 + selection[ecal_class.cal_energy_param], 10 ) plt.hist2d( selection["timestamp"], - selection[f"{ecal_class.energy_param}_cal"], + selection[ecal_class.cal_energy_param], bins=[ time_bins, np.arange(mean - n_spread * spread, mean + n_spread * spread + dx, dx), @@ -722,11 +973,11 @@ def plot_pulser_timemap( return fig -def bin_pulser_stability(ecal_class, time_slice=180): - selection = ecal_class.data.query(f"~is_not_pulser") +def bin_pulser_stability(ecal_class, data, time_slice=180): + selection = data.query(f"~is_not_pulser") - utime_array = ecal_class.data["timestamp"] - select_energies = selection[f"{ecal_class.energy_param}_cal"].to_numpy() + utime_array = data["timestamp"] + select_energies = selection[ecal_class.cal_energy_param].to_numpy() time_bins = np.arange( (np.amin(utime_array) // time_slice) * time_slice, @@ -762,13 +1013,13 @@ def bin_pulser_stability(ecal_class, time_slice=180): return {"time": times_average, "energy": par_average, "spread": par_error} -def bin_stability(ecal_class, time_slice=180, energy_range=[2585, 2660]): - selection = ecal_class.data.query( - f"{ecal_class.energy_param}_cal>{energy_range[0]}&{ecal_class.energy_param}_cal<{energy_range[1]}&is_usable" +def bin_stability(ecal_class, data, time_slice=180, energy_range=[2585, 2660]): + selection = data.query( + f"{ecal_class.cal_energy_param}>{energy_range[0]}&{ecal_class.cal_energy_param}<{energy_range[1]}&{ecal_class.selection_string}" ) - utime_array = ecal_class.data["timestamp"] - select_energies = selection[f"{ecal_class.energy_param}_cal"].to_numpy() + utime_array = data["timestamp"] + select_energies = selection[ecal_class.cal_energy_param].to_numpy() time_bins = np.arange( (np.amin(utime_array) // time_slice) * time_slice, @@ -804,7 +1055,7 @@ def bin_stability(ecal_class, time_slice=180, energy_range=[2585, 2660]): return {"time": times_average, "energy": par_average, "spread": par_error} -def plot_cal_fit(ecal_class, figsize=[12, 8], fontsize=12, erange=[200, 2700]): +def plot_cal_fit(ecal_class, data, figsize=[12, 8], fontsize=12, erange=[200, 2700]): pk_pars = ecal_class.results["pk_pars"] fitted_peaks = ecal_class.results["got_peaks_keV"] pk_errs = ecal_class.results["pk_errors"] @@ -856,7 +1107,7 @@ def plot_cal_fit(ecal_class, figsize=[12, 8], fontsize=12, erange=[200, 2700]): return fig -def plot_eres_fit(ecal_class, figsize=[12, 8], fontsize=12, erange=[200, 2700]): +def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsize=12): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize @@ -881,6 +1132,10 @@ def plot_eres_fit(ecal_class, figsize=[12, 8], fontsize=12, erange=[200, 2700]): log.info(f"{peak} failed") indexes.append(i) continue + elif peak == 511.0: + log.info(f"e annhilation found at index {i}") + indexes.append(i) + continue else: fwhm_peaks = np.append(fwhm_peaks, peak) fit_fwhms = np.delete(fwhms, [indexes]) @@ -889,32 +1144,37 @@ def plot_eres_fit(ecal_class, figsize=[12, 8], fontsize=12, erange=[200, 2700]): fig, (ax1, ax2) = plt.subplots( 2, 1, sharex=True, gridspec_kw={"height_ratios": [3, 1]} ) - ax1.errorbar(fwhm_peaks, fit_fwhms, yerr=fit_dfwhms, marker="x", lw=0, c="b") + ax1.errorbar(fwhm_peaks, fit_fwhms, yerr=fit_dfwhms, marker="x", lw=0, c="black") fwhm_slope_bins = np.arange(erange[0], erange[1], 10) qbb_line_vx = [2039.0, 2039.0] qbb_line_vy = [ - 0.9 * np.nanmin(fwhm_slope(fwhm_slope_bins, *ecal_class.fit_pars)), - ecal_class.fit_qbb, + 0.9 * np.nanmin(fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["pars"])), + np.nanmax([ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"],ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"]]) ] qbb_line_hx = [erange[0], 2039.0] - qbb_line_hy = [ecal_class.fit_qbb, ecal_class.fit_qbb] ax1.plot( - fwhm_slope_bins, fwhm_slope(fwhm_slope_bins, *ecal_class.fit_pars), lw=1, c="g" + fwhm_slope_bins, fwhm_linear.func(fwhm_slope_bins, + *ecal_class.fwhm_fit_linear["pars"]), lw=1, c="g", + label=f'linear, Qbb fwhm: {ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"]:1.2f} +- {ecal_class.fwhm_fit_linear["Qbb_fwhm_err(keV)"]:1.2f} keV' ) - ax1.plot(qbb_line_hx, qbb_line_hy, lw=1, c="r") - ax1.plot(qbb_line_vx, qbb_line_vy, lw=1, c="r") ax1.plot( - np.nan, - np.nan, - "-", - color="none", - label=f"Qbb fwhm: {ecal_class.fit_qbb:1.2f} +- {ecal_class.qbb_err:1.2f} keV", + fwhm_slope_bins, fwhm_quadratic.func(fwhm_slope_bins, + *ecal_class.fwhm_fit_quadratic["pars"]), lw=1, c="b", + label=f'quadratic, Qbb fwhm: {ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"]:1.2f} +- {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_err(keV)"]:1.2f} keV' ) + ax1.plot(qbb_line_hx, [ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"], + ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"]], + lw=1, c="r", ls="--") + ax1.plot(qbb_line_hx, [ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"], + ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"]], + lw=1, c="r", ls="--") + ax1.plot(qbb_line_vx, qbb_line_vy, lw=1, c="r", ls="--") + ax1.legend(loc="upper left", frameon=False) - if np.isnan(ecal_class.fit_pars).all(): + if np.isnan(ecal_class.fwhm_fit_linear["pars"]).all(): [ 0.9 * np.nanmin(fit_fwhms), 1.1 * np.nanmax(fit_fwhms), @@ -922,56 +1182,62 @@ def plot_eres_fit(ecal_class, figsize=[12, 8], fontsize=12, erange=[200, 2700]): else: ax1.set_ylim( [ - 0.9 * np.nanmin(fwhm_slope(fwhm_slope_bins, *ecal_class.fit_pars)), - 1.1 * np.nanmax(fwhm_slope(fwhm_slope_bins, *ecal_class.fit_pars)), + 0.9 * np.nanmin(fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["pars"])), + 1.1 * np.nanmax(fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["pars"])), ] ) - ax1.set_xlim([200, 2700]) - ax1.grid() + ax1.set_xlim(erange) ax1.set_ylabel("FWHM energy resolution (keV)") ax2.plot( fwhm_peaks, - (fit_fwhms - fwhm_slope(fwhm_peaks, *ecal_class.fit_pars)) / fit_dfwhms, + (fit_fwhms - fwhm_linear.func(fwhm_peaks, *ecal_class.fwhm_fit_linear["pars"])) / fit_dfwhms, + lw=0, + marker="x", + c="g", + ) + ax2.plot( + fwhm_peaks, + (fit_fwhms - fwhm_quadratic.func(fwhm_peaks, *ecal_class.fwhm_fit_quadratic["pars"])) / fit_dfwhms, lw=0, marker="x", c="b", ) + ax2.plot(erange,[0,0], color="black",lw=0.5) ax2.set_xlabel("Energy (keV)") ax2.set_ylabel("Normalised Residuals") - ax2.grid() plt.tight_layout() plt.close() return fig -def bin_spectrum(ecal_class, erange=[0, 3000], dx=2): +def bin_spectrum(ecal_class, data, erange=[0, 3000], dx=2): bins = np.arange(erange[0], erange[1] + dx, dx) return { "bins": pgh.get_bin_centers(bins), "counts": np.histogram( - ecal_class.data.query("is_usable")[f"{ecal_class.energy_param}_cal"], bins + data.query(ecal_class.selection_string)[ecal_class.cal_energy_param], bins )[0], "cut_counts": np.histogram( - ecal_class.data.query("~is_valid_cal&is_not_pulser")[ - f"{ecal_class.energy_param}_cal" + data.query("~is_valid_cal&is_not_pulser")[ + ecal_class.cal_energy_param ], bins, )[0], "pulser_counts": np.histogram( - ecal_class.data.query("~is_not_pulser")[f"{ecal_class.energy_param}_cal"], + data.query("~is_not_pulser")[ecal_class.cal_energy_param], bins, )[0], } -def bin_survival_fraction(ecal_class, erange=[0, 3000], dx=6): +def bin_survival_fraction(ecal_class, data, erange=[0, 3000], dx=6): counts_pass, bins_pass, _ = pgh.get_hist( - ecal_class.data.query("is_usable")[f"{ecal_class.energy_param}_cal"], + data.query(ecal_class.selection_string)[ecal_class.cal_energy_param], bins=np.arange(erange[0], erange[1] + dx, dx), ) counts_fail, bins_fail, _ = pgh.get_hist( - ecal_class.data.query("~is_valid_cal&is_not_pulser")[ - f"{ecal_class.energy_param}_cal" + data.query("~is_valid_cal&is_not_pulser")[ + ecal_class.cal_energy_param ], bins=np.arange(erange[0], erange[1] + dx, dx), ) @@ -981,35 +1247,36 @@ def bin_survival_fraction(ecal_class, erange=[0, 3000], dx=6): def energy_cal_th( files: list[str], - energy_params: list[str], + energy_params: list[str] , + lh5_path: str = "dsp", hit_dict: dict = {}, cut_parameters: dict[str, int] = {"bl_mean": 4, "bl_std": 4, "pz_std": 4}, - lh5_path: str = "dsp", plot_options: dict = None, - guess_keV: float | None = None, threshold: int = 0, p_val: float = 0, n_events: int = None, final_cut_field: str = "is_valid_cal", simplex: bool = True, + guess_keV: float | None = None, deg: int = 1, -) -> tuple(dict, dict): +) -> tuple(dict, dict, dict, dict): + data = load_data( files, lh5_path, - energy_params, hit_dict, - cut_parameters=list(cut_parameters) if cut_parameters is not None else None, + params = energy_params + list(cut_parameters) + ["timestamp"] ) data, hit_dict = apply_cuts(data, hit_dict, cut_parameters, final_cut_field) - output_dict = {} + results_dict = {} plot_dict = {} + full_object_dict = {} for energy_param in energy_params: ecal = calibrate_parameter( - data, energy_param, + f"{final_cut_field}&is_not_pulser", plot_options, guess_keV, threshold, @@ -1018,12 +1285,55 @@ def energy_cal_th( simplex, deg, ) - ecal.calibrate_parameter() - output_dict.update(ecal.output_dict) + ecal.calibrate_parameter(data) + results_dict[ecal.cal_energy_param] = ecal.get_results_dict(data) hit_dict.update(ecal.hit_dict) + full_object_dict[ecal.cal_energy_param] = ecal if ~np.isnan(ecal.pars).all(): - ecal.fill_plot_dict() - plot_dict[energy_param] = ecal.plot_dict + plot_dict[ecal.cal_energy_param] = ecal.fill_plot_dict(data) + + + log.info(f"Finished all calibrations") + return hit_dict, results_dict, plot_dict, full_object_dict + + +def partition_energy_cal_th( + files: list[str], + energy_params: list[str], + lh5_path: str = "dsp", + hit_dict: dict = {}, + plot_options: dict = None, + threshold: int = 0, + p_val: float = 0, + n_events: int = None, + final_cut_field: str = "is_valid_cal", + simplex: bool = True, +) -> tuple(dict, dict, dict, dict): + + data = load_data( + files, + lh5_path, + hit_dict, + params = energy_params + [final_cut_field] + ["timestamp"] + ) + + results_dict = {} + plot_dict = {} + full_object_dict = {} + for energy_param in energy_params: + ecal = high_stats_fitting( + energy_param, + f"{final_cut_field}&is_not_pulser", + threshold, + p_val, + plot_options, + simplex, + ) + ecal.fit_peaks(data) + results_dict[energy_param] = ecal.get_results_dict(data) + full_object_dict[energy_param] = ecal + if ecal.results: + plot_dict[energy_param] = ecal.fill_plot_dict(data) log.info(f"Finished all calibrations") - return hit_dict, output_dict, plot_dict + return results_dict, plot_dict, full_object_dict \ No newline at end of file From 011838ac9f039a3275590e882a223c5e1f33fec0 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Sep 2023 17:47:31 +0200 Subject: [PATCH 05/22] rewrite of aoe routines, better handling of guesses, improved clarity of results, split out plots into own functions --- src/pygama/pargen/AoE_cal.py | 3137 +++++++++++++++------------------- 1 file changed, 1395 insertions(+), 1742 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index 114428d19..dc9f9ae3c 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -29,22 +29,12 @@ import pygama.math.histogram as pgh import pygama.math.peak_fitting as pgf -import pygama.pargen.cuts as cts -import pygama.pargen.ecal_th as thc -import pygama.pargen.energy_cal as pgc from pygama.math.peak_fitting import nb_erfc from pygama.pargen.energy_cal import get_i_local_maxima +from pygama.pargen.utils import * log = logging.getLogger(__name__) - -def return_nans(func): - args = func.__code__.co_varnames[: func.__code__.co_argcount][1:] - c = cost.UnbinnedNLL(np.array([0]), func) - m = Minuit(c, *[np.nan for arg in args]) - return m.values, m.errors, np.full((len(m.values), len(m.values)), np.nan) - - class PDF: """ @@ -54,13 +44,7 @@ class PDF: def pdf(x): return - def return_nans(self): - args = self.pdf.__code__.co_varnames[: self.pdf.__code__.co_argcount][2:] - c = cost.UnbinnedNLL(np.array([0]), self.pdf) - m = Minuit(c, *[np.nan for arg in args]) - return m.values, m.errors, np.full((len(m.values), len(m.values)), np.nan) - - def _replace_values(self, dic, **kwargs): + def _replace_values(dic, **kwargs): for item, value in kwargs.items(): dic[item] = value return dic @@ -68,7 +52,6 @@ def _replace_values(self, dic, **kwargs): class standard_aoe(PDF): def pdf( - self, x: np.array, n_sig: float, mu: float, @@ -97,7 +80,6 @@ def pdf( return sig, bkg def extended_pdf( - self, x: np.array, n_sig: float, mu: float, @@ -112,7 +94,7 @@ def extended_pdf( Extended PDF for A/E consists of a gaussian signal with gaussian tail background """ if components == True: - sig, bkg = self.pdf( + sig, bkg = standard_aoe.pdf( x, n_sig, mu, @@ -125,7 +107,7 @@ def extended_pdf( ) return n_sig + n_bkg, sig, bkg else: - return n_sig + n_bkg, self.pdf( + return n_sig + n_bkg, standard_aoe.pdf( x, n_sig, mu, @@ -137,7 +119,7 @@ def extended_pdf( components, ) - def guess(self, hist, bins, var, **kwargs): + def guess(hist, bins, var, **kwargs): bin_centers = (bins[:-1] + bins[1:]) / 2 mu = bin_centers[np.argmax(hist)] @@ -162,10 +144,13 @@ def guess(self, hist, bins, var, **kwargs): "upper_range": np.nanmax(bins), "components": 0, } + for key, guess in guess_dict.items(): + if np.isnan(guess): + guess_dict[key]=0 - return self._replace_values(guess_dict, **kwargs) + return standard_aoe._replace_values(guess_dict, **kwargs) - def bounds(self, guess, **kwargs): + def bounds(guess, **kwargs): bounds_dict = { "n_sig": (0, None), "mu": (None, None), @@ -179,10 +164,10 @@ def bounds(self, guess, **kwargs): return [ bound - for field, bound in self._replace_values(bounds_dict, **kwargs).items() + for field, bound in standard_aoe._replace_values(bounds_dict, **kwargs).items() ] - def fixed(self, **kwargs): + def fixed(**kwargs): fixed_dict = { "n_sig": False, "mu": False, @@ -195,19 +180,18 @@ def fixed(self, **kwargs): } return [ - fixed for field, fixed in self._replace_values(fixed_dict, **kwargs).items() + fixed for field, fixed in standard_aoe._replace_values(fixed_dict, **kwargs).items() ] - def width(self, pars, errs, cov): + def width(pars, errs, cov): return pars["sigma"], errs["sigma"] - def centroid(self, pars, errs, cov): + def centroid(pars, errs, cov): return pars["mu"], errs["mu"] class standard_aoe_with_high_tail(PDF): def pdf( - self, x: np.array, n_sig: float, mu: float, @@ -242,7 +226,6 @@ def pdf( return sig, bkg def extended_pdf( - self, x: np.array, n_sig: float, mu: float, @@ -259,7 +242,7 @@ def extended_pdf( Extended PDF for A/E consists of a gaussian signal with gaussian tail background """ if components == True: - sig, bkg = self.pdf( + sig, bkg = standard_aoe_with_high_tail.pdf( x, n_sig, mu, @@ -274,7 +257,7 @@ def extended_pdf( ) return n_sig + n_bkg, sig, bkg else: - return n_sig + n_bkg, self.pdf( + return n_sig + n_bkg, standard_aoe_with_high_tail.pdf( x, n_sig, mu, @@ -288,7 +271,7 @@ def extended_pdf( components, ) - def guess(self, hist, bins, var, **kwargs): + def guess(hist, bins, var, **kwargs): bin_centers = (bins[:-1] + bins[1:]) / 2 mu = bin_centers[np.argmax(hist)] try: @@ -314,10 +297,13 @@ def guess(self, hist, bins, var, **kwargs): "upper_range": np.nanmax(bins), "components": 0, } + for key, guess in guess_dict.items(): + if np.isnan(guess): + guess_dict[key]=0 - return self._replace_values(guess_dict, **kwargs) + return standard_aoe_with_high_tail._replace_values(guess_dict, **kwargs) - def bounds(self, guess, **kwargs): + def bounds(guess, **kwargs): bounds_dict = { "n_sig": (0, None), "mu": (None, None), @@ -333,10 +319,10 @@ def bounds(self, guess, **kwargs): return [ bound - for field, bound in self._replace_values(bounds_dict, **kwargs).items() + for field, bound in standard_aoe_with_high_tail._replace_values(bounds_dict, **kwargs).items() ] - def fixed(self, **kwargs): + def fixed(**kwargs): fixed_dict = { "n_sig": False, "mu": False, @@ -351,22 +337,21 @@ def fixed(self, **kwargs): } return [ - fixed for field, fixed in self._replace_values(fixed_dict, **kwargs).items() + fixed for field, fixed in standard_aoe_with_high_tail._replace_values(fixed_dict, **kwargs).items() ] - def width(self, pars, errs, cov): + def width(pars, errs, cov): fwhm, fwhm_err = pgf.radford_fwhm( pars[2], pars[3], np.abs(pars[4]), cov=cov[:7, :7] ) return fwhm / 2.355, fwhm_err / 2.355 - def centroid(self, pars, errs, cov): + def centroid(pars, errs, cov): return pars["mu"], errs["mu"] class standard_aoe_bkg(PDF): def pdf( - self, x: np.array, n_events: float, mu: float, @@ -388,7 +373,6 @@ def pdf( return sig def extended_pdf( - self, x: np.array, n_events: float, mu: float, @@ -400,11 +384,11 @@ def extended_pdf( """ Extended PDF for A/E consists of a gaussian signal with gaussian tail background """ - return n_events, self.pdf( + return n_events, standard_aoe_bkg.pdf( x, n_events, mu, sigma, tau_bkg, lower_range, upper_range ) - def guess(self, hist, bins, var, **kwargs): + def guess(hist, bins, var, **kwargs): bin_centers = (bins[:-1] + bins[1:]) / 2 mu = bin_centers[np.argmax(hist)] @@ -427,10 +411,13 @@ def guess(self, hist, bins, var, **kwargs): "lower_range": np.nanmin(bins), "upper_range": np.nanmax(bins), } + for key, guess in guess_dict.items(): + if np.isnan(guess): + guess_dict[key]=0 - return self._replace_values(guess_dict, **kwargs) + return standard_aoe_bkg._replace_values(guess_dict, **kwargs) - def bounds(self, guess, **kwargs): + def bounds(guess, **kwargs): bounds_dict = { "n_events": (0, None), "mu": (None, None), @@ -442,10 +429,10 @@ def bounds(self, guess, **kwargs): return [ bound - for field, bound in self._replace_values(bounds_dict, **kwargs).items() + for field, bound in standard_aoe_bkg._replace_values(bounds_dict, **kwargs).items() ] - def fixed(self, **kwargs): + def fixed(**kwargs): fixed_dict = { "n_bkg": False, "mu": False, @@ -456,12 +443,12 @@ def fixed(self, **kwargs): } return [ - fixed for field, fixed in self._replace_values(fixed_dict, **kwargs).items() + fixed for field, fixed in standard_aoe_bkg._replace_values(fixed_dict, **kwargs).items() ] class gaussian(PDF): - def pdf(self, x: np.array, n_events: float, mu: float, sigma: float) -> np.array: + def pdf(x: np.array, n_events: float, mu: float, sigma: float) -> np.array: """ PDF for A/E consists of a gaussian signal with tail with gaussian tail background """ @@ -473,16 +460,16 @@ def pdf(self, x: np.array, n_events: float, mu: float, sigma: float) -> np.array return sig def extended_pdf( - self, x: np.array, n_events: float, mu: float, sigma: float + x: np.array, n_events: float, mu: float, sigma: float ) -> tuple(float, np.array): """ Extended PDF for A/E consists of a gaussian signal with gaussian tail background """ - return n_events, self.pdf( - x, n_events, mu, sigma, tau_bkg, lower_range, upper_range + return n_events, gaussian.pdf( + x, n_events, mu, sigma ) - def guess(self, hist, bins, var, **kwargs): + def guess(hist, bins, var, **kwargs): bin_centers = (bins[:-1] + bins[1:]) / 2 mu = bin_centers[np.argmax(hist)] try: @@ -497,18 +484,21 @@ def guess(self, hist, bins, var, **kwargs): ) guess_dict = {"n_events": ls_guess, "mu": mu, "sigma": sigma} + for key, guess in guess_dict.items(): + if np.isnan(guess): + guess_dict[key]=0 - return self._replace_values(guess_dict, **kwargs) + return gaussian._replace_values(guess_dict, **kwargs) - def bounds(self, gpars, **kwargs): + def bounds(gpars, **kwargs): bounds_dict = {"n_events": (0, None), "mu": (None, None), "sigma": (0, None)} return [ bound - for field, bound in self._replace_values(bounds_dict, **kwargs).items() + for field, bound in gaussian._replace_values(bounds_dict, **kwargs).items() ] - def fixed(self, **kwargs): + def fixed(**kwargs): fixed_dict = { "n_events": False, "mu": False, @@ -516,13 +506,12 @@ def fixed(self, **kwargs): } return [ - fixed for field, fixed in self._replace_values(fixed_dict, **kwargs).items() + fixed for field, fixed in gaussian._replace_values(fixed_dict, **kwargs).items() ] class drift_time_distribution(PDF): def pdf( - self, x, n_sig1, mu1, @@ -544,7 +533,6 @@ def pdf( return gauss1 + gauss2 def extended_pdf( - self, x, n_sig1, mu1, @@ -559,7 +547,7 @@ def extended_pdf( components, ): if components is True: - gauss1, gauss2 = self.pdf( + gauss1, gauss2 = drift_time_distribution.pdf( x, n_sig1, mu1, @@ -576,7 +564,7 @@ def extended_pdf( return n_sig1 + n_sig2, gauss1, gauss2 else: - return n_sig1 + n_sig2, self.pdf( + return n_sig1 + n_sig2, drift_time_distribution.pdf( x, n_sig1, mu1, @@ -591,7 +579,7 @@ def extended_pdf( components, ) - def guess(self, hist: np.array, bins: np.array, var: np.array, **kwargs) -> list: + def guess(hist: np.array, bins: np.array, var: np.array, **kwargs) -> list: """ Guess for fitting dt spectrum """ @@ -642,10 +630,13 @@ def guess(self, hist: np.array, bins: np.array, var: np.array, **kwargs) -> list "tau2": 0.1, "components": 0, } + for key, guess in guess_dict.items(): + if np.isnan(guess): + guess_dict[key]=0 - return self._replace_values(guess_dict, **kwargs) + return drift_time_distribution._replace_values(guess_dict, **kwargs) - def bounds(self, guess, **kwargs): + def bounds(guess, **kwargs): bounds_dict = { "n_sig1": (0, None), "mu1": (None, None), @@ -662,10 +653,10 @@ def bounds(self, guess, **kwargs): return [ bound - for field, bound in self._replace_values(bounds_dict, **kwargs).items() + for field, bound in drift_time_distribution._replace_values(bounds_dict, **kwargs).items() ] - def fixed(self, **kwargs): + def fixed(**kwargs): fixed_dict = { "n_sig1": False, "mu1": False, @@ -681,93 +672,39 @@ def fixed(self, **kwargs): } return [ - fixed for field, fixed in self._replace_values(fixed_dict, **kwargs).items() + fixed for field, fixed in drift_time_distribution._replace_values(fixed_dict, **kwargs).items() ] - -def tag_pulser(files, lh5_path): - pulser_df = lh5.load_dfs(files, ["timestamp", "trapTmax"], lh5_path) - pulser_props = cts.find_pulser_properties(pulser_df, energy="trapTmax") - if len(pulser_props) > 0: - final_mask = None - for entry in pulser_props: - e_cut = (pulser_df.trapTmax.values < entry[0] + entry[1]) & ( - pulser_df.trapTmax.values > entry[0] - entry[1] - ) - if final_mask is None: - final_mask = e_cut - else: - final_mask = final_mask | e_cut - ids = ~(final_mask) - log.debug(f"pulser found: {pulser_props}") - else: - ids = np.ones(len(pulser_df), dtype=bool) - log.debug(f"no pulser found") - return ids - - -def load_aoe( - files: list, - lh5_path: str, - cal_dict: dict, - params: [ - A_max, - tp_0_est, - tp_99, - dt_eff, - A_max_tri, - cuspEmax, - cuspEmax_ctc_cal, - is_valid_cal, - ], - energy_param: str, - current_param: str, -) -> tuple(np.array, np.array, np.array, np.array): - """ - Loads in the A/E parameters needed and applies calibration constants to energy - """ - - # switch this to dataframes, include timestamp - - sto = lh5.LH5Store() - - if isinstance(files, dict): - df = [] - all_files = [] - for tstamp, tfiles in files.items(): - table = sto.read_object(lh5_path, tfiles)[0] - if tstamp in cal_dict: - file_df = table.eval(cal_dict[tstamp]).get_dataframe() - else: - file_df = table.eval(cal_dict).get_dataframe() - file_df["timestamp"] = np.full(len(file_df), tstamp, dtype=object) - params.append("timestamp") - df.append(file_df) - all_files += tfiles - - df = pd.concat(df) - - elif isinstance(files, list): - table = sto.read_object(lh5_path, files)[0] - df = table.eval(cal_dict).get_dataframe() - all_files = files - - ids = tag_pulser(all_files, lh5_path) - df["is_not_pulser"] = ids - params.append("is_not_pulser") - - for col in list(df.keys()): - if col not in params: - df.drop(col, inplace=True, axis=1) - - param_dict = {} - for param in params: - # add cuts in here - if param not in df: - df[param] = lh5.load_nda(all_files, [param], lh5_path)[param] - - df["AoE_uncorr"] = np.divide(df[current_param], df[energy_param]) - return df +class pol1: + + def func(x, a, b): + return x*a + b + + def string_func(input_param): + return f"{input_param}*a+b" + + def guess(bands, means, mean_errs): + return [-1e-06, 5e-01] + +class sigma_fit: + + def func(x, a, b, c): + return np.sqrt(a + (b / (x + 10**-99)) ** c) + + def string_func(input_param): + return f"(a+(b/({input_param}+10**-99))**c)**(0.5)" + + def guess(bands, sigmas, sigma_errs): + return [np.nanpercentile(sigmas, 50) ** 2, 2, 2] + +class sigmoid_fit: + + def func(x, a, b, c, d): + return (a + b * x) * nb_erfc(c * x + d) + + + def guess(xs, ys, y_errs): + return [np.nanmax(ys) / 2, 0, 1, 1.5] def unbinned_aoe_fit( @@ -779,13 +716,12 @@ def unbinned_aoe_fit( """ hist, bins, var = pgh.get_hist(aoe, bins=500) - gauss = gaussian() - gpars = gauss.guess(hist, bins, var) + gpars = gaussian.guess(hist, bins, var) c1_min = gpars["mu"] - 2 * gpars["sigma"] c1_max = gpars["mu"] + 3 * gpars["sigma"] # Initial fit just using Gaussian - c1 = cost.UnbinnedNLL(aoe[(aoe < c1_max) & (aoe > c1_min)], gauss.pdf) + c1 = cost.UnbinnedNLL(aoe[(aoe < c1_max) & (aoe > c1_min)], gaussian.pdf) m1 = Minuit(c1, **gpars) m1.limits = [ @@ -793,7 +729,7 @@ def unbinned_aoe_fit( (gpars["mu"] * 0.8, gpars["mu"] * 1.2), (0.8 * gpars["sigma"], gpars["sigma"] * 1.2), ] - m1.fixed = gauss.fixed() + m1.fixed = gaussian.fixed() m1.migrad() if verbose: @@ -808,8 +744,7 @@ def unbinned_aoe_fit( n_bkg_guess = len(aoe[(aoe < fmax) & (aoe > fmin)]) - m1.values["n_events"] - aoe_bkg = standard_aoe_bkg() - bkg_guess = aoe_bkg.guess( + bkg_guess = standard_aoe_bkg.guess( hist, bins, var, @@ -821,19 +756,18 @@ def unbinned_aoe_fit( ) c2 = cost.ExtendedUnbinnedNLL( - aoe[(aoe < fmax_bkg) & (aoe > fmin)], aoe_bkg.extended_pdf + aoe[(aoe < fmax_bkg) & (aoe > fmin)], standard_aoe_bkg.extended_pdf ) m2 = Minuit(c2, **bkg_guess) - m2.fixed = aoe_bkg.fixed(mu=True) - m2.limits = aoe_bkg.bounds( + m2.fixed = standard_aoe_bkg.fixed(mu=True) + m2.limits = standard_aoe_bkg.bounds( bkg_guess, n_events=(0, 2 * len(aoe[(aoe < fmax_bkg) & (aoe > fmin)])) ) m2.simplex().migrad() m2.hesse() - aoe_pdf = pdf() - x0 = aoe_pdf.guess( + x0 = pdf.guess( hist, bins, var, @@ -849,14 +783,14 @@ def unbinned_aoe_fit( print(x0) # Full fit using gaussian signal with gaussian tail background - c = cost.ExtendedUnbinnedNLL(aoe[(aoe < fmax) & (aoe > fmin)], aoe_pdf.extended_pdf) + c = cost.ExtendedUnbinnedNLL(aoe[(aoe < fmax) & (aoe > fmin)], pdf.extended_pdf) m = Minuit(c, **x0) - m.limits = aoe_pdf.bounds( + m.limits = pdf.bounds( x0, n_sig=(0, 2 * len(aoe[(aoe < fmax) & (aoe > fmin)])), n_bkg=(0, 2 * len(aoe[(aoe < fmax) & (aoe > fmin)])), ) - m.fixed = aoe_pdf.fixed() + m.fixed = pdf.fixed() m.migrad() m.hesse() @@ -868,7 +802,7 @@ def unbinned_aoe_fit( m.simplex.migrad() m.hesse() except: - return aoe_pdf.return_nans() + return return_nans(pdf) if display > 1: plt.figure() @@ -877,12 +811,12 @@ def unbinned_aoe_fit( aoe[(aoe < fmax) & (aoe > fmin)], bins=200, histtype="step", label="Data" ) dx = np.diff(bins) - plt.plot(xs, aoe_pdf.pdf(xs, *m.values) * dx[0], label="Full fit") - sig, bkg = aoe_pdf.pdf(xs, *m.values[:-1], True) + plt.plot(xs, pdf.pdf(xs, *m.values) * dx[0], label="Full fit") + sig, bkg = pdf.pdf(xs, *m.values[:-1], True) plt.plot(xs, sig * dx[0], label="Signal") plt.plot(xs, bkg * dx[0], label="Background") - plt.plot(xs, gauss.pdf(xs, *m1.values) * dx[0], label="Initial Gaussian") - plt.plot(xs, aoe_bkg.pdf(xs, *m2.values) * dx[0], label="Bkg guess") + plt.plot(xs, gaussian.pdf(xs, *m1.values) * dx[0], label="Initial Gaussian") + plt.plot(xs, standard_aoe_bkg.pdf(xs, *m2.values) * dx[0], label="Bkg guess") plt.xlabel("A/E") plt.ylabel("Counts") plt.legend(loc="upper left") @@ -890,7 +824,7 @@ def unbinned_aoe_fit( plt.figure() bin_centers = (bins[1:] + bins[:-1]) / 2 - res = (aoe_pdf.pdf(bin_centers, *m.values) * dx[0]) - counts + res = (pdf.pdf(bin_centers, *m.values) * dx[0]) - counts plt.plot( bin_centers, [re / count if count != 0 else re for re, count in zip(res, counts)], @@ -953,519 +887,13 @@ def fit_time_means(tstamps, means, reses): return out_dict -def aoe_timecorr( - df, energy_param, current_param, pdf=standard_aoe, plot_dict={}, display=0 -): - if "timestamp" in df: - tstamps = sorted(np.unique(df["timestamp"])) - if len(tstamps) > 1: - means = [] - errors = [] - reses = [] - res_errs = [] - final_tstamps = [] - for tstamp, time_df in df.groupby("timestamp", sort=True): - pars, errs, cov = unbinned_aoe_fit( - time_df.query( - f"is_usable_fits & cuspEmax_ctc_cal>1000 & cuspEmax_ctc_cal<1300" - )["AoE_uncorr"], - pdf=pdf, - display=display, - ) - final_tstamps.append(tstamp) - means.append(pars["mu"]) - errors.append(errs["mu"]) - reses.append(pars["sigma"] / pars["mu"]) - res_errs.append( - reses[-1] - * np.sqrt(errs["sigma"] / pars["sigma"] + errs["mu"] / pars["mu"]) - ) - mean_dict = fit_time_means(tstamps, means, reses) - - df["AoE_timecorr"] = df["AoE_uncorr"] / np.array( - [mean_dict[tstamp] for tstamp in df["timestamp"]] - ) - out_dict = { - tstamp: { - "AoE_Timecorr": { - "expression": f"({current_param}/{energy_param})/a", - "parameters": {"a": mean_dict[tstamp]}, - } - } - for tstamp in mean_dict - } - res_dict = { - "times": tstamps, - "mean": means, - "mean_errs": errors, - "res": reses, - "res_errs": res_errs, - } - if display > 0: - fig1, ax = plt.subplots(1, 1) - ax.errorbar( - [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in tstamps], - means, - yerr=errors, - linestyle=" ", - ) - ax.step( - [ - datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") - for tstamp in list(mean_dict) - ], - [mean_dict[tstamp] for tstamp in mean_dict], - where="post", - ) - ax.fill_between( - [ - datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") - for tstamp in list(mean_dict) - ], - y1=np.array([mean_dict[tstamp] for tstamp in mean_dict]) - - 0.2 * np.array(reses), - y2=np.array([mean_dict[tstamp] for tstamp in mean_dict]) - + 0.2 * np.array(reses), - color="green", - alpha=0.2, - ) - ax.fill_between( - [ - datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") - for tstamp in list(mean_dict) - ], - y1=np.array([mean_dict[tstamp] for tstamp in mean_dict]) - - 0.4 * np.array(reses), - y2=np.array([mean_dict[tstamp] for tstamp in mean_dict]) - + 0.4 * np.array(reses), - color="yellow", - alpha=0.2, - ) - ax.set_xlabel("time") - ax.set_ylabel("A/E mean") - myFmt = mdates.DateFormatter("%b %d") - ax.xaxis.set_major_formatter(myFmt) - plot_dict["aoe_time"] = fig1 - if display > 1: - plt.show() - else: - plt.close() - fig2, ax = plt.subplots(1, 1) - ax.errorbar( - [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in tstamps], - reses, - yerr=res_errs, - linestyle=" ", - ) - ax.set_xlabel("time") - ax.set_ylabel("A/E res") - myFmt = mdates.DateFormatter("%b %d") - ax.xaxis.set_major_formatter(myFmt) - plot_dict["aoe_res"] = fig2 - if display > 1: - plt.show() - else: - plt.close() - return df, out_dict, res_dict, plot_dict - else: - return df, out_dict, res_dict - else: - pars, errs, cov = unbinned_aoe_fit( - df.query("is_usable_fits & cuspEmax_ctc_cal>1000 & cuspEmax_ctc_cal<1300")[ - "AoE_uncorr" - ] - ) - df["AoE_timecorr"] = df["AoE_uncorr"] / pars["mu"] - out_dict = { - "AoE_Timecorr": { - "expression": f"({current_param}/{energy_param})/a", - "parameters": {"a": pars["mu"]}, - } - } - res_err = (pars["sigma"] / pars["mu"]) * np.sqrt( - errs["sigma"] / pars["sigma"] + errs["mu"] / pars["mu"] - ) - fit_result = { - "times": [np.nan], - "mean": [pars["mu"]], - "mean_errs": [errs["mu"]], - "res": [pars["sigma"] / pars["mu"]], - "res_errs": [res_err], - } - if display > 0: - return df, out_dict, fit_result, plot_dict - else: - return df, out_dict, fit_result - - -def pol1(x: np.array, a: float, b: float) -> np.array: - """Basic Polynomial for fitting A/E centroid against energy""" - return a * x + b - - -def sigma_fit(x: np.array, a: float, b: float, c: float) -> np.array: - """Function definition for fitting A/E sigma against energy""" - return np.sqrt(a + (b / (x + 10**-99)) ** c) - - -def AoEcorrection( - energy: np.array, - aoe: np.array, - eres: list, - pdf=standard_aoe, - plot_dict: dict = {}, - display: int = 0, - comptBands_width=20, - sigma_func=sigma_fit, -) -> tuple(np.array, np.array): - """ - Calculates the corrections needed for the energy dependence of the A/E. - Does this by fitting the compton continuum in slices and then applies fits to the centroid and variance. - """ - - comptBands = np.arange(900, 2350, comptBands_width) - peaks = np.array([1080, 1094, 1459, 1512, 1552, 1592, 1620, 1650, 1670, 1830, 2105]) - allowed = np.array([], dtype=bool) - for i, band in enumerate(comptBands): - allow = True - for peak in peaks: - if (peak - 5) > band and (peak - 5) < (band + comptBands_width): - allow = False - elif (peak + 5 > band) and (peak + 5) < (band + comptBands_width): - allow = False - allowed = np.append(allowed, allow) - comptBands = comptBands[allowed] - - results_dict = {} - comptBands = comptBands[::-1] # Flip so color gets darker when plotting - compt_aoe = np.zeros(len(comptBands)) - aoe_sigmas = np.zeros(len(comptBands)) - compt_aoe_err = np.zeros(len(comptBands)) - aoe_sigmas_err = np.zeros(len(comptBands)) - ratio = np.zeros(len(comptBands)) - ratio_err = np.zeros(len(comptBands)) - - copper = cm = plt.get_cmap("copper") - cNorm = mcolors.Normalize(vmin=0, vmax=len(comptBands)) - scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=copper) - - if display > 0: - fits_fig = plt.figure() - - # Fit each compton band - for i, band in enumerate(comptBands): - aoe_tmp = aoe[ - (energy > band) & (energy < band + comptBands_width) & (aoe > 0) - ] # [:20000] - try: - aoe_pdf = pdf() - pars, errs, cov = unbinned_aoe_fit(aoe_tmp, pdf=pdf, display=display) - compt_aoe[i], compt_aoe_err[i] = aoe_pdf.centroid(pars, errs, cov) - aoe_sigmas[i], aoe_sigmas_err[i] = aoe_pdf.width(pars, errs, cov) - - ratio[i] = pars["n_sig"] / pars["n_bkg"] - ratio_err[i] = ratio[i] * np.sqrt( - (errs["n_sig"] / pars["n_sig"]) ** 2 - + (errs["n_bkg"] / pars["n_bkg"]) ** 2 - ) - - except: - compt_aoe[i] = np.nan - aoe_sigmas[i] = np.nan - compt_aoe_err[i] = np.nan - aoe_sigmas_err[i] = np.nan - ratio[i] = np.nan - ratio_err[i] = np.nan - - if display > 0: - if ( - np.isnan(errs["mu"]) - | np.isnan(errs["sigma"]) - | (errs["mu"] == 0) - | (errs["sigma"] == 0) - ): - pass - else: - xs = np.arange( - pars["mu"] - 4 * pars["sigma"], - pars["mu"] + 3 * pars["sigma"], - pars["sigma"] / 10, - ) - colorVal = scalarMap.to_rgba(i) - aoe_pdf = pdf() - plt.plot(xs, aoe_pdf.pdf(xs, *pars), color=colorVal) - - if display > 0: - plt.xlabel("A/E") - plt.ylabel("Expected Counts") - plt.title("Compton Band Fits") - cbar = plt.colorbar( - cmx.ScalarMappable(norm=cNorm, cmap=plt.get_cmap("copper_r")), - orientation="horizontal", - label="Compton Band Energy", - ticks=[0, 16, 32, len(comptBands)], - ) # cax=ax, - cbar.ax.set_xticklabels( - [ - comptBands[::-1][0], - comptBands[::-1][16], - comptBands[::-1][32], - comptBands[::-1][-1], - ] - ) - plot_dict["band_fits"] = fits_fig - if display > 1: - plt.show() - else: - plt.close() - - ids = ( - np.isnan(compt_aoe_err) - | np.isnan(aoe_sigmas_err) - | (aoe_sigmas_err == 0) - | (compt_aoe_err == 0) - ) - results_dict["n_of_valid_fits"] = len(np.where(~ids)[0]) - # Fit mus against energy - p0_mu = [-1e-06, 5e-01] - c_mu = cost.LeastSquares( - comptBands[~ids], compt_aoe[~ids], compt_aoe_err[~ids], pol1 - ) - c_mu.loss = "soft_l1" - m_mu = Minuit(c_mu, *p0_mu) - m_mu.simplex() - m_mu.migrad() - m_mu.hesse() - - pars = m_mu.values - errs = m_mu.errors - - csqr_mu = np.sum( - ((compt_aoe[~ids] - pol1(comptBands[~ids], *pars)) ** 2) / compt_aoe_err[~ids] - ) - dof_mu = len(compt_aoe[~ids]) - len(pars) - results_dict["p_val_mu"] = chi2.sf(csqr_mu, dof_mu) - results_dict["csqr_mu"] = (csqr_mu, dof_mu) - - # Fit sigma against energy - p0_sig = [np.nanpercentile(aoe_sigmas[~ids], 50) ** 2, 2, 2] - c_sig = cost.LeastSquares( - comptBands[~ids], aoe_sigmas[~ids], aoe_sigmas_err[~ids], sigma_func - ) - c_sig.loss = "soft_l1" - m_sig = Minuit(c_sig, *p0_sig) - m_sig.simplex() - m_sig.migrad() - m_sig.hesse() - - sig_pars = m_sig.values - sig_errs = m_sig.errors - - csqr_sig = np.sum( - ((aoe_sigmas[~ids] - sigma_func(comptBands[~ids], *sig_pars)) ** 2) - / aoe_sigmas_err[~ids] - ) - dof_sig = len(aoe_sigmas[~ids]) - len(sig_pars) - results_dict["p_val_sig"] = chi2.sf(csqr_sig, dof_sig) - results_dict["csqr_sig"] = (csqr_sig, dof_sig) - - model = pol1(comptBands, *pars) - sig_model = sigma_func(comptBands, *sig_pars) - - # Get DEP fit - sigma = np.sqrt(eres[0] + 1592 * eres[1]) / 2.355 - n_sigma = 4 - peak = 1592 - emin = peak - n_sigma * sigma - emax = peak + n_sigma * sigma - try: - dep_pars, dep_err, dep_cov = unbinned_aoe_fit( - aoe[(energy > emin) & (energy < emax) & (aoe > 0)], pdf=pdf - ) - except: - dep_pars, dep_err, dep_cov = return_nans(pdf) - - if display > 0: - mean_fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) - ax1.errorbar( - comptBands[~ids] + 10, - compt_aoe[~ids], - yerr=compt_aoe_err[~ids], - xerr=10, - label="data", - linestyle=" ", - ) - ax1.plot(comptBands[~ids] + 10, model[~ids], label="linear model") - ax1.errorbar( - 1592, - dep_pars["mu"], - xerr=n_sigma * sigma, - yerr=dep_err["mu"], - label="DEP", - color="green", - linestyle=" ", - ) - - ax1.legend(title="A/E mu energy dependence", frameon=False) - - ax1.set_ylabel("raw A/E (a.u.)", ha="right", y=1) - ax2.scatter( - comptBands[~ids] + 10, - 100 * (compt_aoe[~ids] - model[~ids]) / model[~ids], - lw=1, - c="b", - ) - ax2.scatter( - 1592, - 100 * (dep_pars["mu"] - pol1(1592, *pars)) / pol1(1592, *pars), - lw=1, - c="g", - ) - ax2.set_ylabel("Residuals %", ha="right", y=1) - ax2.set_xlabel("Energy (keV)", ha="right", x=1) - plt.tight_layout() - plot_dict["mean_fit"] = mean_fig - if display > 1: - plt.show() - else: - plt.close() - - sig_fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) - ax1.errorbar( - comptBands[~ids] + 10, - aoe_sigmas[~ids], - yerr=aoe_sigmas_err[~ids], - xerr=10, - label="data", - linestyle=" ", - ) - if sigma_func == sigma_fit: - label = f"sqrt model: \nsqrt({sig_pars[0]:1.4f}+({sig_pars[1]:1.1f}/E)^{sig_pars[2]:1.1f})" - elif sigma_func == sigma_fit_quadratic: - label = f"quad model: \n({sig_pars[0]:1.4f}+({sig_pars[1]:1.6f}*E)+\n({sig_pars[2]:1.6f}*E)^2)" - else: - raise ValueError("unknown sigma function") - ax1.plot( - comptBands[~ids], - sig_model[~ids], - label=label, - ) - ax1.errorbar( - 1592, - dep_pars["sigma"], - xerr=n_sigma * sigma, - yerr=dep_err["sigma"], - label="DEP", - color="green", - ) - ax1.set_ylabel("A/E stdev (a.u.)", ha="right", y=1) - ax1.legend(title="A/E stdev energy dependence", frameon=False) - ax2.scatter( - comptBands[~ids] + 10, - 100 * (aoe_sigmas[~ids] - sig_model[~ids]) / sig_model[~ids], - lw=1, - c="b", - ) - ax2.scatter( - 1592, - 100 - * (dep_pars["sigma"] - sigma_func(1592, *sig_pars)) - / sigma_func(1592, *sig_pars), - lw=1, - c="g", - ) - ax2.set_ylabel("Residuals", ha="right", y=1) - ax2.set_xlabel("Energy (keV)", ha="right", x=1) - plt.tight_layout() - plot_dict["sigma_fit"] = sig_fig - if display > 1: - plt.show() - else: - plt.close() - return pars, sig_pars, results_dict, dep_pars, plot_dict - else: - return pars, sig_pars, results_dict, dep_pars - - -def plot_compt_bands_overlayed( - aoe: np.array, - energy: np.array, - eranges: list[tuple], - aoe_range: list[float] = None, - density=True, -) -> None: - """ - Function to plot various compton bands to check energy dependence and corrections - """ - - for erange in eranges: - range_idxs = (energy > erange - 10) & (energy < erange + 10) - hist, bins, var = pgh.get_hist( - aoe[range_idxs][ - (~np.isnan(aoe[range_idxs])) - & (aoe[range_idxs] > np.nanpercentile(aoe[range_idxs], 1)) - & (aoe[range_idxs] < np.nanpercentile(aoe[range_idxs], 99)) - ], - bins=100, - ) - bin_cs = (bins[1:] + bins[:-1]) / 2 - mu = bin_cs[np.argmax(hist)] - if aoe_range is not None: - idxs = ( - (energy > erange - 10) - & (energy < erange + 10) - & (aoe > aoe_range[0]) - & (aoe < aoe_range[1]) - & (~np.isnan(aoe)) - ) - bins = np.linspace(aoe_range[0], aoe_range[1], 50) - else: - idxs = (energy > erange - 10) & (energy < erange + 10) & (~np.isnan(aoe)) - bins = np.linspace(0.85, 1.05, 200) - plt.hist( - aoe[idxs], - bins=bins, - histtype="step", - label=f"{erange-10}-{erange+10}", - density=density, - ) - - -def plot_dt_dep( - aoe: np.array, energy: np.array, dt: np.array, erange: list[tuple], title: str -) -> None: - """ - Function to produce 2d histograms of A/E against drift time to check dependencies - """ - - hist, bins, var = pgh.get_hist( - aoe[(energy > erange[0]) & (energy < erange[1]) & (~np.isnan(aoe))], bins=500 - ) - bin_cs = (bins[1:] + bins[:-1]) / 2 - mu = bin_cs[np.argmax(hist)] - aoe_range = [mu * 0.9, mu * 1.1] - - idxs = ( - (energy > erange[0]) - & (energy < erange[1]) - & (aoe > aoe_range[0]) - & (aoe < aoe_range[1]) - & (dt < 2000) - ) - - plt.hist2d(aoe[idxs], dt[idxs], bins=[200, 100], norm=LogNorm()) - plt.ylabel("Drift Time (ns)") - plt.xlabel("A/E") - plt.title(title) - - -def energy_guess(hist, bins, var, func_i, peak, eres_pars, fit_range): +def energy_guess(hist, bins, var, func_i, peak, eres, fit_range): """ Simple guess for peak fitting """ if func_i == pgf.extended_radford_pdf: bin_cs = (bins[1:] + bins[:-1]) / 2 - sigma = thc.fwhm_slope(peak, *eres_pars) / 2.355 + sigma = eres / 2.355 i_0 = np.nanargmax(hist) mu = peak height = hist[i_0] @@ -1496,12 +924,15 @@ def energy_guess(hist, bins, var, func_i, peak, eres_pars, fit_range): fit_range[0], fit_range[1], 0, - ] # + ] + for i, guess in enumerate(parguess): + if np.isnan(guess): + parguess[i]=0 return parguess elif func_i == pgf.extended_gauss_step_pdf: mu = peak - sigma = thc.fwhm_slope(peak, *eres_pars) / 2.355 + sigma = eres / 2.355 i_0 = np.argmax(hist) bg = np.mean(hist[-10:]) step = bg - np.mean(hist[:10]) @@ -1514,13 +945,18 @@ def energy_guess(hist, bins, var, func_i, peak, eres_pars, fit_range): nbkg_guess = 0 if nsig_guess < 0: nsig_guess = 0 - return [nsig_guess, mu, sigma, nbkg_guess, hstep, fit_range[0], fit_range[1], 0] + + parguess=[nsig_guess, mu, sigma, nbkg_guess, hstep, fit_range[0], fit_range[1], 0] + for i, guess in enumerate(parguess): + if np.isnan(guess): + parguess[i]=0 + return parguess def unbinned_energy_fit( energy: np.array, peak: float, - eres_pars: list = None, + eres: list, simplex=False, guess=None, display=0, @@ -1536,7 +972,7 @@ def unbinned_energy_fit( except ValueError: pars, errs, cov = return_nans(pgf.radford_pdf) return pars, errs - sigma = thc.fwhm_slope(peak, *eres_pars) / 2.355 + sigma = eres / 2.355 if guess is None: x0 = energy_guess( hist, @@ -1544,7 +980,7 @@ def unbinned_energy_fit( var, pgf.extended_gauss_step_pdf, peak, - eres_pars, + eres, (np.nanmin(energy), np.nanmax(energy)), ) c = cost.ExtendedUnbinnedNLL(energy, pgf.extended_gauss_step_pdf) @@ -1588,7 +1024,7 @@ def unbinned_energy_fit( var, pgf.extended_radford_pdf, peak, - eres_pars, + eres, (np.nanmin(energy), np.nanmax(energy)), ) x0[0] = x1[0] @@ -1661,7 +1097,7 @@ def get_peak_label(peak: float) -> str: def get_survival_fraction( energy, - aoe, + cut_param, cut_val, peak, eres_pars, @@ -1669,17 +1105,23 @@ def get_survival_fraction( guess_pars_cut=None, guess_pars_surv=None, dt_mask=None, + mode= "greater", display=0, ): if dt_mask is None: - dt_mask = np.full(len(aoe), True, dtype=bool) + dt_mask = np.full(len(cut_param), True, dtype=bool) - nan_idxs = np.isnan(aoe) + nan_idxs = np.isnan(cut_param) if high_cut is not None: - idxs = (aoe > cut_val) & (aoe < high_cut) & dt_mask + idxs = (cut_param > cut_val) & (cut_param < high_cut) & dt_mask else: - idxs = (aoe > cut_val) & dt_mask - + if mode == "greater": + idxs = (cut_param > cut_val) & dt_mask + elif mode == "less": + idxs = (cut_param < cut_val) & dt_mask + else: + raise ValueError("mode not recognised") + if guess_pars_cut is None or guess_pars_surv is None: pars, errs = unbinned_energy_fit(energy, peak, eres_pars, simplex=True) guess_pars_cut = pars @@ -1717,1167 +1159,1378 @@ def get_survival_fraction( return sf, err, cut_pars, surv_pars -def get_aoe_cut_fit( - energy: np.array, - aoe: np.array, - peak: float, - ranges: tuple(int, int), - dep_acc: float, - eres_pars: list, - display: int = 1, - dep_correct: bool = False, - dep_mu: Callable = None, - sig_func: Callable = None, - plot_dict={}, -) -> float: - """ - Determines A/E cut by sweeping through values and for each one fitting the DEP to determine how many events survive. - Then interpolates to get cut value at desired DEP survival fraction (typically 90%) - """ - - min_range, max_range = ranges - - peak_energy = energy[ - (energy > peak - min_range) & (energy < peak + max_range) - ] # [:20000] - peak_aoe = aoe[ - (energy > peak - min_range) & (energy < peak + max_range) - ] # [:20000] - - if dep_correct is True: - peak_aoe = (peak_aoe / dep_mu(peak_energy)) - 1 - peak_aoe = peak_aoe / sig_func(peak_energy) - - cut_vals = np.arange(-8, 0, 0.2) - sfs = [] - sf_errs = [] - for cut_val in cut_vals: - sf, err, cut_pars, surv_pars = get_survival_fraction( - peak_energy, - peak_aoe, - cut_val, - peak, - eres_pars, - guess_pars_cut=None, - guess_pars_surv=None, - ) - sfs.append(sf) - sf_errs.append(err) - - # return cut_vals, sfs, sf_errs - ids = (sf_errs < (1.5 * np.nanpercentile(sf_errs, 85))) & (~np.isnan(sf_errs)) - - def fit_func(x, a, b, c, d): - return (a + b * x) * nb_erfc(c * x + d) - - c = cost.LeastSquares( - cut_vals[ids], np.array(sfs)[ids], np.array(sf_errs)[ids], fit_func - ) - c.loss = "soft_l1" - m1 = Minuit(c, np.nanmax(sfs) / 2, 0, 1, 1.5) - m1.simplex().migrad() - xs = np.arange(np.nanmin(cut_vals[ids]), np.nanmax(cut_vals[ids]), 0.01) - p = fit_func(xs, *m1.values) - cut_val = round(xs[np.argmin(np.abs(p - (100 * 0.9)))], 3) - - if display > 0: - fig = plt.figure() - plt.errorbar( - cut_vals[ids], - np.array(sfs)[ids], - yerr=np.array(sf_errs)[ids], - linestyle=" ", - ) - - plt.plot(xs, p) - plt.hlines((100 * dep_acc), -8.1, cut_val, color="red", linestyle="--") - plt.vlines( - cut_val, - np.nanmin(np.array(sfs)[ids]) * 0.9, - (100 * dep_acc), - color="red", - linestyle="--", - ) - plt.xlabel("cut value") - plt.ylabel("survival percentage") - plt.xlim([-8.1, 0.1]) - plt.ylim([np.nanmin(np.array(sfs)[ids]) * 0.9, 102]) - plot_dict["cut_determination"] = fig - if display > 1: - plt.show() - else: - plt.close() - return cut_val, plot_dict - else: - return cut_val - - -def get_sf( +def get_sf_sweep( energy: np.array, - aoe: np.array, + cut_param: np.array, + final_cut_value: float, peak: float, - fit_width: tuple(int, int), - aoe_cut_val: float, eres_pars: list, - dt_mask: np.array = None, - display: int = 0, -) -> tuple(np.array, np.array, np.array, float, float): + dt_mask = None, + cut_range = (-5,5), + n_samples = 51, + mode= "greater" +) -> tuple(pd.DataFrame, float, float): """ Calculates survival fraction for gamma lines using fitting method as in cut determination """ if dt_mask is None: - dt_mask = np.full(len(aoe), True, dtype=bool) + dt_mask = np.full(len(cut_param), True, dtype=bool) - min_range = peak - fit_width[0] - max_range = peak + fit_width[1] - if peak == "1592.5": - peak_energy = energy[ - (energy > min_range) & (energy < max_range) & (~np.isnan(aoe)) - ] - peak_aoe = aoe[(energy > min_range) & (energy < max_range) & (~np.isnan(aoe))] - peak_dt_mask = dt_mask[ - (energy > min_range) & (energy < max_range) & (~np.isnan(aoe)) - ] - else: - peak_energy = energy[ - (energy > min_range) & (energy < max_range) & (~np.isnan(aoe)) - ] - peak_aoe = aoe[(energy > min_range) & (energy < max_range) & (~np.isnan(aoe))] - peak_dt_mask = dt_mask[ - (energy > min_range) & (energy < max_range) & (~np.isnan(aoe)) - ] - # if len(peak_aoe)>50000: - # rng = np.random.default_rng(10) - # rands = rng.choice(len(peak_aoe),50000,replace=False) - # rands.sort() - # peak_energy = peak_energy[rands] - # peak_aoe = peak_aoe[rands] - # peak_dt_mask = peak_dt_mask[rands] - - pars, errors = unbinned_energy_fit(peak_energy, peak, eres_pars, simplex=False) - pc_n = pars["n_sig"] - pc_err = errors["n_sig"] - sfs = [] - sf_errs = [] - - cut_vals = np.arange(-5, 5, 0.2) - final_cut_vals = [] + cut_vals = np.linspace(cut_range[0], cut_range[1], n_samples) + out_df = pd.DataFrame(columns=["cut_val", "sf", "sf_err"]) for cut_val in cut_vals: try: sf, err, cut_pars, surv_pars = get_survival_fraction( - peak_energy, peak_aoe, cut_val, peak, eres_pars, dt_mask=peak_dt_mask + energy, cut_param, cut_val, peak, eres_pars, dt_mask=dt_mask, mode=mode ) - if np.isnan(cut_pars).all() == False and np.isnan(surv_pars).all() == False: - guess_pars_cut = cut_pars - guess_pars_surv = surv_pars + out_df = pd.concat([out_df, + pd.DataFrame([{"cut_val":cut_val, + "sf":sf, + "sf_err":err}])]) except: - sf = np.nan - err = np.nan - sfs.append(sf) - sf_errs.append(err) - final_cut_vals.append(cut_val) - ids = ( - (sf_errs < (5 * np.nanpercentile(sf_errs, 50))) - & (~np.isnan(sf_errs)) - & (np.array(sfs) < 100) - ) + pass + out_df.set_index("cut_val", inplace=True) sf, sf_err, cut_pars, surv_pars = get_survival_fraction( - peak_energy, peak_aoe, aoe_cut_val, peak, eres_pars, dt_mask=peak_dt_mask + energy, cut_param, final_cut_value, peak, eres_pars, dt_mask=dt_mask, mode=mode ) - - if display > 0: - plt.figure() - plt.errorbar(cut_vals, sfs, sf_errs) - plt.show() - return ( - np.array(final_cut_vals)[ids], - np.array(sfs)[ids], - np.array(sf_errs)[ids], + out_df.query(f'sf_err<5*{np.nanpercentile(out_df["sf_err"], 50)}& sf_err==sf_err & sf<=100'), sf, sf_err, - ) + ) +def compton_sf(cut_param, low_cut_val, high_cut_val = None, mode="greater", dt_mask=None): + + if dt_mask is None: + dt_mask = np.full(len(cut_param), True, dtype=bool) + + if high_cut_val is not None: + mask = (cut_param > low_cut_val) & (cut_param < high_cut_val) & dt_mask + else: + if mode == "greater": + mask = (cut_param > low_cut_val) & dt_mask + elif mode == "less": + mask = (cut_param < low_cut_val) & dt_mask + else: + raise ValueError("mode not recognised") + + sf = 100 * len(cut_param[mask]) / len(cut_param) + sf_err = sf* np.sqrt( + (1 / len(cut_param)) + 1 / (len(cut_param[mask]) + 10**-99) + ) + return {"low_cut":low_cut_val, "sf":sf, "sf_err":sf_err, "high_cut": high_cut_val} -def compton_sf( +def compton_sf_sweep( energy: np.array, - aoe: np.array, - cut: float, + cut_param: np.array, + final_cut_value: float, peak: float, eres: list[float, float], dt_mask: np.array = None, - display: int = 1, + cut_range = (-5,5), + n_samples = 51, + mode= "greater" ) -> tuple(float, np.array, list): """ Determines survival fraction for compton continuum by basic counting """ - fwhm = np.sqrt(eres[0] + peak * eres[1]) - - emin = peak - 2 * fwhm - emax = peak + 2 * fwhm - sfs = [] - sf_errs = [] - ids = (energy > emin) & (energy < emax) & (~np.isnan(aoe)) - aoe = aoe[ids] - if dt_mask is None: - dt_mask = np.full(len(aoe), True, dtype=bool) - else: - dt_mask = dt_mask[ids] - cut_vals = np.arange(-5, 5, 0.1) + cut_vals = np.linspace(cut_range[0], cut_range[1], n_samples) + out_df = pd.DataFrame(columns=["cut_val", "sf", "sf_err"]) + for cut_val in cut_vals: - sfs.append(100 * len(aoe[(aoe > cut_val) & dt_mask]) / len(aoe)) - sf_errs.append( - sfs[-1] - * np.sqrt( - (1 / len(aoe)) + 1 / (len(aoe[(aoe > cut_val) & dt_mask]) + 10**-99) + ct_dict = compton_sf(cut_param, cut_val, mode=mode, dt_mask=dt_mask) + df = pd.DataFrame([{"cut_val":ct_dict["low_cut"], "sf":ct_dict["sf"], "sf_err":ct_dict["sf_err"]}]) + out_df = pd.concat([out_df, df]) + out_df.set_index("cut_val", inplace=True) + + sf_dict = compton_sf(cut_param, final_cut_value, mode=mode, dt_mask=dt_mask) + + return out_df, sf_dict["sf"], sf_dict["sf_err"] + +class cal_aoe: + + def __init__(self, + cal_dicts: dict, + cal_energy_param: str, + eres_func: callable, + pdf=standard_aoe, + selection_string: str = "is_valid_cal&is_not_pulser", + dt_corr: bool = False, + dep_acc:float = 0.9, + dep_correct: bool = False, + dt_cut:dict = None, + dt_param:str = "dt_eff", + high_cut_val: int = 3, + mean_func:Callable=pol1, + sigma_func:Callable=sigma_fit, + comptBands_width:int=20, + plot_options:dict={} + ): + self.cal_dicts = cal_dicts + self.cal_energy_param = cal_energy_param + self.eres_func = eres_func + self.pdf =pdf + self.selection_string = selection_string + self.dt_corr = dt_corr + self.dt_param = "dt_eff" + self.dep_correct= dep_correct + self.dt_cut = dt_cut + self.dep_acc = dep_acc + if self.dt_cut is not None: + self.update_cal_dicts(dt_cut["cut"]) + self.dt_cut_param = dt_cut["out_param"] + self.fit_selection = f"{self.selection_string} & {self.dt_cut_param}" + self.dt_cut_hard = dt_cut["hard"] + else: + self.dt_cut_param = None + self.dt_cut_hard = False + self.fit_selection = self.selection_string + self.high_cut_val = high_cut_val + self.mean_func= mean_func + self.sigma_func=sigma_func + self.comptBands_width = comptBands_width + self.plot_options=plot_options + + def update_cal_dicts(self, update_dict): + if re.match(r"(\d{8})T(\d{6})Z", list(self.cal_dicts)[0]): + for tstamp in self.cal_dicts: + if tstamp in update_dict: + self.cal_dicts[tstamp].update(update_dict[tstamp]) + else: + self.cal_dicts[tstamp].update(update_dict) + else: + self.cal_dicts.update(update_dict) + + def aoe_timecorr( + self, + df, + aoe_param, + output_name = "AoE_Timecorr", + display=0 + ): + log.info("Starting A/E time correction") + self.timecorr_df = pd.DataFrame(columns=["timestamp", "mean", "mean_err", "res", "res_err"]) + try: + if "timestamp" in df: + tstamps = sorted(np.unique(df["timestamp"])) + means = [] + errors = [] + reses = [] + res_errs = [] + final_tstamps = [] + for tstamp, time_df in df.groupby("timestamp", sort=True): + try: + pars, errs, cov = unbinned_aoe_fit( + time_df.query( + f"{self.fit_selection} & ({self.cal_energy_param}>1000) & ({self.cal_energy_param}<1300)" + )[aoe_param], + pdf=self.pdf, + display=display, + ) + self.timecorr_df = pd.concat( + [ + self.timecorr_df, + pd.DataFrame([ + {"timestamp": tstamp, + "mean":pars["mu"], + "mean_err":errs["mu"], + "res":pars["sigma"] / pars["mu"], + "res_err":(pars["sigma"] / pars["mu"]) * np.sqrt(errs["sigma"] / pars["sigma"] + errs["mu"] / pars["mu"])} + ]), + ]) + except: + self.timecorr_df = pd.concat( + [ + self.timecorr_df, + pd.DataFrame([ + {"timestamp": tstamp, + "mean":np.nan, + "mean_err":np.nan, + "res":np.nan, + "res_err":np.nan} + ]), + ]) + self.timecorr_df.set_index("timestamp", inplace=True) + time_dict = fit_time_means(np.array(self.timecorr_df.index), + np.array(self.timecorr_df["mean"]), + np.array(self.timecorr_df["res"])) + + df[output_name] = df[aoe_param] / np.array( + [time_dict[tstamp] for tstamp in df["timestamp"]] + ) + self.update_cal_dicts({ + tstamp: { + output_name: { + "expression": f"{aoe_param}/a", + "parameters": {"a": t_dict}, + } + } + for tstamp, t_dict in time_dict.items() + }) + log.info("A/E time correction finished") + else: + try: + pars, errs, cov = unbinned_aoe_fit( + df.query(f"{self.fit_selection} & {self.cal_energy_param}>1000 & {self.cal_energy_param}<1300")[ + aoe_param + ], + pdf=self.pdf, + display=display, + ) + self.timecorr_df = pd.concat( + [ + self.timecorr_df, + pd.DataFrame([ + { + "mean":pars["mu"], + "mean_err":errs["mu"], + "res":pars["sigma"] / pars["mu"], + "res_err":(pars["sigma"] / pars["mu"]) * np.sqrt(errs["sigma"] / pars["sigma"] + errs["mu"] / pars["mu"])} + ]), + ]) + except: + self.timecorr_df = pd.concat( + [ + self.timecorr_df, + pd.DataFrame([ + { + "mean":np.nan, + "mean_err":np.nan, + "res":np.nan, + "res_err":np.nan} + ]), + ]) + df[output_name] = df[aoe_param] / pars["mu"] + self.update_cal_dicts({ + output_name: { + "expression": f"{aoe_param}/a", + "parameters": {"a": pars["mu"]}, + } + }) + log.info("A/E time correction finished") + except: + log.error("A/E time correction failed") + self.update_cal_dicts({ + output_name: { + "expression": f"{aoe_param}/a", + "parameters": {"a": np.nan}, + } + }) + + def drift_time_correction( + self, + data:pd.DataFrame, + aoe_param, + display: int = 0, + ): + """ + Calculates the correction needed to align the two drift time regions for ICPC detectors + """ + log.info("Starting A/E drift time correction") + self.dt_res_dict = {} + try: + dep_events = data.query(f"{self.fit_selection}&{self.cal_energy_param}>1582&{self.cal_energy_param}<1602&{self.cal_energy_param}=={self.cal_energy_param}&{aoe_param}=={aoe_param}") + + hist, bins, var = pgh.get_hist( + dep_events[aoe_param], + bins=500, ) - ) - sf = 100 * len(aoe[(aoe > cut) & dt_mask]) / len(aoe) - sf_err = sf * np.sqrt( - 1 / len(aoe) + 1 / (len(aoe[(aoe > cut) & dt_mask]) + 10**-99) - ) - return cut_vals, sfs, sf_errs, sf, sf_err + bin_cs = (bins[1:] + bins[:-1]) / 2 + mu = bin_cs[np.argmax(hist)] + aoe_range = [mu * 0.9, mu * 1.1] + dt_range = [np.nanpercentile(dep_events[self.dt_param], 1) , np.nanpercentile(dep_events[self.dt_param], 99)] -def get_sf_no_sweep( - energy: np.array, - aoe: np.array, - peak: float, - fit_width: tuple(int, int), - eres_pars: list, - aoe_low_cut_val: float, - aoe_high_cut_val: float = None, - dt_mask: np.array = None, - display: int = 1, -) -> tuple(float, float): - """ - Calculates survival fraction for gamma line without sweeping through values - """ + self.dt_res_dict['final_selection'] = f"{aoe_param}>{aoe_range[0]}&{aoe_param}<{aoe_range[1]}&{self.dt_param}>{dt_range[0]}&{self.dt_param}<{dt_range[1]}&{self.dt_param}=={self.dt_param}" - if dt_mask is None: - dt_mask = np.full(len(aoe), True, dtype=bool) + final_df = dep_events.query(self.dt_res_dict['final_selection']) - min_range = peak - fit_width[0] - max_range = peak + fit_width[1] - if peak == "1592.5": - peak_energy = energy[ - (energy > min_range) & (energy < max_range) & (~np.isnan(aoe)) - ] - peak_aoe = aoe[(energy > min_range) & (energy < max_range) & (~np.isnan(aoe))] - else: - peak_energy = energy[ - (energy > min_range) & (energy < max_range) & (~np.isnan(aoe)) - ] - peak_aoe = aoe[(energy > min_range) & (energy < max_range) & (~np.isnan(aoe))] - peak_dt_mask = dt_mask[ - (energy > min_range) & (energy < max_range) & (~np.isnan(aoe)) - ] - # if len(peak_aoe)>50000: - # rng = np.random.default_rng(10) - # rands = rng.choice(len(peak_aoe),50000,replace=False) - # rands.sort() - # peak_energy = peak_energy[rands] - # peak_aoe = peak_aoe[rands] - # peak_dt_mask = peak_dt_mask[rands] + hist, bins, var = pgh.get_hist( + final_df[self.dt_param], dx=10, range=(np.nanmin(final_df[self.dt_param]), np.nanmax(final_df[self.dt_param])) + ) + + gpars = self.dt_res_dict['dt_guess'] =drift_time_distribution.guess(hist, bins, var) + cost_func = cost.ExtendedUnbinnedNLL(final_df[self.dt_param], drift_time_distribution.extended_pdf) + m = Minuit(cost_func, **gpars) + m.limits = drift_time_distribution.bounds(gpars) + m.fixed = drift_time_distribution.fixed() + m.simplex().migrad() + m.hesse() + + self.dt_res_dict["dt_fit"]={"pars": m.values,"errs":m.errors, "object":m} + aoe_grp1 = self.dt_res_dict["aoe_grp1"] = f'{self.dt_param}>{m.values["mu1"] - 2 * m.values["sigma1"]} & {self.dt_param}<{m.values["mu1"] + 2 * m.values["sigma1"]}' + aoe_grp2 = self.dt_res_dict["aoe_grp2"] = f'{self.dt_param}>{m.values["mu2"] - 2 * m.values["sigma2"]} & {self.dt_param}<{m.values["mu2"] + 2 * m.values["sigma2"]}' + + aoe_pars, aoe_errs, _ = unbinned_aoe_fit(final_df.query(aoe_grp1)[aoe_param], + pdf=self.pdf, display=display) - sf, sf_err, cut_pars, surv_pars = get_survival_fraction( - peak_energy, - peak_aoe, - aoe_low_cut_val, - peak, - eres_pars, - high_cut=aoe_high_cut_val, - dt_mask=peak_dt_mask, - ) - return sf, sf_err + self.dt_res_dict["aoe_fit1"] = {"pars":aoe_pars, "errs": aoe_errs} + aoe_pars2, aoe_errs2, _ = unbinned_aoe_fit(final_df.query(aoe_grp2)[aoe_param], + pdf=self.pdf, display=display) -def compton_sf_no_sweep( - energy: np.array, - aoe: np.array, - peak: float, - eres: list[float, float], - aoe_low_cut_val: float, - aoe_high_cut_val: float = None, - dt_mask: np.array = None, - display: int = 1, -) -> float: - """ - Calculates survival fraction for compton contiuum without sweeping through values - """ + self.dt_res_dict["aoe_fit2"] = {"pars":aoe_pars2, "errs": aoe_errs2} - fwhm = np.sqrt(eres[0] + peak * eres[1]) + try: + self.alpha = (aoe_pars["mu"] - aoe_pars2["mu"]) / ( + (m.values["mu2"] * aoe_pars2["mu"]) - (m.values["mu1"] * aoe_pars["mu"]) + ) + except ZeroDivisionError: + self.alpha = 0 + self.dt_res_dict["alpha"] = self.alpha + log.info(f"dtcorr successful alpha:{self.alpha}") + data["AoE_DTcorr"] = data[aoe_param] * (1 + self.alpha * data[self.dt_param]) + except: + log.error("Drift time correction failed") + self.alpha=np.nan - emin = peak - 2 * fwhm - emax = peak + 2 * fwhm - sfs = [] - ids = (energy > emin) & (energy < emax) & (~np.isnan(aoe)) - aoe = aoe[ids] - if dt_mask is None: - dt_mask = np.full(len(aoe), True, dtype=bool) - else: - dt_mask = dt_mask[ids] - if aoe_high_cut_val is None: - sf = 100 * len(aoe[(aoe > aoe_low_cut_val)]) / len(aoe) - sf_err = sf * np.sqrt( - 1 / len(aoe) + 1 / len(aoe[(aoe > aoe_low_cut_val) & dt_mask]) - ) - else: - sf = ( - 100 - * len(aoe[(aoe > aoe_low_cut_val) & (aoe < aoe_high_cut_val) & dt_mask]) - / len(aoe) - ) - sf_err = sf * np.sqrt( - 1 / len(aoe) - + 1 / len(aoe[(aoe > aoe_low_cut_val) & (aoe < aoe_high_cut_val) & dt_mask]) - ) - return sf, sf_err + self.update_cal_dicts({ + "AoE_DTcorr": { + "expression": f"{aoe_param}*(1+a*{self.dt_param})", + "parameters": {"a": self.alpha}, + } + }) + def AoEcorrection( + self, + data:pd.DataFrame, + aoe_param:str, + display:int=0 + ): + """ + Calculates the corrections needed for the energy dependence of the A/E. + Does this by fitting the compton continuum in slices and then applies fits to the centroid and variance. + """ -def apply_dtcorr(aoe: np.array, dt: np.array, alpha: float) -> np.array: - """Aligns dt regions""" - return aoe * (1 + alpha * dt) + log.info("Starting A/E energy correction") + self.energy_corr_res_dict = {} + + comptBands = np.arange(900, 2350, self.comptBands_width) + peaks = np.array([1080, 1094, 1459, 1512, 1552, 1592, 1620, 1650, 1670, 1830, 2105]) + allowed = np.array([], dtype=bool) + for i, band in enumerate(comptBands): + allow = True + for peak in peaks: + if (peak - 5) > band and (peak - 5) < (band + self.comptBands_width): + allow = False + elif (peak + 5 > band) and (peak + 5) < (band +self.comptBands_width): + allow = False + allowed = np.append(allowed, allow) + comptBands = comptBands[allowed] + + self.energy_corr_fits = pd.DataFrame(columns=["compt_bands", "mean", "mean_err", + "sigma", "sigma_err", "ratio", "ratio_err"], dtype=float) + try: + select_df = data.query(f"{self.fit_selection} & {aoe_param}>0") + # Fit each compton band + for band in comptBands: + try: + pars, errs, cov = unbinned_aoe_fit( + select_df.query(f"{self.cal_energy_param}>{band}&{self.cal_energy_param}< {self.comptBands_width+band}")[aoe_param], + pdf=self.pdf, display=display) -def drift_time_correction( - aoe: np.array, - energy: np.array, - dt: np.array, - pdf=standard_aoe, - display: int = 0, - plot_dict: dict = {}, -) -> tuple(np.array, float): - """ - Calculates the correction needed to align the two drift time regions for ICPC detectors - """ - hist, bins, var = pgh.get_hist( - aoe[(energy > 1582) & (energy < 1602) & (~np.isnan(energy)) & (~np.isnan(aoe))], - bins=500, - ) - bin_cs = (bins[1:] + bins[:-1]) / 2 - mu = bin_cs[np.argmax(hist)] - aoe_range = [mu * 0.9, mu * 1.1] - - idxs = ( - (energy > 1582) - & (energy < 1602) - & (aoe > aoe_range[0]) - & (aoe < aoe_range[1]) - & (dt > np.nanpercentile(dt, 1)) - & (dt < np.nanpercentile(dt, 99)) - & (~np.isnan(dt)) - & (~np.isnan(aoe)) - & (~np.isnan(energy)) - ) + mean, mean_err = self.pdf.centroid(pars, errs, cov) + sigma, sigma_err = self.pdf.width(pars, errs, cov) - hist, bins, var = pgh.get_hist( - dt[idxs], dx=10, range=(np.nanmin(dt[idxs]), np.nanmax(dt[idxs])) - ) - dt_distrib = drift_time_distribution() - - gpars = dt_distrib.guess(hist, bins, var) - cost_func = cost.ExtendedUnbinnedNLL(dt[idxs], dt_distrib.extended_pdf) - m = Minuit(cost_func, **gpars) - m.limits = dt_distrib.bounds(gpars) - m.fixed = dt_distrib.fixed() - m.simplex().migrad() - m.hesse() + self.energy_corr_fits = pd.concat( + [ + self.energy_corr_fits, + pd.DataFrame([ + {"compt_bands": band+self.comptBands_width/2, + "mean":mean, + "mean_err":mean_err, + "sigma":sigma, + "sigma_err":sigma_err, + "ratio":pars["n_sig"] / pars["n_bkg"], + "ratio_err":(pars["n_sig"] / pars["n_bkg"]) *np.sqrt( + (errs["n_sig"] / pars["n_sig"]) ** 2 + + (errs["n_bkg"] / pars["n_bkg"]) ** 2 + ) + }] + ), + ]) + + except: + self.energy_corr_fits = pd.concat( + [ + self.energy_corr_fits, + pd.DataFrame( + [{"compt_bands": band, + "mean":np.nan, + "mean_err":np.nan, + "sigma":np.nan, + "sigma_err":np.nan, + "ratio":np.nan, + "ratio_err":np.nan + }] + ), + ]) + self.energy_corr_fits.set_index("compt_bands", inplace=True) + valid_fits = self.energy_corr_fits.query("mean_err==mean_err&sigma_err==sigma_err & sigma_err!=0 & mean_err!=0") + self.energy_corr_res_dict["n_of_valid_fits"] = len(valid_fits) + log.info(f"{len(valid_fits)} compton bands fit successfully") + # Fit mus against energy + p0_mu = self.mean_func.guess(valid_fits.index, valid_fits["mean"], valid_fits["mean_err"]) + c_mu = cost.LeastSquares( + valid_fits.index, valid_fits["mean"], valid_fits["mean_err"], self.mean_func.func + ) + c_mu.loss = "soft_l1" + m_mu = Minuit(c_mu, *p0_mu) + m_mu.simplex() + m_mu.migrad() + m_mu.hesse() - aoe_mask = ( - (idxs) - & (dt > m.values["mu1"] - 2 * m.values["sigma1"]) - & (dt < m.values["mu1"] + 2 * m.values["sigma1"]) - ) - aoe_pars, aoe_errs, _ = unbinned_aoe_fit(aoe[aoe_mask], pdf=pdf, display=display) + mu_pars = m_mu.values + mu_errs = m_mu.errors - aoe_mask2 = ( - (idxs) - & (dt > m.values["mu2"] - 2 * m.values["sigma2"]) - & (dt < m.values["mu2"] + 2 * m.values["sigma2"]) - ) + csqr_mu = np.sum( + ((valid_fits["mean"] - self.mean_func.func(valid_fits.index, *mu_pars)) ** 2) / valid_fits["mean_err"] + ) + dof_mu = len(valid_fits["mean"]) - len(pars) + p_val_mu = chi2.sf(csqr_mu, dof_mu) + self.mean_fit_obj = m_mu + + # Fit sigma against energy + p0_sig = self.sigma_func.guess(valid_fits.index, valid_fits["sigma"], valid_fits["sigma_err"]) + c_sig = cost.LeastSquares( + valid_fits.index, valid_fits["sigma"], valid_fits["sigma_err"], self.sigma_func.func + ) + c_sig.loss = "soft_l1" + m_sig = Minuit(c_sig, *p0_sig) + m_sig.simplex() + m_sig.migrad() + m_sig.hesse() + + sig_pars = m_sig.values + sig_errs = m_sig.errors + + csqr_sig = np.sum( + ((valid_fits["sigma"] - self.sigma_func.func(valid_fits.index, *sig_pars)) ** 2) + / valid_fits["sigma_err"] + ) + dof_sig = len(valid_fits["sigma"]) - len(sig_pars) + p_val_sig = chi2.sf(csqr_sig, dof_sig) + + self.sigma_fit_obj = m_sig + + # Get DEP fit + n_sigma = 4 + peak = 1592 + sigma = self.eres_func(peak) / 2.355 + emin = peak - n_sigma * sigma + emax = peak + n_sigma * sigma + try: + dep_pars, dep_err, _ = unbinned_aoe_fit( + select_df.query(f"{self.cal_energy_param}>{emin}&{self.cal_energy_param}<{emax}")[aoe_param], + pdf=self.pdf, + display=display + ) + except: + dep_pars, dep_err, _ = return_nans(self.pdf) + + data["AoE_Corrected"] = data[aoe_param] / self.mean_func.func(data[self.cal_energy_param], *mu_pars) + data["AoE_Classifier"] = (data["AoE_Corrected"] - 1) / self.sigma_func.func( + data[self.cal_energy_param], *sig_pars + ) + log.info("Finished A/E energy successful") + log.info(f"mean pars are {mu_pars.to_dict()}") + log.info(f"sigma pars are {sig_pars.to_dict()}") + except: + log.error("A/E energy correction failed") + mu_pars, mu_errs, mu_cov = return_nans(self.mean_func.func) + csqr_mu, dof_mu, p_val_mu = (np.nan, np.nan, np.nan) + csqr_sig, dof_sig, p_val_sig = (np.nan, np.nan, np.nan) + sig_pars, sig_errs, sig_cov = return_nans(self.sigma_func.func) + dep_pars, dep_err, dep_cov = return_nans(self.pdf) + + self.energy_corr_res_dict["mean_fits"] = {"func": self.mean_func.__name__, + "module": self.mean_func.__module__, + "expression":self.mean_func.string_func("x"), + "pars": mu_pars.to_dict(), + "errs": mu_errs.to_dict(), + "p_val_mu": p_val_mu, + "csqr_mu": (csqr_mu, dof_mu)} + + self.energy_corr_res_dict["sigma_fits"] = {"func": self.sigma_func.__name__, + "module": self.sigma_func.__module__, + "expression":self.sigma_func.string_func("x"), + "pars": sig_pars.to_dict(), + "errs": sig_errs.to_dict(), + "p_val_mu": p_val_sig, + "csqr_mu": (csqr_sig, dof_sig)} + + self.energy_corr_res_dict["dep_fit"]={"func": self.pdf.__name__, + "module": self.pdf.__module__, + "pars": dep_pars.to_dict(), + "errs": dep_err.to_dict()} + + self.update_cal_dicts({ + "AoE_Corrected": { + "expression": f"{aoe_param}/({self.mean_func.string_func(self.cal_energy_param)})", + "parameters": mu_pars.to_dict(), + }, + "AoE_Classifier": { + "expression": f"AoE_Corrected/({self.sigma_func.string_func(self.cal_energy_param)})", + "parameters": sig_pars.to_dict(), + } + }) + + def get_aoe_cut_fit( + self, + data:pd.DataFrame, + aoe_param:str, + peak: float, + ranges: tuple, + dep_acc: float, + display: int = 1 + ): + """ + Determines A/E cut by sweeping through values and for each one fitting the DEP to determine how many events survive. + Then interpolates to get cut value at desired DEP survival fraction (typically 90%) + """ - aoe_pars2, aoe_errs2, _ = unbinned_aoe_fit(aoe[aoe_mask2], pdf=pdf, display=display) + log.info("Starting A/E low cut determination") + self.low_cut_res_dict = {} + self.cut_fits = pd.DataFrame(columns=["cut_val", "sf", "sf_err"]) + min_range, max_range = ranges + + try: + select_df = data.query(f"{self.fit_selection}&({self.cal_energy_param} > {peak - min_range}) & ({self.cal_energy_param} < {peak + max_range})") + + # if dep_correct is True: + # peak_aoe = (select_df[aoe_param] / dep_mu(select_df[self.cal_energy_param])) - 1 + # peak_aoe = select_df[aoe_param] / sig_func(select_df[self.cal_energy_param]) + + cut_vals = np.arange(-8, 0, 0.2) + sfs = [] + sf_errs = [] + for cut_val in cut_vals: + sf, err, cut_pars, surv_pars = get_survival_fraction( + select_df[self.cal_energy_param].to_numpy(), + select_df[aoe_param].to_numpy(), + cut_val, + peak, + self.eres_func(peak), + guess_pars_cut=None, + guess_pars_surv=None + ) + self.cut_fits = pd.concat( + [ + self.cut_fits, + pd.DataFrame( + [{"cut_val": cut_val, + "sf":sf, + "sf_err":err, + }] + ), + ]) + self.cut_fits.set_index("cut_val", inplace=True) + valid_fits = self.cut_fits.query(f'sf_err<{(1.5 * np.nanpercentile(self.cut_fits["sf_err"],85))}&sf_err==sf_err') + + c = cost.LeastSquares( + valid_fits.index, valid_fits["sf"], valid_fits["sf_err"], sigmoid_fit.func + ) + c.loss = "soft_l1" + m1 = Minuit(c, *sigmoid_fit.guess(valid_fits.index, valid_fits["sf"], valid_fits["sf_err"])) + m1.simplex().migrad() + xs = np.arange(np.nanmin(valid_fits.index), np.nanmax(valid_fits.index), 0.01) + p = sigmoid_fit.func(xs, *m1.values) + self.cut_fit = {"function": sigmoid_fit.__name__ , "pars": m1.values.to_dict(), "errs": m1.errors.to_dict()} + self.low_cut_val = round(xs[np.argmin(np.abs(p - (100 * self.dep_acc)))], 3) + log.info(f"Cut found at {self.low_cut_val}") + + data["AoE_Low_Cut"] = (data[aoe_param]>self.low_cut_val) + if self.dt_cut_param is not None: + data["AoE_Low_Cut"] = data["AoE_Low_Cut"] &(data[self.dt_cut_param]) + data["AoE_Double_Sided_Cut"] = data["AoE_Low_Cut"] & (data[aoe_param]a) & ({self.dt_cut_param})", + "parameters": {"a": self.low_cut_val}, + }}) + else: + self.update_cal_dicts({ + "AoE_Low_Cut": { + "expression": f"({aoe_param}>a)", + "parameters": {"a": self.low_cut_val}, + }}) + self.update_cal_dicts({"AoE_Double_Sided_Cut": { + "expression": f"(a>{aoe_param}) & (AoE_Low_Cut)", + "parameters": {"a": self.high_cut_val}, + }}) + + def get_results_dict(self): + return { + "cal_energy_param": self.cal_energy_param, + "dt_param": self.dt_param, + "rt_correction": self.dt_corr, + "pdf":self.pdf.__name__, + "1000-1300keV": self.timecorr_df.to_dict("index"), + "correction_fit_results": self.energy_corr_res_dict, + "low_cut": self.low_cut_val, + "high_cut": self.high_cut_val, + "low_side_sfs": self.low_side_sf.to_dict("index"), + "2_side_sfs": self.two_side_sf.to_dict("index"), + } + + def fill_plot_dict(self, data, plot_dict={}): + for key, item in self.plot_options.items(): + if item["options"] is not None: + plot_dict[key] = item["function"](self, data, **item["options"]) + else: + plot_dict[key] = item["function"](self, data) + return plot_dict + + + def calibrate(self, df, initial_aoe_param): + self.aoe_timecorr( + df, initial_aoe_param + ) + log.info("Finished A/E time correction") + + if self.dt_corr == True: + aoe_param = "AoE_DTcorr" + self.drift_time_correction( + df, + "AoE_Timecorr" + ) + else: + aoe_param = "AoE_Timecorr" + + self.AoEcorrection( + df, + aoe_param + ) + + + self.get_aoe_cut_fit( + df, + "AoE_Classifier", + 1592, + (40, 20), + 0.9 + ) + + aoe_param = "AoE_Classifier" + log.info(" Compute low side survival fractions: ") + self.low_side_sf = pd.DataFrame(columns=["peak", "sf", "sf_err"]) + peaks_of_interest = [1592.5, 1620.5, 2039, 2103.53, 2614.50] + fit_widths = [(40, 25), (25, 40), (0, 0), (25, 40), (50, 50)] + self.low_side_peak_dfs={} + + + for i, peak in enumerate(peaks_of_interest): + try: + select_df = df.query(f"{self.selection_string}&{aoe_param}=={aoe_param}") + fwhm = self.eres_func(peak) + if peak == 2039: + emin = 2 * fwhm + emax = 2 * fwhm + peak_df = select_df.query(f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})") + + cut_df, sf, sf_err = compton_sf_sweep( + peak_df[self.cal_energy_param].to_numpy(), + peak_df[aoe_param].to_numpy(), + self.low_cut_val, + peak, + fwhm, + dt_mask=peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None + ) + self.low_side_sf = pd.concat([self.low_side_sf, pd.DataFrame([{"peak":peak, "sf":sf, "sf_err":sf_err}])]) + self.low_side_peak_dfs[peak]=cut_df + else: + emin,emax = fit_widths[i] + peak_df = select_df.query(f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})") + cut_df, sf, sf_err = get_sf_sweep( + peak_df[self.cal_energy_param].to_numpy(), + peak_df[aoe_param].to_numpy(), + self.low_cut_val, + peak, + fwhm, + dt_mask=peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None + ) + self.low_side_sf = pd.concat([self.low_side_sf, pd.DataFrame([{"peak":peak, "sf":sf, "sf_err":sf_err}])]) + self.low_side_peak_dfs[peak]=cut_df + log.info(f"{peak}keV: {sf:2.1f} +/- {sf_err:2.1f} %") + except: + self.low_side_sf = pd.concat([self.low_side_sf, pd.DataFrame([{"peak":peak, "sf":np.nan, "sf_err":np.nan}])]) + log.error(f"A/E Low side Survival fraction determination failed for {peak} peak") + self.low_side_sf.set_index("peak", inplace=True) + + + self.two_side_sf = pd.DataFrame(columns=["peak", "sf", "sf_err"]) + log.info("Calculating 2 sided cut sfs") + for i, peak in enumerate(peaks_of_interest): + fwhm = self.eres_func(peak) + try: + if peak == 2039: + emin = 2 * fwhm + emax = 2 * fwhm + peak_df = select_df.query(f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})") + + sf_dict = compton_sf(peak_df[aoe_param].to_numpy(), + self.low_cut_val, + self.high_cut_val, + dt_mask=peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None) + sf = sf_dict["sf"] + sf_err = sf_dict["sf_err"] + self.two_side_sf = pd.concat([self.two_side_sf, pd.DataFrame([{"peak":peak, + "sf":sf, + "sf_err":sf_err}])]) + else: + emin, emax = fit_widths[i] + peak_df = select_df.query(f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})") + sf, sf_err, _, _ = get_survival_fraction( + peak_df[self.cal_energy_param].to_numpy(), + peak_df[aoe_param].to_numpy(), + self.low_cut_val, + peak, + fwhm, + high_cut=self.high_cut_val, + dt_mask=peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None) + self.two_side_sf = pd.concat([self.two_side_sf, pd.DataFrame([{"peak":peak, "sf":sf, "sf_err":sf_err}])]) + log.info(f"{peak}keV: {sf:2.1f} +/- {sf_err:2.1f} %") + + except: + self.two_side_sf = pd.concat([self.two_side_sf, pd.DataFrame([{"peak":peak, "sf":np.nan, "sf_err":np.nan}])]) + log.error(f"A/E two side Survival fraction determination failed for {peak} peak") + self.two_side_sf.set_index("peak", inplace=True) + +def plot_aoe_mean_time(aoe_class, data, time_param = "AoE_Timecorr", figsize=[12, 8], fontsize=12): + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + fig, ax = plt.subplots(1, 1) try: - alpha = (aoe_pars["mu"] - aoe_pars2["mu"]) / ( - (m.values["mu2"] * aoe_pars2["mu"]) - (m.values["mu1"] * aoe_pars["mu"]) + ax.errorbar( + [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in aoe_class.timecorr_df.index], + aoe_class.timecorr_df["mean"], + yerr=aoe_class.timecorr_df["mean_err"], + linestyle=" ", ) - except ZeroDivisionError: - alpha = 0 - aoe_corrected = apply_dtcorr(aoe, dt, alpha) - if display > 0: - aoe_pdf = pdf() - dt_fig = plt.figure() + grouped_means = [cal_dict[time_param]["parameters"]["a"] for tstamp, cal_dict in aoe_class.cal_dicts.items()] + ax.step( + [ + datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") + for tstamp in aoe_class.cal_dicts + ], + grouped_means, + where="post", + ) + ax.fill_between( + [ + datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") + for tstamp in aoe_class.cal_dicts + ], + y1=np.array(grouped_means) + - 0.2 * np.array(aoe_class.timecorr_df["res"]), + y2=np.array(grouped_means) + + 0.2 * np.array(aoe_class.timecorr_df["res"]), + color="green", + alpha=0.2, + ) + ax.fill_between( + [ + datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") + for tstamp in aoe_class.cal_dicts + ], + y1=np.array(grouped_means) + - 0.4 * np.array(aoe_class.timecorr_df["res"]), + y2=np.array(grouped_means) + + 0.4 * np.array(aoe_class.timecorr_df["res"]), + color="yellow", + alpha=0.2, + ) + except:pass + ax.set_xlabel("time") + ax.set_ylabel("A/E mean") + myFmt = mdates.DateFormatter("%b %d") + ax.xaxis.set_major_formatter(myFmt) + plt.close() + return fig + +def plot_aoe_res_time(aoe_class, data, time_param = "AoE_Timecorr", figsize=[12, 8], fontsize=12): + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + fig, ax = plt.subplots(1, 1) + try: + ax.errorbar( + [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in aoe_class.timecorr_df.index], + aoe_class.timecorr_df["res"], + yerr=aoe_class.timecorr_df["res_err"], + linestyle=" ", + ) + except:pass + ax.set_xlabel("time") + ax.set_ylabel("A/E res") + myFmt = mdates.DateFormatter("%b %d") + ax.xaxis.set_major_formatter(myFmt) + plt.close() + return fig + +def drifttime_corr_plot(aoe_class, data, aoe_param = "AoE_Timecorr", aoe_param_corr="AoE_DTcorr", + figsize=[12, 8], fontsize=12): + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig = plt.figure() + try: + + dep_events = data.query(f"{aoe_class.fit_selection}&{aoe_class.cal_energy_param}>1582&{aoe_class.cal_energy_param}<1602&{aoe_class.cal_energy_param}=={aoe_class.cal_energy_param}&{aoe_param}=={aoe_param}") + final_df = dep_events.query(aoe_class.dt_res_dict['final_selection']) + + plt.subplot(2, 2, 1) - xs = np.linspace(aoe_pars["lower_range"], aoe_pars["upper_range"], 1000) + aoe_pars = aoe_class.dt_res_dict["aoe_fit1"]["pars"] + + xs = np.linspace(aoe_pars["lower_range"], aoe_pars["upper_range"], 100) counts, aoe_bins, bars = plt.hist( - aoe[ - (aoe < aoe_pars["upper_range"]) - & (aoe > aoe_pars["lower_range"]) - & aoe_mask - ], + final_df.query(f'{aoe_class.dt_res_dict["aoe_grp1"]}&{aoe_param}<{aoe_pars["upper_range"]}&{aoe_param}>{aoe_pars["lower_range"]}')[aoe_param], bins=400, histtype="step", - label="Data", + label="data", ) dx = np.diff(aoe_bins) - plt.plot(xs, aoe_pdf.pdf(xs, *aoe_pars) * dx[0], label="Full fit") - sig, bkg = aoe_pdf.pdf(xs, *aoe_pars[:-1], True) - plt.plot(xs, sig * dx[0], label="Peak fit") - plt.plot(xs, bkg * dx[0], label="Bkg fit") + plt.plot(xs, aoe_class.pdf.pdf(xs, *aoe_pars) * dx[0], label="full fit") + sig, bkg = aoe_class.pdf.pdf(xs, *aoe_pars[:-1], True) + plt.plot(xs, sig * dx[0], label="peak fit") + plt.plot(xs, bkg * dx[0], label="bkg fit") plt.legend(loc="upper left") plt.xlabel("A/E") - plt.ylabel("Counts") - + plt.ylabel("counts") + + aoe_pars2 = aoe_class.dt_res_dict["aoe_fit2"]["pars"] plt.subplot(2, 2, 2) - xs = np.linspace(aoe_pars2["lower_range"], aoe_pars2["upper_range"], 1000) + xs = np.linspace(aoe_pars2["lower_range"], aoe_pars2["upper_range"], 100) counts, aoe_bins2, bars = plt.hist( - aoe[ - (aoe < aoe_pars2["upper_range"]) - & (aoe > aoe_pars2["lower_range"]) - & aoe_mask2 - ], + final_df.query(f'{aoe_class.dt_res_dict["aoe_grp2"]}&{aoe_param}<{aoe_pars2["upper_range"]}&{aoe_param}>{aoe_pars2["lower_range"]}')[aoe_param], bins=400, histtype="step", label="Data", ) dx = np.diff(aoe_bins2) - plt.plot(xs, aoe_pdf.pdf(xs, *aoe_pars2) * dx[0], label="Full fit") - sig, bkg = aoe_pdf.pdf(xs, *aoe_pars2[:-1], True) - plt.plot(xs, sig * dx[0], label="Peak fit") - plt.plot(xs, bkg * dx[0], label="Bkg fit") + plt.plot(xs, aoe_class.pdf.pdf(xs, *aoe_pars2) * dx[0], label="full fit") + sig, bkg = aoe_class.pdf.pdf(xs, *aoe_pars2[:-1], True) + plt.plot(xs, sig * dx[0], label="peak fit") + plt.plot(xs, bkg * dx[0], label="bkg fit") plt.legend(loc="upper left") plt.xlabel("A/E") - plt.ylabel("Counts") - + plt.ylabel("counts") + + hist, bins, var = pgh.get_hist( + final_df[aoe_class.dt_param], dx=10, range=(np.nanmin(final_df[aoe_class.dt_param]), + np.nanmax(final_df[aoe_class.dt_param])) + ) + plt.subplot(2, 2, 3) - plt.step(pgh.get_bin_centers(bins), hist, label="Data") + plt.step(pgh.get_bin_centers(bins), hist, label="data") plt.plot( pgh.get_bin_centers(bins), - dt_distrib.pdf(pgh.get_bin_centers(bins), **gpars) * np.diff(bins)[0], + drift_time_distribution.pdf(pgh.get_bin_centers(bins), + **aoe_class.dt_res_dict['dt_guess']) * np.diff(bins)[0], label="Guess", ) plt.plot( pgh.get_bin_centers(bins), - dt_distrib.pdf(pgh.get_bin_centers(bins), *m.values) * np.diff(bins)[0], - label="Fit", + drift_time_distribution.pdf(pgh.get_bin_centers(bins), + *aoe_class.dt_res_dict["dt_fit"]["pars"]) * np.diff(bins)[0], + label="fit", ) - plt.xlabel("Drift Time (ns)") + plt.xlabel("drift time (ns)") plt.ylabel("Counts") plt.legend(loc="upper left") plt.subplot(2, 2, 4) bins = np.linspace( - np.nanpercentile(aoe[idxs], 1), - np.nanpercentile(aoe_corrected[idxs], 99), + np.nanpercentile(final_df[aoe_param], 1), + np.nanpercentile(final_df[aoe_param_corr], 99), 200, ) - plt.hist(aoe[idxs], bins=bins, histtype="step", label="Uncorrected") - plt.hist(aoe_corrected[idxs], bins=bins, histtype="step", label="Corrected") + plt.hist(final_df[aoe_param], bins=bins, histtype="step", label="uncorrected") + plt.hist(final_df[aoe_param_corr], bins=bins, histtype="step", label="corrected") plt.xlabel("A/E") - plt.ylabel("Counts") + plt.ylabel("counts") plt.legend(loc="upper left") plt.tight_layout() plt.xlim( - np.nanpercentile(aoe[idxs], 1), np.nanpercentile(aoe_corrected[idxs], 99) + bins[0], bins[-1] ) - - plot_dict["dt_corr"] = dt_fig - if display > 1: - plt.show() - else: - plt.close() - return alpha, plot_dict - else: - return alpha - - -def cal_aoe( - files: list, - lh5_path, - cal_dict: dict, - current_param: str, - energy_param: str, - cal_energy_param: str, - eres_pars: list, - pdf=standard_aoe, - cut_field: str = "is_valid_cal", - dt_corr: bool = False, - dep_correct: bool = False, - dt_cut: dict = None, - aoe_high_cut: int = 4, - sigma_func=sigma_fit, - display: int = 0, -) -> tuple(dict, dict): + except:pass + plt.close() + return fig + +def plot_compt_bands_overlayed(aoe_class, + data, + eranges: list[tuple], + aoe_param = "AoE_Timecorr", + aoe_range: list[float] = None, + title= "Compton Bands", + density=True, + n_bins=50, + figsize=[12, 8], fontsize=12 + ) -> None: """ - Main function for running the a/e correction and cut determination. - - dt_cut: dictionary should contain two fields "cut" containing a dictionary of the form required by the hit_config and - hard specifying whether this is a hard cut so these events should be removed (e.g. tail to high A/E) or soft cut - where these events are just not used for the A/E fits and cut determination (e.g. tail to low A/E) + Function to plot various compton bands to check energy dependence and corrections """ - params = [ - current_param, - "tp_0_est", - "tp_99", - "dt_eff", - energy_param, - cal_energy_param, - cut_field, - ] - if dt_cut is not None: - if re.match(r"(\d{8})T(\d{6})Z", list(cal_dict)[0]): - for tstamp in cal_dict: - cal_dict[tstamp].update(dt_cut["cut"]) - else: - cal_dict.update(dt_cut["cut"]) - dt_cut_field = list(dt_cut["cut"])[0] - params.append(dt_cut_field) - else: - dt_cut_field = None - df = load_aoe( - files, - lh5_path, - cal_dict, - params, - energy_param=energy_param, - current_param=current_param, - ) - if dt_cut is not None: - df["dt_cut"] = df[list(dt_cut["cut"])[0]] - else: - df["dt_cut"] = np.full(len(df), True, dtype=bool) + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig = plt.figure() + + for erange in eranges: + try: + select_df = data.query(f'{aoe_class.selection_string}&{aoe_class.cal_energy_param}>{erange[0]}&{aoe_class.cal_energy_param}<{erange[1]}&{aoe_param}=={aoe_param}') + if aoe_range is not None: + select_df = select_df.query(f'{aoe_param}>{aoe_range[0]}&{aoe_param}<{aoe_range[1]}') + bins = np.linspace(aoe_range[0], aoe_range[1], n_bins) + else: + bins = np.linspace(0.85, 1.05, n_bins) + plt.hist( + select_df[aoe_param], + bins=bins, + histtype="step", + label=f"{erange[0]}-{erange[1]}", + density=density, + ) + except:pass + plt.ylabel("counts") + plt.xlabel(aoe_param) + plt.title(title) + plt.legend(loc="upper left") + plt.close() + return fig + +def plot_dt_dep(aoe_class, + data, + eranges: list[tuple], + titles:list=None, + aoe_param = "AoE_Timecorr", + bins=[200, 100], + dt_max = 2000, + figsize=[12, 8], fontsize=12 + ) -> None: + """ + Function to produce 2d histograms of A/E against drift time to check dependencies + """ + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig = plt.figure() + for i,erange in enumerate(eranges): + try: + plt.subplot(3, 2, i+1) + select_df = data.query(f'{aoe_class.selection_string}&{aoe_class.cal_energy_param}<{erange[1]}&{aoe_class.cal_energy_param}>{erange[0]}&{aoe_param}=={aoe_param}') - df["is_usable_fits"] = df[cut_field] & df["is_not_pulser"] & df["dt_cut"] + hist, bs, var = pgh.get_hist( + select_df[aoe_param], bins=500 + ) + bin_cs = (bs[1:] + bs[:-1]) / 2 + mu = bin_cs[np.argmax(hist)] + aoe_range = [mu * 0.9, mu * 1.1] + + + final_df = select_df.query(f'{aoe_param}<{aoe_range[1]}&{aoe_param}>{aoe_range[0]}&{aoe_class.dt_param}<{dt_max}') + plt.hist2d(final_df[aoe_param], final_df[aoe_class.dt_param], + bins=bins, norm=LogNorm()) + plt.ylabel("drift time (ns)") + plt.xlabel("A/E") + if titles is None: + plt.title(f'{erange[0]}-{erange[1]}') + else: + plt.title(titles[i]) + except:pass + plt.tight_layout() + plt.close() + return fig + +def plot_mean_fit(aoe_class, + data, + figsize=[12, 8], fontsize=12 + ) -> plt.figure: + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) try: - df, timecorr_dict, res_dict = aoe_timecorr( - df, energy_param, current_param, pdf=pdf + ax1.errorbar( + aoe_class.energy_corr_fits.index, + aoe_class.energy_corr_fits["mean"], + yerr=aoe_class.energy_corr_fits["mean_err"], + xerr=aoe_class.comptBands_width/2, + label="data", + linestyle=" ", ) - log.info("Finished A/E time correction") - except: - log.info("A/E time correction failed") - res_dict = {} - timecorr_dict = { - "AoE_Timecorr": { - "expression": f"({current_param}/{energy_param})/a", - "parameters": {"a": np.nan}, - } - } - if re.match(r"(\d{8})T(\d{6})Z", list(cal_dict)[0]): - for tstamp in cal_dict: - if tstamp in timecorr_dict: - cal_dict[tstamp].update(timecorr_dict[tstamp]) - else: - cal_dict[tstamp].update(timecorr_dict) - else: - cal_dict.update(timecorr_dict) + ax1.plot(aoe_class.energy_corr_fits.index, + aoe_class.mean_func.func(aoe_class.energy_corr_fits.index, + **aoe_class.energy_corr_res_dict["mean_fits"]["pars"]), label="linear model") + ax1.errorbar( + 1592, + aoe_class.energy_corr_res_dict["dep_fit"]["pars"]['mu'], + yerr=aoe_class.energy_corr_res_dict["dep_fit"]["errs"]['mu'], + label="DEP", + color="green", + linestyle=" ", + ) - if dt_corr == True: - aoe_param = "AoE_dtcorr" - try: - if np.isnan(df.query("is_usable_fits")["AoE_timecorr"]).all(): - raise ValueError - alpha = drift_time_correction( - df.query("is_usable_fits")["AoE_timecorr"], - df.query("is_usable_fits")[cal_energy_param], - df.query("is_usable_fits")["dt_eff"], - pdf=pdf, - ) - df["AoE_dtcorr"] = apply_dtcorr(df["AoE_timecorr"], df["dt_eff"], alpha) - log.info(f"dtcorr successful alpha:{alpha}") - except: - log.error("A/E dtcorr failed") - alpha = np.nan - else: - aoe_param = "AoE_timecorr" + ax1.legend(title="A/E mu energy dependence", frameon=False) - try: - log.info("Starting A/E energy correction") - mu_pars, sigma_pars, results_dict, dep_pars = AoEcorrection( - df.query("is_usable_fits")[cal_energy_param], - df.query("is_usable_fits")[aoe_param], - eres_pars, - pdf=pdf, - sigma_func=sigma_func, + ax1.set_ylabel("raw A/E (a.u.)", ha="right", y=1) + ax2.scatter( + aoe_class.energy_corr_fits.index, + 100 * (aoe_class.energy_corr_fits["mean"] - aoe_class.mean_func.func(aoe_class.energy_corr_fits.index, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"])) / aoe_class.mean_func.func(aoe_class.energy_corr_fits.index, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"]), + lw=1, + c="b", ) - dep_mu = dep_pars["mu"] - log.info("Finished A/E energy correction") - df["AoE_corrected"] = df[aoe_param] / pol1(df[cal_energy_param], *mu_pars) - df["AoE_classifier"] = (df["AoE_corrected"] - 1) / sigma_func( - df[cal_energy_param], *sigma_pars + ax2.scatter( + 1592, + 100 * (aoe_class.energy_corr_res_dict["dep_fit"]["pars"]['mu'] - aoe_class.mean_func.func(1592, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"])) / aoe_class.mean_func.func(1592, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"]), + lw=1, + c="g", ) - except: - log.error("A/E energy correction failed") - args = pol1.__code__.co_varnames[: pol1.__code__.co_argcount][1:] - c = cost.UnbinnedNLL(np.array([0]), pol1) - m = Minuit(c, *[np.nan for arg in args]) - mu_pars = m.values - args = sigma_func.__code__.co_varnames[: sigma_func.__code__.co_argcount][1:] - c = cost.UnbinnedNLL(np.array([0]), sigma_func) - m = Minuit(c, *[np.nan for arg in args]) - sigma_pars = m.values - dep_mu = np.nan - results_dict = {} - + except:pass + ax2.set_ylabel("residuals %", ha="right", y=1) + ax2.set_xlabel("energy (keV)", ha="right", x=1) + plt.tight_layout() + plt.close() + return fig + +def plot_sigma_fit(aoe_class, + data, + figsize=[12, 8], fontsize=12 + ) -> plt.figure: + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) try: - if dep_correct is True: - cut = get_aoe_cut_fit( - df.query("is_usable_fits")[cal_energy_param], - df.query("is_usable_fits")["AoE_corrected"], - 1592, - (40, 20), - 0.9, - eres_pars, - dep_correct=True, - dep_mu=lambda x: dep_mu / pol1(1592.5, *mu_pars), - sig_func=lambda x: sigma_func(x, *sig_pars), - display=0, - ) + ax1.errorbar( + aoe_class.energy_corr_fits.index, + aoe_class.energy_corr_fits["sigma"], + yerr=aoe_class.energy_corr_fits["sigma_err"], + xerr=aoe_class.comptBands_width/2, + label="data", + linestyle=" ", + ) + sig_pars = aoe_class.energy_corr_res_dict["sigma_fits"]["pars"] + if aoe_class.sigma_func == sigma_fit: + label = f'sqrt model: \nsqrt({sig_pars["a"]:1.4f}+({sig_pars["b"]:1.1f}/E)^{sig_pars["c"]:1.1f})' + elif aoe_class.sigma_func == sigma_fit_quadratic: + label = f'quad model: \n({sig_pars["a"]:1.4f}+({sig_pars["b"]:1.6f}*E)+\n({sig_pars["c"]:1.6f}*E)^2)' else: - cut = get_aoe_cut_fit( - df.query("is_usable_fits")[cal_energy_param], - df.query("is_usable_fits")["AoE_classifier"], - 1592, - (40, 20), - 0.9, - eres_pars, - display=0, - ) - - log.info(f"Cut found at {cut}") - except: - log.error("A/E cut determination failed") - cut = np.nan - - aoe_cal_dict = {} - if dt_corr == False: - aoe_uncorr_param = "AoE_Timecorr" - else: - aoe_cal_dict.update( - { - "AoE_DTcorr": { - "expression": f"AoE_Timecorr*(1+a*dt_eff)", - "parameters": {"a": alpha}, - } - } + raise ValueError("unknown sigma function") + ax1.plot( + aoe_class.energy_corr_fits.index, + aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index,**sig_pars), + label=label, ) - aoe_uncorr_param = "AoE_DTcorr" - - aoe_cal_dict.update( - { - "AoE_Corrected": { - "expression": f"((({aoe_uncorr_param})/(a*{cal_energy_param} +b))-1)", - "parameters": mu_pars.to_dict(), - } - } - ) - if sigma_func == sigma_fit: - aoe_cal_dict.update( - { - "AoE_Classifier": { - "expression": f"AoE_Corrected/(sqrt(a+(b/{cal_energy_param})**c))", - "parameters": sigma_pars.to_dict(), - } - } + ax1.errorbar( + 1592, + aoe_class.energy_corr_res_dict["dep_fit"]["pars"]['sigma'], + yerr=aoe_class.energy_corr_res_dict["dep_fit"]["errs"]['sigma'], + label="DEP", + color="green", + linestyle=" ", ) - else: - raise ValueError("Unknown sigma func") - - if dt_cut is not None: - if dt_cut["hard"] is True: - aoe_cal_dict.update( - { - "AoE_Low_Cut": { - "expression": f"(AoE_Classifier>a) & ({list(dt_cut['cut'])[0]})", - "parameters": {"a": cut}, - }, - "AoE_Double_Sided_Cut": { - "expression": "(a>AoE_Classifier) & (AoE_Low_Cut)", - "parameters": {"a": aoe_high_cut}, - }, - } - ) - else: - aoe_cal_dict.update( - { - "AoE_Low_Cut": { - "expression": "AoE_Classifier>a", - "parameters": {"a": cut}, - }, - "AoE_Double_Sided_Cut": { - "expression": "(a>AoE_Classifier) & (AoE_Low_Cut)", - "parameters": {"a": aoe_high_cut}, - }, - } - ) - else: - aoe_cal_dict.update( - { - "AoE_Low_Cut": { - "expression": "AoE_Classifier>a", - "parameters": {"a": cut}, - }, - "AoE_Double_Sided_Cut": { - "expression": "(a>AoE_Classifier)&(AoE_Low_Cut)", - "parameters": {"a": aoe_high_cut}, - }, - } + ax1.set_ylabel("A/E stdev (a.u.)", ha="right", y=1) + ax1.legend(title="A/E stdev energy dependence", frameon=False) + ax2.scatter( + aoe_class.energy_corr_fits.index, + 100 * (aoe_class.energy_corr_fits["sigma"] - aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index, **sig_pars)) / aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index, **sig_pars), + lw=1, + c="b", ) - - if re.match(r"(\d{8})T(\d{6})Z", list(cal_dict)[0]): - for tstamp in cal_dict: - cal_dict[tstamp].update(aoe_cal_dict) - else: - cal_dict.update(aoe_cal_dict) - + ax2.scatter( + 1592, + 100 * (aoe_class.energy_corr_res_dict["dep_fit"]["pars"]['sigma'] - aoe_class.sigma_func.func(1592, **sig_pars)) / aoe_class.sigma_func.func(1592, **sig_pars), + lw=1, + c="g", + ) + except:pass + ax2.set_ylabel("residuals", ha="right", y=1) + ax2.set_xlabel("energy (keV)", ha="right", x=1) + plt.tight_layout() + plt.close() + return fig + +def plot_cut_fit(aoe_class, + data, + figsize=[12, 8], fontsize=12 + ) -> plt.figure: + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + fig = plt.figure() try: - log.info(" Compute low side survival fractions: ") - - peaks_of_interest = [1592.5, 1620.5, 2039, 2103.53, 2614.50] - sf = np.zeros(len(peaks_of_interest)) - sferr = np.zeros(len(peaks_of_interest)) - fit_widths = [(40, 25), (25, 40), (0, 0), (25, 40), (50, 50)] - full_sfs = [] - full_sf_errs = [] - full_cut_vals = [] - - for i, peak in enumerate(peaks_of_interest): - if peak == 2039: - cut_vals, sfs, sf_errs, sf[i], sferr[i] = compton_sf( - df.query(f"{cut_field}& is_not_pulser")[ - cal_energy_param - ].to_numpy(), - df.query(f"{cut_field}& is_not_pulser")[ - "AoE_classifier" - ].to_numpy(), - cut, - peak, - eres_pars, - dt_mask=df.query(f"{cut_field}& is_not_pulser")[ - "dt_cut" - ].to_numpy(), - ) - - full_cut_vals.append(cut_vals) - full_sfs.append(sfs) - full_sf_errs.append(sf_errs) - else: - cut_vals, sfs, sf_errs, sf[i], sferr[i] = get_sf( - df.query(f"{cut_field}& is_not_pulser")[ - cal_energy_param - ].to_numpy(), - df.query(f"{cut_field}& is_not_pulser")[ - "AoE_classifier" - ].to_numpy(), - peak, - fit_widths[i], - cut, - eres_pars, - dt_mask=df.query(f"{cut_field}& is_not_pulser")[ - "dt_cut" - ].to_numpy(), - ) - full_cut_vals.append(cut_vals) - full_sfs.append(sfs) - full_sf_errs.append(sf_errs) - - log.info(f"{peak}keV: {sf[i]:2.1f} +/- {sferr[i]:2.1f} %") - - sf_2side = np.zeros(len(peaks_of_interest)) - sferr_2side = np.zeros(len(peaks_of_interest)) - log.info("Calculating 2 sided cut sfs") - for i, peak in enumerate(peaks_of_interest): - if peak == 2039: - sf_2side[i], sferr_2side[i] = compton_sf_no_sweep( - df.query(f"{cut_field}& is_not_pulser")[ - cal_energy_param - ].to_numpy(), - df.query(f"{cut_field}& is_not_pulser")[ - "AoE_classifier" - ].to_numpy(), - peak, - eres_pars, - cut, - aoe_high_cut_val=aoe_high_cut, - dt_mask=df.query(f"{cut_field}& is_not_pulser")[ - "dt_cut" - ].to_numpy(), - ) - else: - sf_2side[i], sferr_2side[i] = get_sf_no_sweep( - df.query(f"{cut_field}& is_not_pulser")[ - cal_energy_param - ].to_numpy(), - df.query(f"{cut_field}& is_not_pulser")[ - "AoE_classifier" - ].to_numpy(), - peak, - fit_widths[i], - eres_pars, - cut, - aoe_high_cut_val=aoe_high_cut, - dt_mask=df.query(f"{cut_field}& is_not_pulser")[ - "dt_cut" - ].to_numpy(), - ) - - log.info(f"{peak}keV: {sf_2side[i]:2.1f} +/- {sferr_2side[i]:2.1f} %") + plt.errorbar( + aoe_class.cut_fits.index, + aoe_class.cut_fits["sf"], + yerr=aoe_class.cut_fits["sf_err"], + linestyle=" ", + ) - def convert_sfs_to_dict(peaks_of_interest, sfs, sf_errs): - out_dict = {} - for i, peak in enumerate(peaks_of_interest): - out_dict[str(peak)] = { - "sf": f"{sfs[i]:2f}", - "sf_err": f"{sf_errs[i]:2f}", - } - return out_dict - - out_dict = { - "correction_fit_results": results_dict, - "A/E_Energy_param": energy_param, - "Cal_energy_param": cal_energy_param, - "dt_param": "dt_eff", - "rt_correction": dt_corr, - "1000-1300keV": res_dict, - "Mean_pars": list(mu_pars), - "Sigma_pars": list(sigma_pars), - "Low_cut": cut, - "High_cut": aoe_high_cut, - "Low_side_sfs": convert_sfs_to_dict(peaks_of_interest, sf, sferr), - "2_side_sfs": convert_sfs_to_dict(peaks_of_interest, sf_2side, sferr_2side), - } - log.info("Done") - log.info(f"Results are {out_dict}") - - except: - log.error("A/E Survival fraction determination failed") - out_dict = { - "correction_fit_results": results_dict, - "A/E_Energy_param": energy_param, - "Cal_energy_param": cal_energy_param, - "dt_param": "dt_eff", - "rt_correction": False, - "1000-1300keV_mean": res_dict, - "Mean_pars": list(mu_pars), - "Sigma_pars": list(sigma_pars), - "Low_cut": cut, - "High_cut": aoe_high_cut, - } - if display <= 0: - return cal_dict, out_dict - else: - plot_dict = {} - try: - plt.rcParams["figure.figsize"] = (12, 8) - plt.rcParams["font.size"] = 16 - - fig1 = plt.figure() - plt.subplot(3, 2, 1) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_timecorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [1582, 1602], - f"Tl DEP", - ) - plt.subplot(3, 2, 2) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_timecorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [1510, 1630], - f"Bi FEP", - ) - plt.subplot(3, 2, 3) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_timecorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [2030, 2050], - "Qbb", - ) - plt.subplot(3, 2, 4) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_timecorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [2080, 2120], - f"Tl SEP", - ) - plt.subplot(3, 2, 5) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_timecorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [2584, 2638], - f"Tl FEP", - ) - plt.tight_layout() - plot_dict["dt_deps"] = fig1 - if display > 1: - plt.show() - else: - plt.close() - - if dt_corr == True: - alpha, plot_dict = drift_time_correction( - df.query("is_usable_fits")["AoE_timecorr"], - df.query("is_usable_fits")[cal_energy_param], - df.query("is_usable_fits")["dt_eff"], - display=display, - plot_dict=plot_dict, - ) + plt.plot(aoe_class.cut_fits.index, sigmoid_fit.func(aoe_class.cut_fits.index.to_numpy(), + **aoe_class.cut_fit["pars"])) + plt.hlines((100 * aoe_class.dep_acc), -8.1, aoe_class.low_cut_val, color="red", linestyle="--") + plt.vlines( + aoe_class.low_cut_val, + np.nanmin(aoe_class.cut_fits["sf"]) * 0.9, + (100 * aoe_class.dep_acc), + color="red", + linestyle="--", + ) + plt.xlim([-8.1, 0.1]) + vals, labels = plt.yticks() + plt.yticks(vals, [f'{x:,.0f} %' for x in vals]) + plt.ylim([np.nanmin(aoe_class.cut_fits["sf"]) * 0.9, 102]) + except:pass + plt.xlabel("cut value") + plt.ylabel("survival percentage") + plt.close() + return fig + +def plot_survival_fraction_curves(aoe_class, + data, + figsize=[12, 8], fontsize=12 + ) -> plt.figure: + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig = plt.figure() + try: + plt.vlines(aoe_class.low_cut_val, 0, 100, label=f"cut value: {aoe_class.low_cut_val:1.2f}", color="black") + - fig_dt = plt.figure() - plt.subplot(3, 2, 1) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_dtcorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [1582, 1602], - f"Tl DEP", - ) - plt.subplot(3, 2, 2) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_dtcorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [1510, 1630], - f"Bi FEP", - ) - plt.subplot(3, 2, 3) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_dtcorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [2030, 2050], - "Qbb", - ) - plt.subplot(3, 2, 4) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_dtcorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [2080, 2120], - f"Tl SEP", - ) - plt.subplot(3, 2, 5) - plot_dt_dep( - df.query("is_valid_cal& is_not_pulser")["AoE_dtcorr"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - df.query("is_valid_cal& is_not_pulser")["dt_eff"], - [2584, 2638], - f"Tl FEP", + for peak, survival_df in aoe_class.low_side_peak_dfs.items(): + try: + plt.errorbar( + survival_df.index, + survival_df["sf"], + yerr=survival_df["sf_err"], + label=f'{get_peak_label(peak)} {peak} keV: {aoe_class.low_side_sf.loc[peak]["sf"]:2.1f} +/- {aoe_class.low_side_sf.loc[peak]["sf_err"]:2.1f} %' ) - plt.tight_layout() - plot_dict["dt_deps_dtcorr"] = fig_dt - if display > 1: - plt.show() - else: - plt.close() - - fig2 = plt.figure() - plot_compt_bands_overlayed( - df.query("is_valid_cal& is_not_pulser")[aoe_param], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - [950, 1250, 1460, 1660, 1860, 2060, 2270], - ) - plt.ylabel("Counts") - plt.xlabel("Raw A/E") - plt.title(f"Compton Bands before Correction") - plt.legend(loc="upper left") - plot_dict["compt_bands_nocorr"] = fig2 - if display > 1: - plt.show() - else: - plt.close() - - _, _, _, plot_dict = aoe_timecorr( - df, - energy_param, - current_param, - pdf=pdf, - plot_dict=plot_dict, - display=display, - ) - - _, _, _, _, plot_dict = AoEcorrection( - df.query("is_usable_fits")[cal_energy_param], - df.query("is_usable_fits")[aoe_param], - eres_pars, - pdf=pdf, - sigma_func=sigma_func, - plot_dict=plot_dict, - display=display, - ) + except:pass + except:pass + vals, labels = plt.yticks() + plt.yticks(vals, [f'{x:,.0f} %' for x in vals]) + plt.legend(loc="upper right") + plt.xlabel("cut value") + plt.ylabel("survival percentage") + plt.ylim([0, 105]) + plt.close() + return fig + +def plot_spectra(aoe_class, + data, + xrange=(900, 3000), + n_bins=2101, + xrange_inset = (1580, 1640), + n_bins_inset = 200, + figsize=[12, 8], fontsize=12 + ) -> plt.figure: + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig, ax = plt.subplots() + try: + bins = np.linspace(xrange[0], xrange[1], n_bins) + ax.hist( + data.query(aoe_class.selection_string)[aoe_class.cal_energy_param], + bins=bins, + histtype="step", + label="before PSD", + ) + ax.hist( + data.query(f"{aoe_class.selection_string}&AoE_Low_Cut")[aoe_class.cal_energy_param], + bins=bins, + histtype="step", + label="low side PSD cut", + ) + ax.hist( + data.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[aoe_class.cal_energy_param], + bins=bins, + histtype="step", + label="double sided PSD cut", + ) + ax.hist( + data.query(f"{aoe_class.selection_string} & (~AoE_Double_Sided_Cut)")[aoe_class.cal_energy_param], + bins=bins, + histtype="step", + label="rejected by PSD cut", + ) - fig3 = plt.figure() - plot_compt_bands_overlayed( - df.query("is_valid_cal& is_not_pulser")["AoE_classifier"], - df.query("is_valid_cal& is_not_pulser")[cal_energy_param], - [950, 1250, 1460, 1660, 1860, 2060, 2270], - [-5, 5], - ) - plt.ylabel("Counts") - plt.xlabel("Corrected A/E") - plt.title(f"Compton Bands after Correction") - plt.legend(loc="upper left") - plot_dict["compt_bands_corr"] = fig3 - if display > 1: - plt.show() - else: - plt.close() - - if dep_correct is True: - _, plot_dict = get_aoe_cut_fit( - df.query("is_usable_fits")[cal_energy_param], - df.query("is_usable_fits")["AoE_corrected"], - 1592, - (40, 20), - 0.9, - eres_pars, - dep_correct=True, - dep_mu=lambda x: dep_mu / pol1(1592.5, *mu_pars), - sig_func=lambda x: sigma_func(x, *sig_pars), - display=display, - plot_dict=plot_dict, - ) - else: - _, plot_dict = get_aoe_cut_fit( - df.query("is_usable_fits")[cal_energy_param], - df.query("is_usable_fits")["AoE_classifier"], - 1592, - (40, 20), - 0.9, - eres_pars, - display=display, - plot_dict=plot_dict, + axins = ax.inset_axes([0.25, 0.07, 0.4, 0.3]) + bins = np.linspace(xrange_inset[0], xrange_inset[1], n_bins_inset) + select_df = data.query(f"{aoe_class.cal_energy_param}<{xrange_inset[1]}&{aoe_class.cal_energy_param}>{xrange_inset[0]}") + axins.hist( + select_df.query(aoe_class.selection_string)[aoe_class.cal_energy_param], + bins=bins, + histtype="step", + ) + axins.hist( + select_df.query(f"{aoe_class.selection_string}&AoE_Low_Cut")[aoe_class.cal_energy_param], + bins=bins, + histtype="step", + ) + axins.hist( + select_df.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[aoe_class.cal_energy_param], + bins=bins, + histtype="step", + ) + axins.hist( + select_df.query(f"{aoe_class.selection_string} & (~AoE_Double_Sided_Cut)")[aoe_class.cal_energy_param], + bins=bins, + histtype="step", + ) + except:pass + ax.set_xlim(xrange) + ax.set_yscale("log") + plt.xlabel("energy (keV)") + plt.ylabel("counts") + plt.legend(loc="upper left") + plt.close() + return fig + +def plot_sf_vs_energy(aoe_class, + data, + xrange = (900, 3000), + n_bins=701, + figsize=[12, 8], fontsize=12 + ) -> plt.figure: + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig = plt.figure() + try: + bins = np.linspace(xrange[0], xrange[1], n_bins) + counts_pass, bins_pass, _ = pgh.get_hist( + data.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[aoe_class.cal_energy_param], + bins=bins, + ) + counts, bins, _ = pgh.get_hist(data.query(aoe_class.selection_string)[aoe_class.cal_energy_param], bins=bins) + survival_fracs = counts_pass / (counts + 10**-99) + + plt.step(pgh.get_bin_centers(bins_pass), 100*survival_fracs) + except:pass + plt.ylim([0, 100]) + vals, labels = plt.yticks() + plt.yticks(vals, [f'{x:,.0f} %' for x in vals]) + plt.xlabel("energy (keV)") + plt.ylabel("survival percentage") + plt.close() + return fig + +def plot_classifier(aoe_class, + data, + aoe_param="AoE_Classifier", + xrange = (900, 3000), + yrange=(-50,10), + xn_bins=700, + yn_bins=500, + figsize=[12, 8], fontsize=12 + ) -> plt.figure: + + plt.rcParams["figure.figsize"] = figsize + plt.rcParams["font.size"] = fontsize + + fig = plt.figure() + try: + plt.hist2d(data.query(aoe_class.selection_string)[aoe_class.cal_energy_param] , + data.query(aoe_class.selection_string)[aoe_param], + bins=[np.linspace(xrange[0], xrange[1], xn_bins), + np.linspace(yrange[0], yrange[1], yn_bins)], + norm=LogNorm() ) + except:pass + plt.xlabel("energy (keV)") + plt.ylabel(aoe_param) + plt.xlim(xrange) + plt.ylim(yrange) + plt.close() + return fig + +def aoe_calibration(files, + lh5_path:str, + cal_dicts: dict, + current_param:str, + energy_param:str, + cal_energy_param: str, + eres_func: Callable, + pdf:Callable=standard_aoe, + cut_field:str = "is_valid_cal", + dt_corr: bool = False, + dep_correct: bool = False, + dt_cut: dict = None, + high_cut_val: int = 3, + mean_func:Callable=pol1, + sigma_func:Callable=sigma_fit, + dep_acc:float = 0.9, + dt_param:str = "dt_eff", + comptBands_width:int=20, + plot_options:dict={}, + threshold:int=800 + ): + params = [ + current_param, + "tp_0_est", + "tp_99", + dt_param, + energy_param, + cal_energy_param, + cut_field, + ] - fig4 = plt.figure() - plt.vlines(cut, 0, 100, label=f"Cut Value: {cut:1.2f}", color="black") - - for i, peak in enumerate(peaks_of_interest): - plt.errorbar( - full_cut_vals[i], - full_sfs[i], - yerr=full_sf_errs[i], - label=f"{get_peak_label(peak)} {peak} keV: {sf[i]:2.1f} +/- {sferr[i]:2.1f} %", + aoe = cal_aoe( + cal_dicts, + cal_energy_param, + eres_func, + pdf, + f"{cut_field}&is_not_pulser", + dt_corr, + dep_acc, + dep_correct, + dt_cut, + dt_param, + high_cut_val, + mean_func, + sigma_func, + comptBands_width, + plot_options ) + if dt_cut is not None: + params.append(dt_cut["out_param"]) - handles, labels = plt.gca().get_legend_handles_labels() - # order = [1, 2, 3, 0, 4, 5] - plt.legend( - # [handles[idx] for idx in order], - # [labels[idx] for idx in order], - loc="upper right", - ) - plt.xlabel("Cut Value") - plt.ylabel("Survival Fraction %") - plt.ylim([0, 105]) - plot_dict["surv_fracs"] = fig4 - if display > 1: - plt.show() - else: - plt.close() + data = load_data( + files, + lh5_path, + aoe.cal_dicts, + params, + cal_energy_param, + threshold + ) - fig5, ax = plt.subplots() - bins = np.arange(900, 3000, 1) - ax.hist( - df.query(f"is_valid_cal& is_not_pulser")[cal_energy_param], - bins=bins, - histtype="step", - label="Before PSD", - ) - ax.hist( - df.query(f"is_usable_fits & AoE_classifier > {cut}")[cal_energy_param], - bins=bins, - histtype="step", - label="Low side PSD cut", - ) - ax.hist( - df.query( - f"is_usable_fits & AoE_classifier > {cut} & AoE_classifier < {aoe_high_cut}" - )[cal_energy_param], - bins=bins, - histtype="step", - label="Double sided PSD cut", - ) - ax.hist( - df.query( - f"is_valid_cal& is_not_pulser & (AoE_classifier < {cut} | AoE_classifier > {aoe_high_cut} | (~is_usable_fits))" - )[cal_energy_param], - bins=bins, - histtype="step", - label="Rejected by PSD cut", - ) + data["AoE_Uncorr"] = np.divide(data[current_param], data[energy_param]) - axins = ax.inset_axes([0.25, 0.07, 0.4, 0.3]) - bins = np.linspace(1580, 1640, 200) - axins.hist( - df.query(f"is_valid_cal& is_not_pulser")[cal_energy_param], - bins=bins, - histtype="step", - ) - axins.hist( - df.query(f"is_usable_fits & AoE_classifier > {cut}")[cal_energy_param], - bins=bins, - histtype="step", - ) - axins.hist( - df.query( - f"is_usable_fits & AoE_classifier > {cut} & AoE_classifier < {aoe_high_cut}" - )[cal_energy_param], - bins=bins, - histtype="step", - ) - axins.hist( - df.query( - f"is_valid_cal& is_not_pulser & (AoE_classifier < {cut} | AoE_classifier > {aoe_high_cut}| (~is_usable_fits))" - )[cal_energy_param], - bins=bins, - histtype="step", - ) - ax.set_xlim([900, 3000]) - ax.set_yscale("log") - plt.xlabel("Energy (keV)") - plt.ylabel("Counts") - plt.legend(loc="upper left") - plot_dict["PSD_spectrum"] = fig5 - if display > 1: - plt.show() - else: - plt.close() - - fig6 = plt.figure() - bins = np.arange(900, 3000, 3) - counts_pass, bins_pass, _ = pgh.get_hist( - df.query( - f"is_usable_fits & AoE_classifier > {cut} & AoE_classifier < {aoe_high_cut}" - )[cal_energy_param], - bins=bins, - ) - counts, bins, _ = pgh.get_hist( - df.query(f"is_valid_cal& is_not_pulser")[cal_energy_param], bins=bins - ) - survival_fracs = counts_pass / (counts + 10**-99) - - plt.step(pgh.get_bin_centers(bins_pass), survival_fracs) - plt.xlabel("Energy (keV)") - plt.ylabel("Survival Fraction") - plt.ylim([0, 1]) - plot_dict["psd_sf"] = fig6 - if display > 1: - plt.show() - else: - plt.close() + + aoe.update_cal_dicts({"AoE_Uncorr": + {"expression":f"{current_param}/{energy_param}", + "parameters":{} + }} + ) - return cal_dict, out_dict, plot_dict - except: - return cal_dict, out_dict, plot_dict + aoe.calibrate(data, "AoE_Uncorr") + log.info(f"Calibrated A/E") + return cal_dicts, aoe.get_results_dict(), aoe.fill_plot_dict(data), aoe \ No newline at end of file From dfca8a05c0c86d77fa7860ac62ae99de3cc0d19b Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Fri, 29 Sep 2023 17:48:02 +0200 Subject: [PATCH 06/22] split loading routine into own file as well as function to handle failed fits --- src/pygama/pargen/utils.py | 127 +++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 src/pygama/pargen/utils.py diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py new file mode 100644 index 000000000..86d1d94ba --- /dev/null +++ b/src/pygama/pargen/utils.py @@ -0,0 +1,127 @@ +from __future__ import annotations + +import numpy as np +import pandas as pd +import logging +from iminuit import Minuit, cost, util +from types import FunctionType + +import lgdo.lh5_store as lh5 +import pygama.pargen.cuts as cts + +log = logging.getLogger(__name__) + +def return_nans(input): + if isinstance(input, FunctionType): + args = input.__code__.co_varnames[: input.__code__.co_argcount][1:] + c = cost.UnbinnedNLL(np.array([0]), input) + m = Minuit(c, *[np.nan for arg in args]) + return m.values, m.errors, np.full((len(m.values), len(m.values)), np.nan) + else: + args = input.pdf.__code__.co_varnames[: input.pdf.__code__.co_argcount][1:] + c = cost.UnbinnedNLL(np.array([0]), input.pdf) + m = Minuit(c, *[np.nan for arg in args]) + return m.values, m.errors, np.full((len(m.values), len(m.values)), np.nan) + +def tag_pulser(files, lh5_path): + pulser_df = lh5.load_dfs(files, ["timestamp", "trapTmax"], lh5_path) + pulser_props = cts.find_pulser_properties(pulser_df, energy="trapTmax") + if len(pulser_props) > 0: + final_mask = None + for entry in pulser_props: + e_cut = (pulser_df.trapTmax.values < entry[0] + entry[1]) & ( + pulser_df.trapTmax.values > entry[0] - entry[1] + ) + if final_mask is None: + final_mask = e_cut + else: + final_mask = final_mask | e_cut + ids = ~(final_mask) + log.debug(f"pulser found: {pulser_props}") + else: + ids = np.ones(len(pulser_df), dtype=bool) + log.debug(f"no pulser found") + return ids + +def get_params(file_params, param_list): + out_params = [] + if isinstance(file_params, dict): + possible_keys = file_params.keys() + elif isinstance(file_params, list): + possible_keys = file_params + for param in param_list: + for key in possible_keys: + if key in param: + out_params.append(key) + return np.unique(out_params).tolist() + + +def load_data( + files: list, + lh5_path: str, + cal_dict: dict, + params = [ + "cuspEmax" + ], + cal_energy_param: str="cuspEmax_ctc_cal", + threshold = None +) -> tuple(np.array, np.array, np.array, np.array): + """ + Loads in the A/E parameters needed and applies calibration constants to energy + """ + + sto = lh5.LH5Store() + + if isinstance(files, dict): + df = [] + all_files = [] + masks=np.array([],dtype=bool) + for tstamp, tfiles in files.items(): + table = sto.read_object(lh5_path, tfiles)[0] + if tstamp in cal_dict: + file_df = table.eval(cal_dict[tstamp]).get_dataframe() + else: + file_df = table.eval(cal_dict).get_dataframe() + file_df["timestamp"] = np.full(len(file_df), tstamp, dtype=object) + params.append("timestamp") + if threshold is not None: + mask = file_df[cal_energy_param]threshold + df.drop(np.where(~masks)[0], inplace=True) + else: + masks = np.ones(len(df),dtype=bool) + all_files = files + + if lh5_path[-1] != "/": lh5_path+='/' + keys = lh5.ls(all_files[0], lh5_path) + keys = [key.split("/")[-1] for key in keys] + params = get_params(keys+list(df.keys()), params) + + ids = tag_pulser(all_files, lh5_path) + df["is_not_pulser"] = ids[masks] + params.append("is_not_pulser") + + for col in list(df.keys()): + if col not in params: + df.drop(col, inplace=True, axis=1) + + param_dict = {} + for param in params: + if param not in df: + df[param] = lh5.load_nda(all_files, [param], lh5_path)[param][masks] + log.debug(f"data loaded") + return df \ No newline at end of file From 4c55f013442e9537fe3d092e9e206ea819eace75 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sat, 30 Sep 2023 19:21:09 +0200 Subject: [PATCH 07/22] bugfix on selection when nan values from fit --- src/pygama/pargen/energy_optimisation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 893a0a93f..3bcdb5e0a 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -1045,7 +1045,7 @@ def event_selection( n_bins=(np.nanmax(energy) - np.nanmin(energy)) // 1, uncal_is_int=True ) - if params[0] is None: + if params[0] is None or np.isnan(params[0]).any(): log.debug("Fit failed, using max guess") hist, bins, var = pgh.get_hist( energy, range=(int(e_lower_lim), int(e_upper_lim)), dx=1 From 4dc82721fc159e4a52f3ccf1c4dd81f06b651058 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Sat, 30 Sep 2023 19:21:37 +0200 Subject: [PATCH 08/22] added ability to change tail weighting and changed binning on high stats fitting --- src/pygama/pargen/AoE_cal.py | 1551 +++++++++++++--------- src/pygama/pargen/ecal_th.py | 653 +++++---- src/pygama/pargen/energy_cal.py | 209 +-- src/pygama/pargen/energy_optimisation.py | 13 +- src/pygama/pargen/utils.py | 41 +- 5 files changed, 1508 insertions(+), 959 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index dc9f9ae3c..2125f80c3 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -35,6 +35,7 @@ log = logging.getLogger(__name__) + class PDF: """ @@ -144,9 +145,9 @@ def guess(hist, bins, var, **kwargs): "upper_range": np.nanmax(bins), "components": 0, } - for key, guess in guess_dict.items(): + for key, guess in guess_dict.items(): if np.isnan(guess): - guess_dict[key]=0 + guess_dict[key] = 0 return standard_aoe._replace_values(guess_dict, **kwargs) @@ -164,7 +165,9 @@ def bounds(guess, **kwargs): return [ bound - for field, bound in standard_aoe._replace_values(bounds_dict, **kwargs).items() + for field, bound in standard_aoe._replace_values( + bounds_dict, **kwargs + ).items() ] def fixed(**kwargs): @@ -180,7 +183,10 @@ def fixed(**kwargs): } return [ - fixed for field, fixed in standard_aoe._replace_values(fixed_dict, **kwargs).items() + fixed + for field, fixed in standard_aoe._replace_values( + fixed_dict, **kwargs + ).items() ] def width(pars, errs, cov): @@ -297,9 +303,9 @@ def guess(hist, bins, var, **kwargs): "upper_range": np.nanmax(bins), "components": 0, } - for key, guess in guess_dict.items(): + for key, guess in guess_dict.items(): if np.isnan(guess): - guess_dict[key]=0 + guess_dict[key] = 0 return standard_aoe_with_high_tail._replace_values(guess_dict, **kwargs) @@ -319,7 +325,9 @@ def bounds(guess, **kwargs): return [ bound - for field, bound in standard_aoe_with_high_tail._replace_values(bounds_dict, **kwargs).items() + for field, bound in standard_aoe_with_high_tail._replace_values( + bounds_dict, **kwargs + ).items() ] def fixed(**kwargs): @@ -337,7 +345,10 @@ def fixed(**kwargs): } return [ - fixed for field, fixed in standard_aoe_with_high_tail._replace_values(fixed_dict, **kwargs).items() + fixed + for field, fixed in standard_aoe_with_high_tail._replace_values( + fixed_dict, **kwargs + ).items() ] def width(pars, errs, cov): @@ -411,9 +422,9 @@ def guess(hist, bins, var, **kwargs): "lower_range": np.nanmin(bins), "upper_range": np.nanmax(bins), } - for key, guess in guess_dict.items(): + for key, guess in guess_dict.items(): if np.isnan(guess): - guess_dict[key]=0 + guess_dict[key] = 0 return standard_aoe_bkg._replace_values(guess_dict, **kwargs) @@ -429,7 +440,9 @@ def bounds(guess, **kwargs): return [ bound - for field, bound in standard_aoe_bkg._replace_values(bounds_dict, **kwargs).items() + for field, bound in standard_aoe_bkg._replace_values( + bounds_dict, **kwargs + ).items() ] def fixed(**kwargs): @@ -443,7 +456,10 @@ def fixed(**kwargs): } return [ - fixed for field, fixed in standard_aoe_bkg._replace_values(fixed_dict, **kwargs).items() + fixed + for field, fixed in standard_aoe_bkg._replace_values( + fixed_dict, **kwargs + ).items() ] @@ -465,9 +481,7 @@ def extended_pdf( """ Extended PDF for A/E consists of a gaussian signal with gaussian tail background """ - return n_events, gaussian.pdf( - x, n_events, mu, sigma - ) + return n_events, gaussian.pdf(x, n_events, mu, sigma) def guess(hist, bins, var, **kwargs): bin_centers = (bins[:-1] + bins[1:]) / 2 @@ -484,9 +498,9 @@ def guess(hist, bins, var, **kwargs): ) guess_dict = {"n_events": ls_guess, "mu": mu, "sigma": sigma} - for key, guess in guess_dict.items(): + for key, guess in guess_dict.items(): if np.isnan(guess): - guess_dict[key]=0 + guess_dict[key] = 0 return gaussian._replace_values(guess_dict, **kwargs) @@ -506,7 +520,8 @@ def fixed(**kwargs): } return [ - fixed for field, fixed in gaussian._replace_values(fixed_dict, **kwargs).items() + fixed + for field, fixed in gaussian._replace_values(fixed_dict, **kwargs).items() ] @@ -630,9 +645,9 @@ def guess(hist: np.array, bins: np.array, var: np.array, **kwargs) -> list: "tau2": 0.1, "components": 0, } - for key, guess in guess_dict.items(): + for key, guess in guess_dict.items(): if np.isnan(guess): - guess_dict[key]=0 + guess_dict[key] = 0 return drift_time_distribution._replace_values(guess_dict, **kwargs) @@ -653,7 +668,9 @@ def bounds(guess, **kwargs): return [ bound - for field, bound in drift_time_distribution._replace_values(bounds_dict, **kwargs).items() + for field, bound in drift_time_distribution._replace_values( + bounds_dict, **kwargs + ).items() ] def fixed(**kwargs): @@ -672,37 +689,39 @@ def fixed(**kwargs): } return [ - fixed for field, fixed in drift_time_distribution._replace_values(fixed_dict, **kwargs).items() + fixed + for field, fixed in drift_time_distribution._replace_values( + fixed_dict, **kwargs + ).items() ] + class pol1: - def func(x, a, b): - return x*a + b - + return x * a + b + def string_func(input_param): return f"{input_param}*a+b" - + def guess(bands, means, mean_errs): return [-1e-06, 5e-01] - + + class sigma_fit: - def func(x, a, b, c): return np.sqrt(a + (b / (x + 10**-99)) ** c) - + def string_func(input_param): - return f"(a+(b/({input_param}+10**-99))**c)**(0.5)" - + return f"(a+(b/({input_param}+10**-99))**c)**(0.5)" + def guess(bands, sigmas, sigma_errs): return [np.nanpercentile(sigmas, 50) ** 2, 2, 2] + class sigmoid_fit: - def func(x, a, b, c, d): return (a + b * x) * nb_erfc(c * x + d) - - + def guess(xs, ys, y_errs): return [np.nanmax(ys) / 2, 0, 1, 1.5] @@ -766,7 +785,6 @@ def unbinned_aoe_fit( m2.simplex().migrad() m2.hesse() - x0 = pdf.guess( hist, bins, @@ -924,10 +942,10 @@ def energy_guess(hist, bins, var, func_i, peak, eres, fit_range): fit_range[0], fit_range[1], 0, - ] - for i, guess in enumerate(parguess): + ] + for i, guess in enumerate(parguess): if np.isnan(guess): - parguess[i]=0 + parguess[i] = 0 return parguess elif func_i == pgf.extended_gauss_step_pdf: @@ -946,10 +964,19 @@ def energy_guess(hist, bins, var, func_i, peak, eres, fit_range): if nsig_guess < 0: nsig_guess = 0 - parguess=[nsig_guess, mu, sigma, nbkg_guess, hstep, fit_range[0], fit_range[1], 0] - for i, guess in enumerate(parguess): + parguess = [ + nsig_guess, + mu, + sigma, + nbkg_guess, + hstep, + fit_range[0], + fit_range[1], + 0, + ] + for i, guess in enumerate(parguess): if np.isnan(guess): - parguess[i]=0 + parguess[i] = 0 return parguess @@ -1105,7 +1132,7 @@ def get_survival_fraction( guess_pars_cut=None, guess_pars_surv=None, dt_mask=None, - mode= "greater", + mode="greater", display=0, ): if dt_mask is None: @@ -1121,7 +1148,7 @@ def get_survival_fraction( idxs = (cut_param < cut_val) & dt_mask else: raise ValueError("mode not recognised") - + if guess_pars_cut is None or guess_pars_surv is None: pars, errs = unbinned_energy_fit(energy, peak, eres_pars, simplex=True) guess_pars_cut = pars @@ -1165,10 +1192,10 @@ def get_sf_sweep( final_cut_value: float, peak: float, eres_pars: list, - dt_mask = None, - cut_range = (-5,5), - n_samples = 51, - mode= "greater" + dt_mask=None, + cut_range=(-5, 5), + n_samples=51, + mode="greater", ) -> tuple(pd.DataFrame, float, float): """ Calculates survival fraction for gamma lines using fitting method as in cut determination @@ -1184,10 +1211,9 @@ def get_sf_sweep( sf, err, cut_pars, surv_pars = get_survival_fraction( energy, cut_param, cut_val, peak, eres_pars, dt_mask=dt_mask, mode=mode ) - out_df = pd.concat([out_df, - pd.DataFrame([{"cut_val":cut_val, - "sf":sf, - "sf_err":err}])]) + out_df = pd.concat( + [out_df, pd.DataFrame([{"cut_val": cut_val, "sf": sf, "sf_err": err}])] + ) except: pass out_df.set_index("cut_val", inplace=True) @@ -1195,16 +1221,18 @@ def get_sf_sweep( energy, cut_param, final_cut_value, peak, eres_pars, dt_mask=dt_mask, mode=mode ) return ( - out_df.query(f'sf_err<5*{np.nanpercentile(out_df["sf_err"], 50)}& sf_err==sf_err & sf<=100'), + out_df.query( + f'sf_err<5*{np.nanpercentile(out_df["sf_err"], 50)}& sf_err==sf_err & sf<=100' + ), sf, sf_err, - ) + ) + -def compton_sf(cut_param, low_cut_val, high_cut_val = None, mode="greater", dt_mask=None): - +def compton_sf(cut_param, low_cut_val, high_cut_val=None, mode="greater", dt_mask=None): if dt_mask is None: dt_mask = np.full(len(cut_param), True, dtype=bool) - + if high_cut_val is not None: mask = (cut_param > low_cut_val) & (cut_param < high_cut_val) & dt_mask else: @@ -1214,12 +1242,16 @@ def compton_sf(cut_param, low_cut_val, high_cut_val = None, mode="greater", dt_m mask = (cut_param < low_cut_val) & dt_mask else: raise ValueError("mode not recognised") - + sf = 100 * len(cut_param[mask]) / len(cut_param) - sf_err = sf* np.sqrt( - (1 / len(cut_param)) + 1 / (len(cut_param[mask]) + 10**-99) - ) - return {"low_cut":low_cut_val, "sf":sf, "sf_err":sf_err, "high_cut": high_cut_val} + sf_err = sf * np.sqrt((1 / len(cut_param)) + 1 / (len(cut_param[mask]) + 10**-99)) + return { + "low_cut": low_cut_val, + "sf": sf, + "sf_err": sf_err, + "high_cut": high_cut_val, + } + def compton_sf_sweep( energy: np.array, @@ -1228,9 +1260,9 @@ def compton_sf_sweep( peak: float, eres: list[float, float], dt_mask: np.array = None, - cut_range = (-5,5), - n_samples = 51, - mode= "greater" + cut_range=(-5, 5), + n_samples=51, + mode="greater", ) -> tuple(float, np.array, list): """ Determines survival fraction for compton continuum by basic counting @@ -1238,44 +1270,53 @@ def compton_sf_sweep( cut_vals = np.linspace(cut_range[0], cut_range[1], n_samples) out_df = pd.DataFrame(columns=["cut_val", "sf", "sf_err"]) - + for cut_val in cut_vals: ct_dict = compton_sf(cut_param, cut_val, mode=mode, dt_mask=dt_mask) - df = pd.DataFrame([{"cut_val":ct_dict["low_cut"], "sf":ct_dict["sf"], "sf_err":ct_dict["sf_err"]}]) + df = pd.DataFrame( + [ + { + "cut_val": ct_dict["low_cut"], + "sf": ct_dict["sf"], + "sf_err": ct_dict["sf_err"], + } + ] + ) out_df = pd.concat([out_df, df]) out_df.set_index("cut_val", inplace=True) - + sf_dict = compton_sf(cut_param, final_cut_value, mode=mode, dt_mask=dt_mask) - + return out_df, sf_dict["sf"], sf_dict["sf_err"] + class cal_aoe: - - def __init__(self, - cal_dicts: dict, - cal_energy_param: str, - eres_func: callable, - pdf=standard_aoe, - selection_string: str = "is_valid_cal&is_not_pulser", - dt_corr: bool = False, - dep_acc:float = 0.9, - dep_correct: bool = False, - dt_cut:dict = None, - dt_param:str = "dt_eff", - high_cut_val: int = 3, - mean_func:Callable=pol1, - sigma_func:Callable=sigma_fit, - comptBands_width:int=20, - plot_options:dict={} - ): + def __init__( + self, + cal_dicts: dict, + cal_energy_param: str, + eres_func: callable, + pdf=standard_aoe, + selection_string: str = "is_valid_cal&is_not_pulser", + dt_corr: bool = False, + dep_acc: float = 0.9, + dep_correct: bool = False, + dt_cut: dict = None, + dt_param: str = "dt_eff", + high_cut_val: int = 3, + mean_func: Callable = pol1, + sigma_func: Callable = sigma_fit, + comptBands_width: int = 20, + plot_options: dict = {}, + ): self.cal_dicts = cal_dicts self.cal_energy_param = cal_energy_param self.eres_func = eres_func - self.pdf =pdf + self.pdf = pdf self.selection_string = selection_string self.dt_corr = dt_corr self.dt_param = "dt_eff" - self.dep_correct= dep_correct + self.dep_correct = dep_correct self.dt_cut = dt_cut self.dep_acc = dep_acc if self.dt_cut is not None: @@ -1288,10 +1329,10 @@ def __init__(self, self.dt_cut_hard = False self.fit_selection = self.selection_string self.high_cut_val = high_cut_val - self.mean_func= mean_func - self.sigma_func=sigma_func + self.mean_func = mean_func + self.sigma_func = sigma_func self.comptBands_width = comptBands_width - self.plot_options=plot_options + self.plot_options = plot_options def update_cal_dicts(self, update_dict): if re.match(r"(\d{8})T(\d{6})Z", list(self.cal_dicts)[0]): @@ -1303,15 +1344,11 @@ def update_cal_dicts(self, update_dict): else: self.cal_dicts.update(update_dict) - def aoe_timecorr( - self, - df, - aoe_param, - output_name = "AoE_Timecorr", - display=0 - ): + def aoe_timecorr(self, df, aoe_param, output_name="AoE_Timecorr", display=0): log.info("Starting A/E time correction") - self.timecorr_df = pd.DataFrame(columns=["timestamp", "mean", "mean_err", "res", "res_err"]) + self.timecorr_df = pd.DataFrame( + columns=["timestamp", "mean", "mean_err", "res", "res_err"] + ) try: if "timestamp" in df: tstamps = sorted(np.unique(df["timestamp"])) @@ -1330,98 +1367,132 @@ def aoe_timecorr( display=display, ) self.timecorr_df = pd.concat( - [ - self.timecorr_df, - pd.DataFrame([ - {"timestamp": tstamp, - "mean":pars["mu"], - "mean_err":errs["mu"], - "res":pars["sigma"] / pars["mu"], - "res_err":(pars["sigma"] / pars["mu"]) * np.sqrt(errs["sigma"] / pars["sigma"] + errs["mu"] / pars["mu"])} - ]), - ]) + [ + self.timecorr_df, + pd.DataFrame( + [ + { + "timestamp": tstamp, + "mean": pars["mu"], + "mean_err": errs["mu"], + "res": pars["sigma"] / pars["mu"], + "res_err": (pars["sigma"] / pars["mu"]) + * np.sqrt( + errs["sigma"] / pars["sigma"] + + errs["mu"] / pars["mu"] + ), + } + ] + ), + ] + ) except: self.timecorr_df = pd.concat( - [ - self.timecorr_df, - pd.DataFrame([ - {"timestamp": tstamp, - "mean":np.nan, - "mean_err":np.nan, - "res":np.nan, - "res_err":np.nan} - ]), - ]) + [ + self.timecorr_df, + pd.DataFrame( + [ + { + "timestamp": tstamp, + "mean": np.nan, + "mean_err": np.nan, + "res": np.nan, + "res_err": np.nan, + } + ] + ), + ] + ) self.timecorr_df.set_index("timestamp", inplace=True) - time_dict = fit_time_means(np.array(self.timecorr_df.index), - np.array(self.timecorr_df["mean"]), - np.array(self.timecorr_df["res"])) + time_dict = fit_time_means( + np.array(self.timecorr_df.index), + np.array(self.timecorr_df["mean"]), + np.array(self.timecorr_df["res"]), + ) df[output_name] = df[aoe_param] / np.array( [time_dict[tstamp] for tstamp in df["timestamp"]] ) - self.update_cal_dicts({ - tstamp: { - output_name: { - "expression": f"{aoe_param}/a", - "parameters": {"a": t_dict}, + self.update_cal_dicts( + { + tstamp: { + output_name: { + "expression": f"{aoe_param}/a", + "parameters": {"a": t_dict}, + } } + for tstamp, t_dict in time_dict.items() } - for tstamp, t_dict in time_dict.items() - }) + ) log.info("A/E time correction finished") else: try: pars, errs, cov = unbinned_aoe_fit( - df.query(f"{self.fit_selection} & {self.cal_energy_param}>1000 & {self.cal_energy_param}<1300")[ - aoe_param - ], + df.query( + f"{self.fit_selection} & {self.cal_energy_param}>1000 & {self.cal_energy_param}<1300" + )[aoe_param], pdf=self.pdf, display=display, ) self.timecorr_df = pd.concat( [ self.timecorr_df, - pd.DataFrame([ - { - "mean":pars["mu"], - "mean_err":errs["mu"], - "res":pars["sigma"] / pars["mu"], - "res_err":(pars["sigma"] / pars["mu"]) * np.sqrt(errs["sigma"] / pars["sigma"] + errs["mu"] / pars["mu"])} - ]), - ]) + pd.DataFrame( + [ + { + "mean": pars["mu"], + "mean_err": errs["mu"], + "res": pars["sigma"] / pars["mu"], + "res_err": (pars["sigma"] / pars["mu"]) + * np.sqrt( + errs["sigma"] / pars["sigma"] + + errs["mu"] / pars["mu"] + ), + } + ] + ), + ] + ) except: self.timecorr_df = pd.concat( [ self.timecorr_df, - pd.DataFrame([ - { - "mean":np.nan, - "mean_err":np.nan, - "res":np.nan, - "res_err":np.nan} - ]), - ]) + pd.DataFrame( + [ + { + "mean": np.nan, + "mean_err": np.nan, + "res": np.nan, + "res_err": np.nan, + } + ] + ), + ] + ) df[output_name] = df[aoe_param] / pars["mu"] - self.update_cal_dicts({ - output_name: { - "expression": f"{aoe_param}/a", - "parameters": {"a": pars["mu"]}, + self.update_cal_dicts( + { + output_name: { + "expression": f"{aoe_param}/a", + "parameters": {"a": pars["mu"]}, + } } - }) + ) log.info("A/E time correction finished") except: log.error("A/E time correction failed") - self.update_cal_dicts({ - output_name: { - "expression": f"{aoe_param}/a", - "parameters": {"a": np.nan}, + self.update_cal_dicts( + { + output_name: { + "expression": f"{aoe_param}/a", + "parameters": {"a": np.nan}, + } } - }) - + ) + def drift_time_correction( self, - data:pd.DataFrame, + data: pd.DataFrame, aoe_param, display: int = 0, ): @@ -1431,7 +1502,9 @@ def drift_time_correction( log.info("Starting A/E drift time correction") self.dt_res_dict = {} try: - dep_events = data.query(f"{self.fit_selection}&{self.cal_energy_param}>1582&{self.cal_energy_param}<1602&{self.cal_energy_param}=={self.cal_energy_param}&{aoe_param}=={aoe_param}") + dep_events = data.query( + f"{self.fit_selection}&{self.cal_energy_param}>1582&{self.cal_energy_param}<1602&{self.cal_energy_param}=={self.cal_energy_param}&{aoe_param}=={aoe_param}" + ) hist, bins, var = pgh.get_hist( dep_events[aoe_param], @@ -1441,64 +1514,88 @@ def drift_time_correction( mu = bin_cs[np.argmax(hist)] aoe_range = [mu * 0.9, mu * 1.1] - dt_range = [np.nanpercentile(dep_events[self.dt_param], 1) , np.nanpercentile(dep_events[self.dt_param], 99)] + dt_range = [ + np.nanpercentile(dep_events[self.dt_param], 1), + np.nanpercentile(dep_events[self.dt_param], 99), + ] - self.dt_res_dict['final_selection'] = f"{aoe_param}>{aoe_range[0]}&{aoe_param}<{aoe_range[1]}&{self.dt_param}>{dt_range[0]}&{self.dt_param}<{dt_range[1]}&{self.dt_param}=={self.dt_param}" + self.dt_res_dict[ + "final_selection" + ] = f"{aoe_param}>{aoe_range[0]}&{aoe_param}<{aoe_range[1]}&{self.dt_param}>{dt_range[0]}&{self.dt_param}<{dt_range[1]}&{self.dt_param}=={self.dt_param}" - final_df = dep_events.query(self.dt_res_dict['final_selection']) + final_df = dep_events.query(self.dt_res_dict["final_selection"]) hist, bins, var = pgh.get_hist( - final_df[self.dt_param], dx=10, range=(np.nanmin(final_df[self.dt_param]), np.nanmax(final_df[self.dt_param])) + final_df[self.dt_param], + dx=10, + range=( + np.nanmin(final_df[self.dt_param]), + np.nanmax(final_df[self.dt_param]), + ), + ) + + gpars = self.dt_res_dict["dt_guess"] = drift_time_distribution.guess( + hist, bins, var + ) + cost_func = cost.ExtendedUnbinnedNLL( + final_df[self.dt_param], drift_time_distribution.extended_pdf ) - - gpars = self.dt_res_dict['dt_guess'] =drift_time_distribution.guess(hist, bins, var) - cost_func = cost.ExtendedUnbinnedNLL(final_df[self.dt_param], drift_time_distribution.extended_pdf) m = Minuit(cost_func, **gpars) m.limits = drift_time_distribution.bounds(gpars) m.fixed = drift_time_distribution.fixed() m.simplex().migrad() m.hesse() - - self.dt_res_dict["dt_fit"]={"pars": m.values,"errs":m.errors, "object":m} - aoe_grp1 = self.dt_res_dict["aoe_grp1"] = f'{self.dt_param}>{m.values["mu1"] - 2 * m.values["sigma1"]} & {self.dt_param}<{m.values["mu1"] + 2 * m.values["sigma1"]}' - aoe_grp2 = self.dt_res_dict["aoe_grp2"] = f'{self.dt_param}>{m.values["mu2"] - 2 * m.values["sigma2"]} & {self.dt_param}<{m.values["mu2"] + 2 * m.values["sigma2"]}' - - aoe_pars, aoe_errs, _ = unbinned_aoe_fit(final_df.query(aoe_grp1)[aoe_param], - pdf=self.pdf, display=display) - self.dt_res_dict["aoe_fit1"] = {"pars":aoe_pars, "errs": aoe_errs} + self.dt_res_dict["dt_fit"] = { + "parameters": m.values, + "uncertainties": m.errors, + "object": m, + } + aoe_grp1 = self.dt_res_dict[ + "aoe_grp1" + ] = f'{self.dt_param}>{m.values["mu1"] - 2 * m.values["sigma1"]} & {self.dt_param}<{m.values["mu1"] + 2 * m.values["sigma1"]}' + aoe_grp2 = self.dt_res_dict[ + "aoe_grp2" + ] = f'{self.dt_param}>{m.values["mu2"] - 2 * m.values["sigma2"]} & {self.dt_param}<{m.values["mu2"] + 2 * m.values["sigma2"]}' + + aoe_pars, aoe_errs, _ = unbinned_aoe_fit( + final_df.query(aoe_grp1)[aoe_param], pdf=self.pdf, display=display + ) - aoe_pars2, aoe_errs2, _ = unbinned_aoe_fit(final_df.query(aoe_grp2)[aoe_param], - pdf=self.pdf, display=display) + self.dt_res_dict["aoe_fit1"] = {"pars": aoe_pars, "errs": aoe_errs} - self.dt_res_dict["aoe_fit2"] = {"pars":aoe_pars2, "errs": aoe_errs2} + aoe_pars2, aoe_errs2, _ = unbinned_aoe_fit( + final_df.query(aoe_grp2)[aoe_param], pdf=self.pdf, display=display + ) + + self.dt_res_dict["aoe_fit2"] = {"pars": aoe_pars2, "errs": aoe_errs2} try: self.alpha = (aoe_pars["mu"] - aoe_pars2["mu"]) / ( - (m.values["mu2"] * aoe_pars2["mu"]) - (m.values["mu1"] * aoe_pars["mu"]) + (m.values["mu2"] * aoe_pars2["mu"]) + - (m.values["mu1"] * aoe_pars["mu"]) ) except ZeroDivisionError: self.alpha = 0 - self.dt_res_dict["alpha"] = self.alpha + self.dt_res_dict["alpha"] = self.alpha log.info(f"dtcorr successful alpha:{self.alpha}") - data["AoE_DTcorr"] = data[aoe_param] * (1 + self.alpha * data[self.dt_param]) + data["AoE_DTcorr"] = data[aoe_param] * ( + 1 + self.alpha * data[self.dt_param] + ) except: log.error("Drift time correction failed") - self.alpha=np.nan + self.alpha = np.nan - self.update_cal_dicts({ - "AoE_DTcorr": { - "expression": f"{aoe_param}*(1+a*{self.dt_param})", - "parameters": {"a": self.alpha}, + self.update_cal_dicts( + { + "AoE_DTcorr": { + "expression": f"{aoe_param}*(1+a*{self.dt_param})", + "parameters": {"a": self.alpha}, + } } - }) + ) - def AoEcorrection( - self, - data:pd.DataFrame, - aoe_param:str, - display:int=0 - ): + def AoEcorrection(self, data: pd.DataFrame, aoe_param: str, display: int = 0): """ Calculates the corrections needed for the energy dependence of the A/E. Does this by fitting the compton continuum in slices and then applies fits to the centroid and variance. @@ -1508,20 +1605,32 @@ def AoEcorrection( self.energy_corr_res_dict = {} comptBands = np.arange(900, 2350, self.comptBands_width) - peaks = np.array([1080, 1094, 1459, 1512, 1552, 1592, 1620, 1650, 1670, 1830, 2105]) + peaks = np.array( + [1080, 1094, 1459, 1512, 1552, 1592, 1620, 1650, 1670, 1830, 2105] + ) allowed = np.array([], dtype=bool) for i, band in enumerate(comptBands): allow = True for peak in peaks: if (peak - 5) > band and (peak - 5) < (band + self.comptBands_width): allow = False - elif (peak + 5 > band) and (peak + 5) < (band +self.comptBands_width): + elif (peak + 5 > band) and (peak + 5) < (band + self.comptBands_width): allow = False allowed = np.append(allowed, allow) comptBands = comptBands[allowed] - self.energy_corr_fits = pd.DataFrame(columns=["compt_bands", "mean", "mean_err", - "sigma", "sigma_err", "ratio", "ratio_err"], dtype=float) + self.energy_corr_fits = pd.DataFrame( + columns=[ + "compt_bands", + "mean", + "mean_err", + "sigma", + "sigma_err", + "ratio", + "ratio_err", + ], + dtype=float, + ) try: select_df = data.query(f"{self.fit_selection} & {aoe_param}>0") @@ -1529,53 +1638,73 @@ def AoEcorrection( for band in comptBands: try: pars, errs, cov = unbinned_aoe_fit( - select_df.query(f"{self.cal_energy_param}>{band}&{self.cal_energy_param}< {self.comptBands_width+band}")[aoe_param], - pdf=self.pdf, display=display) + select_df.query( + f"{self.cal_energy_param}>{band}&{self.cal_energy_param}< {self.comptBands_width+band}" + )[aoe_param], + pdf=self.pdf, + display=display, + ) mean, mean_err = self.pdf.centroid(pars, errs, cov) sigma, sigma_err = self.pdf.width(pars, errs, cov) self.energy_corr_fits = pd.concat( - [ - self.energy_corr_fits, - pd.DataFrame([ - {"compt_bands": band+self.comptBands_width/2, - "mean":mean, - "mean_err":mean_err, - "sigma":sigma, - "sigma_err":sigma_err, - "ratio":pars["n_sig"] / pars["n_bkg"], - "ratio_err":(pars["n_sig"] / pars["n_bkg"]) *np.sqrt( - (errs["n_sig"] / pars["n_sig"]) ** 2 - + (errs["n_bkg"] / pars["n_bkg"]) ** 2 - ) - }] - ), - ]) + [ + self.energy_corr_fits, + pd.DataFrame( + [ + { + "compt_bands": band + self.comptBands_width / 2, + "mean": mean, + "mean_err": mean_err, + "sigma": sigma, + "sigma_err": sigma_err, + "ratio": pars["n_sig"] / pars["n_bkg"], + "ratio_err": (pars["n_sig"] / pars["n_bkg"]) + * np.sqrt( + (errs["n_sig"] / pars["n_sig"]) ** 2 + + (errs["n_bkg"] / pars["n_bkg"]) ** 2 + ), + } + ] + ), + ] + ) except: self.energy_corr_fits = pd.concat( - [ - self.energy_corr_fits, - pd.DataFrame( - [{"compt_bands": band, - "mean":np.nan, - "mean_err":np.nan, - "sigma":np.nan, - "sigma_err":np.nan, - "ratio":np.nan, - "ratio_err":np.nan - }] - ), - ]) + [ + self.energy_corr_fits, + pd.DataFrame( + [ + { + "compt_bands": band, + "mean": np.nan, + "mean_err": np.nan, + "sigma": np.nan, + "sigma_err": np.nan, + "ratio": np.nan, + "ratio_err": np.nan, + } + ] + ), + ] + ) self.energy_corr_fits.set_index("compt_bands", inplace=True) - valid_fits = self.energy_corr_fits.query("mean_err==mean_err&sigma_err==sigma_err & sigma_err!=0 & mean_err!=0") + valid_fits = self.energy_corr_fits.query( + "mean_err==mean_err&sigma_err==sigma_err & sigma_err!=0 & mean_err!=0" + ) self.energy_corr_res_dict["n_of_valid_fits"] = len(valid_fits) log.info(f"{len(valid_fits)} compton bands fit successfully") # Fit mus against energy - p0_mu = self.mean_func.guess(valid_fits.index, valid_fits["mean"], valid_fits["mean_err"]) + p0_mu = self.mean_func.guess( + valid_fits.index, valid_fits["mean"], valid_fits["mean_err"] + ) c_mu = cost.LeastSquares( - valid_fits.index, valid_fits["mean"], valid_fits["mean_err"], self.mean_func.func + valid_fits.index, + valid_fits["mean"], + valid_fits["mean_err"], + self.mean_func.func, ) c_mu.loss = "soft_l1" m_mu = Minuit(c_mu, *p0_mu) @@ -1587,16 +1716,28 @@ def AoEcorrection( mu_errs = m_mu.errors csqr_mu = np.sum( - ((valid_fits["mean"] - self.mean_func.func(valid_fits.index, *mu_pars)) ** 2) / valid_fits["mean_err"] + ( + ( + valid_fits["mean"] + - self.mean_func.func(valid_fits.index, *mu_pars) + ) + ** 2 + ) + / valid_fits["mean_err"] ) dof_mu = len(valid_fits["mean"]) - len(pars) p_val_mu = chi2.sf(csqr_mu, dof_mu) self.mean_fit_obj = m_mu # Fit sigma against energy - p0_sig = self.sigma_func.guess(valid_fits.index, valid_fits["sigma"], valid_fits["sigma_err"]) + p0_sig = self.sigma_func.guess( + valid_fits.index, valid_fits["sigma"], valid_fits["sigma_err"] + ) c_sig = cost.LeastSquares( - valid_fits.index, valid_fits["sigma"], valid_fits["sigma_err"], self.sigma_func.func + valid_fits.index, + valid_fits["sigma"], + valid_fits["sigma_err"], + self.sigma_func.func, ) c_sig.loss = "soft_l1" m_sig = Minuit(c_sig, *p0_sig) @@ -1606,14 +1747,20 @@ def AoEcorrection( sig_pars = m_sig.values sig_errs = m_sig.errors - + csqr_sig = np.sum( - ((valid_fits["sigma"] - self.sigma_func.func(valid_fits.index, *sig_pars)) ** 2) + ( + ( + valid_fits["sigma"] + - self.sigma_func.func(valid_fits.index, *sig_pars) + ) + ** 2 + ) / valid_fits["sigma_err"] ) dof_sig = len(valid_fits["sigma"]) - len(sig_pars) p_val_sig = chi2.sf(csqr_sig, dof_sig) - + self.sigma_fit_obj = m_sig # Get DEP fit @@ -1624,17 +1771,21 @@ def AoEcorrection( emax = peak + n_sigma * sigma try: dep_pars, dep_err, _ = unbinned_aoe_fit( - select_df.query(f"{self.cal_energy_param}>{emin}&{self.cal_energy_param}<{emax}")[aoe_param], + select_df.query( + f"{self.cal_energy_param}>{emin}&{self.cal_energy_param}<{emax}" + )[aoe_param], pdf=self.pdf, - display=display + display=display, ) except: dep_pars, dep_err, _ = return_nans(self.pdf) - data["AoE_Corrected"] = data[aoe_param] / self.mean_func.func(data[self.cal_energy_param], *mu_pars) + data["AoE_Corrected"] = data[aoe_param] / self.mean_func.func( + data[self.cal_energy_param], *mu_pars + ) data["AoE_Classifier"] = (data["AoE_Corrected"] - 1) / self.sigma_func.func( - data[self.cal_energy_param], *sig_pars - ) + data[self.cal_energy_param], *sig_pars + ) log.info("Finished A/E energy successful") log.info(f"mean pars are {mu_pars.to_dict()}") log.info(f"sigma pars are {sig_pars.to_dict()}") @@ -1646,46 +1797,54 @@ def AoEcorrection( sig_pars, sig_errs, sig_cov = return_nans(self.sigma_func.func) dep_pars, dep_err, dep_cov = return_nans(self.pdf) - self.energy_corr_res_dict["mean_fits"] = {"func": self.mean_func.__name__, - "module": self.mean_func.__module__, - "expression":self.mean_func.string_func("x"), - "pars": mu_pars.to_dict(), - "errs": mu_errs.to_dict(), - "p_val_mu": p_val_mu, - "csqr_mu": (csqr_mu, dof_mu)} - - self.energy_corr_res_dict["sigma_fits"] = {"func": self.sigma_func.__name__, - "module": self.sigma_func.__module__, - "expression":self.sigma_func.string_func("x"), - "pars": sig_pars.to_dict(), - "errs": sig_errs.to_dict(), - "p_val_mu": p_val_sig, - "csqr_mu": (csqr_sig, dof_sig)} - - self.energy_corr_res_dict["dep_fit"]={"func": self.pdf.__name__, - "module": self.pdf.__module__, - "pars": dep_pars.to_dict(), - "errs": dep_err.to_dict()} - - self.update_cal_dicts({ - "AoE_Corrected": { - "expression": f"{aoe_param}/({self.mean_func.string_func(self.cal_energy_param)})", - "parameters": mu_pars.to_dict(), - }, - "AoE_Classifier": { + self.energy_corr_res_dict["mean_fits"] = { + "func": self.mean_func.__name__, + "module": self.mean_func.__module__, + "expression": self.mean_func.string_func("x"), + "parameters": mu_pars.to_dict(), + "uncertainties": mu_errs.to_dict(), + "p_val_mu": p_val_mu, + "csqr_mu": (csqr_mu, dof_mu), + } + + self.energy_corr_res_dict["sigma_fits"] = { + "func": self.sigma_func.__name__, + "module": self.sigma_func.__module__, + "expression": self.sigma_func.string_func("x"), + "parameters": sig_pars.to_dict(), + "uncertainties": sig_errs.to_dict(), + "p_val_mu": p_val_sig, + "csqr_mu": (csqr_sig, dof_sig), + } + + self.energy_corr_res_dict["dep_fit"] = { + "func": self.pdf.__name__, + "module": self.pdf.__module__, + "parameters": dep_pars.to_dict(), + "uncertainties": dep_err.to_dict(), + } + + self.update_cal_dicts( + { + "AoE_Corrected": { + "expression": f"{aoe_param}/({self.mean_func.string_func(self.cal_energy_param)})", + "parameters": mu_pars.to_dict(), + }, + "AoE_Classifier": { "expression": f"AoE_Corrected/({self.sigma_func.string_func(self.cal_energy_param)})", "parameters": sig_pars.to_dict(), + }, } - }) + ) def get_aoe_cut_fit( self, - data:pd.DataFrame, - aoe_param:str, + data: pd.DataFrame, + aoe_param: str, peak: float, ranges: tuple, dep_acc: float, - display: int = 1 + display: int = 1, ): """ Determines A/E cut by sweeping through values and for each one fitting the DEP to determine how many events survive. @@ -1699,7 +1858,9 @@ def get_aoe_cut_fit( min_range, max_range = ranges try: - select_df = data.query(f"{self.fit_selection}&({self.cal_energy_param} > {peak - min_range}) & ({self.cal_energy_param} < {peak + max_range})") + select_df = data.query( + f"{self.fit_selection}&({self.cal_energy_param} > {peak - min_range}) & ({self.cal_energy_param} < {peak + max_range})" + ) # if dep_correct is True: # peak_aoe = (select_df[aoe_param] / dep_mu(select_df[self.cal_energy_param])) - 1 @@ -1716,70 +1877,102 @@ def get_aoe_cut_fit( peak, self.eres_func(peak), guess_pars_cut=None, - guess_pars_surv=None + guess_pars_surv=None, ) self.cut_fits = pd.concat( [ self.cut_fits, pd.DataFrame( - [{"cut_val": cut_val, - "sf":sf, - "sf_err":err, - }] + [ + { + "cut_val": cut_val, + "sf": sf, + "sf_err": err, + } + ] ), - ]) + ] + ) self.cut_fits.set_index("cut_val", inplace=True) - valid_fits = self.cut_fits.query(f'sf_err<{(1.5 * np.nanpercentile(self.cut_fits["sf_err"],85))}&sf_err==sf_err') + valid_fits = self.cut_fits.query( + f'sf_err<{(1.5 * np.nanpercentile(self.cut_fits["sf_err"],85))}&sf_err==sf_err' + ) c = cost.LeastSquares( - valid_fits.index, valid_fits["sf"], valid_fits["sf_err"], sigmoid_fit.func + valid_fits.index, + valid_fits["sf"], + valid_fits["sf_err"], + sigmoid_fit.func, ) c.loss = "soft_l1" - m1 = Minuit(c, *sigmoid_fit.guess(valid_fits.index, valid_fits["sf"], valid_fits["sf_err"])) + m1 = Minuit( + c, + *sigmoid_fit.guess( + valid_fits.index, valid_fits["sf"], valid_fits["sf_err"] + ), + ) m1.simplex().migrad() - xs = np.arange(np.nanmin(valid_fits.index), np.nanmax(valid_fits.index), 0.01) + xs = np.arange( + np.nanmin(valid_fits.index), np.nanmax(valid_fits.index), 0.01 + ) p = sigmoid_fit.func(xs, *m1.values) - self.cut_fit = {"function": sigmoid_fit.__name__ , "pars": m1.values.to_dict(), "errs": m1.errors.to_dict()} + self.cut_fit = { + "function": sigmoid_fit.__name__, + "parameters": m1.values.to_dict(), + "uncertainties": m1.errors.to_dict(), + } self.low_cut_val = round(xs[np.argmin(np.abs(p - (100 * self.dep_acc)))], 3) log.info(f"Cut found at {self.low_cut_val}") - - data["AoE_Low_Cut"] = (data[aoe_param]>self.low_cut_val) + + data["AoE_Low_Cut"] = data[aoe_param] > self.low_cut_val if self.dt_cut_param is not None: - data["AoE_Low_Cut"] = data["AoE_Low_Cut"] &(data[self.dt_cut_param]) - data["AoE_Double_Sided_Cut"] = data["AoE_Low_Cut"] & (data[aoe_param]a) & ({self.dt_cut_param})", - "parameters": {"a": self.low_cut_val}, - }}) + self.update_cal_dicts( + { + "AoE_Low_Cut": { + "expression": f"({aoe_param}>a) & ({self.dt_cut_param})", + "parameters": {"a": self.low_cut_val}, + } + } + ) else: - self.update_cal_dicts({ - "AoE_Low_Cut": { - "expression": f"({aoe_param}>a)", - "parameters": {"a": self.low_cut_val}, - }}) - self.update_cal_dicts({"AoE_Double_Sided_Cut": { - "expression": f"(a>{aoe_param}) & (AoE_Low_Cut)", - "parameters": {"a": self.high_cut_val}, - }}) + self.update_cal_dicts( + { + "AoE_Low_Cut": { + "expression": f"({aoe_param}>a)", + "parameters": {"a": self.low_cut_val}, + } + } + ) + self.update_cal_dicts( + { + "AoE_Double_Sided_Cut": { + "expression": f"(a>{aoe_param}) & (AoE_Low_Cut)", + "parameters": {"a": self.high_cut_val}, + } + } + ) def get_results_dict(self): return { "cal_energy_param": self.cal_energy_param, "dt_param": self.dt_param, - "rt_correction": self.dt_corr, - "pdf":self.pdf.__name__, + "rt_correction": self.dt_corr, + "pdf": self.pdf.__name__, "1000-1300keV": self.timecorr_df.to_dict("index"), "correction_fit_results": self.energy_corr_res_dict, "low_cut": self.low_cut_val, "high_cut": self.high_cut_val, "low_side_sfs": self.low_side_sf.to_dict("index"), "2_side_sfs": self.two_side_sf.to_dict("index"), - } + } def fill_plot_dict(self, data, plot_dict={}): for key, item in self.plot_options.items(): @@ -1789,52 +1982,39 @@ def fill_plot_dict(self, data, plot_dict={}): plot_dict[key] = item["function"](self, data) return plot_dict - def calibrate(self, df, initial_aoe_param): - self.aoe_timecorr( - df, initial_aoe_param - ) + self.aoe_timecorr(df, initial_aoe_param) log.info("Finished A/E time correction") if self.dt_corr == True: aoe_param = "AoE_DTcorr" - self.drift_time_correction( - df, - "AoE_Timecorr" - ) + self.drift_time_correction(df, "AoE_Timecorr") else: aoe_param = "AoE_Timecorr" - self.AoEcorrection( - df, - aoe_param - ) - - - self.get_aoe_cut_fit( - df, - "AoE_Classifier", - 1592, - (40, 20), - 0.9 - ) - + self.AoEcorrection(df, aoe_param) + + self.get_aoe_cut_fit(df, "AoE_Classifier", 1592, (40, 20), 0.9) + aoe_param = "AoE_Classifier" log.info(" Compute low side survival fractions: ") self.low_side_sf = pd.DataFrame(columns=["peak", "sf", "sf_err"]) peaks_of_interest = [1592.5, 1620.5, 2039, 2103.53, 2614.50] fit_widths = [(40, 25), (25, 40), (0, 0), (25, 40), (50, 50)] - self.low_side_peak_dfs={} - - + self.low_side_peak_dfs = {} + for i, peak in enumerate(peaks_of_interest): try: - select_df = df.query(f"{self.selection_string}&{aoe_param}=={aoe_param}") + select_df = df.query( + f"{self.selection_string}&{aoe_param}=={aoe_param}" + ) fwhm = self.eres_func(peak) if peak == 2039: emin = 2 * fwhm emax = 2 * fwhm - peak_df = select_df.query(f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})") + peak_df = select_df.query( + f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})" + ) cut_df, sf, sf_err = compton_sf_sweep( peak_df[self.cal_energy_param].to_numpy(), @@ -1842,31 +2022,53 @@ def calibrate(self, df, initial_aoe_param): self.low_cut_val, peak, fwhm, - dt_mask=peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None + dt_mask=peak_df[self.dt_cut_param].to_numpy() + if self.dt_cut_param is not None + else None, + ) + self.low_side_sf = pd.concat( + [ + self.low_side_sf, + pd.DataFrame([{"peak": peak, "sf": sf, "sf_err": sf_err}]), + ] ) - self.low_side_sf = pd.concat([self.low_side_sf, pd.DataFrame([{"peak":peak, "sf":sf, "sf_err":sf_err}])]) - self.low_side_peak_dfs[peak]=cut_df + self.low_side_peak_dfs[peak] = cut_df else: - emin,emax = fit_widths[i] - peak_df = select_df.query(f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})") + emin, emax = fit_widths[i] + peak_df = select_df.query( + f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})" + ) cut_df, sf, sf_err = get_sf_sweep( peak_df[self.cal_energy_param].to_numpy(), peak_df[aoe_param].to_numpy(), self.low_cut_val, peak, fwhm, - dt_mask=peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None + dt_mask=peak_df[self.dt_cut_param].to_numpy() + if self.dt_cut_param is not None + else None, + ) + self.low_side_sf = pd.concat( + [ + self.low_side_sf, + pd.DataFrame([{"peak": peak, "sf": sf, "sf_err": sf_err}]), + ] ) - self.low_side_sf = pd.concat([self.low_side_sf, pd.DataFrame([{"peak":peak, "sf":sf, "sf_err":sf_err}])]) - self.low_side_peak_dfs[peak]=cut_df + self.low_side_peak_dfs[peak] = cut_df log.info(f"{peak}keV: {sf:2.1f} +/- {sf_err:2.1f} %") except: - self.low_side_sf = pd.concat([self.low_side_sf, pd.DataFrame([{"peak":peak, "sf":np.nan, "sf_err":np.nan}])]) - log.error(f"A/E Low side Survival fraction determination failed for {peak} peak") + self.low_side_sf = pd.concat( + [ + self.low_side_sf, + pd.DataFrame([{"peak": peak, "sf": np.nan, "sf_err": np.nan}]), + ] + ) + log.error( + f"A/E Low side Survival fraction determination failed for {peak} peak" + ) self.low_side_sf.set_index("peak", inplace=True) - - self.two_side_sf = pd.DataFrame(columns=["peak", "sf", "sf_err"]) + self.two_side_sf = pd.DataFrame(columns=["peak", "sf", "sf_err"]) log.info("Calculating 2 sided cut sfs") for i, peak in enumerate(peaks_of_interest): fwhm = self.eres_func(peak) @@ -1874,49 +2076,84 @@ def calibrate(self, df, initial_aoe_param): if peak == 2039: emin = 2 * fwhm emax = 2 * fwhm - peak_df = select_df.query(f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})") + peak_df = select_df.query( + f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})" + ) - sf_dict = compton_sf(peak_df[aoe_param].to_numpy(), - self.low_cut_val, - self.high_cut_val, - dt_mask=peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None) + sf_dict = compton_sf( + peak_df[aoe_param].to_numpy(), + self.low_cut_val, + self.high_cut_val, + dt_mask=peak_df[self.dt_cut_param].to_numpy() + if self.dt_cut_param is not None + else None, + ) sf = sf_dict["sf"] sf_err = sf_dict["sf_err"] - self.two_side_sf = pd.concat([self.two_side_sf, pd.DataFrame([{"peak":peak, - "sf":sf, - "sf_err":sf_err}])]) + self.two_side_sf = pd.concat( + [ + self.two_side_sf, + pd.DataFrame([{"peak": peak, "sf": sf, "sf_err": sf_err}]), + ] + ) else: emin, emax = fit_widths[i] - peak_df = select_df.query(f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})") + peak_df = select_df.query( + f"({self.cal_energy_param}>{peak-emin})&({self.cal_energy_param}<{peak+emax})" + ) sf, sf_err, _, _ = get_survival_fraction( - peak_df[self.cal_energy_param].to_numpy(), - peak_df[aoe_param].to_numpy(), - self.low_cut_val, - peak, - fwhm, - high_cut=self.high_cut_val, - dt_mask=peak_df[self.dt_cut_param].to_numpy() if self.dt_cut_param is not None else None) - self.two_side_sf = pd.concat([self.two_side_sf, pd.DataFrame([{"peak":peak, "sf":sf, "sf_err":sf_err}])]) + peak_df[self.cal_energy_param].to_numpy(), + peak_df[aoe_param].to_numpy(), + self.low_cut_val, + peak, + fwhm, + high_cut=self.high_cut_val, + dt_mask=peak_df[self.dt_cut_param].to_numpy() + if self.dt_cut_param is not None + else None, + ) + self.two_side_sf = pd.concat( + [ + self.two_side_sf, + pd.DataFrame([{"peak": peak, "sf": sf, "sf_err": sf_err}]), + ] + ) log.info(f"{peak}keV: {sf:2.1f} +/- {sf_err:2.1f} %") except: - self.two_side_sf = pd.concat([self.two_side_sf, pd.DataFrame([{"peak":peak, "sf":np.nan, "sf_err":np.nan}])]) - log.error(f"A/E two side Survival fraction determination failed for {peak} peak") + self.two_side_sf = pd.concat( + [ + self.two_side_sf, + pd.DataFrame([{"peak": peak, "sf": np.nan, "sf_err": np.nan}]), + ] + ) + log.error( + f"A/E two side Survival fraction determination failed for {peak} peak" + ) self.two_side_sf.set_index("peak", inplace=True) -def plot_aoe_mean_time(aoe_class, data, time_param = "AoE_Timecorr", figsize=[12, 8], fontsize=12): + +def plot_aoe_mean_time( + aoe_class, data, time_param="AoE_Timecorr", figsize=[12, 8], fontsize=12 +): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize fig, ax = plt.subplots(1, 1) try: ax.errorbar( - [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in aoe_class.timecorr_df.index], + [ + datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") + for tstamp in aoe_class.timecorr_df.index + ], aoe_class.timecorr_df["mean"], yerr=aoe_class.timecorr_df["mean_err"], linestyle=" ", ) - grouped_means = [cal_dict[time_param]["parameters"]["a"] for tstamp, cal_dict in aoe_class.cal_dicts.items()] + grouped_means = [ + cal_dict[time_param]["parameters"]["a"] + for tstamp, cal_dict in aoe_class.cal_dicts.items() + ] ax.step( [ datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") @@ -1930,10 +2167,8 @@ def plot_aoe_mean_time(aoe_class, data, time_param = "AoE_Timecorr", figsize=[12 datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in aoe_class.cal_dicts ], - y1=np.array(grouped_means) - - 0.2 * np.array(aoe_class.timecorr_df["res"]), - y2=np.array(grouped_means) - + 0.2 * np.array(aoe_class.timecorr_df["res"]), + y1=np.array(grouped_means) - 0.2 * np.array(aoe_class.timecorr_df["res"]), + y2=np.array(grouped_means) + 0.2 * np.array(aoe_class.timecorr_df["res"]), color="green", alpha=0.2, ) @@ -1942,14 +2177,13 @@ def plot_aoe_mean_time(aoe_class, data, time_param = "AoE_Timecorr", figsize=[12 datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in aoe_class.cal_dicts ], - y1=np.array(grouped_means) - - 0.4 * np.array(aoe_class.timecorr_df["res"]), - y2=np.array(grouped_means) - + 0.4 * np.array(aoe_class.timecorr_df["res"]), + y1=np.array(grouped_means) - 0.4 * np.array(aoe_class.timecorr_df["res"]), + y2=np.array(grouped_means) + 0.4 * np.array(aoe_class.timecorr_df["res"]), color="yellow", alpha=0.2, ) - except:pass + except: + pass ax.set_xlabel("time") ax.set_ylabel("A/E mean") myFmt = mdates.DateFormatter("%b %d") @@ -1957,18 +2191,25 @@ def plot_aoe_mean_time(aoe_class, data, time_param = "AoE_Timecorr", figsize=[12 plt.close() return fig -def plot_aoe_res_time(aoe_class, data, time_param = "AoE_Timecorr", figsize=[12, 8], fontsize=12): + +def plot_aoe_res_time( + aoe_class, data, time_param="AoE_Timecorr", figsize=[12, 8], fontsize=12 +): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize fig, ax = plt.subplots(1, 1) try: ax.errorbar( - [datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") for tstamp in aoe_class.timecorr_df.index], + [ + datetime.strptime(tstamp, "%Y%m%dT%H%M%SZ") + for tstamp in aoe_class.timecorr_df.index + ], aoe_class.timecorr_df["res"], yerr=aoe_class.timecorr_df["res_err"], linestyle=" ", ) - except:pass + except: + pass ax.set_xlabel("time") ax.set_ylabel("A/E res") myFmt = mdates.DateFormatter("%b %d") @@ -1976,26 +2217,34 @@ def plot_aoe_res_time(aoe_class, data, time_param = "AoE_Timecorr", figsize=[12, plt.close() return fig -def drifttime_corr_plot(aoe_class, data, aoe_param = "AoE_Timecorr", aoe_param_corr="AoE_DTcorr", - figsize=[12, 8], fontsize=12): - + +def drifttime_corr_plot( + aoe_class, + data, + aoe_param="AoE_Timecorr", + aoe_param_corr="AoE_DTcorr", + figsize=[12, 8], + fontsize=12, +): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize fig = plt.figure() try: - - dep_events = data.query(f"{aoe_class.fit_selection}&{aoe_class.cal_energy_param}>1582&{aoe_class.cal_energy_param}<1602&{aoe_class.cal_energy_param}=={aoe_class.cal_energy_param}&{aoe_param}=={aoe_param}") - final_df = dep_events.query(aoe_class.dt_res_dict['final_selection']) - - + dep_events = data.query( + f"{aoe_class.fit_selection}&{aoe_class.cal_energy_param}>1582&{aoe_class.cal_energy_param}<1602&{aoe_class.cal_energy_param}=={aoe_class.cal_energy_param}&{aoe_param}=={aoe_param}" + ) + final_df = dep_events.query(aoe_class.dt_res_dict["final_selection"]) + plt.subplot(2, 2, 1) - aoe_pars = aoe_class.dt_res_dict["aoe_fit1"]["pars"] - + aoe_pars = aoe_class.dt_res_dict["aoe_fit1"]["parameters"] + xs = np.linspace(aoe_pars["lower_range"], aoe_pars["upper_range"], 100) counts, aoe_bins, bars = plt.hist( - final_df.query(f'{aoe_class.dt_res_dict["aoe_grp1"]}&{aoe_param}<{aoe_pars["upper_range"]}&{aoe_param}>{aoe_pars["lower_range"]}')[aoe_param], + final_df.query( + f'{aoe_class.dt_res_dict["aoe_grp1"]}&{aoe_param}<{aoe_pars["upper_range"]}&{aoe_param}>{aoe_pars["lower_range"]}' + )[aoe_param], bins=400, histtype="step", label="data", @@ -2008,12 +2257,14 @@ def drifttime_corr_plot(aoe_class, data, aoe_param = "AoE_Timecorr", aoe_param_c plt.legend(loc="upper left") plt.xlabel("A/E") plt.ylabel("counts") - - aoe_pars2 = aoe_class.dt_res_dict["aoe_fit2"]["pars"] + + aoe_pars2 = aoe_class.dt_res_dict["aoe_fit2"]["parameters"] plt.subplot(2, 2, 2) xs = np.linspace(aoe_pars2["lower_range"], aoe_pars2["upper_range"], 100) counts, aoe_bins2, bars = plt.hist( - final_df.query(f'{aoe_class.dt_res_dict["aoe_grp2"]}&{aoe_param}<{aoe_pars2["upper_range"]}&{aoe_param}>{aoe_pars2["lower_range"]}')[aoe_param], + final_df.query( + f'{aoe_class.dt_res_dict["aoe_grp2"]}&{aoe_param}<{aoe_pars2["upper_range"]}&{aoe_param}>{aoe_pars2["lower_range"]}' + )[aoe_param], bins=400, histtype="step", label="Data", @@ -2026,24 +2277,33 @@ def drifttime_corr_plot(aoe_class, data, aoe_param = "AoE_Timecorr", aoe_param_c plt.legend(loc="upper left") plt.xlabel("A/E") plt.ylabel("counts") - + hist, bins, var = pgh.get_hist( - final_df[aoe_class.dt_param], dx=10, range=(np.nanmin(final_df[aoe_class.dt_param]), - np.nanmax(final_df[aoe_class.dt_param])) + final_df[aoe_class.dt_param], + dx=10, + range=( + np.nanmin(final_df[aoe_class.dt_param]), + np.nanmax(final_df[aoe_class.dt_param]), + ), ) - + plt.subplot(2, 2, 3) plt.step(pgh.get_bin_centers(bins), hist, label="data") plt.plot( pgh.get_bin_centers(bins), - drift_time_distribution.pdf(pgh.get_bin_centers(bins), - **aoe_class.dt_res_dict['dt_guess']) * np.diff(bins)[0], + drift_time_distribution.pdf( + pgh.get_bin_centers(bins), **aoe_class.dt_res_dict["dt_guess"] + ) + * np.diff(bins)[0], label="Guess", ) plt.plot( pgh.get_bin_centers(bins), - drift_time_distribution.pdf(pgh.get_bin_centers(bins), - *aoe_class.dt_res_dict["dt_fit"]["pars"]) * np.diff(bins)[0], + drift_time_distribution.pdf( + pgh.get_bin_centers(bins), + *aoe_class.dt_res_dict["dt_fit"]["parameters"], + ) + * np.diff(bins)[0], label="fit", ) plt.xlabel("drift time (ns)") @@ -2057,41 +2317,49 @@ def drifttime_corr_plot(aoe_class, data, aoe_param = "AoE_Timecorr", aoe_param_c 200, ) plt.hist(final_df[aoe_param], bins=bins, histtype="step", label="uncorrected") - plt.hist(final_df[aoe_param_corr], bins=bins, histtype="step", label="corrected") + plt.hist( + final_df[aoe_param_corr], bins=bins, histtype="step", label="corrected" + ) plt.xlabel("A/E") plt.ylabel("counts") plt.legend(loc="upper left") plt.tight_layout() - plt.xlim( - bins[0], bins[-1] - ) - except:pass + plt.xlim(bins[0], bins[-1]) + except: + pass plt.close() return fig -def plot_compt_bands_overlayed(aoe_class, - data, - eranges: list[tuple], - aoe_param = "AoE_Timecorr", - aoe_range: list[float] = None, - title= "Compton Bands", - density=True, - n_bins=50, - figsize=[12, 8], fontsize=12 - ) -> None: + +def plot_compt_bands_overlayed( + aoe_class, + data, + eranges: list[tuple], + aoe_param="AoE_Timecorr", + aoe_range: list[float] = None, + title="Compton Bands", + density=True, + n_bins=50, + figsize=[12, 8], + fontsize=12, +) -> None: """ Function to plot various compton bands to check energy dependence and corrections """ plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize - + fig = plt.figure() - + for erange in eranges: try: - select_df = data.query(f'{aoe_class.selection_string}&{aoe_class.cal_energy_param}>{erange[0]}&{aoe_class.cal_energy_param}<{erange[1]}&{aoe_param}=={aoe_param}') + select_df = data.query( + f"{aoe_class.selection_string}&{aoe_class.cal_energy_param}>{erange[0]}&{aoe_class.cal_energy_param}<{erange[1]}&{aoe_param}=={aoe_param}" + ) if aoe_range is not None: - select_df = select_df.query(f'{aoe_param}>{aoe_range[0]}&{aoe_param}<{aoe_range[1]}') + select_df = select_df.query( + f"{aoe_param}>{aoe_range[0]}&{aoe_param}<{aoe_range[1]}" + ) bins = np.linspace(aoe_range[0], aoe_range[1], n_bins) else: bins = np.linspace(0.85, 1.05, n_bins) @@ -2102,7 +2370,8 @@ def plot_compt_bands_overlayed(aoe_class, label=f"{erange[0]}-{erange[1]}", density=density, ) - except:pass + except: + pass plt.ylabel("counts") plt.xlabel(aoe_param) plt.title(title) @@ -2110,54 +2379,60 @@ def plot_compt_bands_overlayed(aoe_class, plt.close() return fig -def plot_dt_dep(aoe_class, - data, - eranges: list[tuple], - titles:list=None, - aoe_param = "AoE_Timecorr", - bins=[200, 100], - dt_max = 2000, - figsize=[12, 8], fontsize=12 - ) -> None: + +def plot_dt_dep( + aoe_class, + data, + eranges: list[tuple], + titles: list = None, + aoe_param="AoE_Timecorr", + bins=[200, 100], + dt_max=2000, + figsize=[12, 8], + fontsize=12, +) -> None: """ Function to produce 2d histograms of A/E against drift time to check dependencies """ plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize - + fig = plt.figure() - for i,erange in enumerate(eranges): + for i, erange in enumerate(eranges): try: - plt.subplot(3, 2, i+1) - select_df = data.query(f'{aoe_class.selection_string}&{aoe_class.cal_energy_param}<{erange[1]}&{aoe_class.cal_energy_param}>{erange[0]}&{aoe_param}=={aoe_param}') - - hist, bs, var = pgh.get_hist( - select_df[aoe_param], bins=500 + plt.subplot(3, 2, i + 1) + select_df = data.query( + f"{aoe_class.selection_string}&{aoe_class.cal_energy_param}<{erange[1]}&{aoe_class.cal_energy_param}>{erange[0]}&{aoe_param}=={aoe_param}" ) + + hist, bs, var = pgh.get_hist(select_df[aoe_param], bins=500) bin_cs = (bs[1:] + bs[:-1]) / 2 mu = bin_cs[np.argmax(hist)] aoe_range = [mu * 0.9, mu * 1.1] - - final_df = select_df.query(f'{aoe_param}<{aoe_range[1]}&{aoe_param}>{aoe_range[0]}&{aoe_class.dt_param}<{dt_max}') - plt.hist2d(final_df[aoe_param], final_df[aoe_class.dt_param], - bins=bins, norm=LogNorm()) + final_df = select_df.query( + f"{aoe_param}<{aoe_range[1]}&{aoe_param}>{aoe_range[0]}&{aoe_class.dt_param}<{dt_max}" + ) + plt.hist2d( + final_df[aoe_param], + final_df[aoe_class.dt_param], + bins=bins, + norm=LogNorm(), + ) plt.ylabel("drift time (ns)") plt.xlabel("A/E") if titles is None: - plt.title(f'{erange[0]}-{erange[1]}') + plt.title(f"{erange[0]}-{erange[1]}") else: plt.title(titles[i]) - except:pass + except: + pass plt.tight_layout() plt.close() return fig -def plot_mean_fit(aoe_class, - data, - figsize=[12, 8], fontsize=12 - ) -> plt.figure: +def plot_mean_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) @@ -2166,18 +2441,23 @@ def plot_mean_fit(aoe_class, aoe_class.energy_corr_fits.index, aoe_class.energy_corr_fits["mean"], yerr=aoe_class.energy_corr_fits["mean_err"], - xerr=aoe_class.comptBands_width/2, + xerr=aoe_class.comptBands_width / 2, label="data", linestyle=" ", ) - ax1.plot(aoe_class.energy_corr_fits.index, - aoe_class.mean_func.func(aoe_class.energy_corr_fits.index, - **aoe_class.energy_corr_res_dict["mean_fits"]["pars"]), label="linear model") + ax1.plot( + aoe_class.energy_corr_fits.index, + aoe_class.mean_func.func( + aoe_class.energy_corr_fits.index, + **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"], + ), + label="linear model", + ) ax1.errorbar( 1592, - aoe_class.energy_corr_res_dict["dep_fit"]["pars"]['mu'], - yerr=aoe_class.energy_corr_res_dict["dep_fit"]["errs"]['mu'], + aoe_class.energy_corr_res_dict["dep_fit"]["parameters"]["mu"], + yerr=aoe_class.energy_corr_res_dict["dep_fit"]["uncertainties"]["mu"], label="DEP", color="green", linestyle=" ", @@ -2188,42 +2468,60 @@ def plot_mean_fit(aoe_class, ax1.set_ylabel("raw A/E (a.u.)", ha="right", y=1) ax2.scatter( aoe_class.energy_corr_fits.index, - 100 * (aoe_class.energy_corr_fits["mean"] - aoe_class.mean_func.func(aoe_class.energy_corr_fits.index, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"])) / aoe_class.mean_func.func(aoe_class.energy_corr_fits.index, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"]), + 100 + * ( + aoe_class.energy_corr_fits["mean"] + - aoe_class.mean_func.func( + aoe_class.energy_corr_fits.index, + **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"], + ) + ) + / aoe_class.mean_func.func( + aoe_class.energy_corr_fits.index, + **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"], + ), lw=1, c="b", ) ax2.scatter( 1592, - 100 * (aoe_class.energy_corr_res_dict["dep_fit"]["pars"]['mu'] - aoe_class.mean_func.func(1592, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"])) / aoe_class.mean_func.func(1592, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"]), + 100 + * ( + aoe_class.energy_corr_res_dict["dep_fit"]["parameters"]["mu"] + - aoe_class.mean_func.func( + 1592, **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"] + ) + ) + / aoe_class.mean_func.func( + 1592, **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"] + ), lw=1, c="g", ) - except:pass + except: + pass ax2.set_ylabel("residuals %", ha="right", y=1) ax2.set_xlabel("energy (keV)", ha="right", x=1) plt.tight_layout() plt.close() return fig -def plot_sigma_fit(aoe_class, - data, - figsize=[12, 8], fontsize=12 - ) -> plt.figure: +def plot_sigma_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: plt.rcParams["figure.figsize"] = figsize - plt.rcParams["font.size"] = fontsize - + plt.rcParams["font.size"] = fontsize + fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True) try: ax1.errorbar( aoe_class.energy_corr_fits.index, aoe_class.energy_corr_fits["sigma"], yerr=aoe_class.energy_corr_fits["sigma_err"], - xerr=aoe_class.comptBands_width/2, + xerr=aoe_class.comptBands_width / 2, label="data", linestyle=" ", ) - sig_pars = aoe_class.energy_corr_res_dict["sigma_fits"]["pars"] + sig_pars = aoe_class.energy_corr_res_dict["sigma_fits"]["parameters"] if aoe_class.sigma_func == sigma_fit: label = f'sqrt model: \nsqrt({sig_pars["a"]:1.4f}+({sig_pars["b"]:1.1f}/E)^{sig_pars["c"]:1.1f})' elif aoe_class.sigma_func == sigma_fit_quadratic: @@ -2232,13 +2530,13 @@ def plot_sigma_fit(aoe_class, raise ValueError("unknown sigma function") ax1.plot( aoe_class.energy_corr_fits.index, - aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index,**sig_pars), + aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index, **sig_pars), label=label, ) ax1.errorbar( 1592, - aoe_class.energy_corr_res_dict["dep_fit"]["pars"]['sigma'], - yerr=aoe_class.energy_corr_res_dict["dep_fit"]["errs"]['sigma'], + aoe_class.energy_corr_res_dict["dep_fit"]["parameters"]["sigma"], + yerr=aoe_class.energy_corr_res_dict["dep_fit"]["uncertainies"]["sigma"], label="DEP", color="green", linestyle=" ", @@ -2247,29 +2545,40 @@ def plot_sigma_fit(aoe_class, ax1.legend(title="A/E stdev energy dependence", frameon=False) ax2.scatter( aoe_class.energy_corr_fits.index, - 100 * (aoe_class.energy_corr_fits["sigma"] - aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index, **sig_pars)) / aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index, **sig_pars), + 100 + * ( + aoe_class.energy_corr_fits["sigma"] + - aoe_class.sigma_func.func( + aoe_class.energy_corr_fits.index, **sig_pars + ) + ) + / aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index, **sig_pars), lw=1, c="b", ) ax2.scatter( 1592, - 100 * (aoe_class.energy_corr_res_dict["dep_fit"]["pars"]['sigma'] - aoe_class.sigma_func.func(1592, **sig_pars)) / aoe_class.sigma_func.func(1592, **sig_pars), + 100 + * ( + aoe_class.energy_corr_res_dict["dep_fit"]["parameters"]["sigma"] + - aoe_class.sigma_func.func(1592, **sig_pars) + ) + / aoe_class.sigma_func.func(1592, **sig_pars), lw=1, c="g", ) - except:pass + except: + pass ax2.set_ylabel("residuals", ha="right", y=1) ax2.set_xlabel("energy (keV)", ha="right", x=1) plt.tight_layout() plt.close() return fig - -def plot_cut_fit(aoe_class, - data, - figsize=[12, 8], fontsize=12 - ) -> plt.figure: + + +def plot_cut_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: plt.rcParams["figure.figsize"] = figsize - plt.rcParams["font.size"] = fontsize + plt.rcParams["font.size"] = fontsize fig = plt.figure() try: plt.errorbar( @@ -2279,9 +2588,19 @@ def plot_cut_fit(aoe_class, linestyle=" ", ) - plt.plot(aoe_class.cut_fits.index, sigmoid_fit.func(aoe_class.cut_fits.index.to_numpy(), - **aoe_class.cut_fit["pars"])) - plt.hlines((100 * aoe_class.dep_acc), -8.1, aoe_class.low_cut_val, color="red", linestyle="--") + plt.plot( + aoe_class.cut_fits.index, + sigmoid_fit.func( + aoe_class.cut_fits.index.to_numpy(), **aoe_class.cut_fit["parameters"] + ), + ) + plt.hlines( + (100 * aoe_class.dep_acc), + -8.1, + aoe_class.low_cut_val, + color="red", + linestyle="--", + ) plt.vlines( aoe_class.low_cut_val, np.nanmin(aoe_class.cut_fits["sf"]) * 0.9, @@ -2291,26 +2610,31 @@ def plot_cut_fit(aoe_class, ) plt.xlim([-8.1, 0.1]) vals, labels = plt.yticks() - plt.yticks(vals, [f'{x:,.0f} %' for x in vals]) + plt.yticks(vals, [f"{x:,.0f} %" for x in vals]) plt.ylim([np.nanmin(aoe_class.cut_fits["sf"]) * 0.9, 102]) - except:pass + except: + pass plt.xlabel("cut value") plt.ylabel("survival percentage") plt.close() return fig -def plot_survival_fraction_curves(aoe_class, - data, - figsize=[12, 8], fontsize=12 - ) -> plt.figure: - + +def plot_survival_fraction_curves( + aoe_class, data, figsize=[12, 8], fontsize=12 +) -> plt.figure: plt.rcParams["figure.figsize"] = figsize - plt.rcParams["font.size"] = fontsize - + plt.rcParams["font.size"] = fontsize + fig = plt.figure() try: - plt.vlines(aoe_class.low_cut_val, 0, 100, label=f"cut value: {aoe_class.low_cut_val:1.2f}", color="black") - + plt.vlines( + aoe_class.low_cut_val, + 0, + 100, + label=f"cut value: {aoe_class.low_cut_val:1.2f}", + color="black", + ) for peak, survival_df in aoe_class.low_side_peak_dfs.items(): try: @@ -2318,12 +2642,14 @@ def plot_survival_fraction_curves(aoe_class, survival_df.index, survival_df["sf"], yerr=survival_df["sf_err"], - label=f'{get_peak_label(peak)} {peak} keV: {aoe_class.low_side_sf.loc[peak]["sf"]:2.1f} +/- {aoe_class.low_side_sf.loc[peak]["sf_err"]:2.1f} %' + label=f'{get_peak_label(peak)} {peak} keV: {aoe_class.low_side_sf.loc[peak]["sf"]:2.1f} +/- {aoe_class.low_side_sf.loc[peak]["sf_err"]:2.1f} %', ) - except:pass - except:pass + except: + pass + except: + pass vals, labels = plt.yticks() - plt.yticks(vals, [f'{x:,.0f} %' for x in vals]) + plt.yticks(vals, [f"{x:,.0f} %" for x in vals]) plt.legend(loc="upper right") plt.xlabel("cut value") plt.ylabel("survival percentage") @@ -2331,18 +2657,20 @@ def plot_survival_fraction_curves(aoe_class, plt.close() return fig -def plot_spectra(aoe_class, - data, - xrange=(900, 3000), - n_bins=2101, - xrange_inset = (1580, 1640), - n_bins_inset = 200, - figsize=[12, 8], fontsize=12 - ) -> plt.figure: - + +def plot_spectra( + aoe_class, + data, + xrange=(900, 3000), + n_bins=2101, + xrange_inset=(1580, 1640), + n_bins_inset=200, + figsize=[12, 8], + fontsize=12, +) -> plt.figure: plt.rcParams["figure.figsize"] = figsize - plt.rcParams["font.size"] = fontsize - + plt.rcParams["font.size"] = fontsize + fig, ax = plt.subplots() try: bins = np.linspace(xrange[0], xrange[1], n_bins) @@ -2353,19 +2681,25 @@ def plot_spectra(aoe_class, label="before PSD", ) ax.hist( - data.query(f"{aoe_class.selection_string}&AoE_Low_Cut")[aoe_class.cal_energy_param], + data.query(f"{aoe_class.selection_string}&AoE_Low_Cut")[ + aoe_class.cal_energy_param + ], bins=bins, histtype="step", label="low side PSD cut", ) ax.hist( - data.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[aoe_class.cal_energy_param], + data.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[ + aoe_class.cal_energy_param + ], bins=bins, histtype="step", label="double sided PSD cut", ) ax.hist( - data.query(f"{aoe_class.selection_string} & (~AoE_Double_Sided_Cut)")[aoe_class.cal_energy_param], + data.query(f"{aoe_class.selection_string} & (~AoE_Double_Sided_Cut)")[ + aoe_class.cal_energy_param + ], bins=bins, histtype="step", label="rejected by PSD cut", @@ -2373,28 +2707,37 @@ def plot_spectra(aoe_class, axins = ax.inset_axes([0.25, 0.07, 0.4, 0.3]) bins = np.linspace(xrange_inset[0], xrange_inset[1], n_bins_inset) - select_df = data.query(f"{aoe_class.cal_energy_param}<{xrange_inset[1]}&{aoe_class.cal_energy_param}>{xrange_inset[0]}") + select_df = data.query( + f"{aoe_class.cal_energy_param}<{xrange_inset[1]}&{aoe_class.cal_energy_param}>{xrange_inset[0]}" + ) axins.hist( select_df.query(aoe_class.selection_string)[aoe_class.cal_energy_param], bins=bins, histtype="step", ) axins.hist( - select_df.query(f"{aoe_class.selection_string}&AoE_Low_Cut")[aoe_class.cal_energy_param], + select_df.query(f"{aoe_class.selection_string}&AoE_Low_Cut")[ + aoe_class.cal_energy_param + ], bins=bins, histtype="step", ) axins.hist( - select_df.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[aoe_class.cal_energy_param], + select_df.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[ + aoe_class.cal_energy_param + ], bins=bins, histtype="step", ) axins.hist( - select_df.query(f"{aoe_class.selection_string} & (~AoE_Double_Sided_Cut)")[aoe_class.cal_energy_param], + select_df.query(f"{aoe_class.selection_string} & (~AoE_Double_Sided_Cut)")[ + aoe_class.cal_energy_param + ], bins=bins, histtype="step", ) - except:pass + except: + pass ax.set_xlim(xrange) ax.set_yscale("log") plt.xlabel("energy (keV)") @@ -2403,58 +2746,67 @@ def plot_spectra(aoe_class, plt.close() return fig -def plot_sf_vs_energy(aoe_class, - data, - xrange = (900, 3000), - n_bins=701, - figsize=[12, 8], fontsize=12 - ) -> plt.figure: - + +def plot_sf_vs_energy( + aoe_class, data, xrange=(900, 3000), n_bins=701, figsize=[12, 8], fontsize=12 +) -> plt.figure: plt.rcParams["figure.figsize"] = figsize - plt.rcParams["font.size"] = fontsize - + plt.rcParams["font.size"] = fontsize + fig = plt.figure() try: bins = np.linspace(xrange[0], xrange[1], n_bins) counts_pass, bins_pass, _ = pgh.get_hist( - data.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[aoe_class.cal_energy_param], + data.query(f"{aoe_class.selection_string}&AoE_Double_Sided_Cut")[ + aoe_class.cal_energy_param + ], + bins=bins, + ) + counts, bins, _ = pgh.get_hist( + data.query(aoe_class.selection_string)[aoe_class.cal_energy_param], bins=bins, ) - counts, bins, _ = pgh.get_hist(data.query(aoe_class.selection_string)[aoe_class.cal_energy_param], bins=bins) survival_fracs = counts_pass / (counts + 10**-99) - plt.step(pgh.get_bin_centers(bins_pass), 100*survival_fracs) - except:pass + plt.step(pgh.get_bin_centers(bins_pass), 100 * survival_fracs) + except: + pass plt.ylim([0, 100]) vals, labels = plt.yticks() - plt.yticks(vals, [f'{x:,.0f} %' for x in vals]) + plt.yticks(vals, [f"{x:,.0f} %" for x in vals]) plt.xlabel("energy (keV)") plt.ylabel("survival percentage") plt.close() return fig -def plot_classifier(aoe_class, - data, - aoe_param="AoE_Classifier", - xrange = (900, 3000), - yrange=(-50,10), - xn_bins=700, - yn_bins=500, - figsize=[12, 8], fontsize=12 - ) -> plt.figure: - + +def plot_classifier( + aoe_class, + data, + aoe_param="AoE_Classifier", + xrange=(900, 3000), + yrange=(-50, 10), + xn_bins=700, + yn_bins=500, + figsize=[12, 8], + fontsize=12, +) -> plt.figure: plt.rcParams["figure.figsize"] = figsize - plt.rcParams["font.size"] = fontsize - + plt.rcParams["font.size"] = fontsize + fig = plt.figure() try: - plt.hist2d(data.query(aoe_class.selection_string)[aoe_class.cal_energy_param] , - data.query(aoe_class.selection_string)[aoe_param], - bins=[np.linspace(xrange[0], xrange[1], xn_bins), - np.linspace(yrange[0], yrange[1], yn_bins)], - norm=LogNorm() - ) - except:pass + plt.hist2d( + data.query(aoe_class.selection_string)[aoe_class.cal_energy_param], + data.query(aoe_class.selection_string)[aoe_param], + bins=[ + np.linspace(xrange[0], xrange[1], xn_bins), + np.linspace(yrange[0], yrange[1], yn_bins), + ], + norm=LogNorm(), + ) + except: + pass plt.xlabel("energy (keV)") plt.ylabel(aoe_param) plt.xlim(xrange) @@ -2462,27 +2814,29 @@ def plot_classifier(aoe_class, plt.close() return fig -def aoe_calibration(files, - lh5_path:str, - cal_dicts: dict, - current_param:str, - energy_param:str, - cal_energy_param: str, - eres_func: Callable, - pdf:Callable=standard_aoe, - cut_field:str = "is_valid_cal", - dt_corr: bool = False, - dep_correct: bool = False, - dt_cut: dict = None, - high_cut_val: int = 3, - mean_func:Callable=pol1, - sigma_func:Callable=sigma_fit, - dep_acc:float = 0.9, - dt_param:str = "dt_eff", - comptBands_width:int=20, - plot_options:dict={}, - threshold:int=800 - ): + +def aoe_calibration( + files, + lh5_path: str, + cal_dicts: dict, + current_param: str, + energy_param: str, + cal_energy_param: str, + eres_func: Callable, + pdf: Callable = standard_aoe, + cut_field: str = "is_valid_cal", + dt_corr: bool = False, + dep_correct: bool = False, + dt_cut: dict = None, + high_cut_val: int = 3, + mean_func: Callable = pol1, + sigma_func: Callable = sigma_fit, + dep_acc: float = 0.9, + dt_param: str = "dt_eff", + comptBands_width: int = 20, + plot_options: dict = {}, + threshold: int = 800, +): params = [ current_param, "tp_0_est", @@ -2491,46 +2845,43 @@ def aoe_calibration(files, energy_param, cal_energy_param, cut_field, - ] + ] - aoe = cal_aoe( - cal_dicts, - cal_energy_param, - eres_func, - pdf, - f"{cut_field}&is_not_pulser", - dt_corr, - dep_acc, - dep_correct, - dt_cut, - dt_param, - high_cut_val, - mean_func, - sigma_func, - comptBands_width, - plot_options - ) + aoe = cal_aoe( + cal_dicts, + cal_energy_param, + eres_func, + pdf, + f"{cut_field}&is_not_pulser", + dt_corr, + dep_acc, + dep_correct, + dt_cut, + dt_param, + high_cut_val, + mean_func, + sigma_func, + comptBands_width, + plot_options, + ) if dt_cut is not None: params.append(dt_cut["out_param"]) data = load_data( - files, - lh5_path, - aoe.cal_dicts, - params, - cal_energy_param, - threshold - ) + files, lh5_path, aoe.cal_dicts, params, cal_energy_param, threshold + ) data["AoE_Uncorr"] = np.divide(data[current_param], data[energy_param]) - - aoe.update_cal_dicts({"AoE_Uncorr": - {"expression":f"{current_param}/{energy_param}", - "parameters":{} - }} - ) + aoe.update_cal_dicts( + { + "AoE_Uncorr": { + "expression": f"{current_param}/{energy_param}", + "parameters": {}, + } + } + ) aoe.calibrate(data, "AoE_Uncorr") log.info(f"Calibrated A/E") - return cal_dicts, aoe.get_results_dict(), aoe.fill_plot_dict(data), aoe \ No newline at end of file + return cal_dicts, aoe.get_results_dict(), aoe.fill_plot_dict(data), aoe diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index 578d44867..acc8ed77c 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -89,32 +89,34 @@ def gen_pars_dict(pars, deg, energy_param): return out_dict + class fwhm_linear: - def func(x,a,b): + def func(x, a, b): return np.sqrt(a + b * x) - + def string_func(input_param): return f"(a+b*{input_param})**(0.5)" - + def guess(xs, ys, y_errs): - return [np.nanmin(ys), 10**-3] - + return [np.nanmin(ys), 10**-3] + def bounds(): - return [(0,None),(0,None)] - + return [(0, None), (0, None)] + + class fwhm_quadratic: - def func(x, a, b, c): - return np.sqrt(a + b * x + c*x**2) - + return np.sqrt(a + b * x + c * x**2) + def string_func(input_param): return f"(a+b*{input_param}+c*{input_param}**2)**(0.5)" - + def guess(xs, ys, y_errs): - return [np.nanmin(ys), 10**-3, 10**-5] - + return [np.nanmin(ys), 10**-3, 10**-5] + def bounds(): - return [(0,None),(0,None),(0,None)] + return [(0, None), (0, None), (0, None)] + class calibrate_parameter: glines = [ @@ -132,8 +134,8 @@ class calibrate_parameter: (20, 20), (30, 30), (30, 30), - (40, 25), - (25, 40), + (40, 20), + (20, 40), (40, 40), (60, 60), ] # side bands width @@ -161,7 +163,7 @@ class calibrate_parameter: def __init__( self, energy_param, - selection_string = "is_usable", + selection_string="is_usable", plot_options: dict = None, guess_keV: float | None = None, threshold: int = 0, @@ -169,7 +171,7 @@ def __init__( n_events: int = None, simplex: bool = True, deg: int = 1, - cal_energy_param:str = None + cal_energy_param: str = None, ): self.energy_param = energy_param if cal_energy_param is None: @@ -228,7 +230,7 @@ def fit_energy_res(self): try: if 2614.50 not in fwhm_peaks: raise RuntimeError - + c_lin = cost.LeastSquares( fwhm_peaks, fit_fwhms, fit_dfwhms, fwhm_linear.func ) @@ -240,57 +242,61 @@ def fit_energy_res(self): m_lin.hesse() rng = np.random.default_rng(1) - pars_b = rng.multivariate_normal(m_lin.values, - m_lin.covariance, size=1000) + pars_b = rng.multivariate_normal(m_lin.values, m_lin.covariance, size=1000) fits = np.array([fwhm_linear.func(fwhm_peaks, *par_b) for par_b in pars_b]) qbb_vals = np.array([fwhm_linear.func(2039.0, *par_b) for par_b in pars_b]) qbb_err = np.nanstd(qbb_vals) predicted_fwhms = fwhm_linear.func(fwhm_peaks, *m_lin.values) fit_qbb = fwhm_linear.func(2039.0, *m_lin.values) - - p_val = scipy.stats.chi2.sf(m_lin.fval, len(fwhm_peaks)-len(m_lin.values)) - - self.fwhm_fit_linear = {"function":fwhm_linear.__name__, - "module":fwhm_linear.__module__, - "expression":fwhm_linear.string_func("x"), - "Qbb_fwhm(keV)": fit_qbb, - "Qbb_fwhm_err(keV)":qbb_err, - "pars":m_lin.values, - "errors":m_lin.errors, - "cov":m_lin.covariance, - "csqr": (m_lin.fval, len(fwhm_peaks)-len(m_lin.values)), - "p_val":p_val} - - + + p_val = scipy.stats.chi2.sf(m_lin.fval, len(fwhm_peaks) - len(m_lin.values)) + + self.fwhm_fit_linear = { + "function": fwhm_linear.__name__, + "module": fwhm_linear.__module__, + "expression": fwhm_linear.string_func("x"), + "Qbb_fwhm(keV)": fit_qbb, + "Qbb_fwhm_err(keV)": qbb_err, + "parameters": m_lin.values, + "uncertainties": m_lin.errors, + "cov": m_lin.covariance, + "csqr": (m_lin.fval, len(fwhm_peaks) - len(m_lin.values)), + "p_val": p_val, + } + log.info(f'FWHM linear fit: {self.fwhm_fit_linear["pars"].to_dict()}') log.info(f"FWHM fit values:") log.info(f"\t Energy | FWHM (keV) | Predicted (keV)") for i, (peak, fwhm, fwhme) in enumerate( - zip(fwhm_peaks, fit_fwhms, fit_dfwhms) - ): - log.info( - f"\t{i}".ljust(4) - + str(peak).ljust(9) - + f"| {fwhm:.2f}+-{fwhme:.2f} ".ljust(5) - +f"| {fwhm_linear.func(peak, *self.fwhm_fit_linear['pars']):.2f}".ljust(5) - ) - + zip(fwhm_peaks, fit_fwhms, fit_dfwhms) + ): + log.info( + f"\t{i}".ljust(4) + + str(peak).ljust(9) + + f"| {fwhm:.2f}+-{fwhme:.2f} ".ljust(5) + + f"| {fwhm_linear.func(peak, *self.fwhm_fit_linear['parameters']):.2f}".ljust( + 5 + ) + ) + log.info( f"FWHM energy resolution at Qbb (linear fit): {fit_qbb:1.2f} +- {qbb_err:1.2f} keV" - ) + ) except RuntimeError: log.error(f"FWHM linear fit failed for {self.energy_param}") pars, errs, cov = return_nans(fwhm_linear.func) - self.fwhm_fit_linear = {"function":fwhm_linear.__name__, - "module":fwhm_linear.__module__, - "expression":fwhm_linear.string_func("x"), - "Qbb_fwhm(keV)": np.nan, - "Qbb_fwhm_err(keV)":np.nan, - "pars":pars, - "errors":errs, - "cov":cov, - "csqr":(np.nan, np.nan), - "p_val":0} + self.fwhm_fit_linear = { + "function": fwhm_linear.__name__, + "module": fwhm_linear.__module__, + "expression": fwhm_linear.string_func("x"), + "Qbb_fwhm(keV)": np.nan, + "Qbb_fwhm_err(keV)": np.nan, + "parameters": pars, + "uncertainties": errs, + "cov": cov, + "csqr": (np.nan, np.nan), + "p_val": 0, + } log.error("FWHM linear fit failed to converge") try: if 2614.50 not in fwhm_peaks: @@ -299,51 +305,65 @@ def fit_energy_res(self): fwhm_peaks, fit_fwhms, fit_dfwhms, fwhm_quadratic.func ) c_quad.loss = "soft_l1" - m_quad = Minuit(c_quad, *fwhm_quadratic.guess(fwhm_peaks, fit_fwhms, fit_dfwhms)) + m_quad = Minuit( + c_quad, *fwhm_quadratic.guess(fwhm_peaks, fit_fwhms, fit_dfwhms) + ) m_quad.limits = fwhm_quadratic.bounds() m_quad.simplex() m_quad.migrad() m_quad.hesse() - + rng = np.random.default_rng(1) - pars_b = rng.multivariate_normal(m_quad.values, - m_quad.covariance, size=1000) - fits = np.array([fwhm_quadratic.func(fwhm_peaks, *par_b) for par_b in pars_b]) - qbb_vals = np.array([fwhm_quadratic.func(2039.0, *par_b) for par_b in pars_b]) + pars_b = rng.multivariate_normal( + m_quad.values, m_quad.covariance, size=1000 + ) + fits = np.array( + [fwhm_quadratic.func(fwhm_peaks, *par_b) for par_b in pars_b] + ) + qbb_vals = np.array( + [fwhm_quadratic.func(2039.0, *par_b) for par_b in pars_b] + ) qbb_err = np.nanstd(qbb_vals) predicted_fwhms = fwhm_quadratic.func(fwhm_peaks, *m_quad.values) fit_qbb = fwhm_quadratic.func(2039.0, *m_quad.values) - - p_val = scipy.stats.chi2.sf(m_quad.fval, len(fwhm_peaks)-len(m_quad.values)) - - self.fwhm_fit_quadratic = {"function":fwhm_quadratic.__name__, - "module":fwhm_quadratic.__module__, - "expression":fwhm_quadratic.string_func("x"), - "Qbb_fwhm(keV)": fit_qbb, - "Qbb_fwhm_err(keV)":qbb_err, - "pars":m_quad.values, - "errors":m_quad.errors, - "cov":m_quad.covariance, - "csqr": (m_quad.fval, len(fwhm_peaks)-len(m_quad.values)), - "p_val":p_val - } - log.info(f'FWHM quadratic fit: {self.fwhm_fit_quadratic["pars"].to_dict()}') + + p_val = scipy.stats.chi2.sf( + m_quad.fval, len(fwhm_peaks) - len(m_quad.values) + ) + + self.fwhm_fit_quadratic = { + "function": fwhm_quadratic.__name__, + "module": fwhm_quadratic.__module__, + "expression": fwhm_quadratic.string_func("x"), + "Qbb_fwhm(keV)": fit_qbb, + "Qbb_fwhm_err(keV)": qbb_err, + "parameters": m_quad.values, + "uncertainties": m_quad.errors, + "cov": m_quad.covariance, + "csqr": (m_quad.fval, len(fwhm_peaks) - len(m_quad.values)), + "p_val": p_val, + } + log.info( + f'FWHM quadratic fit: {self.fwhm_fit_quadratic["parameters"].to_dict()}' + ) log.info( f"FWHM energy resolution at Qbb (quadratic fit): {fit_qbb:1.2f} +- {qbb_err:1.2f} keV" - ) + ) except RuntimeError: log.error(f"FWHM quadratic fit failed for {self.energy_param}") pars, errs, cov = return_nans(fwhm_quadratic.func) - self.fwhm_fit_quadratic = {"function":fwhm_quadratic.__name__, - "module":fwhm_quadratic.__module__, - "expression":fwhm_quadratic.string_func("x"), - "Qbb_fwhm(keV)": np.nan, - "Qbb_fwhm_err(keV)":np.nan, - "pars":pars, - "errors":errs, - "cov":cov, - "csqr":(np.nan, np.nan), - "p_val":0} + self.fwhm_fit_quadratic = { + "function": fwhm_quadratic.__name__, + "module": fwhm_quadratic.__module__, + "expression": fwhm_quadratic.string_func("x"), + "Qbb_fwhm(keV)": np.nan, + "Qbb_fwhm_err(keV)": np.nan, + "parameters": pars, + "uncertainties": errs, + "cov": cov, + "csqr": (np.nan, np.nan), + "p_val": 0, + } log.error("FWHM quadratic fit failed to converge") def gen_pars_dict(self): @@ -373,63 +393,69 @@ def get_results_dict(self, data): return {} else: fwhm_linear = self.fwhm_fit_linear.copy() - fwhm_linear["pars"] = fwhm_linear['pars'].to_dict() - fwhm_linear["errors"] = fwhm_linear['errors'].to_dict() + fwhm_linear["parameters"] = fwhm_linear["parameters"].to_dict() + fwhm_linear["uncertainties"] = fwhm_linear["uncertainties"].to_dict() fwhm_linear["cov"] = fwhm_linear["cov"].tolist() fwhm_quad = self.fwhm_fit_quadratic.copy() - fwhm_quad["pars"] = fwhm_quad['pars'].to_dict() - fwhm_quad["errors"] = fwhm_quad['errors'].to_dict() + fwhm_quad["parameters"] = fwhm_quad["parameters"].to_dict() + fwhm_quad["uncertainties"] = fwhm_quad["uncertainties"].to_dict() fwhm_quad["cov"] = fwhm_quad["cov"].tolist() - pk_dict = {Ei:{"function":func_i.__name__, - "module":func_i.__module__, - "pars(uncal)":parsi.to_dict(), - "errs(uncal)":errorsi.to_dict(), - "p_val": pvali, - "fwhm (keV)": list(fwhmi)} - for i, (Ei, parsi, errorsi, pvali, fwhmi, func_i) in enumerate( - zip(self.results["fitted_keV"], - self.results["pk_pars"][self.results["pk_validities"]], - self.results["pk_errors"][self.results["pk_validities"]], - self.results["pk_pvals"][self.results["pk_validities"]], - self.results["pk_fwhms"], - self.funcs) - )} + pk_dict = { + Ei: { + "function": func_i.__name__, + "module": func_i.__module__, + "parameters_in_ADC": parsi.to_dict(), + "uncertainties_in_ADC": errorsi.to_dict(), + "p_val": pvali, + "fwhm_in_keV": list(fwhmi), + } + for i, (Ei, parsi, errorsi, pvali, fwhmi, func_i) in enumerate( + zip( + self.results["fitted_keV"], + self.results["pk_pars"][self.results["pk_validities"]], + self.results["pk_errors"][self.results["pk_validities"]], + self.results["pk_pvals"][self.results["pk_validities"]], + self.results["pk_fwhms"], + self.funcs, + ) + ) + } return { - "total_fep": len( - data.query( - f"{self.cal_energy_param}>2604&{self.cal_energy_param}<2624" - ) - ), - "total_dep": len( - data.query( - f"{self.cal_energy_param}>1587&{self.cal_energy_param}<1597" - ) - ), - "pass_fep": len( - data.query( - f"{self.cal_energy_param}>2604&{self.cal_energy_param}<2624&{self.selection_string}" - ) - ), - "pass_dep": len( - data.query( - f"{self.cal_energy_param}>1587&{self.cal_energy_param}<1597&{self.selection_string}" - ) - ), - "eres_linear": fwhm_linear, - "eres_quadratic":fwhm_quad, - "fitted_peaks": self.results["fitted_keV"].tolist(), - "pk_fits":pk_dict - } + "total_fep": len( + data.query( + f"{self.cal_energy_param}>2604&{self.cal_energy_param}<2624" + ) + ), + "total_dep": len( + data.query( + f"{self.cal_energy_param}>1587&{self.cal_energy_param}<1597" + ) + ), + "pass_fep": len( + data.query( + f"{self.cal_energy_param}>2604&{self.cal_energy_param}<2624&{self.selection_string}" + ) + ), + "pass_dep": len( + data.query( + f"{self.cal_energy_param}>1587&{self.cal_energy_param}<1597&{self.selection_string}" + ) + ), + "eres_linear": fwhm_linear, + "eres_quadratic": fwhm_quad, + "fitted_peaks": self.results["fitted_keV"].tolist(), + "pk_fits": pk_dict, + } def calibrate_parameter(self, data): kev_ranges = self.range_keV.copy() if self.guess_keV is None: self.guess_keV = 2620 / np.nanpercentile( - data.query(f"{self.selection_string} & {self.energy_param}>{self.threshold}")[ - self.energy_param - ], + data.query( + f"{self.selection_string} & {self.energy_param}>{self.threshold}" + )[self.energy_param], 99, ) @@ -457,8 +483,8 @@ def calibrate_parameter(self, data): if self.pars is None: raise ValueError - for i, peak in enumerate(self.results["got_peaks_keV"]): - idx = np.where(peak ==self.glines)[0][0] + for i, peak in enumerate(self.results["got_peaks_keV"]): + idx = np.where(peak == self.glines)[0][0] self.funcs[idx] = fitted_funcs[i] if fitted_funcs[i] == pgf.extended_radford_pdf: self.gof_funcs[idx] = pgf.radford_pdf @@ -469,7 +495,10 @@ def calibrate_parameter(self, data): fitted_peaks = np.array([]) fitted_funcs = np.array([]) - if len(fitted_peaks) != len(self.glines) or self.gof_funcs[-1]==pgf.gauss_step_pdf: + if ( + len(fitted_peaks) != len(self.glines) + or self.gof_funcs[-1] == pgf.gauss_step_pdf + ): if self.glines[-1] in fitted_peaks: if fitted_funcs[-1] == pgf.extended_gauss_step_pdf: self.funcs = [pgf.extended_gauss_step_pdf for entry in self.glines] @@ -489,7 +518,10 @@ def calibrate_parameter(self, data): > 0.05 ): index = np.where(self.glines == peak)[0][0] - kev_ranges[i] = (kev_ranges[index][0] - 5, kev_ranges[index][1] - 5) + kev_ranges[i] = ( + kev_ranges[index][0] - 5, + kev_ranges[index][1] - 5, + ) except: pass @@ -513,8 +545,8 @@ def calibrate_parameter(self, data): log.debug("Calibrated found") log.info(f"Calibration pars are {self.pars}") - for i, peak in enumerate(self.results["got_peaks_keV"]): - idx = np.where(peak ==self.glines)[0][0] + for i, peak in enumerate(self.results["got_peaks_keV"]): + idx = np.where(peak == self.glines)[0][0] self.funcs[idx] = fitted_funcs[i] if fitted_funcs[i] == pgf.extended_radford_pdf: self.gof_funcs[idx] = pgf.radford_pdf @@ -522,23 +554,19 @@ def calibrate_parameter(self, data): self.gof_funcs[idx] = pgf.gauss_step_pdf if self.pars is None: raise ValueError - + except: self.pars = np.full(self.deg + 1, np.nan) self.results = None - - log.error( - f"Calibration failed completely for {self.energy_param}" - ) + + log.error(f"Calibration failed completely for {self.energy_param}") else: log.debug("Calibrated found") log.info(f"Calibration pars are {self.pars}") if ~np.isnan(self.pars).all(): self.fit_energy_res() self.hit_dict[self.cal_energy_param] = self.gen_pars_dict() - data[f"{self.energy_param}_cal"] = pgf.poly( - data[self.energy_param], self.pars - ) + data[f"{self.energy_param}_cal"] = pgf.poly(data[self.energy_param], self.pars) def fill_plot_dict(self, data, plot_dict={}): for key, item in self.plot_options.items(): @@ -549,7 +577,6 @@ def fill_plot_dict(self, data, plot_dict={}): return plot_dict - class high_stats_fitting(calibrate_parameter): glines = [ 238.632, @@ -568,11 +595,11 @@ class high_stats_fitting(calibrate_parameter): 2614.50, 3125, 3198, - 3474 - ] # gamma lines used for calibration + 3474, + ] # gamma lines used for calibration range_keV = [ (10, 10), - (30,30), + (30, 30), (30, 30), (30, 30), (30, 15), @@ -588,9 +615,28 @@ class high_stats_fitting(calibrate_parameter): (30, 30), (30, 30), (30, 30), - ] # side bands width + ] # side bands width + binning = [ + 0.02, + 0.02, + 0.02, + 0.02, + 0.2, + 0.2, + 0.02, + 0.2, + 0.2, + 0.2, + 0.1, + 0.1, + 0.1, + 0.02, + 0.2, + 0.2, + 0.2, + ] funcs = [ - pgf.extended_gauss_step_pdf, #probably should be gauss on exp + pgf.extended_gauss_step_pdf, # probably should be gauss on exp pgf.extended_gauss_step_pdf, pgf.extended_radford_pdf, pgf.extended_radford_pdf, @@ -629,8 +675,16 @@ class high_stats_fitting(calibrate_parameter): pgf.gauss_step_pdf, ] - def __init__(self, energy_param, selection_string, threshold, p_val, - plot_options={}, simplex=False): + def __init__( + self, + energy_param, + selection_string, + threshold, + p_val, + plot_options={}, + simplex=False, + tail_weight=20, + ): self.energy_param = energy_param self.cal_energy_param = energy_param self.selection_string = selection_string @@ -640,69 +694,87 @@ def __init__(self, energy_param, selection_string, threshold, p_val, self.simplex = simplex self.results = {} self.plot_dict = {} - self.n_events=None + self.n_events = None self.output_dict = {} - self.pars=[1,0] - + self.pars = [1, 0] + self.tail_weight = tail_weight + def get_results_dict(self, data): if self.results: fwhm_linear = self.fwhm_fit_linear.copy() - fwhm_linear["pars"] = fwhm_linear['pars'].to_dict() - fwhm_linear["errors"] = fwhm_linear['errors'].to_dict() + fwhm_linear["parameters"] = fwhm_linear["parameters"].to_dict() + fwhm_linear["uncertainties"] = fwhm_linear["uncertainties"].to_dict() fwhm_linear["cov"] = fwhm_linear["cov"].tolist() fwhm_quad = self.fwhm_fit_quadratic.copy() - fwhm_quad["pars"] = fwhm_quad['pars'].to_dict() - fwhm_quad["errors"] = fwhm_quad['errors'].to_dict() + fwhm_quad["parameters"] = fwhm_quad["parameters"].to_dict() + fwhm_quad["uncertainties"] = fwhm_quad["uncertainties"].to_dict() fwhm_quad["cov"] = fwhm_quad["cov"].tolist() - - pk_dict = {Ei:{"function":func_i.__name__, - "module":func_i.__module__, - "pars(cal)":parsi.to_dict(), - "errs(cal)":errorsi.to_dict(), + + pk_dict = { + Ei: { + "function": func_i.__name__, + "module": func_i.__module__, + "parameters_in_keV": parsi.to_dict(), + "uncertainties_in_keV": errorsi.to_dict(), "p_val": pvali, - "fwhm (keV)": list(fwhmi)} - for i, (Ei, parsi, errorsi, pvali, fwhmi, func_i) in enumerate( - zip(self.results["fitted_keV"], - self.results["pk_pars"][self.results["pk_validities"]], - self.results["pk_errors"][self.results["pk_validities"]], - self.results["pk_pvals"][self.results["pk_validities"]], - self.results["pk_fwhms"], - self.funcs) - )} - + "fwhm_in_keV": list(fwhmi), + } + for i, (Ei, parsi, errorsi, pvali, fwhmi, func_i) in enumerate( + zip( + self.results["fitted_keV"], + self.results["pk_pars"][self.results["pk_validities"]], + self.results["pk_errors"][self.results["pk_validities"]], + self.results["pk_pvals"][self.results["pk_validities"]], + self.results["pk_fwhms"], + self.funcs, + ) + ) + } + return { - "eres_linear": fwhm_linear, - "eres_quadratic":fwhm_quad, - "fitted_peaks": self.results["fitted_keV"].tolist(), - "pk_fits":pk_dict + "eres_linear": fwhm_linear, + "eres_quadratic": fwhm_quad, + "fitted_peaks": self.results["fitted_keV"].tolist(), + "pk_fits": pk_dict, } else: return {} - def fit_peaks(self, data): log.debug(f"Fitting {self.energy_param}") try: - n_bins = [int((self.range_keV[i][1]+self.range_keV[i][0]) /0.2) for i in range(len(self.glines))] - pk_pars, pk_errors, pk_covs, pk_binws, pk_ranges, pk_pvals, valid_pks, pk_funcs = cal.hpge_fit_E_peaks( - data.query(self.selection_string)[self.energy_param], - self.glines, - self.range_keV, - n_bins=n_bins, - funcs=self.funcs, - method="unbinned", - gof_funcs=self.gof_funcs, - n_events=None, - allowed_p_val=self.p_val - ) - for idx, peak in enumerate(self.glines): - #idx = np.where(peak ==self.glines)[0][0] + n_bins = [ + int((self.range_keV[i][1] + self.range_keV[i][0]) / self.binning[i]) + for i in range(len(self.glines)) + ] + ( + pk_pars, + pk_errors, + pk_covs, + pk_binws, + pk_ranges, + pk_pvals, + valid_pks, + pk_funcs, + ) = cal.hpge_fit_E_peaks( + data.query(self.selection_string)[self.energy_param], + self.glines, + self.range_keV, + n_bins=n_bins, + funcs=self.funcs, + method="unbinned", + gof_funcs=self.gof_funcs, + n_events=None, + allowed_p_val=self.p_val, + tail_weight=20, + ) + for idx, peak in enumerate(self.glines): self.funcs[idx] = pk_funcs[idx] if pk_funcs[idx] == pgf.extended_radford_pdf: self.gof_funcs[idx] = pgf.radford_pdf else: - self.gof_funcs[idx] = pgf.gauss_step_pdf - + self.gof_funcs[idx] = pgf.gauss_step_pdf + self.results["got_peaks_keV"] = self.glines self.results["pk_pars"] = pk_pars self.results["pk_errors"] = pk_errors @@ -710,13 +782,15 @@ def fit_peaks(self, data): self.results["pk_binws"] = pk_binws self.results["pk_ranges"] = pk_ranges self.results["pk_pvals"] = pk_pvals - for i, pk in enumerate(self.results["got_peaks_keV"]): try: - if self.results["pk_pars"][i]["n_sig"]<10: + if self.results["pk_pars"][i]["n_sig"] < 10: valid_pks[i] = False - elif 2*self.results["pk_errors"][i]["n_sig"]>self.results["pk_pars"][i]["n_sig"]: + elif ( + 2 * self.results["pk_errors"][i]["n_sig"] + > self.results["pk_pars"][i]["n_sig"] + ): valid_pks[i] = False except: pass @@ -724,16 +798,16 @@ def fit_peaks(self, data): self.results["pk_validities"] = valid_pks # Drop failed fits - fitted_peaks_keV = self.results["fitted_keV"] = np.asarray(self.glines)[valid_pks] + fitted_peaks_keV = self.results["fitted_keV"] = np.asarray(self.glines)[ + valid_pks + ] pk_pars = np.asarray(pk_pars, dtype=object)[valid_pks] # ragged - pk_errors = np.asarray(pk_errors, dtype=object)[valid_pks] + pk_errors = np.asarray(pk_errors, dtype=object)[valid_pks] pk_covs = np.asarray(pk_covs, dtype=object)[valid_pks] pk_binws = np.asarray(pk_binws)[valid_pks] pk_ranges = np.asarray(pk_ranges)[valid_pks] pk_pvals = np.asarray(pk_pvals)[valid_pks] pk_funcs = np.asarray(pk_funcs)[valid_pks] - - log.info(f"{len(np.where(valid_pks)[0])} peaks fitted:") for i, (Ei, parsi, errorsi, covsi, func_i) in enumerate( @@ -778,7 +852,6 @@ def fit_peaks(self, data): except: self.results = {} log.debug(f"high stats fitting failed") - def get_peak_labels( @@ -814,7 +887,9 @@ def get_peak_label(peak: float) -> str: return "" -def plot_fits(ecal_class, data, figsize=[12, 8], fontsize=12, ncols=3, nrows=3, binning_keV=5): +def plot_fits( + ecal_class, data, figsize=[12, 8], fontsize=12, ncols=3, nrows=3, binning_keV=5 +): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize @@ -828,7 +903,6 @@ def plot_fits(ecal_class, data, figsize=[12, 8], fontsize=12, ncols=3, nrows=3, if peak in fitted_peaks: fitted_gof_funcs.append(ecal_class.gof_funcs[i]) - mus = [ pgf.get_mu_func(func_i, pars_i) if pars_i is not None else np.nan for func_i, pars_i in zip(fitted_gof_funcs, pk_pars) @@ -841,7 +915,7 @@ def plot_fits(ecal_class, data, figsize=[12, 8], fontsize=12, ncols=3, nrows=3, range_adu = 5 / der[i] plt.subplot(nrows, ncols, i + 1) try: - binning = np.arange(pk_ranges[i][0], pk_ranges[i][1], 0.1/ der[i]) + binning = np.arange(pk_ranges[i][0], pk_ranges[i][1], 0.1 / der[i]) bin_cs = (binning[1:] + binning[:-1]) / 2 energies = data.query( f"{ecal_class.energy_param}>{pk_ranges[i][0]}&{ecal_class.energy_param}<{pk_ranges[i][1]}&{ecal_class.selection_string}" @@ -850,7 +924,9 @@ def plot_fits(ecal_class, data, figsize=[12, 8], fontsize=12, ncols=3, nrows=3, counts, bs, bars = plt.hist(energies, bins=binning, histtype="step") if pk_pars[i] is not None: - fit_vals = fitted_gof_funcs[i](bin_cs, *pk_pars[i][:-1], 0) * np.diff(bs)[0] + fit_vals = ( + fitted_gof_funcs[i](bin_cs, *pk_pars[i][:-1], 0) * np.diff(bs)[0] + ) plt.plot(bin_cs, fit_vals) plt.step( bin_cs, @@ -861,7 +937,9 @@ def plot_fits(ecal_class, data, figsize=[12, 8], fontsize=12, ncols=3, nrows=3, ) plt.annotate( - get_peak_label(fitted_peaks[i]), (0.02, 0.9), xycoords="axes fraction" + get_peak_label(fitted_peaks[i]), + (0.02, 0.9), + xycoords="axes fraction", ) plt.annotate( f"{fitted_peaks[i]:.1f} keV", (0.02, 0.8), xycoords="axes fraction" @@ -885,7 +963,13 @@ def plot_fits(ecal_class, data, figsize=[12, 8], fontsize=12, ncols=3, nrows=3, def plot_2614_timemap( - ecal_class, data, figsize=[12, 8], fontsize=12, erange=[2580, 2630], dx=1, time_dx=180 + ecal_class, + data, + figsize=[12, 8], + fontsize=12, + erange=[2580, 2630], + dx=1, + time_dx=180, ): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize @@ -945,9 +1029,7 @@ def plot_pulser_timemap( else: mean = np.nanpercentile(selection[ecal_class.cal_energy_param], 50) - spread = mean - np.nanpercentile( - selection[ecal_class.cal_energy_param], 10 - ) + spread = mean - np.nanpercentile(selection[ecal_class.cal_energy_param], 10) plt.hist2d( selection["timestamp"], @@ -1150,31 +1232,59 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz qbb_line_vx = [2039.0, 2039.0] qbb_line_vy = [ - 0.9 * np.nanmin(fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["pars"])), - np.nanmax([ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"],ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"]]) + 0.9 + * np.nanmin( + fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]) + ), + np.nanmax( + [ + ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"], + ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"], + ] + ), ] qbb_line_hx = [erange[0], 2039.0] ax1.plot( - fwhm_slope_bins, fwhm_linear.func(fwhm_slope_bins, - *ecal_class.fwhm_fit_linear["pars"]), lw=1, c="g", - label=f'linear, Qbb fwhm: {ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"]:1.2f} +- {ecal_class.fwhm_fit_linear["Qbb_fwhm_err(keV)"]:1.2f} keV' + fwhm_slope_bins, + fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]), + lw=1, + c="g", + label=f'linear, Qbb fwhm: {ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"]:1.2f} +- {ecal_class.fwhm_fit_linear["Qbb_fwhm_err(keV)"]:1.2f} keV', + ) + ax1.plot( + fwhm_slope_bins, + fwhm_quadratic.func( + fwhm_slope_bins, *ecal_class.fwhm_fit_quadratic["parameters"] + ), + lw=1, + c="b", + label=f'quadratic, Qbb fwhm: {ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"]:1.2f} +- {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_err(keV)"]:1.2f} keV', ) ax1.plot( - fwhm_slope_bins, fwhm_quadratic.func(fwhm_slope_bins, - *ecal_class.fwhm_fit_quadratic["pars"]), lw=1, c="b", - label=f'quadratic, Qbb fwhm: {ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"]:1.2f} +- {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_err(keV)"]:1.2f} keV' + qbb_line_hx, + [ + ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"], + ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"], + ], + lw=1, + c="r", + ls="--", + ) + ax1.plot( + qbb_line_hx, + [ + ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"], + ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"], + ], + lw=1, + c="r", + ls="--", ) - ax1.plot(qbb_line_hx, [ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"], - ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"]], - lw=1, c="r", ls="--") - ax1.plot(qbb_line_hx, [ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"], - ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"]], - lw=1, c="r", ls="--") ax1.plot(qbb_line_vx, qbb_line_vy, lw=1, c="r", ls="--") ax1.legend(loc="upper left", frameon=False) - if np.isnan(ecal_class.fwhm_fit_linear["pars"]).all(): + if np.isnan(ecal_class.fwhm_fit_linear["parameters"]).all(): [ 0.9 * np.nanmin(fit_fwhms), 1.1 * np.nanmax(fit_fwhms), @@ -1182,27 +1292,47 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz else: ax1.set_ylim( [ - 0.9 * np.nanmin(fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["pars"])), - 1.1 * np.nanmax(fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["pars"])), + 0.9 + * np.nanmin( + fwhm_linear.func( + fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"] + ) + ), + 1.1 + * np.nanmax( + fwhm_linear.func( + fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"] + ) + ), ] ) ax1.set_xlim(erange) ax1.set_ylabel("FWHM energy resolution (keV)") ax2.plot( fwhm_peaks, - (fit_fwhms - fwhm_linear.func(fwhm_peaks, *ecal_class.fwhm_fit_linear["pars"])) / fit_dfwhms, + ( + fit_fwhms + - fwhm_linear.func(fwhm_peaks, *ecal_class.fwhm_fit_linear["parameters"]) + ) + / fit_dfwhms, lw=0, marker="x", c="g", ) ax2.plot( fwhm_peaks, - (fit_fwhms - fwhm_quadratic.func(fwhm_peaks, *ecal_class.fwhm_fit_quadratic["pars"])) / fit_dfwhms, + ( + fit_fwhms + - fwhm_quadratic.func( + fwhm_peaks, *ecal_class.fwhm_fit_quadratic["parameters"] + ) + ) + / fit_dfwhms, lw=0, marker="x", c="b", ) - ax2.plot(erange,[0,0], color="black",lw=0.5) + ax2.plot(erange, [0, 0], color="black", lw=0.5) ax2.set_xlabel("Energy (keV)") ax2.set_ylabel("Normalised Residuals") plt.tight_layout() @@ -1218,9 +1348,7 @@ def bin_spectrum(ecal_class, data, erange=[0, 3000], dx=2): data.query(ecal_class.selection_string)[ecal_class.cal_energy_param], bins )[0], "cut_counts": np.histogram( - data.query("~is_valid_cal&is_not_pulser")[ - ecal_class.cal_energy_param - ], + data.query("~is_valid_cal&is_not_pulser")[ecal_class.cal_energy_param], bins, )[0], "pulser_counts": np.histogram( @@ -1236,9 +1364,7 @@ def bin_survival_fraction(ecal_class, data, erange=[0, 3000], dx=6): bins=np.arange(erange[0], erange[1] + dx, dx), ) counts_fail, bins_fail, _ = pgh.get_hist( - data.query("~is_valid_cal&is_not_pulser")[ - ecal_class.cal_energy_param - ], + data.query("~is_valid_cal&is_not_pulser")[ecal_class.cal_energy_param], bins=np.arange(erange[0], erange[1] + dx, dx), ) sf = 100 * (counts_pass + 10 ** (-6)) / (counts_pass + counts_fail + 10 ** (-6)) @@ -1247,7 +1373,7 @@ def bin_survival_fraction(ecal_class, data, erange=[0, 3000], dx=6): def energy_cal_th( files: list[str], - energy_params: list[str] , + energy_params: list[str], lh5_path: str = "dsp", hit_dict: dict = {}, cut_parameters: dict[str, int] = {"bl_mean": 4, "bl_std": 4, "pz_std": 4}, @@ -1260,12 +1386,11 @@ def energy_cal_th( guess_keV: float | None = None, deg: int = 1, ) -> tuple(dict, dict, dict, dict): - data = load_data( files, lh5_path, hit_dict, - params = energy_params + list(cut_parameters) + ["timestamp"] + params=energy_params + list(cut_parameters) + ["timestamp"], ) data, hit_dict = apply_cuts(data, hit_dict, cut_parameters, final_cut_field) @@ -1274,7 +1399,7 @@ def energy_cal_th( plot_dict = {} full_object_dict = {} for energy_param in energy_params: - ecal = calibrate_parameter( + full_object_dict[energy_param] = calibrate_parameter( energy_param, f"{final_cut_field}&is_not_pulser", plot_options, @@ -1285,13 +1410,15 @@ def energy_cal_th( simplex, deg, ) - ecal.calibrate_parameter(data) - results_dict[ecal.cal_energy_param] = ecal.get_results_dict(data) - hit_dict.update(ecal.hit_dict) - full_object_dict[ecal.cal_energy_param] = ecal - if ~np.isnan(ecal.pars).all(): - plot_dict[ecal.cal_energy_param] = ecal.fill_plot_dict(data) - + full_object_dict[energy_param].calibrate_parameter(data) + results_dict[ + full_object_dict[energy_param].cal_energy_param + ] = full_object_dict[energy_param].get_results_dict(data) + hit_dict.update(full_object_dict[energy_param].hit_dict) + if ~np.isnan(full_object_dict[energy_param].pars).all(): + plot_dict[full_object_dict[energy_param].cal_energy_param] = ( + full_object_dict[energy_param].fill_plot_dict(data).copy() + ) log.info(f"Finished all calibrations") return hit_dict, results_dict, plot_dict, full_object_dict @@ -1308,32 +1435,36 @@ def partition_energy_cal_th( n_events: int = None, final_cut_field: str = "is_valid_cal", simplex: bool = True, + tail_weight: int = 20, ) -> tuple(dict, dict, dict, dict): - data = load_data( files, lh5_path, hit_dict, - params = energy_params + [final_cut_field] + ["timestamp"] + params=energy_params + [final_cut_field] + ["timestamp"], ) results_dict = {} plot_dict = {} full_object_dict = {} for energy_param in energy_params: - ecal = high_stats_fitting( + full_object_dict[energy_param] = high_stats_fitting( energy_param, f"{final_cut_field}&is_not_pulser", threshold, p_val, plot_options, simplex, + tail_weight, + ) + full_object_dict[energy_param].fit_peaks(data) + results_dict[energy_param] = full_object_dict[energy_param].get_results_dict( + data ) - ecal.fit_peaks(data) - results_dict[energy_param] = ecal.get_results_dict(data) - full_object_dict[energy_param] = ecal if ecal.results: - plot_dict[energy_param] = ecal.fill_plot_dict(data) + plot_dict[energy_param] = ( + full_object_dict[energy_param].fill_plot_dict(data).copy() + ) log.info(f"Finished all calibrations") - return results_dict, plot_dict, full_object_dict \ No newline at end of file + return results_dict, plot_dict, full_object_dict diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py index 8a54f3bfd..293d8f30d 100644 --- a/src/pygama/pargen/energy_cal.py +++ b/src/pygama/pargen/energy_cal.py @@ -264,7 +264,9 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess): or func == pgf.extended_gauss_step_pdf ): # get mu and height from a gauss fit, also sigma as fallback - pars, cov = pgf.gauss_mode_width_max(hist, bins, var, mode_guess=mode_guess, n_bins=10) + pars, cov = pgf.gauss_mode_width_max( + hist, bins, var, mode_guess=mode_guess, n_bins=10 + ) bin_centres = pgh.get_bin_centers(bins) if pars is None: log.info("get_hpge_E_peak_par_guess: gauss_mode_width_max failed") @@ -273,7 +275,7 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess): height = hist[i_0] sigma_guess = None else: - mu = pars[0] + mu = mode_guess sigma_guess = pars[1] height = pars[2] @@ -319,9 +321,9 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess): hstep = step / (bg + np.mean(hist[:10])) parguess = [n_sig, mu, sigma / 2, n_bkg, hstep, bins[0], bins[-1], 0] - for i, guess in enumerate(parguess): + for i, guess in enumerate(parguess): if np.isnan(guess): - parguess[i]=0 + parguess[i] = 0 return parguess @@ -331,7 +333,9 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess): or func == pgf.extended_radford_pdf ): # guess mu, height - pars, cov = pgf.gauss_mode_width_max(hist, bins, var, mode_guess=mode_guess, n_bins=10) + pars, cov = pgf.gauss_mode_width_max( + hist, bins, var, mode_guess=mode_guess, n_bins=10 + ) bin_centres = pgh.get_bin_centers(bins) if pars is None: log.info("get_hpge_E_peak_par_guess: gauss_mode_width_max failed") @@ -393,9 +397,9 @@ def get_hpge_E_peak_par_guess(hist, bins, var, func, mode_guess): parguess = [n_sig, mu, sigma, htail, tau, n_bkg, hstep, bins[0], bins[-1], 0] - for i, guess in enumerate(parguess): + for i, guess in enumerate(parguess): if np.isnan(guess): - parguess[i]=0 + parguess[i] = 0 return parguess @@ -472,48 +476,65 @@ def get_hpge_E_bounds(func, parguess): log.error(f"get_hpge_E_bounds not implemented for {func.__name__}") return [] + class tail_prior: """ Generic least-squares cost function with error. """ - verbose=0 + + verbose = 0 errordef = Minuit.LIKELIHOOD # for Minuit to compute errors correctly - def __init__(self, data, model): + def __init__(self, data, model, tail_weight=100): self.model = model # model predicts y for given x - self.data=data - #self.x = np.asarray(x) - + self.data = data + self.tail_weight = tail_weight + def _call(self, *pars): - return self.__call__( *pars[0]) + return self.__call__(*pars[0]) + + def __call__( + self, + n_sig, + mu, + sigma, + htail, + tau, + n_bkg, + hstep, + lower_range, + upper_range, + components, + ): + return self.tail_weight * np.log(htail + 0.1) # len(self.data)/ - def __call__(self, n_sig, mu, sigma, htail, - tau, n_bkg, hstep, - lower_range ,upper_range, components): - return 100 * np.log(htail+0.1) #len(self.data)/ -def staged_fit(energies, hist, bins, var, func_i, gof_func_i, simplex, mode_guess): +def staged_fit( + energies, hist, bins, var, func_i, gof_func_i, simplex, mode_guess, tail_weight=100 +): par_guesses = get_hpge_E_peak_par_guess(hist, bins, var, func_i, mode_guess) bounds = get_hpge_E_bounds(func_i, par_guesses) - fixed, mask = get_hpge_E_fixed(func_i) - + fixed, mask = get_hpge_E_fixed(func_i) + if func_i == pgf.extended_radford_pdf or func_i == pgf.radford_pdf: - cost_func = cost.ExtendedUnbinnedNLL(energies, func_i) +tail_prior(energies, func_i) + cost_func = cost.ExtendedUnbinnedNLL(energies, func_i) + tail_prior( + energies, func_i, tail_weight=tail_weight + ) m = Minuit(cost_func, *par_guesses) m.limits = bounds for fix in fixed: m.fixed[fix] = True - + m.values["htail"] = 0 m.values["tau"] = 0 - m.fixed["htail"] = True - m.fixed["tau"] = True + m.fixed["htail"] = True + m.fixed["tau"] = True if simplex == True: m.simplex().migrad() else: m.migrad() try: - #set htail to guess + # set htail to guess m.values["htail"] = par_guesses[3] m.values["tau"] = par_guesses[4] m.fixed = False @@ -534,16 +555,22 @@ def staged_fit(energies, hist, bins, var, func_i, gof_func_i, simplex, mode_gues except: func_i = pgf.extended_gauss_step_pdf gof_func_i = pgf.gauss_step_pdf - pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit(energies, hist, bins, var, - func_i, gof_func_i, simplex, mode_guess) - - #check htail - if m.values["htail"]<0.01 or m.values["htail"]<2*m.errors["htail"] or np.isnan(m.values).any():# or + pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit( + energies, hist, bins, var, func_i, gof_func_i, simplex, mode_guess + ) + + # check htail + if ( + m.values["htail"] < 0.01 + or m.values["htail"] < 2 * m.errors["htail"] + or np.isnan(m.values).any() + ): # or func_i = pgf.extended_gauss_step_pdf gof_func_i = pgf.gauss_step_pdf - pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit(energies, hist, bins, var, - func_i, gof_func_i, simplex, mode_guess) - + pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit( + energies, hist, bins, var, func_i, gof_func_i, simplex, mode_guess + ) + else: cost_func = cost.ExtendedUnbinnedNLL(energies, func_i) m = Minuit(cost_func, *par_guesses) @@ -553,8 +580,8 @@ def staged_fit(energies, hist, bins, var, func_i, gof_func_i, simplex, mode_gues if simplex == True: m.simplex().migrad() else: - m.migrad() - + m.migrad() + m.hesse() pars_i = m.values @@ -565,6 +592,7 @@ def staged_fit(energies, hist, bins, var, func_i, gof_func_i, simplex, mode_gues return pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit + def hpge_fit_E_peaks( E_uncal, mode_guesses, @@ -577,6 +605,7 @@ def hpge_fit_E_peaks( allowed_p_val=0.05, uncal_is_int=False, simplex=False, + tail_weight=100, ): """Fit the Energy peaks specified using the function given @@ -615,14 +644,14 @@ def hpge_fit_E_peaks( ranges: list of array a list of [Euc_min, Euc_max] used for each peak fit """ - pars = np.zeros(len(mode_guesses), dtype='object') - errors = np.zeros(len(mode_guesses), dtype='object') - covs = np.zeros(len(mode_guesses), dtype='object') + pars = np.zeros(len(mode_guesses), dtype="object") + errors = np.zeros(len(mode_guesses), dtype="object") + covs = np.zeros(len(mode_guesses), dtype="object") binws = np.zeros(len(mode_guesses)) - ranges = np.zeros(len(mode_guesses), dtype='object') + ranges = np.zeros(len(mode_guesses), dtype="object") p_vals = np.zeros(len(mode_guesses)) - valid_pks = np.zeros(len(mode_guesses),dtype=bool) - out_funcs= np.zeros(len(mode_guesses), dtype='object') + valid_pks = np.zeros(len(mode_guesses), dtype=bool) + out_funcs = np.zeros(len(mode_guesses), dtype="object") for i_peak, mode_guess in enumerate(mode_guesses): # get args for this peak @@ -642,7 +671,7 @@ def hpge_fit_E_peaks( # bin a histogram Euc_min = mode_guesses[i_peak] - wleft_i Euc_max = mode_guesses[i_peak] + wright_i - if uncal_is_int ==True: + if uncal_is_int == True: Euc_min, Euc_max, n_bins_i = pgh.better_int_binning( x_lo=Euc_min, x_hi=Euc_max, n_bins=n_bins_i ) @@ -653,10 +682,28 @@ def hpge_fit_E_peaks( energies, bins=n_bins_i, range=(Euc_min, Euc_max) ) if func_i == pgf.extended_radford_pdf or pgf.extended_gauss_step_pdf: - pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit(energies, hist, bins, var, - func_i, gof_func_i, simplex, mode_guess) + ( + pars_i, + errs_i, + cov_i, + func_i, + gof_func_i, + mask, + valid_fit, + ) = staged_fit( + energies, + hist, + bins, + var, + func_i, + gof_func_i, + simplex, + mode_guess, + tail_weight=tail_weight, + ) + if pars_i["n_sig"] < 20: + valid_fit = False else: - par_guesses = get_hpge_E_peak_par_guess(hist, bins, var, func_i) bounds = get_hpge_E_bounds(func_i, par_guesses) fixed, mask = get_hpge_E_fixed(func_i) @@ -678,7 +725,13 @@ def hpge_fit_E_peaks( valid_fit = m.valid csqr = pgf.goodness_of_fit( - hist, bins, None, gof_func_i, pars_i, method="Pearson", scale_bins=True + hist, + bins, + None, + gof_func_i, + pars_i, + method="Pearson", + scale_bins=True, ) else: @@ -700,25 +753,35 @@ def hpge_fit_E_peaks( simplex=simplex, bounds=bounds, ) - valid_fit=True + valid_fit = True csqr = pgf.goodness_of_fit( - hist, bins, None, gof_func_i, pars_i, method="Pearson", scale_bins=False + hist, + bins, + None, + gof_func_i, + pars_i, + method="Pearson", + scale_bins=False, ) - + if np.isnan(pars_i).any(): log.debug( f"hpge_fit_E_peaks: fit failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}, par is nan : {pars_i}" ) - raise RuntimeError + raise RuntimeError - p_val = scipy.stats.chi2.sf(csqr[0], csqr[1]+ len(np.where(mask)[0])) + p_val = scipy.stats.chi2.sf(csqr[0], csqr[1] + len(np.where(mask)[0])) total_events = pgf.get_total_events_func(func_i, pars_i, errors=errs_i) if ( - sum(sum(c) if c is not None else 0 for c in cov_i[mask,:][:,mask]) == np.inf - or sum(sum(c) if c is not None else 0 for c in cov_i[mask,:][:,mask]) == 0 - or np.isnan(sum(sum(c) if c is not None else 0 for c in cov_i[mask,:][:,mask])) + sum(sum(c) if c is not None else 0 for c in cov_i[mask, :][:, mask]) + == np.inf + or sum(sum(c) if c is not None else 0 for c in cov_i[mask, :][:, mask]) + == 0 + or np.isnan( + sum(sum(c) if c is not None else 0 for c in cov_i[mask, :][:, mask]) + ) ): log.debug( f"hpge_fit_E_peaks: cov estimation failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}" @@ -732,8 +795,9 @@ def hpge_fit_E_peaks( ) valid_pks[i_peak] = False - elif ((np.abs(np.array(errs_i)[mask] / np.array(pars_i)[mask]) < 1e-7).any() - or np.isnan(np.array(errs_i)[mask]).any()): + elif ( + np.abs(np.array(errs_i)[mask] / np.array(pars_i)[mask]) < 1e-7 + ).any() or np.isnan(np.array(errs_i)[mask]).any(): log.debug( f"hpge_fit_E_peaks: failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}, parameter error too low" ) @@ -758,33 +822,24 @@ def hpge_fit_E_peaks( except: log.debug( - f"hpge_fit_E_peaks: fit failed for i_peak={i_peak}, unknown error" - ) + f"hpge_fit_E_peaks: fit failed for i_peak={i_peak}, unknown error" + ) valid_pks[i_peak] = False - pars_i, errs_i, cov_i = return_nans(func_i)#None, None, None, None + pars_i, errs_i, cov_i = return_nans(func_i) # None, None, None, None p_val = 0 # get binning binw_1 = (bins[-1] - bins[0]) / (len(bins) - 1) - pars[i_peak] = pars_i + pars[i_peak] = pars_i errors[i_peak] = errs_i covs[i_peak] = cov_i - binws[i_peak] =binw_1 - ranges[i_peak] =[Euc_min, Euc_max] - p_vals[i_peak] =p_val - out_funcs[i_peak] =func_i - - return ( - pars, - errors, - covs, - binws, - ranges, - p_vals, - valid_pks, - out_funcs - ) + binws[i_peak] = binw_1 + ranges[i_peak] = [Euc_min, Euc_max] + p_vals[i_peak] = p_val + out_funcs[i_peak] = func_i + + return (pars, errors, covs, binws, ranges, p_vals, valid_pks, out_funcs) def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0): @@ -1011,7 +1066,7 @@ def hpge_E_calibration( range_keV = [range_keV[i] for i in idx] funcs = [funcs[i] for i in idx] gof_funcs = [gof_funcs[i] for i in idx] - + # Drop peaks to not be fitted tmp = zip( *[ @@ -1071,7 +1126,7 @@ def hpge_E_calibration( pk_ranges, pk_pvals, valid_pks, - pk_funcs + pk_funcs, ) = hpge_fit_E_peaks( E_uncal, got_peaks_locs, diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index 3bcdb5e0a..b82b39afc 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -1038,12 +1038,21 @@ def event_selection( e_upper_lim = peak_loc + (1.5 * kev_width[1]) / rough_adc_to_kev e_ranges = (int(peak_loc - e_lower_lim), int(e_upper_lim - peak_loc)) - params, errors, covs, bins, ranges, p_val, valid_pks, pk_funcs = pgc.hpge_fit_E_peaks( + ( + params, + errors, + covs, + bins, + ranges, + p_val, + valid_pks, + pk_funcs, + ) = pgc.hpge_fit_E_peaks( energy, [peak_loc], [e_ranges], n_bins=(np.nanmax(energy) - np.nanmin(energy)) // 1, - uncal_is_int=True + uncal_is_int=True, ) if params[0] is None or np.isnan(params[0]).any(): log.debug("Fit failed, using max guess") diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index 86d1d94ba..e39dc255e 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -1,16 +1,18 @@ from __future__ import annotations -import numpy as np -import pandas as pd import logging -from iminuit import Minuit, cost, util from types import FunctionType import lgdo.lh5_store as lh5 +import numpy as np +import pandas as pd +from iminuit import Minuit, cost, util + import pygama.pargen.cuts as cts log = logging.getLogger(__name__) + def return_nans(input): if isinstance(input, FunctionType): args = input.__code__.co_varnames[: input.__code__.co_argcount][1:] @@ -23,6 +25,7 @@ def return_nans(input): m = Minuit(c, *[np.nan for arg in args]) return m.values, m.errors, np.full((len(m.values), len(m.values)), np.nan) + def tag_pulser(files, lh5_path): pulser_df = lh5.load_dfs(files, ["timestamp", "trapTmax"], lh5_path) pulser_props = cts.find_pulser_properties(pulser_df, energy="trapTmax") @@ -43,6 +46,7 @@ def tag_pulser(files, lh5_path): log.debug(f"no pulser found") return ids + def get_params(file_params, param_list): out_params = [] if isinstance(file_params, dict): @@ -54,17 +58,15 @@ def get_params(file_params, param_list): if key in param: out_params.append(key) return np.unique(out_params).tolist() - + def load_data( files: list, lh5_path: str, cal_dict: dict, - params = [ - "cuspEmax" - ], - cal_energy_param: str="cuspEmax_ctc_cal", - threshold = None + params=["cuspEmax"], + cal_energy_param: str = "cuspEmax_ctc_cal", + threshold=None, ) -> tuple(np.array, np.array, np.array, np.array): """ Loads in the A/E parameters needed and applies calibration constants to energy @@ -75,7 +77,7 @@ def load_data( if isinstance(files, dict): df = [] all_files = [] - masks=np.array([],dtype=bool) + masks = np.array([], dtype=bool) for tstamp, tfiles in files.items(): table = sto.read_object(lh5_path, tfiles)[0] if tstamp in cal_dict: @@ -85,12 +87,12 @@ def load_data( file_df["timestamp"] = np.full(len(file_df), tstamp, dtype=object) params.append("timestamp") if threshold is not None: - mask = file_df[cal_energy_param]threshold + masks = df[cal_energy_param] > threshold df.drop(np.where(~masks)[0], inplace=True) else: - masks = np.ones(len(df),dtype=bool) + masks = np.ones(len(df), dtype=bool) all_files = files - if lh5_path[-1] != "/": lh5_path+='/' + if lh5_path[-1] != "/": + lh5_path += "/" keys = lh5.ls(all_files[0], lh5_path) keys = [key.split("/")[-1] for key in keys] - params = get_params(keys+list(df.keys()), params) + params = get_params(keys + list(df.keys()), params) ids = tag_pulser(all_files, lh5_path) df["is_not_pulser"] = ids[masks] @@ -124,4 +127,4 @@ def load_data( if param not in df: df[param] = lh5.load_nda(all_files, [param], lh5_path)[param][masks] log.debug(f"data loaded") - return df \ No newline at end of file + return df From 0c6ddbe0d6178efda2fa205b3f9bac1c7943e6ef Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 3 Oct 2023 17:26:58 +0200 Subject: [PATCH 09/22] bugfix for partitiona ecal --- src/pygama/pargen/ecal_th.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index acc8ed77c..9613a054d 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -1461,7 +1461,7 @@ def partition_energy_cal_th( results_dict[energy_param] = full_object_dict[energy_param].get_results_dict( data ) - if ecal.results: + if results_dict[energy_param].results: plot_dict[energy_param] = ( full_object_dict[energy_param].fill_plot_dict(data).copy() ) From 64a45a11ec651d8727e6ecb4e39671b53da1101e Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 3 Oct 2023 17:34:29 +0200 Subject: [PATCH 10/22] fixed pars to params to eres fit --- src/pygama/pargen/ecal_th.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index 9613a054d..ffbf37633 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -264,7 +264,7 @@ def fit_energy_res(self): "p_val": p_val, } - log.info(f'FWHM linear fit: {self.fwhm_fit_linear["pars"].to_dict()}') + log.info(f'FWHM linear fit: {self.fwhm_fit_linear["parameters"].to_dict()}') log.info(f"FWHM fit values:") log.info(f"\t Energy | FWHM (keV) | Predicted (keV)") for i, (peak, fwhm, fwhme) in enumerate( From a71479da58176b010d615d4b8c0b5c2d211a6d42 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 3 Oct 2023 17:54:16 +0200 Subject: [PATCH 11/22] partition ecal naming fix --- src/pygama/pargen/ecal_th.py | 26 ++++++++++---------------- src/pygama/pargen/energy_cal.py | 2 ++ 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index ffbf37633..7aa72260c 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -172,6 +172,7 @@ def __init__( simplex: bool = True, deg: int = 1, cal_energy_param: str = None, + tail_weight=100, ): self.energy_param = energy_param if cal_energy_param is None: @@ -186,6 +187,7 @@ def __init__( self.deg = deg self.plot_options = plot_options self.simplex = simplex + self.tail_weight = tail_weight self.output_dict = {} self.hit_dict = {} @@ -474,6 +476,7 @@ def calibrate_parameter(self, data): n_events=self.n_events, allowed_p_val=self.p_val, simplex=self.simplex, + tail_weight=self.tail_weight, verbose=False, ) pk_pars = self.results["pk_pars"] @@ -494,19 +497,7 @@ def calibrate_parameter(self, data): found_peaks = np.array([]) fitted_peaks = np.array([]) fitted_funcs = np.array([]) - - if ( - len(fitted_peaks) != len(self.glines) - or self.gof_funcs[-1] == pgf.gauss_step_pdf - ): - if self.glines[-1] in fitted_peaks: - if fitted_funcs[-1] == pgf.extended_gauss_step_pdf: - self.funcs = [pgf.extended_gauss_step_pdf for entry in self.glines] - self.gof_funcs = [pgf.gauss_step_pdf for entry in self.glines] - - for i, peak in enumerate(self.glines): - if peak not in fitted_peaks: - kev_ranges[i] = (kev_ranges[i][0] - 5, kev_ranges[i][1] - 5) + if len(fitted_peaks) != len(self.glines): for i, peak in enumerate(self.glines): if peak not in fitted_peaks: kev_ranges[i] = (kev_ranges[i][0] - 5, kev_ranges[i][1] - 5) @@ -518,13 +509,12 @@ def calibrate_parameter(self, data): > 0.05 ): index = np.where(self.glines == peak)[0][0] - kev_ranges[i] = ( + kev_ranges[index] = ( kev_ranges[index][0] - 5, kev_ranges[index][1] - 5, ) except: pass - try: self.pars, self.cov, self.results = cal.hpge_E_calibration( data.query(self.selection_string)[self.energy_param], @@ -537,6 +527,7 @@ def calibrate_parameter(self, data): n_events=self.n_events, allowed_p_val=self.p_val, simplex=self.simplex, + tail_weight=self.tail_weight, verbose=False, ) fitted_peaks = self.results["fitted_keV"] @@ -934,6 +925,7 @@ def plot_fits( (fval - count) / count if count != 0 else (fval - count) for count, fval in zip(counts, fit_vals) ], + where="mid", ) plt.annotate( @@ -1384,6 +1376,7 @@ def energy_cal_th( final_cut_field: str = "is_valid_cal", simplex: bool = True, guess_keV: float | None = None, + tail_weight=100, deg: int = 1, ) -> tuple(dict, dict, dict, dict): data = load_data( @@ -1409,6 +1402,7 @@ def energy_cal_th( n_events, simplex, deg, + tail_weight=tail_weight, ) full_object_dict[energy_param].calibrate_parameter(data) results_dict[ @@ -1461,7 +1455,7 @@ def partition_energy_cal_th( results_dict[energy_param] = full_object_dict[energy_param].get_results_dict( data ) - if results_dict[energy_param].results: + if full_object_dict[energy_param].results: plot_dict[energy_param] = ( full_object_dict[energy_param].fill_plot_dict(data).copy() ) diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py index 293d8f30d..e7fdb21d8 100644 --- a/src/pygama/pargen/energy_cal.py +++ b/src/pygama/pargen/energy_cal.py @@ -929,6 +929,7 @@ def hpge_E_calibration( n_events=None, simplex=False, allowed_p_val=0.05, + tail_weight=100, verbose=True, ): """Calibrate HPGe data to a set of known peaks @@ -1139,6 +1140,7 @@ def hpge_E_calibration( uncal_is_int=False, simplex=simplex, allowed_p_val=allowed_p_val, + tail_weight=tail_weight, ) results["pk_pars"] = pk_pars results["pk_errors"] = pk_errors From 9cd6853649ceed0852075e38ce6de3bfa3bfa9e2 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Mon, 9 Oct 2023 13:08:57 +0200 Subject: [PATCH 12/22] updated units in fwhm to convention --- src/pygama/pargen/AoE_cal.py | 71 ++++++++++++++++++------------------ src/pygama/pargen/ecal_th.py | 16 ++++---- 2 files changed, 44 insertions(+), 43 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index 2125f80c3..55fec4ee3 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -1547,8 +1547,8 @@ def drift_time_correction( m.hesse() self.dt_res_dict["dt_fit"] = { - "parameters": m.values, - "uncertainties": m.errors, + "pars": m.values, + "errs": m.errors, "object": m, } aoe_grp1 = self.dt_res_dict[ @@ -1801,8 +1801,8 @@ def AoEcorrection(self, data: pd.DataFrame, aoe_param: str, display: int = 0): "func": self.mean_func.__name__, "module": self.mean_func.__module__, "expression": self.mean_func.string_func("x"), - "parameters": mu_pars.to_dict(), - "uncertainties": mu_errs.to_dict(), + "pars": mu_pars.to_dict(), + "errs": mu_errs.to_dict(), "p_val_mu": p_val_mu, "csqr_mu": (csqr_mu, dof_mu), } @@ -1811,8 +1811,8 @@ def AoEcorrection(self, data: pd.DataFrame, aoe_param: str, display: int = 0): "func": self.sigma_func.__name__, "module": self.sigma_func.__module__, "expression": self.sigma_func.string_func("x"), - "parameters": sig_pars.to_dict(), - "uncertainties": sig_errs.to_dict(), + "pars": sig_pars.to_dict(), + "errs": sig_errs.to_dict(), "p_val_mu": p_val_sig, "csqr_mu": (csqr_sig, dof_sig), } @@ -1820,8 +1820,8 @@ def AoEcorrection(self, data: pd.DataFrame, aoe_param: str, display: int = 0): self.energy_corr_res_dict["dep_fit"] = { "func": self.pdf.__name__, "module": self.pdf.__module__, - "parameters": dep_pars.to_dict(), - "uncertainties": dep_err.to_dict(), + "pars": dep_pars.to_dict(), + "errs": dep_err.to_dict(), } self.update_cal_dicts( @@ -1831,7 +1831,7 @@ def AoEcorrection(self, data: pd.DataFrame, aoe_param: str, display: int = 0): "parameters": mu_pars.to_dict(), }, "AoE_Classifier": { - "expression": f"AoE_Corrected/({self.sigma_func.string_func(self.cal_energy_param)})", + "expression": f"(AoE_Corrected-1)/({self.sigma_func.string_func(self.cal_energy_param)})", "parameters": sig_pars.to_dict(), }, } @@ -1918,8 +1918,8 @@ def get_aoe_cut_fit( p = sigmoid_fit.func(xs, *m1.values) self.cut_fit = { "function": sigmoid_fit.__name__, - "parameters": m1.values.to_dict(), - "uncertainties": m1.errors.to_dict(), + "pars": m1.values.to_dict(), + "errs": m1.errors.to_dict(), } self.low_cut_val = round(xs[np.argmin(np.abs(p - (100 * self.dep_acc)))], 3) log.info(f"Cut found at {self.low_cut_val}") @@ -2238,12 +2238,12 @@ def drifttime_corr_plot( final_df = dep_events.query(aoe_class.dt_res_dict["final_selection"]) plt.subplot(2, 2, 1) - aoe_pars = aoe_class.dt_res_dict["aoe_fit1"]["parameters"] + aoe_pars = aoe_class.dt_res_dict["aoe_fit1"]["pars"] xs = np.linspace(aoe_pars["lower_range"], aoe_pars["upper_range"], 100) counts, aoe_bins, bars = plt.hist( final_df.query( - f'{aoe_class.dt_res_dict["aoe_grp1"]}&{aoe_param}<{aoe_pars["upper_range"]}&{aoe_param}>{aoe_pars["lower_range"]}' + f'{aoe_class.dt_res_dict["aoe_grp1"]}&({aoe_param}<{aoe_pars["upper_range"]})&({aoe_param}>{aoe_pars["lower_range"]})' )[aoe_param], bins=400, histtype="step", @@ -2258,12 +2258,12 @@ def drifttime_corr_plot( plt.xlabel("A/E") plt.ylabel("counts") - aoe_pars2 = aoe_class.dt_res_dict["aoe_fit2"]["parameters"] + aoe_pars2 = aoe_class.dt_res_dict["aoe_fit2"]["pars"] plt.subplot(2, 2, 2) xs = np.linspace(aoe_pars2["lower_range"], aoe_pars2["upper_range"], 100) counts, aoe_bins2, bars = plt.hist( final_df.query( - f'{aoe_class.dt_res_dict["aoe_grp2"]}&{aoe_param}<{aoe_pars2["upper_range"]}&{aoe_param}>{aoe_pars2["lower_range"]}' + f'{aoe_class.dt_res_dict["aoe_grp2"]}&({aoe_param}<{aoe_pars2["upper_range"]})&({aoe_param}>{aoe_pars2["lower_range"]})' )[aoe_param], bins=400, histtype="step", @@ -2300,8 +2300,7 @@ def drifttime_corr_plot( plt.plot( pgh.get_bin_centers(bins), drift_time_distribution.pdf( - pgh.get_bin_centers(bins), - *aoe_class.dt_res_dict["dt_fit"]["parameters"], + pgh.get_bin_centers(bins), *aoe_class.dt_res_dict["dt_fit"]["pars"] ) * np.diff(bins)[0], label="fit", @@ -2447,17 +2446,17 @@ def plot_mean_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: ) ax1.plot( - aoe_class.energy_corr_fits.index, + aoe_class.energy_corr_fits.index.to_numpy(), aoe_class.mean_func.func( - aoe_class.energy_corr_fits.index, - **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"], + aoe_class.energy_corr_fits.index.to_numpy(), + **aoe_class.energy_corr_res_dict["mean_fits"]["pars"], ), label="linear model", ) ax1.errorbar( 1592, - aoe_class.energy_corr_res_dict["dep_fit"]["parameters"]["mu"], - yerr=aoe_class.energy_corr_res_dict["dep_fit"]["uncertainties"]["mu"], + aoe_class.energy_corr_res_dict["dep_fit"]["pars"]["mu"], + yerr=aoe_class.energy_corr_res_dict["dep_fit"]["errs"]["mu"], label="DEP", color="green", linestyle=" ", @@ -2473,12 +2472,12 @@ def plot_mean_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: aoe_class.energy_corr_fits["mean"] - aoe_class.mean_func.func( aoe_class.energy_corr_fits.index, - **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"], + **aoe_class.energy_corr_res_dict["mean_fits"]["pars"], ) ) / aoe_class.mean_func.func( aoe_class.energy_corr_fits.index, - **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"], + **aoe_class.energy_corr_res_dict["mean_fits"]["pars"], ), lw=1, c="b", @@ -2487,13 +2486,13 @@ def plot_mean_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: 1592, 100 * ( - aoe_class.energy_corr_res_dict["dep_fit"]["parameters"]["mu"] + aoe_class.energy_corr_res_dict["dep_fit"]["pars"]["mu"] - aoe_class.mean_func.func( - 1592, **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"] + 1592, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"] ) ) / aoe_class.mean_func.func( - 1592, **aoe_class.energy_corr_res_dict["mean_fits"]["parameters"] + 1592, **aoe_class.energy_corr_res_dict["mean_fits"]["pars"] ), lw=1, c="g", @@ -2521,7 +2520,7 @@ def plot_sigma_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: label="data", linestyle=" ", ) - sig_pars = aoe_class.energy_corr_res_dict["sigma_fits"]["parameters"] + sig_pars = aoe_class.energy_corr_res_dict["sigma_fits"]["pars"] if aoe_class.sigma_func == sigma_fit: label = f'sqrt model: \nsqrt({sig_pars["a"]:1.4f}+({sig_pars["b"]:1.1f}/E)^{sig_pars["c"]:1.1f})' elif aoe_class.sigma_func == sigma_fit_quadratic: @@ -2529,14 +2528,16 @@ def plot_sigma_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: else: raise ValueError("unknown sigma function") ax1.plot( - aoe_class.energy_corr_fits.index, - aoe_class.sigma_func.func(aoe_class.energy_corr_fits.index, **sig_pars), + aoe_class.energy_corr_fits.index.to_numpy(), + aoe_class.sigma_func.func( + aoe_class.energy_corr_fits.index.to_numpy(), **sig_pars + ), label=label, ) ax1.errorbar( 1592, - aoe_class.energy_corr_res_dict["dep_fit"]["parameters"]["sigma"], - yerr=aoe_class.energy_corr_res_dict["dep_fit"]["uncertainies"]["sigma"], + aoe_class.energy_corr_res_dict["dep_fit"]["pars"]["sigma"], + yerr=aoe_class.energy_corr_res_dict["dep_fit"]["errs"]["sigma"], label="DEP", color="green", linestyle=" ", @@ -2560,7 +2561,7 @@ def plot_sigma_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: 1592, 100 * ( - aoe_class.energy_corr_res_dict["dep_fit"]["parameters"]["sigma"] + aoe_class.energy_corr_res_dict["dep_fit"]["pars"]["sigma"] - aoe_class.sigma_func.func(1592, **sig_pars) ) / aoe_class.sigma_func.func(1592, **sig_pars), @@ -2589,9 +2590,9 @@ def plot_cut_fit(aoe_class, data, figsize=[12, 8], fontsize=12) -> plt.figure: ) plt.plot( - aoe_class.cut_fits.index, + aoe_class.cut_fits.index.to_numpy(), sigmoid_fit.func( - aoe_class.cut_fits.index.to_numpy(), **aoe_class.cut_fit["parameters"] + aoe_class.cut_fits.index.to_numpy(), **aoe_class.cut_fit["pars"] ), ) plt.hlines( diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index 7aa72260c..138d9076f 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -257,8 +257,8 @@ def fit_energy_res(self): "function": fwhm_linear.__name__, "module": fwhm_linear.__module__, "expression": fwhm_linear.string_func("x"), - "Qbb_fwhm(keV)": fit_qbb, - "Qbb_fwhm_err(keV)": qbb_err, + "Qbb_fwhm_in_keV": fit_qbb, + "Qbb_fwhm_err_in_keV": qbb_err, "parameters": m_lin.values, "uncertainties": m_lin.errors, "cov": m_lin.covariance, @@ -291,8 +291,8 @@ def fit_energy_res(self): "function": fwhm_linear.__name__, "module": fwhm_linear.__module__, "expression": fwhm_linear.string_func("x"), - "Qbb_fwhm(keV)": np.nan, - "Qbb_fwhm_err(keV)": np.nan, + "Qbb_fwhm_in_keV": np.nan, + "Qbb_fwhm_err_in_keV": np.nan, "parameters": pars, "uncertainties": errs, "cov": cov, @@ -337,8 +337,8 @@ def fit_energy_res(self): "function": fwhm_quadratic.__name__, "module": fwhm_quadratic.__module__, "expression": fwhm_quadratic.string_func("x"), - "Qbb_fwhm(keV)": fit_qbb, - "Qbb_fwhm_err(keV)": qbb_err, + "Qbb_fwhm_in_keV": fit_qbb, + "Qbb_fwhm_err_in_keV": qbb_err, "parameters": m_quad.values, "uncertainties": m_quad.errors, "cov": m_quad.covariance, @@ -358,8 +358,8 @@ def fit_energy_res(self): "function": fwhm_quadratic.__name__, "module": fwhm_quadratic.__module__, "expression": fwhm_quadratic.string_func("x"), - "Qbb_fwhm(keV)": np.nan, - "Qbb_fwhm_err(keV)": np.nan, + "Qbb_fwhm_in_keV": np.nan, + "Qbb_fwhm_err_in_keV": np.nan, "parameters": pars, "uncertainties": errs, "cov": cov, From 5a623db57dfc6dcaa412bbeb01226436c72a4049 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 12 Oct 2023 22:48:59 +0200 Subject: [PATCH 13/22] corrected units to _in_keV --- src/pygama/pargen/ecal_th.py | 16 +++++++-------- src/pygama/pargen/utils.py | 39 ++++++++++++++++++++++++++++++++---- 2 files changed, 43 insertions(+), 12 deletions(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index 138d9076f..e3f71d800 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -1230,8 +1230,8 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz ), np.nanmax( [ - ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"], - ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"], + ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"], + ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"], ] ), ] @@ -1242,7 +1242,7 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz fwhm_linear.func(fwhm_slope_bins, *ecal_class.fwhm_fit_linear["parameters"]), lw=1, c="g", - label=f'linear, Qbb fwhm: {ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"]:1.2f} +- {ecal_class.fwhm_fit_linear["Qbb_fwhm_err(keV)"]:1.2f} keV', + label=f'linear, Qbb fwhm: {ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"]:1.2f} +- {ecal_class.fwhm_fit_linear["Qbb_fwhm_err_in_keV"]:1.2f} keV', ) ax1.plot( fwhm_slope_bins, @@ -1251,13 +1251,13 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz ), lw=1, c="b", - label=f'quadratic, Qbb fwhm: {ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"]:1.2f} +- {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_err(keV)"]:1.2f} keV', + label=f'quadratic, Qbb fwhm: {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"]:1.2f} +- {ecal_class.fwhm_fit_quadratic["Qbb_fwhm_err_in_keV"]:1.2f} keV', ) ax1.plot( qbb_line_hx, [ - ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"], - ecal_class.fwhm_fit_linear["Qbb_fwhm(keV)"], + ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"], + ecal_class.fwhm_fit_linear["Qbb_fwhm_in_keV"], ], lw=1, c="r", @@ -1266,8 +1266,8 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz ax1.plot( qbb_line_hx, [ - ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"], - ecal_class.fwhm_fit_quadratic["Qbb_fwhm(keV)"], + ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"], + ecal_class.fwhm_fit_quadratic["Qbb_fwhm_in_keV"], ], lw=1, c="r", diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index e39dc255e..de9994523 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -114,10 +114,6 @@ def load_data( keys = [key.split("/")[-1] for key in keys] params = get_params(keys + list(df.keys()), params) - ids = tag_pulser(all_files, lh5_path) - df["is_not_pulser"] = ids[masks] - params.append("is_not_pulser") - for col in list(df.keys()): if col not in params: df.drop(col, inplace=True, axis=1) @@ -128,3 +124,38 @@ def load_data( df[param] = lh5.load_nda(all_files, [param], lh5_path)[param][masks] log.debug(f"data loaded") return df + + +def get_pulser_ids(tcm_file, channel, multiplicity_threshold): + if isinstance(channel, str): + if channel[:2] == "ch": + channel = int(channel[2:]) + else: + chan = int(channel) + else: + chan = channel + if isinstance(tcm_file, list): + mask = np.array([], dtype=bool) + for file in tcm_file: + _, file_mask = get_pulser_ids(file, chan, multiplicity_threshold) + mask = np.append(mask, file_mask) + ids = np.where(mask)[0] + else: + data = lh5.load_dfs(tcm_file, ["array_id", "array_idx"], "hardware_tcm_1") + cum_length = lh5.load_nda(tcm_file, ["cumulative_length"], "hardware_tcm_1")[ + "cumulative_length" + ] + cum_length = np.append(np.array([0]), cum_length) + n_channels = np.diff(cum_length) + evt_numbers = np.repeat(np.arange(0, len(cum_length) - 1), np.diff(cum_length)) + evt_mult = np.repeat(np.diff(cum_length), np.diff(cum_length)) + data["evt_number"] = evt_numbers + data["evt_mult"] = evt_mult + high_mult_events = np.where(n_channels > multiplicity_threshold)[0] + + ids = data.query(f"array_id=={channel} and evt_number in @high_mult_events")[ + "array_idx" + ].to_numpy() + mask = np.zeros(len(data.query(f"array_id==1104000")), dtype="bool") + mask[ids] = True + return ids, mask From 193fac5a97bfa9a2e3fcb9f6ec54552be0b7edf9 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Oct 2023 12:54:32 +0100 Subject: [PATCH 14/22] moved aoe_calibration function to dataflow --- src/pygama/pargen/AoE_cal.py | 74 +----------------------------------- 1 file changed, 1 insertion(+), 73 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index 55fec4ee3..0a5f7544b 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -1297,7 +1297,7 @@ def __init__( cal_energy_param: str, eres_func: callable, pdf=standard_aoe, - selection_string: str = "is_valid_cal&is_not_pulser", + selection_string: str = "", dt_corr: bool = False, dep_acc: float = 0.9, dep_correct: bool = False, @@ -2814,75 +2814,3 @@ def plot_classifier( plt.ylim(yrange) plt.close() return fig - - -def aoe_calibration( - files, - lh5_path: str, - cal_dicts: dict, - current_param: str, - energy_param: str, - cal_energy_param: str, - eres_func: Callable, - pdf: Callable = standard_aoe, - cut_field: str = "is_valid_cal", - dt_corr: bool = False, - dep_correct: bool = False, - dt_cut: dict = None, - high_cut_val: int = 3, - mean_func: Callable = pol1, - sigma_func: Callable = sigma_fit, - dep_acc: float = 0.9, - dt_param: str = "dt_eff", - comptBands_width: int = 20, - plot_options: dict = {}, - threshold: int = 800, -): - params = [ - current_param, - "tp_0_est", - "tp_99", - dt_param, - energy_param, - cal_energy_param, - cut_field, - ] - - aoe = cal_aoe( - cal_dicts, - cal_energy_param, - eres_func, - pdf, - f"{cut_field}&is_not_pulser", - dt_corr, - dep_acc, - dep_correct, - dt_cut, - dt_param, - high_cut_val, - mean_func, - sigma_func, - comptBands_width, - plot_options, - ) - if dt_cut is not None: - params.append(dt_cut["out_param"]) - - data = load_data( - files, lh5_path, aoe.cal_dicts, params, cal_energy_param, threshold - ) - - data["AoE_Uncorr"] = np.divide(data[current_param], data[energy_param]) - - aoe.update_cal_dicts( - { - "AoE_Uncorr": { - "expression": f"{current_param}/{energy_param}", - "parameters": {}, - } - } - ) - - aoe.calibrate(data, "AoE_Uncorr") - log.info(f"Calibrated A/E") - return cal_dicts, aoe.get_results_dict(), aoe.fill_plot_dict(data), aoe From a7ceeab864b12d39bc0921f1f5545347c6290338 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Oct 2023 12:58:23 +0100 Subject: [PATCH 15/22] moved top level funcs to dataflow added pulser field to plot arguments --- src/pygama/pargen/ecal_th.py | 147 +++++++++-------------------------- 1 file changed, 35 insertions(+), 112 deletions(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index e3f71d800..d048e6b24 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -48,9 +48,10 @@ def apply_cuts( hit_dict, cut_parameters=None, final_cut_field: str = "is_valid_cal", + pulser_field="is_pulser", ): if cut_parameters is not None: - cut_dict = cts.generate_cuts(data.query("is_not_pulser"), cut_parameters) + cut_dict = cts.generate_cuts(data.query(f"(~{pulser_field})"), cut_parameters) hit_dict.update( cts.cut_dict_to_hit_dict(cut_dict, final_cut_field=final_cut_field) ) @@ -61,7 +62,7 @@ def apply_cuts( else: data[final_cut_field] = np.ones(len(data), dtype=bool) - events_pqc = len(data.query(f"{final_cut_field}&is_not_pulser")) + events_pqc = len(data.query(f"{final_cut_field}&(~{pulser_field})")) log.debug(f"{events_pqc} events valid for calibration") return data, hit_dict @@ -1003,7 +1004,14 @@ def plot_2614_timemap( def plot_pulser_timemap( - ecal_class, data, figsize=[12, 8], fontsize=12, dx=0.2, time_dx=180, n_spread=3 + ecal_class, + data, + pulser_field="is_pulser", + figsize=[12, 8], + fontsize=12, + dx=0.2, + time_dx=180, + n_spread=3, ): plt.rcParams["figure.figsize"] = figsize plt.rcParams["font.size"] = fontsize @@ -1014,7 +1022,7 @@ def plot_pulser_timemap( time_dx, ) - selection = data.query(f"~is_not_pulser") + selection = data.query(pulser_field) fig = plt.figure() if len(selection) == 0: pass @@ -1047,8 +1055,8 @@ def plot_pulser_timemap( return fig -def bin_pulser_stability(ecal_class, data, time_slice=180): - selection = data.query(f"~is_not_pulser") +def bin_pulser_stability(ecal_class, data, pulser_field="is_pulser", time_slice=180): + selection = data.query(pulser_field) utime_array = data["timestamp"] select_energies = selection[ecal_class.cal_energy_param].to_numpy() @@ -1332,7 +1340,14 @@ def plot_eres_fit(ecal_class, data, erange=[200, 2700], figsize=[12, 8], fontsiz return fig -def bin_spectrum(ecal_class, data, erange=[0, 3000], dx=2): +def bin_spectrum( + ecal_class, + data, + cut_field="is_valid_cal", + pulser_field="is_pulser", + erange=[0, 3000], + dx=2, +): bins = np.arange(erange[0], erange[1] + dx, dx) return { "bins": pgh.get_bin_centers(bins), @@ -1340,125 +1355,33 @@ def bin_spectrum(ecal_class, data, erange=[0, 3000], dx=2): data.query(ecal_class.selection_string)[ecal_class.cal_energy_param], bins )[0], "cut_counts": np.histogram( - data.query("~is_valid_cal&is_not_pulser")[ecal_class.cal_energy_param], + data.querydata.query(f"(~{cut_field})&(~{pulser_field})")[ + ecal_class.cal_energy_param + ], bins, )[0], "pulser_counts": np.histogram( - data.query("~is_not_pulser")[ecal_class.cal_energy_param], + data.query(pulser_field)[ecal_class.cal_energy_param], bins, )[0], } -def bin_survival_fraction(ecal_class, data, erange=[0, 3000], dx=6): +def bin_survival_fraction( + ecal_class, + data, + cut_field="is_valid_cal", + pulser_field="is_pulser", + erange=[0, 3000], + dx=6, +): counts_pass, bins_pass, _ = pgh.get_hist( data.query(ecal_class.selection_string)[ecal_class.cal_energy_param], bins=np.arange(erange[0], erange[1] + dx, dx), ) counts_fail, bins_fail, _ = pgh.get_hist( - data.query("~is_valid_cal&is_not_pulser")[ecal_class.cal_energy_param], + data.query(f"(~{cut_field})&(~{pulser_field})")[ecal_class.cal_energy_param], bins=np.arange(erange[0], erange[1] + dx, dx), ) sf = 100 * (counts_pass + 10 ** (-6)) / (counts_pass + counts_fail + 10 ** (-6)) return {"bins": pgh.get_bin_centers(bins_pass), "sf": sf} - - -def energy_cal_th( - files: list[str], - energy_params: list[str], - lh5_path: str = "dsp", - hit_dict: dict = {}, - cut_parameters: dict[str, int] = {"bl_mean": 4, "bl_std": 4, "pz_std": 4}, - plot_options: dict = None, - threshold: int = 0, - p_val: float = 0, - n_events: int = None, - final_cut_field: str = "is_valid_cal", - simplex: bool = True, - guess_keV: float | None = None, - tail_weight=100, - deg: int = 1, -) -> tuple(dict, dict, dict, dict): - data = load_data( - files, - lh5_path, - hit_dict, - params=energy_params + list(cut_parameters) + ["timestamp"], - ) - - data, hit_dict = apply_cuts(data, hit_dict, cut_parameters, final_cut_field) - - results_dict = {} - plot_dict = {} - full_object_dict = {} - for energy_param in energy_params: - full_object_dict[energy_param] = calibrate_parameter( - energy_param, - f"{final_cut_field}&is_not_pulser", - plot_options, - guess_keV, - threshold, - p_val, - n_events, - simplex, - deg, - tail_weight=tail_weight, - ) - full_object_dict[energy_param].calibrate_parameter(data) - results_dict[ - full_object_dict[energy_param].cal_energy_param - ] = full_object_dict[energy_param].get_results_dict(data) - hit_dict.update(full_object_dict[energy_param].hit_dict) - if ~np.isnan(full_object_dict[energy_param].pars).all(): - plot_dict[full_object_dict[energy_param].cal_energy_param] = ( - full_object_dict[energy_param].fill_plot_dict(data).copy() - ) - - log.info(f"Finished all calibrations") - return hit_dict, results_dict, plot_dict, full_object_dict - - -def partition_energy_cal_th( - files: list[str], - energy_params: list[str], - lh5_path: str = "dsp", - hit_dict: dict = {}, - plot_options: dict = None, - threshold: int = 0, - p_val: float = 0, - n_events: int = None, - final_cut_field: str = "is_valid_cal", - simplex: bool = True, - tail_weight: int = 20, -) -> tuple(dict, dict, dict, dict): - data = load_data( - files, - lh5_path, - hit_dict, - params=energy_params + [final_cut_field] + ["timestamp"], - ) - - results_dict = {} - plot_dict = {} - full_object_dict = {} - for energy_param in energy_params: - full_object_dict[energy_param] = high_stats_fitting( - energy_param, - f"{final_cut_field}&is_not_pulser", - threshold, - p_val, - plot_options, - simplex, - tail_weight, - ) - full_object_dict[energy_param].fit_peaks(data) - results_dict[energy_param] = full_object_dict[energy_param].get_results_dict( - data - ) - if full_object_dict[energy_param].results: - plot_dict[energy_param] = ( - full_object_dict[energy_param].fill_plot_dict(data).copy() - ) - - log.info(f"Finished all calibrations") - return results_dict, plot_dict, full_object_dict From 118381736c11441b1e4b40b35ff1e20d9258c940 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Oct 2023 13:05:20 +0100 Subject: [PATCH 16/22] added option to pass pulser mask to event selection if not calculate itself --- src/pygama/pargen/energy_optimisation.py | 36 +++++++++++++----------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/pygama/pargen/energy_optimisation.py b/src/pygama/pargen/energy_optimisation.py index b82b39afc..b5d59278b 100644 --- a/src/pygama/pargen/energy_optimisation.py +++ b/src/pygama/pargen/energy_optimisation.py @@ -915,6 +915,7 @@ def event_selection( peak_idxs, kev_widths, cut_parameters={"bl_mean": 4, "bl_std": 4, "pz_std": 4}, + pulser_mask=None, energy_parameter="trapTmax", wf_field: str = "waveform", n_events=10000, @@ -928,23 +929,26 @@ def event_selection( sto = lh5.LH5Store() df = lh5.load_dfs(raw_files, ["daqenergy", "timestamp"], lh5_path) - pulser_props = cts.find_pulser_properties(df, energy="daqenergy") - if len(pulser_props) > 0: - final_mask = None - for entry in pulser_props: - e_cut = (df.daqenergy.values < entry[0] + entry[1]) & ( - df.daqenergy.values > entry[0] - entry[1] - ) - if final_mask is None: - final_mask = e_cut - else: - final_mask = final_mask | e_cut - ids = final_mask - log.debug(f"pulser found: {pulser_props}") + if pulser_mask is None: + pulser_props = cts.find_pulser_properties(df, energy="daqenergy") + if len(pulser_props) > 0: + final_mask = None + for entry in pulser_props: + e_cut = (df.daqenergy.values < entry[0] + entry[1]) & ( + df.daqenergy.values > entry[0] - entry[1] + ) + if final_mask is None: + final_mask = e_cut + else: + final_mask = final_mask | e_cut + ids = final_mask + log.debug(f"pulser found: {pulser_props}") + else: + log.debug("no_pulser") + ids = np.zeros(len(df.daqenergy.values), dtype=bool) + # Get events around peak using raw file values else: - log.debug("no_pulser") - ids = np.zeros(len(df.daqenergy.values), dtype=bool) - # Get events around peak using raw file values + ids = pulser_mask initial_mask = (df.daqenergy.values > threshold) & (~ids) rough_energy = df.daqenergy.values[initial_mask] initial_idxs = np.where(initial_mask)[0] From 91e7b601bc94165140459daa24bba6c988ea73c6 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Oct 2023 13:08:17 +0100 Subject: [PATCH 17/22] added pulser mask to load data, modified to have the data loading external to main function --- src/pygama/pargen/extract_tau.py | 40 +++++++++++++++++--------------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/src/pygama/pargen/extract_tau.py b/src/pygama/pargen/extract_tau.py index 2c096161e..11f76fa81 100644 --- a/src/pygama/pargen/extract_tau.py +++ b/src/pygama/pargen/extract_tau.py @@ -31,6 +31,7 @@ def load_data( raw_file: list[str], lh5_path: str, + pulser_mask=None, n_events: int = 10000, threshold: int = 5000, wf_field: str = "waveform", @@ -38,24 +39,27 @@ def load_data( sto = lh5.LH5Store() df = lh5.load_dfs(raw_file, ["daqenergy", "timestamp"], lh5_path) - pulser_props = cts.find_pulser_properties(df, energy="daqenergy") - if len(pulser_props) > 0: - final_mask = None - for entry in pulser_props: - e_cut = (df.daqenergy.values < entry[0] + entry[1]) & ( - df.daqenergy.values > entry[0] - entry[1] - ) - if final_mask is None: - final_mask = e_cut - else: - final_mask = final_mask | e_cut - ids = ~(final_mask) - log.debug(f"pulser found: {pulser_props}") + if pulser_mask is None: + pulser_props = cts.find_pulser_properties(df, energy="daqenergy") + if len(pulser_props) > 0: + final_mask = None + for entry in pulser_props: + e_cut = (df.daqenergy.values < entry[0] + entry[1]) & ( + df.daqenergy.values > entry[0] - entry[1] + ) + if final_mask is None: + final_mask = e_cut + else: + final_mask = final_mask | e_cut + ids = final_mask + log.debug(f"pulser found: {pulser_props}") + else: + log.debug("no_pulser") + ids = np.zeros(len(df.daqenergy.values), dtype=bool) else: - log.debug("no_pulser") - ids = np.ones(len(df.daqenergy.values), dtype=bool) + ids = pulser_mask - cuts = np.where((df.daqenergy.values > threshold) & (ids))[0] + cuts = np.where((df.daqenergy.values > threshold) & (~ids))[0] waveforms = sto.read_object( f"{lh5_path}/{wf_field}", raw_file, idx=cuts, n_rows=n_events @@ -216,9 +220,8 @@ def get_dpz_consts(grid_out, opt_dict): def dsp_preprocess_decay_const( - raw_files: list[str], + tb_data, dsp_config: dict, - lh5_path: str, double_pz: bool = False, display: int = 0, opt_dict: dict = None, @@ -245,7 +248,6 @@ def dsp_preprocess_decay_const( tau_dict : dict """ - tb_data = load_data(raw_files, lh5_path, wf_field=wf_field, threshold=threshold) tb_out = opt.run_one_dsp(tb_data, dsp_config) log.debug("Processed Data") cut_dict = cts.generate_cuts(tb_out, parameters=cut_parameters) From d3753aee6a58f98f92afbf226551d055d7856a1f Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Oct 2023 13:19:26 +0100 Subject: [PATCH 18/22] switched fit escale and ecal to iminuit and add errors as outputs --- src/pygama/pargen/energy_cal.py | 46 +++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py index e7fdb21d8..a82d13286 100644 --- a/src/pygama/pargen/energy_cal.py +++ b/src/pygama/pargen/energy_cal.py @@ -564,7 +564,7 @@ def staged_fit( m.values["htail"] < 0.01 or m.values["htail"] < 2 * m.errors["htail"] or np.isnan(m.values).any() - ): # or + ): # switch to stat test func_i = pgf.extended_gauss_step_pdf gof_func_i = pgf.gauss_step_pdf pars_i, errs_i, cov_i, func_i, gof_func_i, mask, valid_fit = staged_fit( @@ -787,7 +787,6 @@ def hpge_fit_E_peaks( f"hpge_fit_E_peaks: cov estimation failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}" ) valid_pks[i_peak] = False - # pars_i, errs_i, cov_i, p_val = None, None, None, None elif valid_fit == False: log.debug( @@ -802,21 +801,18 @@ def hpge_fit_E_peaks( f"hpge_fit_E_peaks: failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}, parameter error too low" ) valid_pks[i_peak] = False - # pars_i, errs_i, cov_i, p_val = None, None, None, None elif np.abs(total_events[0] - np.sum(hist)) / np.sum(hist) > 0.1: log.debug( f"hpge_fit_E_peaks: fit failed for i_peak={i_peak} at loc {mode_guesses[i_peak]:g}, total_events is outside limit" ) valid_pks[i_peak] = False - # pars_i, errs_i, cov_i, p_val = None, None, None, None elif p_val < allowed_p_val or np.isnan(p_val): log.debug( f"hpge_fit_E_peaks: fit failed for i_peak={i_peak}, p-value too low: {p_val}" ) valid_pks[i_peak] = False - # pars_i, errs_i, cov_i, p_val = None, None, None, None else: valid_pks[i_peak] = True @@ -825,7 +821,7 @@ def hpge_fit_E_peaks( f"hpge_fit_E_peaks: fit failed for i_peak={i_peak}, unknown error" ) valid_pks[i_peak] = False - pars_i, errs_i, cov_i = return_nans(func_i) # None, None, None, None + pars_i, errs_i, cov_i = return_nans(func_i) p_val = 0 # get binning @@ -869,9 +865,20 @@ def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0): scale, scale_cov = pgu.fit_simple_scaling(Es_keV, mus, var=mu_vars) pars = np.array([scale, 0]) cov = np.array([[scale_cov, 0], [0, 0]]) + errs = np.diag(np.sqrt(cov)) else: - pars, cov = np.polyfit(Es_keV, mus, deg=deg, w=1 / np.sqrt(mu_vars), cov=True) - return pars, cov + poly_pars = np.polyfit(Es_keV, mus, deg=deg, w=1 / np.sqrt(mu_vars)) + c = cost.LeastSquares( + Es_keV, mus, np.sqrt(mu_vars), lambda x, *pars: pgf.poly(x, pars) + ) + m = Minuit(c, *poly_pars) + m.simplex() + m.migrad() + m.hesse() + pars = m.values + cov = m.covariance + errs = m.errors + return pars, errs, cov def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0): @@ -889,7 +896,8 @@ def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0): Es_keV : array energies to fit to, in keV E_scale_pars : array - ??? + Parameters from the escale fit (keV to ADC) used for calculating + uncertainties deg : int degree for energy scale fit. deg=0 corresponds to a simple scaling mu = scale * E. Otherwise deg follows the definition in np.polyfit @@ -906,13 +914,24 @@ def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0): scale, scale_cov = pgu.fit_simple_scaling(mus, Es_keV, var=E_vars) pars = np.array([scale, 0]) cov = np.array([[scale_cov, 0], [0, 0]]) + errs = np.diag(np.sqrt(cov)) else: dmudEs = np.zeros(len(mus)) for n in range(len(E_scale_pars) - 1): dmudEs += E_scale_pars[n] * mus ** (len(E_scale_pars) - 2 - n) E_weights = dmudEs * mu_vars - pars, cov = np.polyfit(mus, Es_keV, deg=deg, w=1 / E_weights, cov=True) - return pars, cov + poly_pars = np.polyfit(mus, Es_keV, deg=deg, w=1 / E_weights) + c = cost.LeastSquares( + mus, Es_keV, E_weights, lambda x, *pars: pgf.poly(x, pars) + ) + m = Minuit(c, *poly_pars) + m.simplex() + m.migrad() + m.hesse() + pars = m.values + errs = m.errors + cov = m.covariance + return pars, errs, cov def hpge_E_calibration( @@ -1188,15 +1207,16 @@ def hpge_E_calibration( mu_vars = np.asarray(mu_vars) ** 2 try: - pars, cov = hpge_fit_E_scale(mus, mu_vars, fitted_peaks_keV, deg=deg) + pars, errs, cov = hpge_fit_E_scale(mus, mu_vars, fitted_peaks_keV, deg=deg) results["pk_cal_pars"] = pars + results["pk_cal_errs"] = errs results["pk_cal_cov"] = cov except ValueError: log.error("Failed to fit enough peaks to get accurate calibration") return None, None, results # Invert the E scale fit to get a calibration function - pars, cov = hpge_fit_E_cal_func(mus, mu_vars, fitted_peaks_keV, pars, deg=deg) + pars, errs, cov = hpge_fit_E_cal_func(mus, mu_vars, fitted_peaks_keV, pars, deg=deg) # Finally, calculate fwhms in keV uncal_fwhms = [ From f6fea9eddfde684567f0957e39ceff25179c71ab Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Oct 2023 22:11:53 +0100 Subject: [PATCH 19/22] removed tag_pulser and cut import as had circular dependencies, renamed get_pulser_ids to get_tcm_pulser_ids, added ability to return mask for load_data --- src/pygama/pargen/utils.py | 39 ++++++++++---------------------------- 1 file changed, 10 insertions(+), 29 deletions(-) diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index de9994523..a33661fa5 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -8,8 +8,6 @@ import pandas as pd from iminuit import Minuit, cost, util -import pygama.pargen.cuts as cts - log = logging.getLogger(__name__) @@ -26,27 +24,6 @@ def return_nans(input): return m.values, m.errors, np.full((len(m.values), len(m.values)), np.nan) -def tag_pulser(files, lh5_path): - pulser_df = lh5.load_dfs(files, ["timestamp", "trapTmax"], lh5_path) - pulser_props = cts.find_pulser_properties(pulser_df, energy="trapTmax") - if len(pulser_props) > 0: - final_mask = None - for entry in pulser_props: - e_cut = (pulser_df.trapTmax.values < entry[0] + entry[1]) & ( - pulser_df.trapTmax.values > entry[0] - entry[1] - ) - if final_mask is None: - final_mask = e_cut - else: - final_mask = final_mask | e_cut - ids = ~(final_mask) - log.debug(f"pulser found: {pulser_props}") - else: - ids = np.ones(len(pulser_df), dtype=bool) - log.debug(f"no pulser found") - return ids - - def get_params(file_params, param_list): out_params = [] if isinstance(file_params, dict): @@ -67,6 +44,7 @@ def load_data( params=["cuspEmax"], cal_energy_param: str = "cuspEmax_ctc_cal", threshold=None, + return_selection_mask=False, ) -> tuple(np.array, np.array, np.array, np.array): """ Loads in the A/E parameters needed and applies calibration constants to energy @@ -84,8 +62,8 @@ def load_data( file_df = table.eval(cal_dict[tstamp]).get_dataframe() else: file_df = table.eval(cal_dict).get_dataframe() - file_df["timestamp"] = np.full(len(file_df), tstamp, dtype=object) - params.append("timestamp") + file_df["run_timestamp"] = np.full(len(file_df), tstamp, dtype=object) + params.append("run_timestamp") if threshold is not None: mask = file_df[cal_energy_param] < threshold @@ -123,13 +101,16 @@ def load_data( if param not in df: df[param] = lh5.load_nda(all_files, [param], lh5_path)[param][masks] log.debug(f"data loaded") - return df + if return_selection_mask: + return df, masks + else: + return df -def get_pulser_ids(tcm_file, channel, multiplicity_threshold): +def get_tcm_pulser_ids(tcm_file, channel, multiplicity_threshold): if isinstance(channel, str): if channel[:2] == "ch": - channel = int(channel[2:]) + chan = int(channel[2:]) else: chan = int(channel) else: @@ -137,7 +118,7 @@ def get_pulser_ids(tcm_file, channel, multiplicity_threshold): if isinstance(tcm_file, list): mask = np.array([], dtype=bool) for file in tcm_file: - _, file_mask = get_pulser_ids(file, chan, multiplicity_threshold) + _, file_mask = get_tcm_pulser_ids(file, chan, multiplicity_threshold) mask = np.append(mask, file_mask) ids = np.where(mask)[0] else: From 54af5356219125860520ee2427aec7a1a7f1d7d9 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Oct 2023 22:12:50 +0100 Subject: [PATCH 20/22] added default arguments, changed timestamp to run_timestamp to differentiate from normal timestamp --- src/pygama/pargen/AoE_cal.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/pygama/pargen/AoE_cal.py b/src/pygama/pargen/AoE_cal.py index 0a5f7544b..4db0cfbaa 100644 --- a/src/pygama/pargen/AoE_cal.py +++ b/src/pygama/pargen/AoE_cal.py @@ -1293,9 +1293,9 @@ def compton_sf_sweep( class cal_aoe: def __init__( self, - cal_dicts: dict, - cal_energy_param: str, - eres_func: callable, + cal_dicts: dict = {}, + cal_energy_param: str = "cuspEmax_ctc_cal", + eres_func: callable = lambda x: 1, pdf=standard_aoe, selection_string: str = "", dt_corr: bool = False, @@ -1347,17 +1347,17 @@ def update_cal_dicts(self, update_dict): def aoe_timecorr(self, df, aoe_param, output_name="AoE_Timecorr", display=0): log.info("Starting A/E time correction") self.timecorr_df = pd.DataFrame( - columns=["timestamp", "mean", "mean_err", "res", "res_err"] + columns=["run_timestamp", "mean", "mean_err", "res", "res_err"] ) try: - if "timestamp" in df: - tstamps = sorted(np.unique(df["timestamp"])) + if "run_timestamp" in df: + tstamps = sorted(np.unique(df["run_timestamp"])) means = [] errors = [] reses = [] res_errs = [] final_tstamps = [] - for tstamp, time_df in df.groupby("timestamp", sort=True): + for tstamp, time_df in df.groupby("run_timestamp", sort=True): try: pars, errs, cov = unbinned_aoe_fit( time_df.query( @@ -1372,7 +1372,7 @@ def aoe_timecorr(self, df, aoe_param, output_name="AoE_Timecorr", display=0): pd.DataFrame( [ { - "timestamp": tstamp, + "run_timestamp": tstamp, "mean": pars["mu"], "mean_err": errs["mu"], "res": pars["sigma"] / pars["mu"], @@ -1393,7 +1393,7 @@ def aoe_timecorr(self, df, aoe_param, output_name="AoE_Timecorr", display=0): pd.DataFrame( [ { - "timestamp": tstamp, + "run_timestamp": tstamp, "mean": np.nan, "mean_err": np.nan, "res": np.nan, @@ -1403,7 +1403,7 @@ def aoe_timecorr(self, df, aoe_param, output_name="AoE_Timecorr", display=0): ), ] ) - self.timecorr_df.set_index("timestamp", inplace=True) + self.timecorr_df.set_index("run_timestamp", inplace=True) time_dict = fit_time_means( np.array(self.timecorr_df.index), np.array(self.timecorr_df["mean"]), @@ -1411,7 +1411,7 @@ def aoe_timecorr(self, df, aoe_param, output_name="AoE_Timecorr", display=0): ) df[output_name] = df[aoe_param] / np.array( - [time_dict[tstamp] for tstamp in df["timestamp"]] + [time_dict[tstamp] for tstamp in df["run_timestamp"]] ) self.update_cal_dicts( { From 978399008e24c7dce2301399fbc5da93330f6043 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Tue, 31 Oct 2023 22:14:41 +0100 Subject: [PATCH 21/22] cleaned up imports, removing * imports and removing unnecessary arguments --- src/pygama/pargen/ecal_th.py | 7 ++++--- src/pygama/pargen/energy_cal.py | 16 ++++++++-------- src/pygama/pargen/extract_tau.py | 1 - 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/src/pygama/pargen/ecal_th.py b/src/pygama/pargen/ecal_th.py index d048e6b24..7d21cd91d 100644 --- a/src/pygama/pargen/ecal_th.py +++ b/src/pygama/pargen/ecal_th.py @@ -20,6 +20,7 @@ import numpy as np import pandas as pd import scipy.stats +from iminuit import Minuit, cost from matplotlib.backends.backend_pdf import PdfPages from matplotlib.colors import LogNorm from scipy.optimize import curve_fit @@ -28,7 +29,7 @@ import pygama.math.peak_fitting as pgf import pygama.pargen.cuts as cts import pygama.pargen.energy_cal as cal -from pygama.pargen.utils import * +from pygama.pargen.utils import load_data, return_nans log = logging.getLogger(__name__) @@ -164,7 +165,7 @@ class calibrate_parameter: def __init__( self, energy_param, - selection_string="is_usable", + selection_string="", plot_options: dict = None, guess_keV: float | None = None, threshold: int = 0, @@ -1355,7 +1356,7 @@ def bin_spectrum( data.query(ecal_class.selection_string)[ecal_class.cal_energy_param], bins )[0], "cut_counts": np.histogram( - data.querydata.query(f"(~{cut_field})&(~{pulser_field})")[ + data.query(f"(~{cut_field})&(~{pulser_field})")[ ecal_class.cal_energy_param ], bins, diff --git a/src/pygama/pargen/energy_cal.py b/src/pygama/pargen/energy_cal.py index a82d13286..a55df8a92 100644 --- a/src/pygama/pargen/energy_cal.py +++ b/src/pygama/pargen/energy_cal.py @@ -18,7 +18,7 @@ import pygama.math.histogram as pgh import pygama.math.peak_fitting as pgf import pygama.math.utils as pgu -from pygama.pargen.utils import * +from pygama.pargen.utils import return_nans log = logging.getLogger(__name__) @@ -838,6 +838,10 @@ def hpge_fit_E_peaks( return (pars, errors, covs, binws, ranges, p_vals, valid_pks, out_funcs) +def poly_wrapper(x, *pars): + return pgf.poly(x, pars) + + def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0): """Find best fit of poly(E) = mus +/- sqrt(mu_vars) Compare to hpge_fit_E_cal_func which fits for E = poly(mu) @@ -868,9 +872,7 @@ def hpge_fit_E_scale(mus, mu_vars, Es_keV, deg=0): errs = np.diag(np.sqrt(cov)) else: poly_pars = np.polyfit(Es_keV, mus, deg=deg, w=1 / np.sqrt(mu_vars)) - c = cost.LeastSquares( - Es_keV, mus, np.sqrt(mu_vars), lambda x, *pars: pgf.poly(x, pars) - ) + c = cost.LeastSquares(Es_keV, mus, np.sqrt(mu_vars), poly_wrapper) m = Minuit(c, *poly_pars) m.simplex() m.migrad() @@ -921,9 +923,7 @@ def hpge_fit_E_cal_func(mus, mu_vars, Es_keV, E_scale_pars, deg=0): dmudEs += E_scale_pars[n] * mus ** (len(E_scale_pars) - 2 - n) E_weights = dmudEs * mu_vars poly_pars = np.polyfit(mus, Es_keV, deg=deg, w=1 / E_weights) - c = cost.LeastSquares( - mus, Es_keV, E_weights, lambda x, *pars: pgf.poly(x, pars) - ) + c = cost.LeastSquares(mus, Es_keV, E_weights, poly_wrapper) m = Minuit(c, *poly_pars) m.simplex() m.migrad() @@ -1213,7 +1213,7 @@ def hpge_E_calibration( results["pk_cal_cov"] = cov except ValueError: log.error("Failed to fit enough peaks to get accurate calibration") - return None, None, results + return None, None, None, results # Invert the E scale fit to get a calibration function pars, errs, cov = hpge_fit_E_cal_func(mus, mu_vars, fitted_peaks_keV, pars, deg=deg) diff --git a/src/pygama/pargen/extract_tau.py b/src/pygama/pargen/extract_tau.py index 11f76fa81..72e357fd7 100644 --- a/src/pygama/pargen/extract_tau.py +++ b/src/pygama/pargen/extract_tau.py @@ -225,7 +225,6 @@ def dsp_preprocess_decay_const( double_pz: bool = False, display: int = 0, opt_dict: dict = None, - threshold: int = 5000, wf_field: str = "waveform", wf_plot: str = "wf_pz", norm_param: str = "pz_mean", From 894e60bc4e868ce7cab99eb0f6a869ed77a9e0c4 Mon Sep 17 00:00:00 2001 From: ggmarshall Date: Thu, 2 Nov 2023 15:06:42 +0100 Subject: [PATCH 22/22] bugfix for tcm pulser where channel was incorrectly hardcoded --- src/pygama/pargen/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pygama/pargen/utils.py b/src/pygama/pargen/utils.py index a33661fa5..e6c9f3c75 100644 --- a/src/pygama/pargen/utils.py +++ b/src/pygama/pargen/utils.py @@ -137,6 +137,6 @@ def get_tcm_pulser_ids(tcm_file, channel, multiplicity_threshold): ids = data.query(f"array_id=={channel} and evt_number in @high_mult_events")[ "array_idx" ].to_numpy() - mask = np.zeros(len(data.query(f"array_id==1104000")), dtype="bool") + mask = np.zeros(len(data.query(f"array_id=={channel}")), dtype="bool") mask[ids] = True return ids, mask