diff --git a/ReleaseNotes/release_notes.md b/ReleaseNotes/release_notes.md index 85b09d6d..aff26078 100644 --- a/ReleaseNotes/release_notes.md +++ b/ReleaseNotes/release_notes.md @@ -1,5 +1,9 @@ ## New features - + + 1.improve all toys machinery: toys, jackknife, bootstrapping and significance + 1. extend all tests for toys + + ## Backward incompatible ## Bug fixes diff --git a/ostap/fitting/tests/test_fitting_toys.py b/ostap/fitting/tests/test_fitting_toys.py index 6b12c6be..5ae572fd 100755 --- a/ostap/fitting/tests/test_fitting_toys.py +++ b/ostap/fitting/tests/test_fitting_toys.py @@ -16,14 +16,18 @@ __author__ = "Ostap developers" __all__ = () ## nothing to import # ============================================================================= -from ostap.core.pyrouts import hID +from ostap.core.pyrouts import hID, VE import ostap.fitting.models as Models import ostap.fitting.toys as Toys import ostap.histos.histos -from ostap.utils.timing import timing +from ostap.utils.timing import timing +from ostap.utils.memory import memory from ostap.plotting.canvas import use_canvas from ostap.utils.utils import wait from ostap.fitting.toys import pull_var +from ostap.math.base import num_range +from ostap.utils.progress_bar import progress_bar +import ostap.logger.table as T import ROOT, time, random # ============================================================================= # logging @@ -34,16 +38,36 @@ else : logger = getLogger ( __name__ ) # ============================================================================= -nominal_mean = 0.4 -nominal_sigma = 0.1 +nominal_mean = 0.4 +nominal_sigma = 0.1 +nominal_S = 1000 +nominal_B = 100 +nominal_F = float ( nominal_S ) / ( nominal_S + nominal_B ) -mass = ROOT.RooRealVar ( 'mass' , '', 0 , 1 ) -gen_gauss = Models.Gauss_pdf ( 'GG' , xvar = mass ) -fit_gauss = Models.Gauss_pdf ( 'FG' , xvar = mass ) +mass = ROOT.RooRealVar ( 'mass' , '', 0 , 1 ) +gen_gauss = Models.Gauss_pdf ( 'GG' , xvar = mass ) +fit_gauss = Models.Gauss_pdf ( 'FG' , xvar = mass ) gen_gauss.mean = nominal_mean gen_gauss.sigma = nominal_sigma -model = Models.Fit1D ( signal = gen_gauss , background = 'flat' ) +model = Models.Fit1D ( signal = gen_gauss , background = 'flat' ) +model_NE = Models.Fit1D ( signal = gen_gauss , background = 'flat' , extended = False ) +model.S = nominal_S +model.B = nominal_B +model_NE.F = nominal_F + +fit_config = { 'silent' : True , 'refit' : 5 } + +def reset_funcs () : + + gen_gauss.mean = nominal_mean + gen_gauss.sigma = nominal_sigma + fit_gauss.mean = nominal_mean + fit_gauss.sigma = nominal_sigma + model.S = nominal_S + model.B = nominal_B + +toy_results = {} # ============================================================================== ## Perform toy-study for possible fit bias and correct uncertainty evaluation @@ -54,7 +78,7 @@ # - fill distributions for fit results # - fill distribution of pulls def test_toys ( ) : - """Perform toys-study for possible fit bias and correct uncertainty evaluation + """ Perform toys-study for possible fit bias and correct uncertainty evaluation - generate `nToys` pseudoexperiments with some PDF `pdf` - fit teach experiment with the same PDF - store fit results @@ -67,14 +91,19 @@ def test_toys ( ) : more_vars = { 'pull:mean_GG' : pull_var ( 'mean_GG' , nominal_mean ) , 'pull:sigma_GG' : pull_var ( 'sigma_GG' , nominal_sigma ) } +\ + ## reset all functions + reset_funcs () + + pdf = model with timing ( 'Toys analysis' , logger = logger ) : results , stats = Toys.make_toys ( - pdf = gen_gauss , + pdf = pdf , nToys = 1000 , data = [ mass ] , - gen_config = { 'nEvents' : 200 , 'sample' : True } , - fit_config = { 'silent' : True , 'refit' : 5 } , + gen_config = { 'nEvents' : 1100 , 'sample' : True } , + fit_config = fit_config , init_pars = { 'mean_GG' : nominal_mean , 'sigma_GG' : nominal_sigma } , more_vars = more_vars , @@ -91,6 +120,8 @@ def test_toys ( ) : for h in ( h_mean , h_sigma ) : with use_canvas ( 'test_toys %s' % h.title , wait = 1 ) : h.draw() + + toy_results [ 'test_toys' ] = results , stats # ============================================================================= ## Perform toy-study for possible fit bias and correct uncertainty evaluation @@ -111,6 +142,9 @@ def test_toys2 ( ) : more_vars = { 'pull:mean_FG' : pull_var ( 'mean_FG' , nominal_mean ) , 'pull:sigma_FG' : pull_var ( 'sigma_FG', nominal_sigma ) } + ## reset all functions + reset_funcs () + with timing ( 'Toys2 analysis' , logger = logger ) : results , stats = Toys.make_toys2 ( gen_pdf = gen_gauss , @@ -118,7 +152,7 @@ def test_toys2 ( ) : nToys = 1000 , data = [ mass ] , gen_config = { 'nEvents' : 200 , 'sample' : True } , - fit_config = { 'silent' : True } , + fit_config = fit_config , gen_pars = { 'mean_GG' : nominal_mean , 'sigma_GG' : nominal_sigma } , fit_pars = { 'mean_FG' : nominal_mean , 'sigma_FG' : nominal_sigma } , more_vars = more_vars , @@ -137,49 +171,197 @@ def test_toys2 ( ) : for h in ( h_mean , h_sigma ) : with use_canvas ( 'test_toys2 %s' % h.title , wait = 1 ) : h.draw() + toy_results [ 'test_toys2' ] = results , stats # ============================================================================== -## Perform toy-study for Jackknife -def test_jackknife ( ) : - """Perform toys-study for Jackknife +## Perform toy-study for Jackknife (non-extended model) +def test_jackknife_NE1 ( ) : + """ Perform toys-study for Jackknife (non-extended model) """ - logger = getLogger ( 'test_parallel_jackknife' ) + logger = getLogger ( 'test_parallel_jackknife_NE1' ) + logger.info ( 'Jackknife analysis for non-extended model' ) + + pdf = model_NE - with timing ( 'Jackknife analysis' , logger = logger ) : - model.S = 1000 - model.B = 100 + with timing ( 'Jackknife analysis (non-extended)' , logger = logger ) : - data = model.generate ( 1100 ) + ## reset all functions + reset_funcs () - Toys.make_jackknife ( pdf = model , - data = data , - fit_config = { 'silent' : True } , - progress = True , - silent = True ) - + data = pdf.generate ( nominal_S ) + with use_canvas ( "Jackknife analysis (non-extended)" , wait = 3 ) : + r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config ) + + ## reset all functions + reset_funcs () + + results, stats = Toys.make_jackknife ( pdf = pdf , + data = data , + fit_config = fit_config , + progress = True , + silent = True ) + + toy_results [ 'test_jackknife_NE1' ] = results , stats + # ============================================================================== -## Perform toy-study for Bootstrap -def test_bootstrap ( ) : - """Perform toys-study for Bootstrap +## Perform toy-study for Jackknife (non-extended model) +def test_jackknife_NE2 ( ) : + """ Perform toys-study for Jackknife (non-extended model) """ - logger = getLogger ( 'test_parallel_bootstrap' ) - - with timing ( 'Bootstrap analysis' , logger = logger ) : + logger = getLogger ( 'test_parallel_jackknife_NE2' ) + logger.info ( 'Jackknife analysis for non-extended model' ) + + pdf = fit_gauss + + with timing ( 'Jackknife analysis (non-extended)' , logger = logger ) : + + ## reset all functions + reset_funcs () + + data = pdf.generate ( nominal_S ) + with use_canvas ( "Jackknife analysis (non-extended)" , wait = 3 ) : + r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config ) + + ## reset all functions + reset_funcs () + + results, stats = Toys.make_jackknife ( pdf = pdf , + data = data , + fit_config = fit_config , + progress = True , + silent = True ) + + toy_results [ 'test_jackknife_NE2' ] = results , stats + +# ============================================================================== +## Perform toy-study for Jackknife (extended model) +def test_jackknife_EXT ( ) : + """ Perform toys-study for Jackknife (extended model) + """ - model.S = 1000 - model.B = 100 + logger = getLogger ( 'test_jackknife_EXT' ) + logger.info ( 'Jackknife analysis for extended model' ) + + pdf = model + + with timing ( 'Jackknife analysis (extended)' , logger = logger ) : - data = model.generate ( 1100 ) + ## reset all functions + reset_funcs () - Toys.make_bootstrap ( pdf = model , - size = 100 , - data = data , - fit_config = { 'silent' : True } , - progress = True , - silent = True ) + data = pdf.generate ( nominal_S ) + with use_canvas ( "Jackknife analysis (extended)" , wait = 3 ) : + r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config ) + + ## reset all functions + reset_funcs () + + results, stats = Toys.make_jackknife ( pdf = pdf , + data = data , + fit_config = fit_config , + progress = True , + silent = True ) + + toy_results [ 'test_jackknife_EXT' ] = results , stats + +# ============================================================================== +## Perform toy-study for Bootstrap (non-extended) +def test_bootstrap_NE1 ( ) : + """ Perform toys-study for Bootstrap (non-extended) + """ + + logger = getLogger ( 'test_bootstrap_NE1' ) + logger.info ( 'Bootstrap analysis for non-extended model' ) + + pdf = model_NE + with timing ( 'Bootstrap analysis (non-extended)' , logger = logger ) : + + ## reset all functions + reset_funcs () + + data = pdf.generate ( nominal_S + nominal_B ) + with use_canvas ( "Booststrap analysis" , wait = 3 ) : + r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config ) + + ## reset all functions + reset_funcs () + results, stats = Toys.make_bootstrap ( pdf = pdf , + size = 400 , + data = data , + fit_config = fit_config , + extended = False , + progress = True , + silent = True ) + + toy_results [ 'test_bootstrap_NE1' ] = results , stats + +# ============================================================================== +## Perform toy-study for Bootstrap (non-extended) +def test_bootstrap_NE2 ( ) : + """ Perform toys-study for Bootstrap (non-extended) + """ + + logger = getLogger ( 'test_parallel_bootstrap_NE2' ) + logger.info ( 'Bootstrap analysis for non-extended model' ) + + pdf = fit_gauss + with timing ( 'Bootstrap analysis (non-extended)' , logger = logger ) : + + ## reset all functions + reset_funcs () + + data = pdf.generate ( nominal_S + nominal_B ) + with use_canvas ( "Booststrap analysis" , wait = 3 ) : + r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config ) + + ## reset all functions + reset_funcs () + + results, stats = Toys.make_bootstrap ( pdf = pdf , + size = 400 , + data = data , + fit_config = fit_config , + extended = False , + progress = True , + silent = True ) + + toy_results [ 'test_bootstrap_NE2' ] = results , stats + +# ============================================================================== +## Perform toy-study for Bootstrap (extended) +def test_bootstrap_EXT ( ) : + """ Perform toys-study for Bootstrap (extended) + """ + + logger = getLogger ( 'test_parallel_bootstrap_EXT' ) + logger.info ( 'Bootstrap analysis for extended model' ) + + pdf = model + with timing ( 'Bootstrap analysis (extended)' , logger = logger ) : + + ## reset all functions + reset_funcs () + + data = pdf.generate ( nominal_S + nominal_B ) + with use_canvas ( "(Extended) Booststrap analysis" , wait = 3 ) : + r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config ) + + ## reset all functions + reset_funcs () + + results, stats = Toys.make_bootstrap ( pdf = pdf , + size = 400 , + data = data , + fit_config = fit_config , + extended = True , + progress = True , + silent = True ) + + toy_results [ 'test_bootstrap_EXT' ] = results , stats + # ============================================================================= ## Perform toy-study for significance of the signal # - generate nToys pseudoexperiments using background-only hypothesis @@ -187,7 +369,7 @@ def test_bootstrap ( ) : # - store fit results # - fill distributions for fit results def test_significance ( ) : - """Perform toy-study for significance of the signal + """ Perform toy-study for significance of the signal - generate `nToys` pseudoexperiments using background-only hypothesis - fit each experiment with signal+background hypothesis - store fit results @@ -196,6 +378,9 @@ def test_significance ( ) : logger = getLogger ( 'test_significance' ) + ## reset all functions + reset_funcs () + with timing ( 'Significance analysis' , logger = logger ) : ## only background hypothesis @@ -209,35 +394,120 @@ def test_significance ( ) : model.background.tau.fix ( 0 ) results , stats = Toys.make_toys2 ( - gen_pdf = bkg_only , - fit_pdf = model , - nToys = 1000 , - data = [ mass ] , - gen_config = { 'nEvents' : 100 , 'sample' : True } , - fit_config = { 'silent' : True } , + gen_pdf = bkg_only , + fit_pdf = model , + nToys = 2000 , + data = [ mass ] , + gen_config = { 'nEvents' : 110 , 'sample' : True } , + fit_config = fit_config , gen_pars = { 'tau_BKG' : 0. } , ## initial values for generation - fit_pars = { 'B' : 100 , - 'S' : 10 , - 'phi0_Bkg_S': 0.0 } , ## initial fit values for parameters + fit_pars = { 'B' : 100 , + 'S' : 10 , + 'phi0_Bkg_S' : 0.0 } , ## initial fit values for parameters silent = True , progress = True ) - - h_S = ROOT.TH1F ( hID() , '#S' , 60 , 0 , 60 ) - - for r in results ['S' ] : h_S .Fill ( r ) + + + # ========================================================================= + ## yields + h_Y = ROOT.TH1F ( hID() , '#S' , 140 , 0 , 70 ) + for r in results [ 'S' ] : h_Y .Fill ( r ) + + ## get p-value and significance histograms: + h_P , h_S = h_Y.significance ( cut_low = True ) + + h_S.red () + h_P.blue () + + minv = 1.e+100 + for i,_,y in h_P.items() : + yv = y.value() + if 0 < yv and yv < minv : minv = yv + minv , _ = num_range ( 0.75 * minv ) + h_P.SetMinimum ( minv ) + h_S.SetMinimum ( 0 ) - for h in ( h_S , ) : - with use_canvas ( 'test_significance %s' % h.title , wait = 1 ) : h.draw() - + with use_canvas ( 'test_significance: yields' , wait = 1 ) : h_Y.draw ( ) + with use_canvas ( 'test_significance: p-value' , wait = 1 ) : h_P.draw ( logy = True ) + with use_canvas ( 'test_significance: significance' , wait = 3 ) : h_S.draw () + + toy_results [ 'test_significance' ] = results , stats +# ============================================================================= +## Save resuls of toys to DBASE +def test_db () : + """ Save resuls of toys to DBASE + """ + logger = getLogger ( 'test_db' ) + logger.info ( 'Saving all toys results into DBASE' ) + import ostap.io.zipshelve as DBASE + from ostap.utils.timing import timing + with timing( 'Save everything to DBASE', logger ), DBASE.tmpdb() as db : + for key in progress_bar ( toy_results ) : + r , s = toy_results [ key ] + key1 = '%s:results' % key + key2 = '%s:stats' % key + db [ key1 ] = r + db [ key2 ] = s + db.ls() + while toy_results : toy_results.popitem() +# ============================================================================= + + # ============================================================================= if '__main__' == __name__ : + + """ + with memory ( 'test_toys' , logger = logger ) as mtt : test_toys () + with memory ( 'test_toys2' , logger = logger ) as mt2 : test_toys () + with memory ( 'test_jackknife_NE1' , logger = logger ) as mj1 : test_jackknife_NE1 () + with memory ( 'test_jackknife_NE2' , logger = logger ) as mj2 : test_jackknife_NE2 () + with memory ( 'test_jackknife_EXT' , logger = logger ) as mje : test_jackknife_EXT () + with memory ( 'test_bootstrap_NE1' , logger = logger ) as mb1 : test_bootstrap_NE1 () + with memory ( 'test_bootstrap_NE2' , logger = logger ) as mb2 : test_bootstrap_NE2 () + with memory ( 'test_bootstrap_EXT' , logger = logger ) as mbe : test_bootstrap_EXT () + """ + + with memory ( 'test_significance' , logger = logger ) as mts : test_significance () + with memory ( 'test_db' , logger = logger ) as mdb : test_db () + + rows = [ ( 'Test' , 'Memory [MB]' ) ] + + """ + row = 'Toys' , '%.1f' % mtt.delta + rows.append ( row ) + + row = 'Toys2' , '%.1f' % mt2.delta + rows.append ( row ) + + row = 'Jackknife_NE1' , '%.1f' % mj1.delta + rows.append ( row ) + + row = 'Jackknife_NE2' , '%.1f' % mj2.delta + rows.append ( row ) + + row = 'Jackknife_EXT' , '%.1f' % mje.delta + rows.append ( row ) + + row = 'Bootstrap_NE1' , '%.1f' % mb1.delta + rows.append ( row ) + + row = 'Bootstrap_NE2' , '%.1f' % mb2.delta + rows.append ( row ) + + row = 'Bootstrap_EXT' , '%.1f' % mbe.delta + rows.append ( row ) + """ + + row = 'Significance' , '%.1f' % mts.delta + rows.append ( row ) + + row = 'test_db' , '%.1f' % mdb.delta + rows.append ( row ) - test_toys ( ) - test_toys2 ( ) - test_jackknife ( ) - test_bootstrap ( ) - test_significance ( ) + title = 'Memory usage for various toys' + table = T.table ( rows , title = title , prefix = '# ' , alignment = 'lc' ) + logger.info ( '%s:\n%s' % ( title , table ) ) # ============================================================================= diff --git a/ostap/fitting/toys.py b/ostap/fitting/toys.py index d0401dec..07c52cd1 100644 --- a/ostap/fitting/toys.py +++ b/ostap/fitting/toys.py @@ -26,6 +26,8 @@ from builtins import range from ostap.core.ostap_types import string_types, integer_types from ostap.core.core import VE, SE +from ostap.logger.pretty import pretty_ve, pretty_float, fmt_pretty_float +from ostap.logger.colorized import attention import ROOT # ============================================================================= # logging @@ -34,11 +36,36 @@ if '__main__' == __name__ : logger = getLogger( 'ostap.fitting.toys' ) else : logger = getLogger( __name__ ) # ============================================================================= -logger.debug ( 'Utilities to run fitting toys, Jackknife, bootstrap, ... ') +logger.debug ( 'Utilities to run toys, Jackknife, bootstrap, ... ') # ============================================================================== +extmodes = { + ROOT.RooAbsPdf.MustBeExtended : 'MustBeExtended' , + ROOT.RooAbsPdf.CanBeExtended : 'CanBeExtended' , + ROOT.RooAbsPdf.CanNotBeExtended : 'CanBeExtended' , +} +# ============================================================================== +## Check the consistency of the PDF and `extended` flag +def check_extended ( pdf , extended ) : + """ Check the consistency of the PDF and `extended` flag + """ + if isinstance ( pdf , ROOT.RooAbsPdf ) : pass + else : pdf = pdf.pdf + ## + extmode = pdf.extendMode() + ## + if pdf.canBeExtended () : return True + elif pdf.mustBeExtended () and extended : return True + elif ROOT.RooAbsPdf.MustBeExtended == extmode and extended : return True + elif ROOT.RooAbsPdf.CanNotBeExtended == extmode and not extended : return True + elif ROOT.RooAbsPdf.CanBeExtended == extmode and extended : return True + ## + logger.warning ( "Mismatch of `ExtendMode`:`%s` and `extended`:%s" % ( extmodes [ extmode ] , extended ) ) + ## + return False +# ============================================================================= ## Technical transformation to the dictionary : { 'name' : float_value } def vars_transform ( vars ) : - """Technical transformation to the dictionary : `{ 'name' : float_value }` + """ Technical transformation to the dictionary : `{ 'name' : float_value }` """ from ostap.core.ostap_types import dictlike_types @@ -76,7 +103,7 @@ def pull_var ( name , value ) : return PoolVar ( name , value ) # statistics = make_stats ( results ) # @endcode def make_stats ( results , fits = {} , covs = {} , accept = SE () ) : - """Prepare statistics from results of toys/jackknifes/boostrap studies + """ Prepare statistics from results of toys/jackknifes/boostrap studies >>> results = ... >>> statistics = make_stats ( results ) """ @@ -98,25 +125,55 @@ def make_stats ( results , fits = {} , covs = {} , accept = SE () ) : for k in covs : stats ['- CovQual %s' % k ] = covs [ k ] - return stats # ============================================================================= -## print statistics of pseudoexperiments +## Print statistics of pseudoexperiments def print_stats ( stats , ntoys = '' , logger = logger ) : - """print statistics of pseudoexperiments + """ Print statistics of pseudoexperiments """ - - table = [ ( 'Parameter' , '#', 'mean' , 'rms' , '%13s / %-13s' % ( 'min' , 'max' ) ) ] + ## 0 1 2 3 4 5 6 7 + table = [ ( 'Parameter' , '#', 'mean', 'x[..]' , 'rms' , 'x[..]', 'min / max' , 'x[..]' ) ] - def make_row ( c ) : - n = "{:^11}".format ( c.nEntries() ) + has_e1 = [] + has_e2 = [] + has_e3 = [] + + def make_row ( c ) : + + n = '%d' % c.nEntries() mean = c.mean () - mean = "%+13.6g +/- %-13.6g" % ( mean.value() , mean.error () ) - rms = "%13.6g" % c.rms () - minmax = "%+13.6g / %-+13.6g" % ( c.min() , c.max () ) - return p , n , mean , rms , minmax + rms = c.rms () + minmax = c.min() , c.max() + + mean , expo1 = pretty_ve ( mean , precision = 6 , width = 8 , parentheses = False ) + rms , expo2 = pretty_float ( rms , precision = 4 , width = 6 ) + fmt , expo3 = fmt_pretty_float ( max ( abs ( c.min() ) , abs ( c.max() ) ) , precision = 3 , width = 5 ) + fmt = '%s / %s ' % ( fmt , fmt ) + + if expo3 : scale = 10**expo3 + else : scale = 1 + minmax = fmt % ( c.min() / scale , c.max() / scale ) + + ## mean = "%+13.6g +/- %-13.6g" % ( mean.value() , mean.error () ) + ## rms = "%13.6g" % c.rms () + ## minmax = "%+13.6g / %-+13.6g" % ( c.min() , c.max () ) + + if expo1 : expo1 = '10^%+d' % expo1 + else : expo1 = '' + if expo2 : expo2 = '10^%+d' % expo2 + else : expo2 = '' + + if expo3 : expo3 = '10^%+d' % expo3 + else : expo3 = '' + + if expo1 : has_e1.append ( '' ) + if expo2 : has_e2.append ( '' ) + if expo3 : has_e3.append ( '' ) + + return p , n , mean, expo1, rms, expo2, minmax, expo3 + for p in sorted ( stats ) : if p.startswith('pull:') : continue c = stats [ p ] @@ -127,16 +184,45 @@ def make_row ( c ) : c = stats [ p ] table.append ( make_row ( c ) ) + ## skip empty column + if not has_e3 : + rows = [] + for row in table : + r = list ( row ) + del r [ 7 ] + rows.append ( r ) + table = rows + + ## skip empty column + if not has_e2 : + rows = [] + for row in table : + r = list ( row ) + del r [ 5 ] + rows.append ( r ) + table = rows + + ## skip empty column + if not has_e1 : + rows = [] + for row in table : + r = list ( row ) + del r [ 3 ] + rows.append ( r ) + table = rows + + if not ntoys : ntoys = 0 for s in stats : ntoys = max ( ntoys , stats[s].nEntries() ) - + import ostap.logger.table as Table table = Table.table ( table , title = "Results of %s toys" % ntoys , - alignment = 'lcccc' , + alignment = 'lccccccc' , prefix = "# " ) + logger.info ( 'Results of %s toys:\n%s' % ( ntoys , table ) ) @@ -148,11 +234,12 @@ def make_row ( c ) : # jackknife , theta_jack = jackknife_esiimator ( statistics , value ) # @endcode def jackknife_statistics ( statistics , theta = None ) : - """Jackknife estimator from jackknife statistic + """ Jackknife estimator from jackknife statistic >>> statistics = .... >>> jacknife = jackknife_estimator ( statistics ) >>> jacknife , theta_corr , bias = jackknife_esiimator ( statistics , value ) """ + assert isinstance ( theta , VE ) or theta is None ,\ "jackknife_statistics: invalid type of 'value': %s" % type ( value ) @@ -167,24 +254,35 @@ def jackknife_statistics ( statistics , theta = None ) : bias = ( N - 1 ) * ( theta_dot.value() - theta.value() ) - ## theta corrected for the bias and with Jackknife variance + ## theta corrected for the bias and with Jackknife variance + theta_jack = VE ( theta.value() - bias , jvar ) ## corrected value return jackknife , theta_jack # ============================================================================= ## print Jackknife statistics -def print_jackknife ( fitresult , +def print_jackknife ( fitresult , ## restl of the fit of th e total datasample stats , morevars = {} , logger = logger , title = '' ) : - """print Jackknife statistics + """ Print Jackknife statistics """ + - header = ( 'Parameter' , 'theta' , 'theta_(.)' , 'theta_jack' , 'bias/sigma [%]' , 'error [%]' ) - table = [] + header = ( 'Parameter' , + 'theta' , 'x[...]' , + 'theta_(.)' , 'x[...]' , + 'bias' , 'x[...]' , + 'bias [%]' , 's/s' ) + + table = [ header ] + has_e1 = False + has_e2 = False + has_e3 = False + N = 0 for name in sorted ( stats ) : @@ -206,20 +304,45 @@ def print_jackknife ( fitresult , N = max ( N , statistics.nEntries() ) + ## jackknife estimates jackknife , theta_jack = jackknife_statistics ( statistics , theta ) - bias = theta_jack.value () - theta .value () + bias = ( N - 1 ) * ( jackknife - theta ).value() + + ## bias = theta_jack.value () - theta .value () scale = theta .error () / theta_jack.error () - row = ( name , - "%+13.6g +/- %-13.6g" % ( theta . value () , theta .error () ) , - "%+13.6g +/- %-13.6g" % ( jackknife . value () , jackknife .error () ) , - "%+13.6g +/- %-13.6g" % ( theta_jack . value () , theta_jack .error () ) , - '%+6.2f' % ( bias / theta.error() * 100 ) , - '%+6.2f' % ( scale * 100 - 100 ) ) + th1 , expo1 = pretty_ve ( theta , precision = 4 , width = 6 , parentheses = False ) + th2 , expo2 = pretty_ve ( jackknife , precision = 4 , width = 6 , parentheses = False ) + ## th3 , expo3 = pretty_ve ( theta_jack , precision = 4 , width = 6 , parentheses = False ) + th3 , expo3 = pretty_float ( bias , precision = 4 , width = 6 ) + + if expo1 : expo1 = '10^%+d' % expo1 + else : expo1 = '' + + if expo2 : expo2 = '10^%+d' % expo2 + else : expo2 = '' + + if expo3 : expo3 = '10^%+d' % expo3 + else : expo3 = '' + + bias = bias / theta.error() * 100 + + fbias = '%+.1f' % bias + errs = '%+.2f' % scale + + if expo1 : has_e1 = True + if expo2 : has_e2 = True + if expo3 : has_e3 = True + + if 50 < abs ( bias ) : fbias = attention ( fbias ) + if 0.5 < abs ( scale - 1 ) : errs = attention ( errs ) + + row = name , th1, expo1 , th2 , expo2 , th3 , expo3 , fbias , errs + table.append ( row ) @@ -230,13 +353,44 @@ def print_jackknife ( fitresult , statistics = stats [ name ] jackknife = jackknife_statistics ( statistics ) + + th2, expo2 = pretty_ve ( jackknife , precision = 4 , width = 6 , parentheses = False ) + + if expo2 : expo2 = '10^%+d' % expo2 + else : expo2 = '' + + if expo2 : has_e2 = True - row = name , '' , "%+13.6g +/- %-13.6g" % ( jackknife . value () , jackknife .error () ) , '' , '' , '' + row = name , '' , '' , th2 , expo2 , '' , '' , '' , '' table.append ( row ) - - table = [ header ] + table - + ## skip empty column + if not has_e3 : + rows = [] + for row in table : + r = list ( row ) + del r [ 6 ] + rows.append ( r ) + table = rows + + ## skip empty column + if not has_e2 : + rows = [] + for row in table : + r = list ( row ) + del r [ 4 ] + rows.append ( r ) + table = rows + + ## skip empty column + if not has_e1 : + rows = [] + for row in table : + r = list ( row ) + del r [ 2 ] + rows.append ( r ) + table = rows + title = title if title else "Jackknife results (N=%d)" % N import ostap.logger.table as Table @@ -254,14 +408,17 @@ def print_bootstrap ( fitresult , morevars = {} , logger = logger , title = '' ) : - """print Bootstrap statistics + """ Print Bootstrap statistics """ - header = ( 'Parameter' , 'theta' , 'theta_boot' , 'bias/sigma [%]' , 'error [%]' ) + header = ( 'Parameter' , 'theta' , 'x[...]' , 'theta_boot' , 'x[...]', 'bias[%]' , 's/s' ) table = [] n = 0 + has_e1 = False + has_e2 = False + for name in sorted ( stats ) : if name in fitresult : @@ -284,15 +441,30 @@ def print_bootstrap ( fitresult , theta_boot = VE ( statistics.mean().value() , statistics.mu2() ) - bias = theta_boot.value () - theta .value () - scale = theta .error () / theta_boot.error () + scale = theta .error () / theta_boot.error () + + th1 , expo1 = pretty_ve ( theta , precision = 4 , width = 6 , parentheses = False ) + th2 , expo2 = pretty_ve ( theta_boot , precision = 4 , width = 6 , parentheses = False ) + + if expo1 : expo1 = '10^%+d' % expo1 + else : expo1 = '' + + if expo2 : expo2 = '10^%+d' % expo2 + else : expo2 = '' + + bias = bias / theta.error() * 100 + + fbias = '%+.1f' % bias + errs = '%+.2f' % scale - row = ( name , - "%+13.6g +/- %-13.6g" % ( theta . value () , theta .error () ) , - "%+13.6g +/- %-13.6g" % ( theta_boot . value () , theta_boot .error () ) , - '%+6.2f' % ( bias / theta.error() * 100 ) , - '%+6.2f' % ( scale * 100 - 100 ) ) + if 50 < abs ( bias ) : fbias = attention ( fbias ) + if 0.5 < abs ( scale - 1 ) : errs = attention ( errs ) + + row = name , th1, expo1 , th2 , expo2 , fbias , errs + + if expo1 : has_e1 = True + if expo2 : has_e2 = True table.append ( row ) @@ -304,19 +476,42 @@ def print_bootstrap ( fitresult , statistics = stats [ name ] theta_boot = VE ( statistics.mean().value() , statistics.mu2() ) - row = name , '', "%+13.6g +/- %-13.6g" % ( theta_boot . value () , theta_boot .error () ) , '' , '' + th2 , expo2 = pretty_ve ( theta_boot , precision = 4 , width = 6 , parentheses = False ) + + if expo2 : expo2 = '10^%+d' % expo2 + else : expo2 = '' + + if expo2 : has_e2 = True + row = name , '','' , th2 , expo2 , '' , '' table.append ( row ) - + table = [ header ] + table - table = [ header ] + table + ## skip empty column + if not has_e2 : + rows = [] + for row in table : + r = list ( row ) + del r [ 4 ] + rows.append ( r ) + table = rows + + ## skip empty column + if not has_e1 : + rows = [] + for row in table : + r = list ( row ) + del r [ 2 ] + rows.append ( r ) + table = rows + title = title if title else "Bootstrapping with #%d samples" % n import ostap.logger.table as Table table = Table.table ( table , title = title , - alignment = 'lcccc' , + alignment = 'lccccccc' , prefix = "# " ) logger.info ( '%s:\n%s' % ( title , table ) ) @@ -325,7 +520,7 @@ def print_bootstrap ( fitresult , ## Default function to generate the data # - simple call for PDF.generate def generate_data ( pdf , varset , **config ) : - """Default function to generate the data + """ Default function to generate the data - simple call for `PDF.generate` """ return pdf.generate ( varset = varset , **config ) @@ -353,7 +548,7 @@ def make_fit ( pdf , dataset , **config ) : # @param dataset pdf # def accept_fit ( result , pdf = None , dataset = None ) : - """Accept the fit result? + """ Accept the fit result? - valid fit result - fit status is 0 (SUCCESS) - covariance matrix quality is either 0 or -1 @@ -431,7 +626,7 @@ def make_toys ( pdf , progress = True , logger = logger , frequency = 500 ) : ## - """Make `nToys` pseudoexperiments + """ Make `nToys` pseudoexperiments - Schematically: >>> for toy in range ( nToys ) : @@ -575,11 +770,13 @@ def make_toys ( pdf , ## 3. fit it! r = fit_fun ( pdf , dataset , **fitcnf ) - ## fit status - fits [ r.status () ] += 1 + ## fit status + st = r.status() + if 0 != st : fits [ st ] += 1 ## covariance matrix quality - covs [ r.covQual () ] += 1 + cq = r.covQual() + if not cq in ( -1 , 3 ) : covs [ cq ] += 1 ## ok ? ok = accept_fun ( r , pdf , dataset ) @@ -839,10 +1036,12 @@ def make_toys2 ( gen_pdf , ## pdf to generate toys r = fit_fun ( fit_pdf , dataset , **fitcnf ) ## fit status - fits [ r.status () ] += 1 + st = r.status() + if 0 != st : fits [ st ] += 1 ## covariance matrix quality - covs [ r.covQual () ] += 1 + cq = r.covQual() + if not cq in ( -1 , 3 ) : covs [ cq ] += 1 ## ok ? ok = accept_fun ( r , fit_pdf , dataset ) @@ -925,14 +1124,14 @@ def make_jackknife ( pdf , fit_config = {} , ## parameters for pdf.fitTo fit_pars = {} , ## fit-parameters to reset/use more_vars = {} , ## additional results to be calculated - fit_fun = None , ## fit function ( pdf , dataset , **fit_config ) - accept_fun = None , ## accept function ( fit-result, pdf, dataset ) + fit_fun = None , ## - fit function ( pdf , dataset , **fit_config ) + accept_fun = None , ## - accept function ( fit-result, pdf, dataset ) event_range = () , ## event range for jackknife silent = True , progress = True , logger = logger , frequency = 500 ) : - """Run Jackknife analysis, useful for evaluaton of fit biased and uncertainty estimates + """ Run Jackknife analysis, useful for evaluaton of fit biased and uncertainty estimates For each i remove event with index i from the dataset, and refit it. >>> dataset = ... >>> model = ... @@ -970,7 +1169,11 @@ def make_jackknife ( pdf , assert 0 <= begin and begin < end and begin < N , 'make_jackknife: invalid event range (%s,%s)/%d' % ( begin , end , N ) ## adjust the end end = min ( end , N ) - + + ## check if pdf and `extended` flag are in agreemrnt, and print warning message otherwise + ok1 = check_extended ( pdf , False ) + if not ok1 and not silent : logger.warning ( "Jackknife: estimates for `yield` parameters could be wrong!") + ## 1. fitting function? if fit_fun is None : if not silent : logger.info ( "make_jackknife: use default 'make_fit' function!") @@ -1024,10 +1227,12 @@ def make_jackknife ( pdf , r = fit_fun ( pdf , ds , **fitcnf ) ## 4. fit status - fits [ r.status () ] += 1 + st = r.status() + if 0 != st : fits [ st ] += 1 ## 5. covariance matrix quality - covs [ r.covQual () ] += 1 + cq = r.covQual() + if not cq in ( -1 , 3 ) : covs [ cq ] += 1 ## ok ? ok = accept_fun ( r , pdf , ds ) @@ -1050,8 +1255,10 @@ def make_jackknife ( pdf , results [ '#' ] .append ( len ( ds ) ) results [ '#sumw' ] .append ( ds.sumVar ( '1' ) ) + ## reset/remove/delete dataset ds.clear() - + del ds + if progress or not silent : if 0 < frequency and 1 <= i and 0 == i % frequency : stats = make_stats ( results , fits , covs , accept ) @@ -1066,19 +1273,27 @@ def make_jackknife ( pdf , r_tot = fit_fun ( pdf , data , **fitcnf ) r_tot = fit_fun ( pdf , data , **fitcnf ) - ## 10. the final table + ## 10. the final table + title = '' + if not ok1 : + logger.warning ( "Jackknife: estimates for `yield` parameters are likely wrong!") + NN = 0 + for k in stats : NN = max ( NN , stats[k].nEntries() ) + title = "Jackknife results (N=%d).[Estimates for `yield` are likely wrong!]" % NN print_jackknife ( r_tot , stats , morevars = dict ( ( k , more_vars [ k ]( r_tot , pdf ) ) for k in more_vars ) , - logger = logger ) - + logger = logger , + title = title ) + + if not ok1 and not silent : logger.warning ( "Jackknife: estimates for `yield` parameters are likely wrong!") return results , stats # ============================================================================= ## Run Bootstrap analysis, useful for evaluaton of fit biases and uncertainty estimates # -# In total size datasets are sampled (with replacement) from the original dataste +# In total size datasets are sampled (with replacement) from the original dataset # data and each sampled dataset is fit # @code # dataset = ... @@ -1102,6 +1317,7 @@ def make_jackknife ( pdf , # @param fit_config configuration of pdf.FitTo( data , ... ) # @param fit_pars redefine these parameters before each fit # @param more_vars calculate more variables from the fit-results +# @param extended use extended bootstrap? # @param fit_fun specific fitting action (if needed) # @param accept_fun specific accept action (if needed) # @param silent silent processing @@ -1115,14 +1331,15 @@ def make_bootstrap ( pdf , fit_config = {} , ## parameters for pdf.fitTo fit_pars = {} , ## fit-parameters to reset/use more_vars = {} , ## additional results to be calculated - fit_fun = None , ## fit function ( pdf , dataset , **fit_config ) + extended = True , ## use extended/non-extended bootstrtap + fit_fun = None , ## fit function ( pdf , dataset , **fit_config ) accept_fun = None , ## accept function ( fit-result, pdf, dataset ) silent = True , ## silent processing? progress = True , ## shpow progress bar? - logger = logger , ## use this logger + logger = logger , ## use this logger frequency = 500 ) : - """Run Bootstrap analysis, useful for evaluaton of fit biased and uncertainty estimates + """ Run Bootstrap analysis, useful for evaluaton of fit biased and uncertainty estimates In total `size` datasets are sampled (with replacement) from the original dataste `data` and each sampled dataset is fit >>> dataset = ... @@ -1137,6 +1354,7 @@ def make_bootstrap ( pdf , - `fit_config` : configuration of `pdf.FitTo( data , ... )` - `fit_pars` : redefine these parameters before each fit - `more_vars` : calculate more variables from the fit-results + - `extended` : use extended bootstrap? - `fit_fun` : specific fitting acion (if needed) - `accept_fun` : specific accept action (if needed) - `silent` : silent processing? @@ -1152,6 +1370,10 @@ def make_bootstrap ( pdf , assert isinstance ( size , integer_types ) and 1 <= size ,\ 'Invalid "size" argument %s/%s' % ( size , type ( size ) ) + ## check if pdf and `extended` flag are in agreemrnt, and print warning message otherwise + ok1 = check_extended ( pdf , extended ) + if not ok1 and not silent : logger.warning ( "Bootstrap: estimates for `yield` parameters could be wrong!") + ## 1. fitting function? if fit_fun is None : if not silent : logger.info ( "make_bootstrap: use default 'make_fit' function!") @@ -1193,10 +1415,14 @@ def make_bootstrap ( pdf , pdf.load_params ( params = fix_fit_pars , silent = True ) ## silent = silent ) r_tot = fit_fun ( pdf , data , **fitcnf ) + nn = 0 + from ostap.utils.progress_bar import progress_bar - ## run jackknife bootstrapping - for i , ds in progress_bar ( enumerate ( data.bootstrap ( size ) ) , max_value = size , silent = not progress ) : - + ## run bootstrapping + for i , ds in progress_bar ( enumerate ( data.bootstrap ( size , extended = extended ) ) , + max_value = size , + silent = not progress ) : + ## 2. reset parameters of fit_pdf pdf.load_params ( params = fix_fit_init , silent = True ) ## silent = silent ) pdf.load_params ( params = fix_fit_pars , silent = True ) ## silent = silent ) @@ -1205,10 +1431,12 @@ def make_bootstrap ( pdf , r = fit_fun ( pdf , ds , **fitcnf ) ## 4. fit status - fits [ r.status () ] += 1 + st = r.status() + if 0 != st : fits [ st ] += 1 ## 5. covariance matrix quality - covs [ r.covQual () ] += 1 + cq = r.covQual() + if not cq in ( -1 , 3 ) : covs [ cq ] += 1 ## ok ? ok = accept_fun ( r , pdf , ds ) @@ -1230,18 +1458,24 @@ def make_bootstrap ( pdf , results [ '#' ] .append ( len ( ds ) ) results [ '#sumw' ] .append ( ds.sumVar ( '1' ) ) + nn += 1 + ## reset/remove/delete dataset ds.clear() + del ds if progress or not silent : if 0 < frequency and 1 <= i and 0 == i % frequency : stats = make_stats ( results , fits , covs , accept ) - ## print_stats ( stats , i + 1 , logger = logger ) + title = "Bootstrapping with #%d samples" % nn + if extended : title = '(Extended) %s' % title + if not ok1 : title += '[Estimates for `yield` are likely wrong!]' print_bootstrap ( r_tot , stats , morevars = dict ( ( k , more_vars [ k ] ( r_tot , pdf ) ) for k in more_vars ), - logger = logger ) - + logger = logger , + title = title ) + if not ok1 and not silent : logger.warning ( "Bootstrap: estimates for `yield` parameters are likely wrong!") ## 8. make a final statistics stats = make_stats ( results , fits , covs , accept ) @@ -1253,11 +1487,19 @@ def make_bootstrap ( pdf , r_tot = fit_fun ( pdf , data , **fitcnf ) ## 10. the final table + title = "Bootstrapping with #%d samples" % nn + if extended : title = '(Extended) %s' % title + if not ok1 : + title += '[Estimates for `yield` are likely wrong!]' + logger.warning ( "Bootstrap: estimates for `yield` parameters are likely wrong!") print_bootstrap ( r_tot , stats , morevars = dict ( ( k , more_vars [ k ]( r_tot , pdf ) ) for k in more_vars ), - logger = logger ) + logger = logger , + title = title ) + if not ok1 and not silent : logger.warning ( "Bootstrap: estimates for `yield` parameters are likely wrong!") + return results , stats diff --git a/ostap/parallel/parallel_toys.py b/ostap/parallel/parallel_toys.py index f825bbbc..fd506d94 100644 --- a/ostap/parallel/parallel_toys.py +++ b/ostap/parallel/parallel_toys.py @@ -302,7 +302,8 @@ def __init__ ( self , silent = True , progress = True , frequency = 100 ) : - + + TheTask_.__init__ ( self , data = data , fit_config = fit_config , @@ -354,9 +355,38 @@ def process ( self , jobid , event_range ) : # @author Vanya BELYAEV Ivan.Belyaev@itep.ru # @date 2023-06-22 class BootstrapTask(JackknifeTask) : - """The simple task object for parallel Jackknife + """ The simple task object for parallel Jackknife - see ostap.fitting.toys.make_jackknife """ + + def __init__ ( self , + pdf , + data , + fit_config = {} , ## parameters for pdf.fitTo + fit_pars = {} , ## fit-parameters to reset/use + more_vars = {} , ## additional results to be calculated + extended = True , ## use extended bootstrap + fit_fun = None , ## fit function ( pdf , dataset , **fit_config ) + accept_fun = None , ## accept function ( fit-result, pdf, dataset ) + silent = True , + progress = True , + frequency = 100 ) : + + JackknifeTask.__init__ ( self , + pdf = pdf , + data = data , + fit_config = fit_config , ## parameters for pdf.fitTo + fit_pars = fit_pars , ## fit-parameters to reset/use + more_vars = more_vars , ## additional results to be calculated + fit_fun = fit_fun , ## fit function ( pdf , dataset , **fit_config ) + accept_fun = accept_fun , ## accept function ( fit-result, pdf, dataset ) + silent = silent , + progress = progress , + frequency = frequency ) + + self.extended = True if extended else False + + ## the actual processing def process ( self , jobid , size ) : @@ -370,16 +400,17 @@ def process ( self , jobid , size ) : import ostap.fitting.variables from ostap.core.ostap_types import integer_types - assert isinstance ( size , integer_types ) and 0 < size ,\ + assert isinstance ( size , integer_types ) and 1 <= size ,\ 'Jobid %s: Invalid "size" argument %s/%s' % ( jobid , size , type ( size ) ) import ostap.fitting.toys as Toys return Toys.make_bootstrap ( pdf = self.pdf , data = self.data , - size = size , ## ATTENTION + size = size , ## ATTENTION !!! fit_config = self.fit_config , fit_pars = self.fit_pars , more_vars = self.more_vars , + extended = self.extended , fit_fun = self.fit_fun , accept_fun = self.accept_fun , silent = self.silent , @@ -867,6 +898,10 @@ def parallel_jackknife ( pdf , import ostap.fitting.toys as Toys if nSplit < 2 : return Toys.make_jackknife ( progress = progress , **config ) + + ## check if pdf and `extended` flag are in agreemrnt, and print warning message otherwise + ok1 = Toys.check_extended ( pdf , False ) + if not ok1 : logger.warning ( "Jackknife: estimates for `yield` parameters could be wrong!") ## create the task task = JackknifeTask ( progress = progress and not silent , **config ) @@ -885,8 +920,6 @@ def parallel_jackknife ( pdf , results , stats = task.results () if progress or not silent : - ## Toys.print_stats ( stats ) - fitcnf = {} fitcnf.update ( fit_config ) if not 'silent' in fitcnf : fitcnf [ 'silent' ] = silent @@ -896,14 +929,21 @@ def parallel_jackknife ( pdf , ## 9. fit total dataset (twice) r_tot = fit_fun ( pdf , data , **fitcnf ) r_tot = fit_fun ( pdf , data , **fitcnf ) - + + title = '' + if not ok1 : + logger.warning ( "Jackknife: estimates for `yield` parameters are likely wrong!") + NN = 0 + for k in stats : NN = max ( NN , stats[k].nEntries() ) + title = "Jackknife results (N=%d).[Estimates for `yield` are likely wrong!]" % NN ## the final table Toys.print_jackknife ( r_tot , stats , - morevars = dict ( ( k , more_vars [ k ]( r_tot , pdf ) ) for k in more_vars ) ) - + morevars = dict ( ( k , more_vars [ k ]( r_tot , pdf ) ) for k in more_vars ) , + title = title ) + if not ok1 : logger.warning ( "Jackknife: estimates for `yield` parameters are likely wrong!") return results, stats @@ -948,6 +988,7 @@ def parallel_bootstrap ( pdf , fit_config = {} , ## parameters for pdf.fitTo fit_pars = {} , ## fit-parameters to reset/use more_vars = {} , ## additional results to be calculated + extended = True , ## use extended bootstrap fit_fun = None , ## fit function ( pdf , dataset , **fit_config ) accept_fun = None , ## accept function ( fit-result, pdf, dataset ) silent = True , ## silent processing? @@ -973,7 +1014,10 @@ def parallel_bootstrap ( pdf , import ostap.fitting.toys as Toys if nSplit < 2 : return Toys.make_boostrap ( size = size , progress = progress , **config ) - + + ## check if pdf and `extended` flag are in agreemrnt, and print warning message otherwise + ok1 = Toys.check_extended ( pdf , extended ) + if not ok1 : logger.warning ( "Bootstrap: estimates for `yield` parameters could be wrong!") ## create teh task task = BootstrapTask ( progress = progress and not silent , **config ) @@ -1002,13 +1046,19 @@ def parallel_bootstrap ( pdf , r_tot = fit_fun ( pdf , data , **fitcnf ) r_tot = fit_fun ( pdf , data , **fitcnf ) - ## the final table + ## the final table + title = '' + if not ok1 : + logger.warning ( "Bootstrap: estimates for `yield` parameters are likely wrong!") + NN = 0 + for k in stats : NN = max ( NN , stats[k].nEntries() ) + title = "Bootstrap results (N=%d).[Estimates for `yield` are likely wrong!]" % NN Toys.print_bootstrap ( r_tot , stats , morevars = dict ( ( k , more_vars [ k ]( r_tot , pdf ) ) for k in more_vars ) ) - + if not ok1 : logger.warning ( "Bootstrap: estimates for `yield` parameters are likely wrong!") return results, stats diff --git a/ostap/parallel/tests/test_parallel_toys.py b/ostap/parallel/tests/test_parallel_toys.py index e5ece3ea..a89f9e6c 100755 --- a/ostap/parallel/tests/test_parallel_toys.py +++ b/ostap/parallel/tests/test_parallel_toys.py @@ -18,12 +18,16 @@ from ostap.core.meta_info import root_info import ostap.fitting.roofit import ostap.fitting.models as Models -from ostap.utils.timing import timing +from ostap.utils.timing import timing +from ostap.utils.memory import memory from ostap.core.core import cpp, VE, dsID, hID, rooSilent import ostap.fitting.models as Models from ostap.plotting.canvas import use_canvas from ostap.utils.utils import wait from ostap.fitting.toys import pull_var +from ostap.math.base import num_range +from ostap.utils.progress_bar import progress_bar +import ostap.logger.table as T import ROOT, sys, os, random, time # ============================================================================= # logging @@ -47,16 +51,27 @@ nominal_mean = 0.4 nominal_sigma = 0.1 +nominal_S = 1000 +nominal_B = 100 +nominal_F = float ( nominal_S ) / ( nominal_S + nominal_B ) -mass = ROOT.RooRealVar ( 'mass' , '', 0 , 1 ) -gen_gauss = Models.Gauss_pdf ( 'GG' , xvar = mass ) -fit_gauss = Models.Gauss_pdf ( 'FG' , xvar = mass ) + +mass = ROOT.RooRealVar ( 'mass' , '', 0 , 1 ) +gen_gauss = Models.Gauss_pdf ( 'GG' , xvar = mass ) +fit_gauss = Models.Gauss_pdf ( 'FG' , xvar = mass ) gen_gauss.mean = nominal_mean gen_gauss.sigma = nominal_sigma -model = Models.Fit1D ( signal = gen_gauss , - background = 'flat' ) +model = Models.Fit1D ( signal = gen_gauss , background = 'flat' ) +model_NE = Models.Fit1D ( signal = gen_gauss , background = 'flat' , extended = False ) + +model.S = nominal_S +model.B = nominal_B +model_NE.F = nominal_F + +toy_results = {} +fit_config = { 'silent' : True , 'refit' : 5 } # ============================================================================== ## Perform toy-study for possible fit bias and correct uncertainty evaluation @@ -67,7 +82,7 @@ # - fill distributions for fit results # - fill distribution of pulls def test_parallel_toys ( ) : - """Perform toys-study for possible fit bias and correct uncertainty evaluation + """ Perform toys-study for possible fit bias and correct uncertainty evaluation - generate `nToys` pseudoexperiments with some PDF `pdf` - fit teach experiment with the same PDF - store fit results @@ -84,11 +99,11 @@ def test_parallel_toys ( ) : with timing ( 'Toys analysis' , logger = logger ) : results , stats = Toys.parallel_toys ( pdf = gen_gauss , - nToys = 1000 , + nToys = 2000 , nSplit = 20 , data = [ mass ] , gen_config = { 'nEvents' : 200 , 'sample' : True } , - fit_config = { 'silent' : True } , + fit_config = fit_config , init_pars = { 'mean_GG' : nominal_mean , 'sigma_GG' : nominal_sigma } , more_vars = more_vars , silent = True , @@ -105,6 +120,8 @@ def test_parallel_toys ( ) : for h in ( h_mean , h_sigma ) : with use_canvas ( 'test_paralllel_toys %s' % h.title , wait = 1 ) : h.draw() + toy_results['test_parallel_toys'] = results , stats + # ============================================================================= ## Perform toy-study for possible fit bias and correct uncertainty evaluation # - generate nToys pseudoexperiments with some PDF gen_pdf @@ -112,7 +129,7 @@ def test_parallel_toys ( ) : # - store fit results # - fill distributions for fit results def test_parallel_toys2 ( ) : - """Perform toys-study for possible fit bias and correct uncertainty evaluation + """ Perform toys-study for possible fit bias and correct uncertainty evaluation - generate `nToys` pseudoexperiments with some PDF `gen_pdf` - fit teach experiment with the PDF `fit_pdf` - store fit results @@ -128,11 +145,11 @@ def test_parallel_toys2 ( ) : results , stats = Toys.parallel_toys2 ( gen_pdf = gen_gauss , fit_pdf = fit_gauss , - nToys = 1000 , + nToys = 2000 , nSplit = 20 , data = [ mass ] , gen_config = { 'nEvents' : 200 , 'sample' : True } , - fit_config = { 'silent' : True } , + fit_config = fit_config , gen_pars = { 'mean_GG' : nominal_mean , 'sigma_GG' : nominal_sigma } , fit_pars = { 'mean_FG' : nominal_mean , 'sigma_FG' : nominal_sigma } , more_vars = more_vars , @@ -150,58 +167,221 @@ def test_parallel_toys2 ( ) : for h in ( h_mean , h_sigma ) : with use_canvas ( 'test_parallel_toys2 %s' % h.title , wait = 1 ) : h.draw() + toy_results['test_parallel_toys2'] = results , stats -# ============================================================================== -## Perform toy-study for Jackknife -def test_parallel_jackknife ( ) : - """Perform toys-study for Jackknife - """ - logger = getLogger ( 'test_parallel_jackknife' ) - if root_info < (6,18) : - logger.warning ( "(parallel) Jackknife is temporarily disabled for this version of ROOT" ) - return +# ============================================================================== +## Perform toy-study for Jackknife (non-extended model) +def test_parallel_jackknife_NE1 ( ) : + """ Perform toys-study for Jackknife (non-extended model) + """ - with timing ( 'Jackknife analysis' , logger = logger ) : - - model.S = 1000 - model.B = 100 - - dataset = model.generate ( 1100 ) - - Toys.parallel_jackknife ( pdf = model , - nSplit = 6 , - data = dataset , - fit_config = { 'silent' : True } , - progress = True , - silent = True ) - + logger = getLogger ( 'test_parallel_jackknife_NE1' ) + logger.info ( 'Jackknife analysis for non-extended model' ) + + pdf = model_NE + + pdf.signal.mean = nominal_mean + pdf.signal.sigma = nominal_sigma + pdf.F = nominal_F + + with timing ( 'Jackknife analysis (non-extended)' , logger = logger ) : + data = pdf.generate ( nominal_S ) + with use_canvas ( "Jackknife analysis (non-extended)" , wait = 3 ) : + r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config ) + + pdf.signal.mean = nominal_mean + pdf.signal.sigma = nominal_sigma + pdf.F = nominal_F + + results, stats = Toys.parallel_jackknife ( pdf = pdf , + nSplit = 10 , + data = data , + fit_config = fit_config , + progress = True , + silent = True ) + + toy_results [ 'test_parallel_jackknife_NE1' ] = results , stats + +# ============================================================================== +## Perform toy-study for Jackknife (non-extended model) +def test_parallel_jackknife_NE2 ( ) : + """ Perform toys-study for Jackknife (non-extended model) + """ + + logger = getLogger ( 'test_parallel_jackknife_NE2' ) + logger.info ( 'Jackknife analysis for non-extended model' ) + + pdf = fit_gauss + + pdf.mean = nominal_mean + pdf.sigma = nominal_sigma + + with timing ( 'Jackknife analysis (non-extended)' , logger = logger ) : + data = pdf.generate ( nominal_S ) + with use_canvas ( "Jackknife analysis (non-extended)" , wait = 3 ) : + r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config ) + + pdf.mean = nominal_mean + pdf.sigma = nominal_sigma + + results, stats = Toys.parallel_jackknife ( pdf = pdf , + nSplit = 10 , + data = data , + fit_config = fit_config , + progress = True , + silent = True ) + + toy_results [ 'test_parallel_jackknife_NE2' ] = results , stats + # ============================================================================== -## Perform toy-study for Bootstrap -def test_parallel_bootstrap ( ) : - """Perform toys-study for Bootstrap +## Perform toy-study for Jackknife (extended model) +def test_parallel_jackknife_EXT ( ) : + """ Perform toys-study for Jackknife (extended model) """ - logger = getLogger ( 'test_parallel_bootstrap' ) + logger = getLogger ( 'test_parallel_jackknife_EXT' ) + logger.info ( 'Jackknife analysis for extended model' ) + + pdf = model - with timing ( 'Bootstrap analysis' , logger = logger ) : - - model.S = 1000 - model.B = 100 + pdf.signal.mean = nominal_mean + pdf.signal.sigma = nominal_sigma + pdf.S = nominal_S + pdf.B = nominal_B + + with timing ( 'Jackknife analysis (extended)' , logger = logger ) : - dataset = model.generate ( 1100 ) + data = pdf.generate ( nominal_S ) + with use_canvas ( "Jackknife analysis (extended)" , wait = 3 ) : + r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config ) + + pdf.signal.mean = nominal_mean + pdf.signal.sigma = nominal_sigma + pdf.S = nominal_S + pdf.B = nominal_B + + results, stats = Toys.parallel_jackknife ( pdf = pdf , + nSplit = 10 , + data = data , + fit_config = fit_config , + progress = True , + silent = True ) + + toy_results [ 'test_parallel_jackknife_EXT' ] = results , stats - Toys.parallel_bootstrap ( pdf = model , - size = 1000 , - nSplit = 6 , - data = dataset , - fit_config = { 'silent' : True } , - progress = True , - silent = True ) +# ============================================================================== +## Perform toy-study for Bootstrap (non-extended) +def test_parallel_bootstrap_NE1 ( ) : + """ Perform toys-study for Bootstrap (non-extended) + """ + + logger = getLogger ( 'test_parallel_bootstrap_NE1' ) + logger.info ( 'Bootstrap analysis for non-extended model' ) + + pdf = model_NE + + pdf.signal.mean = nominal_mean + pdf.signal.sigma = nominal_sigma + pdf.F = nominal_F + + with timing ( 'Bootstrap analysis (non-extended)' , logger = logger ) : + + data = pdf.generate ( nominal_S + nominal_B ) + with use_canvas ( "Booststrap analysis" , wait = 3 ) : + r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config ) + + pdf.signal.mean = nominal_mean + pdf.signal.sigma = nominal_sigma + pdf.F = nominal_F + + results, stats = Toys.parallel_bootstrap ( pdf = pdf , + nSplit = 10 , + size = 400 , + data = data , + fit_config = fit_config , + extended = False , + progress = True , + silent = True ) + + toy_results [ 'test_parallel_bootstrap_NE1' ] = results , stats + +# ============================================================================== +## Perform toy-study for Bootstrap (non-extended) +def test_parallel_bootstrap_NE2 ( ) : + """ Perform toys-study for Bootstrap (non-extended) + """ + + logger = getLogger ( 'test_parallel_bootstrap_NE2' ) + logger.info ( 'Bootstrap analysis for non-extended model' ) + + pdf = fit_gauss + + pdf.mean = nominal_mean + pdf.sigma = nominal_sigma + + with timing ( 'Bootstrap analysis (non-extended)' , logger = logger ) : + + data = pdf.generate ( nominal_S + nominal_B ) + with use_canvas ( "Booststrap analysis" , wait = 3 ) : + r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config ) + + pdf.mean = nominal_mean + pdf.sigma = nominal_sigma + + results, stats = Toys.parallel_bootstrap ( pdf = pdf , + nSplit = 10 , + size = 400 , + data = data , + fit_config = fit_config , + extended = False , + progress = True , + silent = True ) + + toy_results [ 'test_parallel_bootstrap_NE2' ] = results , stats + +# ============================================================================== +## Perform toy-study for Bootstrap (extended) +def test_parallel_bootstrap_EXT ( ) : + """ Perform toys-study for Bootstrap (extended) + """ + + logger = getLogger ( 'test_parallel_bootstrap_EXT' ) + logger.info ( 'Bootstrap analysis for extended model' ) + + pdf = model + + pdf.signal.mean = nominal_mean + pdf.signal.sigma = nominal_sigma + pdf.S = nominal_S + pdf.B = nominal_B + + with timing ( 'Bootstrap analysis (extended)' , logger = logger ) : + + data = pdf.generate ( nominal_S + nominal_B ) + with use_canvas ( "(Extended) Booststrap analysis" , wait = 3 ) : + r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config ) + + pdf.signal.mean = nominal_mean + pdf.signal.sigma = nominal_sigma + pdf.S = nominal_S + pdf.B = nominal_B + + results, stats = Toys.parallel_bootstrap ( pdf = pdf , + nSplit = 10 , + size = 400 , + data = data , + fit_config = fit_config , + extended = True , + progress = True , + silent = True ) + + toy_results [ 'test_parallel_bootstrap_EXT' ] = results , stats + # ============================================================================= ## Perform toy-study for significance of the signal # - generate nToys pseudoexperiments using background-only hypothesis @@ -209,7 +389,7 @@ def test_parallel_bootstrap ( ) : # - store fit results # - fill distributions for fit results def test_parallel_significance ( ) : - """Perform toy-study for significance of the signal + """ Perform toy-study for significance of the signal - generate `nToys` pseudoexperiments using background-only hypothesis - fit each experiment with signal+background hypothesis - store fit results @@ -236,34 +416,117 @@ def test_parallel_significance ( ) : results , stats = Toys.parallel_toys2 ( gen_pdf = bkg_only , fit_pdf = model , - nToys = 1000 , + nToys = 10000 , nSplit = 20 , data = [ mass ] , gen_config = { 'nEvents' : 100 , 'sample' : True } , - fit_config = { 'silent' : True } , + fit_config = fit_config , gen_pars = { 'tau_BKG' : 0. } , ## initial values for generation fit_pars = { 'B' : 100 , 'S' : 10 , 'phi0_Bkg_S': 0.0 } , ## initial fit values for parameters silent = True , progress = True ) - - h_S = ROOT.TH1F ( hID() , '#S' , 60 , 0 , 60 ) - for r in results ['S' ] : h_S .Fill ( r ) + + + # ========================================================================= + ## yields + h_Y = ROOT.TH1F ( hID() , '#S' , 140 , 0 , 70 ) + for r in results [ 'S' ] : h_Y .Fill ( r ) + + ## get p-value and significance histograms: + h_P , h_S = h_Y.significance ( cut_low = True ) + + h_S.red () + h_P.blue () + + minv = 1.e+100 + for i,_,y in h_P.items() : + yv = y.value() + if 0 < yv and yv < minv : minv = yv + minv , _ = num_range ( 0.75 * minv ) + h_P.SetMinimum ( minv ) + h_S.SetMinimum ( 0 ) + + with use_canvas ( 'test_significance: yields' , wait = 1 ) : h_Y.draw ( ) + with use_canvas ( 'test_significance: p-value' , wait = 1 ) : h_P.draw ( logy = True ) + with use_canvas ( 'test_significance: significance' , wait = 3 ) : h_S.draw () - for h in ( h_S , ) : - with use_canvas ( 'test_parallel_significance %s' % h.title , wait = 1 ) : h.draw() + toy_results [ 'test_parallel_significance' ] = results , stats + +# ============================================================================= +## Save resuls of toys to DBASE +def test_db () : + """ Save resuls of toys to DBASE + """ + logger = getLogger ( 'test_db' ) + logger.info ( 'Saving all toys results into DBASE' ) + import ostap.io.zipshelve as DBASE + from ostap.utils.timing import timing + with timing( 'Save everything to DBASE', logger ), DBASE.tmpdb() as db : + for key in progress_bar ( toy_results ) : + r , s = toy_results [ key ] + key1 = '%s:results' % key + key2 = '%s:stats' % key + db [ key1 ] = r + db [ key2 ] = s + db.ls() + while toy_results : toy_results.popitem() +# ============================================================================= + + # ============================================================================= if '__main__' == __name__ : + with memory ( 'test_toys' , logger = logger ) as mtt : test_parallel_toys () + with memory ( 'test_toys2' , logger = logger ) as mt2 : test_parallel_toys2 () + with memory ( 'test_jackknife_NE1' , logger = logger ) as mj1 : test_parallel_jackknife_NE1 () + with memory ( 'test_jackknife_NE2' , logger = logger ) as mj2 : test_parallel_jackknife_NE2 () + with memory ( 'test_jackknife_EXT' , logger = logger ) as mje : test_parallel_jackknife_EXT () + with memory ( 'test_bootstrap_NE1' , logger = logger ) as mb1 : test_parallel_bootstrap_NE1 () + with memory ( 'test_bootstrap_NE2' , logger = logger ) as mb2 : test_parallel_bootstrap_NE2 () + with memory ( 'test_bootstrap_EXT' , logger = logger ) as mbe : test_parallel_bootstrap_EXT () + + with memory ( 'test_significance' , logger = logger ) as mts : test_parallel_significance () + with memory ( 'test_db' , logger = logger ) as mdb : test_db () + + rows = [ ( 'Test' , 'Memory [MB]' ) ] + + row = 'Toys' , '%.1f' % mtt.delta + rows.append ( row ) - test_parallel_toys ( ) - test_parallel_toys2 ( ) - test_parallel_significance ( ) + row = 'Toys2' , '%.1f' % mt2.delta + rows.append ( row ) + + row = 'Jackknife_NE1' , '%.1f' % mj1.delta + rows.append ( row ) + + row = 'Jackknife_NE2' , '%.1f' % mj2.delta + rows.append ( row ) + + row = 'Jackknife_EXT' , '%.1f' % mje.delta + rows.append ( row ) + + row = 'Bootstrap_NE1' , '%.1f' % mb1.delta + rows.append ( row ) + + row = 'Bootstrap_NE2' , '%.1f' % mb2.delta + rows.append ( row ) + + row = 'Bootstrap_EXT' , '%.1f' % mbe.delta + rows.append ( row ) + + row = 'Significance' , '%.1f' % mts.delta + rows.append ( row ) + + row = 'test_db' , '%.1f' % mdb.delta + rows.append ( row ) + + title = 'Memory usage for various toys' + table = T.table ( rows , title = title , prefix = '# ' , alignment = 'lc' ) + logger.info ( '%s:\n%s' % ( title , table ) ) - test_parallel_jackknife ( ) - test_parallel_bootstrap ( ) # =============================================================================