diff --git a/ReleaseNotes/release_notes.md b/ReleaseNotes/release_notes.md
index 85b09d6d..aff26078 100644
--- a/ReleaseNotes/release_notes.md
+++ b/ReleaseNotes/release_notes.md
@@ -1,5 +1,9 @@
## New features
-
+
+ 1.improve all toys machinery: toys, jackknife, bootstrapping and significance
+ 1. extend all tests for toys
+
+
## Backward incompatible
## Bug fixes
diff --git a/ostap/fitting/tests/test_fitting_toys.py b/ostap/fitting/tests/test_fitting_toys.py
index 6b12c6be..5ae572fd 100755
--- a/ostap/fitting/tests/test_fitting_toys.py
+++ b/ostap/fitting/tests/test_fitting_toys.py
@@ -16,14 +16,18 @@
__author__ = "Ostap developers"
__all__ = () ## nothing to import
# =============================================================================
-from ostap.core.pyrouts import hID
+from ostap.core.pyrouts import hID, VE
import ostap.fitting.models as Models
import ostap.fitting.toys as Toys
import ostap.histos.histos
-from ostap.utils.timing import timing
+from ostap.utils.timing import timing
+from ostap.utils.memory import memory
from ostap.plotting.canvas import use_canvas
from ostap.utils.utils import wait
from ostap.fitting.toys import pull_var
+from ostap.math.base import num_range
+from ostap.utils.progress_bar import progress_bar
+import ostap.logger.table as T
import ROOT, time, random
# =============================================================================
# logging
@@ -34,16 +38,36 @@
else :
logger = getLogger ( __name__ )
# =============================================================================
-nominal_mean = 0.4
-nominal_sigma = 0.1
+nominal_mean = 0.4
+nominal_sigma = 0.1
+nominal_S = 1000
+nominal_B = 100
+nominal_F = float ( nominal_S ) / ( nominal_S + nominal_B )
-mass = ROOT.RooRealVar ( 'mass' , '', 0 , 1 )
-gen_gauss = Models.Gauss_pdf ( 'GG' , xvar = mass )
-fit_gauss = Models.Gauss_pdf ( 'FG' , xvar = mass )
+mass = ROOT.RooRealVar ( 'mass' , '', 0 , 1 )
+gen_gauss = Models.Gauss_pdf ( 'GG' , xvar = mass )
+fit_gauss = Models.Gauss_pdf ( 'FG' , xvar = mass )
gen_gauss.mean = nominal_mean
gen_gauss.sigma = nominal_sigma
-model = Models.Fit1D ( signal = gen_gauss , background = 'flat' )
+model = Models.Fit1D ( signal = gen_gauss , background = 'flat' )
+model_NE = Models.Fit1D ( signal = gen_gauss , background = 'flat' , extended = False )
+model.S = nominal_S
+model.B = nominal_B
+model_NE.F = nominal_F
+
+fit_config = { 'silent' : True , 'refit' : 5 }
+
+def reset_funcs () :
+
+ gen_gauss.mean = nominal_mean
+ gen_gauss.sigma = nominal_sigma
+ fit_gauss.mean = nominal_mean
+ fit_gauss.sigma = nominal_sigma
+ model.S = nominal_S
+ model.B = nominal_B
+
+toy_results = {}
# ==============================================================================
## Perform toy-study for possible fit bias and correct uncertainty evaluation
@@ -54,7 +78,7 @@
# - fill distributions for fit results
# - fill distribution of pulls
def test_toys ( ) :
- """Perform toys-study for possible fit bias and correct uncertainty evaluation
+ """ Perform toys-study for possible fit bias and correct uncertainty evaluation
- generate `nToys` pseudoexperiments with some PDF `pdf`
- fit teach experiment with the same PDF
- store fit results
@@ -67,14 +91,19 @@ def test_toys ( ) :
more_vars = { 'pull:mean_GG' : pull_var ( 'mean_GG' , nominal_mean ) ,
'pull:sigma_GG' : pull_var ( 'sigma_GG' , nominal_sigma ) }
+\
+ ## reset all functions
+ reset_funcs ()
+
+ pdf = model
with timing ( 'Toys analysis' , logger = logger ) :
results , stats = Toys.make_toys (
- pdf = gen_gauss ,
+ pdf = pdf ,
nToys = 1000 ,
data = [ mass ] ,
- gen_config = { 'nEvents' : 200 , 'sample' : True } ,
- fit_config = { 'silent' : True , 'refit' : 5 } ,
+ gen_config = { 'nEvents' : 1100 , 'sample' : True } ,
+ fit_config = fit_config ,
init_pars = { 'mean_GG' : nominal_mean ,
'sigma_GG' : nominal_sigma } ,
more_vars = more_vars ,
@@ -91,6 +120,8 @@ def test_toys ( ) :
for h in ( h_mean , h_sigma ) :
with use_canvas ( 'test_toys %s' % h.title , wait = 1 ) : h.draw()
+
+ toy_results [ 'test_toys' ] = results , stats
# =============================================================================
## Perform toy-study for possible fit bias and correct uncertainty evaluation
@@ -111,6 +142,9 @@ def test_toys2 ( ) :
more_vars = { 'pull:mean_FG' : pull_var ( 'mean_FG' , nominal_mean ) ,
'pull:sigma_FG' : pull_var ( 'sigma_FG', nominal_sigma ) }
+ ## reset all functions
+ reset_funcs ()
+
with timing ( 'Toys2 analysis' , logger = logger ) :
results , stats = Toys.make_toys2 (
gen_pdf = gen_gauss ,
@@ -118,7 +152,7 @@ def test_toys2 ( ) :
nToys = 1000 ,
data = [ mass ] ,
gen_config = { 'nEvents' : 200 , 'sample' : True } ,
- fit_config = { 'silent' : True } ,
+ fit_config = fit_config ,
gen_pars = { 'mean_GG' : nominal_mean , 'sigma_GG' : nominal_sigma } ,
fit_pars = { 'mean_FG' : nominal_mean , 'sigma_FG' : nominal_sigma } ,
more_vars = more_vars ,
@@ -137,49 +171,197 @@ def test_toys2 ( ) :
for h in ( h_mean , h_sigma ) :
with use_canvas ( 'test_toys2 %s' % h.title , wait = 1 ) : h.draw()
+ toy_results [ 'test_toys2' ] = results , stats
# ==============================================================================
-## Perform toy-study for Jackknife
-def test_jackknife ( ) :
- """Perform toys-study for Jackknife
+## Perform toy-study for Jackknife (non-extended model)
+def test_jackknife_NE1 ( ) :
+ """ Perform toys-study for Jackknife (non-extended model)
"""
- logger = getLogger ( 'test_parallel_jackknife' )
+ logger = getLogger ( 'test_parallel_jackknife_NE1' )
+ logger.info ( 'Jackknife analysis for non-extended model' )
+
+ pdf = model_NE
- with timing ( 'Jackknife analysis' , logger = logger ) :
- model.S = 1000
- model.B = 100
+ with timing ( 'Jackknife analysis (non-extended)' , logger = logger ) :
- data = model.generate ( 1100 )
+ ## reset all functions
+ reset_funcs ()
- Toys.make_jackknife ( pdf = model ,
- data = data ,
- fit_config = { 'silent' : True } ,
- progress = True ,
- silent = True )
-
+ data = pdf.generate ( nominal_S )
+ with use_canvas ( "Jackknife analysis (non-extended)" , wait = 3 ) :
+ r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config )
+
+ ## reset all functions
+ reset_funcs ()
+
+ results, stats = Toys.make_jackknife ( pdf = pdf ,
+ data = data ,
+ fit_config = fit_config ,
+ progress = True ,
+ silent = True )
+
+ toy_results [ 'test_jackknife_NE1' ] = results , stats
+
# ==============================================================================
-## Perform toy-study for Bootstrap
-def test_bootstrap ( ) :
- """Perform toys-study for Bootstrap
+## Perform toy-study for Jackknife (non-extended model)
+def test_jackknife_NE2 ( ) :
+ """ Perform toys-study for Jackknife (non-extended model)
"""
- logger = getLogger ( 'test_parallel_bootstrap' )
-
- with timing ( 'Bootstrap analysis' , logger = logger ) :
+ logger = getLogger ( 'test_parallel_jackknife_NE2' )
+ logger.info ( 'Jackknife analysis for non-extended model' )
+
+ pdf = fit_gauss
+
+ with timing ( 'Jackknife analysis (non-extended)' , logger = logger ) :
+
+ ## reset all functions
+ reset_funcs ()
+
+ data = pdf.generate ( nominal_S )
+ with use_canvas ( "Jackknife analysis (non-extended)" , wait = 3 ) :
+ r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config )
+
+ ## reset all functions
+ reset_funcs ()
+
+ results, stats = Toys.make_jackknife ( pdf = pdf ,
+ data = data ,
+ fit_config = fit_config ,
+ progress = True ,
+ silent = True )
+
+ toy_results [ 'test_jackknife_NE2' ] = results , stats
+
+# ==============================================================================
+## Perform toy-study for Jackknife (extended model)
+def test_jackknife_EXT ( ) :
+ """ Perform toys-study for Jackknife (extended model)
+ """
- model.S = 1000
- model.B = 100
+ logger = getLogger ( 'test_jackknife_EXT' )
+ logger.info ( 'Jackknife analysis for extended model' )
+
+ pdf = model
+
+ with timing ( 'Jackknife analysis (extended)' , logger = logger ) :
- data = model.generate ( 1100 )
+ ## reset all functions
+ reset_funcs ()
- Toys.make_bootstrap ( pdf = model ,
- size = 100 ,
- data = data ,
- fit_config = { 'silent' : True } ,
- progress = True ,
- silent = True )
+ data = pdf.generate ( nominal_S )
+ with use_canvas ( "Jackknife analysis (extended)" , wait = 3 ) :
+ r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config )
+
+ ## reset all functions
+ reset_funcs ()
+
+ results, stats = Toys.make_jackknife ( pdf = pdf ,
+ data = data ,
+ fit_config = fit_config ,
+ progress = True ,
+ silent = True )
+
+ toy_results [ 'test_jackknife_EXT' ] = results , stats
+
+# ==============================================================================
+## Perform toy-study for Bootstrap (non-extended)
+def test_bootstrap_NE1 ( ) :
+ """ Perform toys-study for Bootstrap (non-extended)
+ """
+
+ logger = getLogger ( 'test_bootstrap_NE1' )
+ logger.info ( 'Bootstrap analysis for non-extended model' )
+
+ pdf = model_NE
+ with timing ( 'Bootstrap analysis (non-extended)' , logger = logger ) :
+
+ ## reset all functions
+ reset_funcs ()
+
+ data = pdf.generate ( nominal_S + nominal_B )
+ with use_canvas ( "Booststrap analysis" , wait = 3 ) :
+ r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config )
+
+ ## reset all functions
+ reset_funcs ()
+ results, stats = Toys.make_bootstrap ( pdf = pdf ,
+ size = 400 ,
+ data = data ,
+ fit_config = fit_config ,
+ extended = False ,
+ progress = True ,
+ silent = True )
+
+ toy_results [ 'test_bootstrap_NE1' ] = results , stats
+
+# ==============================================================================
+## Perform toy-study for Bootstrap (non-extended)
+def test_bootstrap_NE2 ( ) :
+ """ Perform toys-study for Bootstrap (non-extended)
+ """
+
+ logger = getLogger ( 'test_parallel_bootstrap_NE2' )
+ logger.info ( 'Bootstrap analysis for non-extended model' )
+
+ pdf = fit_gauss
+ with timing ( 'Bootstrap analysis (non-extended)' , logger = logger ) :
+
+ ## reset all functions
+ reset_funcs ()
+
+ data = pdf.generate ( nominal_S + nominal_B )
+ with use_canvas ( "Booststrap analysis" , wait = 3 ) :
+ r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config )
+
+ ## reset all functions
+ reset_funcs ()
+
+ results, stats = Toys.make_bootstrap ( pdf = pdf ,
+ size = 400 ,
+ data = data ,
+ fit_config = fit_config ,
+ extended = False ,
+ progress = True ,
+ silent = True )
+
+ toy_results [ 'test_bootstrap_NE2' ] = results , stats
+
+# ==============================================================================
+## Perform toy-study for Bootstrap (extended)
+def test_bootstrap_EXT ( ) :
+ """ Perform toys-study for Bootstrap (extended)
+ """
+
+ logger = getLogger ( 'test_parallel_bootstrap_EXT' )
+ logger.info ( 'Bootstrap analysis for extended model' )
+
+ pdf = model
+ with timing ( 'Bootstrap analysis (extended)' , logger = logger ) :
+
+ ## reset all functions
+ reset_funcs ()
+
+ data = pdf.generate ( nominal_S + nominal_B )
+ with use_canvas ( "(Extended) Booststrap analysis" , wait = 3 ) :
+ r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config )
+
+ ## reset all functions
+ reset_funcs ()
+
+ results, stats = Toys.make_bootstrap ( pdf = pdf ,
+ size = 400 ,
+ data = data ,
+ fit_config = fit_config ,
+ extended = True ,
+ progress = True ,
+ silent = True )
+
+ toy_results [ 'test_bootstrap_EXT' ] = results , stats
+
# =============================================================================
## Perform toy-study for significance of the signal
# - generate nToys
pseudoexperiments using background-only hypothesis
@@ -187,7 +369,7 @@ def test_bootstrap ( ) :
# - store fit results
# - fill distributions for fit results
def test_significance ( ) :
- """Perform toy-study for significance of the signal
+ """ Perform toy-study for significance of the signal
- generate `nToys` pseudoexperiments using background-only hypothesis
- fit each experiment with signal+background hypothesis
- store fit results
@@ -196,6 +378,9 @@ def test_significance ( ) :
logger = getLogger ( 'test_significance' )
+ ## reset all functions
+ reset_funcs ()
+
with timing ( 'Significance analysis' , logger = logger ) :
## only background hypothesis
@@ -209,35 +394,120 @@ def test_significance ( ) :
model.background.tau.fix ( 0 )
results , stats = Toys.make_toys2 (
- gen_pdf = bkg_only ,
- fit_pdf = model ,
- nToys = 1000 ,
- data = [ mass ] ,
- gen_config = { 'nEvents' : 100 , 'sample' : True } ,
- fit_config = { 'silent' : True } ,
+ gen_pdf = bkg_only ,
+ fit_pdf = model ,
+ nToys = 2000 ,
+ data = [ mass ] ,
+ gen_config = { 'nEvents' : 110 , 'sample' : True } ,
+ fit_config = fit_config ,
gen_pars = { 'tau_BKG' : 0. } , ## initial values for generation
- fit_pars = { 'B' : 100 ,
- 'S' : 10 ,
- 'phi0_Bkg_S': 0.0 } , ## initial fit values for parameters
+ fit_pars = { 'B' : 100 ,
+ 'S' : 10 ,
+ 'phi0_Bkg_S' : 0.0 } , ## initial fit values for parameters
silent = True ,
progress = True )
-
- h_S = ROOT.TH1F ( hID() , '#S' , 60 , 0 , 60 )
-
- for r in results ['S' ] : h_S .Fill ( r )
+
+
+ # =========================================================================
+ ## yields
+ h_Y = ROOT.TH1F ( hID() , '#S' , 140 , 0 , 70 )
+ for r in results [ 'S' ] : h_Y .Fill ( r )
+
+ ## get p-value and significance histograms:
+ h_P , h_S = h_Y.significance ( cut_low = True )
+
+ h_S.red ()
+ h_P.blue ()
+
+ minv = 1.e+100
+ for i,_,y in h_P.items() :
+ yv = y.value()
+ if 0 < yv and yv < minv : minv = yv
+ minv , _ = num_range ( 0.75 * minv )
+ h_P.SetMinimum ( minv )
+ h_S.SetMinimum ( 0 )
- for h in ( h_S , ) :
- with use_canvas ( 'test_significance %s' % h.title , wait = 1 ) : h.draw()
-
+ with use_canvas ( 'test_significance: yields' , wait = 1 ) : h_Y.draw ( )
+ with use_canvas ( 'test_significance: p-value' , wait = 1 ) : h_P.draw ( logy = True )
+ with use_canvas ( 'test_significance: significance' , wait = 3 ) : h_S.draw ()
+
+ toy_results [ 'test_significance' ] = results , stats
+# =============================================================================
+## Save resuls of toys to DBASE
+def test_db () :
+ """ Save resuls of toys to DBASE
+ """
+ logger = getLogger ( 'test_db' )
+ logger.info ( 'Saving all toys results into DBASE' )
+ import ostap.io.zipshelve as DBASE
+ from ostap.utils.timing import timing
+ with timing( 'Save everything to DBASE', logger ), DBASE.tmpdb() as db :
+ for key in progress_bar ( toy_results ) :
+ r , s = toy_results [ key ]
+ key1 = '%s:results' % key
+ key2 = '%s:stats' % key
+ db [ key1 ] = r
+ db [ key2 ] = s
+ db.ls()
+ while toy_results : toy_results.popitem()
+# =============================================================================
+
+
# =============================================================================
if '__main__' == __name__ :
+
+ """
+ with memory ( 'test_toys' , logger = logger ) as mtt : test_toys ()
+ with memory ( 'test_toys2' , logger = logger ) as mt2 : test_toys ()
+ with memory ( 'test_jackknife_NE1' , logger = logger ) as mj1 : test_jackknife_NE1 ()
+ with memory ( 'test_jackknife_NE2' , logger = logger ) as mj2 : test_jackknife_NE2 ()
+ with memory ( 'test_jackknife_EXT' , logger = logger ) as mje : test_jackknife_EXT ()
+ with memory ( 'test_bootstrap_NE1' , logger = logger ) as mb1 : test_bootstrap_NE1 ()
+ with memory ( 'test_bootstrap_NE2' , logger = logger ) as mb2 : test_bootstrap_NE2 ()
+ with memory ( 'test_bootstrap_EXT' , logger = logger ) as mbe : test_bootstrap_EXT ()
+ """
+
+ with memory ( 'test_significance' , logger = logger ) as mts : test_significance ()
+ with memory ( 'test_db' , logger = logger ) as mdb : test_db ()
+
+ rows = [ ( 'Test' , 'Memory [MB]' ) ]
+
+ """
+ row = 'Toys' , '%.1f' % mtt.delta
+ rows.append ( row )
+
+ row = 'Toys2' , '%.1f' % mt2.delta
+ rows.append ( row )
+
+ row = 'Jackknife_NE1' , '%.1f' % mj1.delta
+ rows.append ( row )
+
+ row = 'Jackknife_NE2' , '%.1f' % mj2.delta
+ rows.append ( row )
+
+ row = 'Jackknife_EXT' , '%.1f' % mje.delta
+ rows.append ( row )
+
+ row = 'Bootstrap_NE1' , '%.1f' % mb1.delta
+ rows.append ( row )
+
+ row = 'Bootstrap_NE2' , '%.1f' % mb2.delta
+ rows.append ( row )
+
+ row = 'Bootstrap_EXT' , '%.1f' % mbe.delta
+ rows.append ( row )
+ """
+
+ row = 'Significance' , '%.1f' % mts.delta
+ rows.append ( row )
+
+ row = 'test_db' , '%.1f' % mdb.delta
+ rows.append ( row )
- test_toys ( )
- test_toys2 ( )
- test_jackknife ( )
- test_bootstrap ( )
- test_significance ( )
+ title = 'Memory usage for various toys'
+ table = T.table ( rows , title = title , prefix = '# ' , alignment = 'lc' )
+ logger.info ( '%s:\n%s' % ( title , table ) )
# =============================================================================
diff --git a/ostap/fitting/toys.py b/ostap/fitting/toys.py
index d0401dec..07c52cd1 100644
--- a/ostap/fitting/toys.py
+++ b/ostap/fitting/toys.py
@@ -26,6 +26,8 @@
from builtins import range
from ostap.core.ostap_types import string_types, integer_types
from ostap.core.core import VE, SE
+from ostap.logger.pretty import pretty_ve, pretty_float, fmt_pretty_float
+from ostap.logger.colorized import attention
import ROOT
# =============================================================================
# logging
@@ -34,11 +36,36 @@
if '__main__' == __name__ : logger = getLogger( 'ostap.fitting.toys' )
else : logger = getLogger( __name__ )
# =============================================================================
-logger.debug ( 'Utilities to run fitting toys, Jackknife, bootstrap, ... ')
+logger.debug ( 'Utilities to run toys, Jackknife, bootstrap, ... ')
# ==============================================================================
+extmodes = {
+ ROOT.RooAbsPdf.MustBeExtended : 'MustBeExtended' ,
+ ROOT.RooAbsPdf.CanBeExtended : 'CanBeExtended' ,
+ ROOT.RooAbsPdf.CanNotBeExtended : 'CanBeExtended' ,
+}
+# ==============================================================================
+## Check the consistency of the PDF and `extended` flag
+def check_extended ( pdf , extended ) :
+ """ Check the consistency of the PDF and `extended` flag
+ """
+ if isinstance ( pdf , ROOT.RooAbsPdf ) : pass
+ else : pdf = pdf.pdf
+ ##
+ extmode = pdf.extendMode()
+ ##
+ if pdf.canBeExtended () : return True
+ elif pdf.mustBeExtended () and extended : return True
+ elif ROOT.RooAbsPdf.MustBeExtended == extmode and extended : return True
+ elif ROOT.RooAbsPdf.CanNotBeExtended == extmode and not extended : return True
+ elif ROOT.RooAbsPdf.CanBeExtended == extmode and extended : return True
+ ##
+ logger.warning ( "Mismatch of `ExtendMode`:`%s` and `extended`:%s" % ( extmodes [ extmode ] , extended ) )
+ ##
+ return False
+# =============================================================================
## Technical transformation to the dictionary : { 'name' : float_value }
def vars_transform ( vars ) :
- """Technical transformation to the dictionary : `{ 'name' : float_value }`
+ """ Technical transformation to the dictionary : `{ 'name' : float_value }`
"""
from ostap.core.ostap_types import dictlike_types
@@ -76,7 +103,7 @@ def pull_var ( name , value ) : return PoolVar ( name , value )
# statistics = make_stats ( results )
# @endcode
def make_stats ( results , fits = {} , covs = {} , accept = SE () ) :
- """Prepare statistics from results of toys/jackknifes/boostrap studies
+ """ Prepare statistics from results of toys/jackknifes/boostrap studies
>>> results = ...
>>> statistics = make_stats ( results )
"""
@@ -98,25 +125,55 @@ def make_stats ( results , fits = {} , covs = {} , accept = SE () ) :
for k in covs :
stats ['- CovQual %s' % k ] = covs [ k ]
-
return stats
# =============================================================================
-## print statistics of pseudoexperiments
+## Print statistics of pseudoexperiments
def print_stats ( stats , ntoys = '' , logger = logger ) :
- """print statistics of pseudoexperiments
+ """ Print statistics of pseudoexperiments
"""
-
- table = [ ( 'Parameter' , '#', 'mean' , 'rms' , '%13s / %-13s' % ( 'min' , 'max' ) ) ]
+ ## 0 1 2 3 4 5 6 7
+ table = [ ( 'Parameter' , '#', 'mean', 'x[..]' , 'rms' , 'x[..]', 'min / max' , 'x[..]' ) ]
- def make_row ( c ) :
- n = "{:^11}".format ( c.nEntries() )
+ has_e1 = []
+ has_e2 = []
+ has_e3 = []
+
+ def make_row ( c ) :
+
+ n = '%d' % c.nEntries()
mean = c.mean ()
- mean = "%+13.6g +/- %-13.6g" % ( mean.value() , mean.error () )
- rms = "%13.6g" % c.rms ()
- minmax = "%+13.6g / %-+13.6g" % ( c.min() , c.max () )
- return p , n , mean , rms , minmax
+ rms = c.rms ()
+ minmax = c.min() , c.max()
+
+ mean , expo1 = pretty_ve ( mean , precision = 6 , width = 8 , parentheses = False )
+ rms , expo2 = pretty_float ( rms , precision = 4 , width = 6 )
+ fmt , expo3 = fmt_pretty_float ( max ( abs ( c.min() ) , abs ( c.max() ) ) , precision = 3 , width = 5 )
+ fmt = '%s / %s ' % ( fmt , fmt )
+
+ if expo3 : scale = 10**expo3
+ else : scale = 1
+ minmax = fmt % ( c.min() / scale , c.max() / scale )
+
+ ## mean = "%+13.6g +/- %-13.6g" % ( mean.value() , mean.error () )
+ ## rms = "%13.6g" % c.rms ()
+ ## minmax = "%+13.6g / %-+13.6g" % ( c.min() , c.max () )
+
+ if expo1 : expo1 = '10^%+d' % expo1
+ else : expo1 = ''
+ if expo2 : expo2 = '10^%+d' % expo2
+ else : expo2 = ''
+
+ if expo3 : expo3 = '10^%+d' % expo3
+ else : expo3 = ''
+
+ if expo1 : has_e1.append ( '' )
+ if expo2 : has_e2.append ( '' )
+ if expo3 : has_e3.append ( '' )
+
+ return p , n , mean, expo1, rms, expo2, minmax, expo3
+
for p in sorted ( stats ) :
if p.startswith('pull:') : continue
c = stats [ p ]
@@ -127,16 +184,45 @@ def make_row ( c ) :
c = stats [ p ]
table.append ( make_row ( c ) )
+ ## skip empty column
+ if not has_e3 :
+ rows = []
+ for row in table :
+ r = list ( row )
+ del r [ 7 ]
+ rows.append ( r )
+ table = rows
+
+ ## skip empty column
+ if not has_e2 :
+ rows = []
+ for row in table :
+ r = list ( row )
+ del r [ 5 ]
+ rows.append ( r )
+ table = rows
+
+ ## skip empty column
+ if not has_e1 :
+ rows = []
+ for row in table :
+ r = list ( row )
+ del r [ 3 ]
+ rows.append ( r )
+ table = rows
+
+
if not ntoys :
ntoys = 0
for s in stats :
ntoys = max ( ntoys , stats[s].nEntries() )
-
+
import ostap.logger.table as Table
table = Table.table ( table ,
title = "Results of %s toys" % ntoys ,
- alignment = 'lcccc' ,
+ alignment = 'lccccccc' ,
prefix = "# " )
+
logger.info ( 'Results of %s toys:\n%s' % ( ntoys , table ) )
@@ -148,11 +234,12 @@ def make_row ( c ) :
# jackknife , theta_jack = jackknife_esiimator ( statistics , value )
# @endcode
def jackknife_statistics ( statistics , theta = None ) :
- """Jackknife estimator from jackknife statistic
+ """ Jackknife estimator from jackknife statistic
>>> statistics = ....
>>> jacknife = jackknife_estimator ( statistics )
>>> jacknife , theta_corr , bias = jackknife_esiimator ( statistics , value )
"""
+
assert isinstance ( theta , VE ) or theta is None ,\
"jackknife_statistics: invalid type of 'value': %s" % type ( value )
@@ -167,24 +254,35 @@ def jackknife_statistics ( statistics , theta = None ) :
bias = ( N - 1 ) * ( theta_dot.value() - theta.value() )
- ## theta corrected for the bias and with Jackknife variance
+ ## theta corrected for the bias and with Jackknife variance
+
theta_jack = VE ( theta.value() - bias , jvar ) ## corrected value
return jackknife , theta_jack
# =============================================================================
## print Jackknife statistics
-def print_jackknife ( fitresult ,
+def print_jackknife ( fitresult , ## restl of the fit of th e total datasample
stats ,
morevars = {} ,
logger = logger ,
title = '' ) :
- """print Jackknife statistics
+ """ Print Jackknife statistics
"""
+
- header = ( 'Parameter' , 'theta' , 'theta_(.)' , 'theta_jack' , 'bias/sigma [%]' , 'error [%]' )
- table = []
+ header = ( 'Parameter' ,
+ 'theta' , 'x[...]' ,
+ 'theta_(.)' , 'x[...]' ,
+ 'bias' , 'x[...]' ,
+ 'bias [%]' , 's/s' )
+
+ table = [ header ]
+ has_e1 = False
+ has_e2 = False
+ has_e3 = False
+
N = 0
for name in sorted ( stats ) :
@@ -206,20 +304,45 @@ def print_jackknife ( fitresult ,
N = max ( N , statistics.nEntries() )
+
## jackknife estimates
jackknife , theta_jack = jackknife_statistics ( statistics , theta )
- bias = theta_jack.value () - theta .value ()
+ bias = ( N - 1 ) * ( jackknife - theta ).value()
+
+ ## bias = theta_jack.value () - theta .value ()
scale = theta .error () / theta_jack.error ()
- row = ( name ,
- "%+13.6g +/- %-13.6g" % ( theta . value () , theta .error () ) ,
- "%+13.6g +/- %-13.6g" % ( jackknife . value () , jackknife .error () ) ,
- "%+13.6g +/- %-13.6g" % ( theta_jack . value () , theta_jack .error () ) ,
- '%+6.2f' % ( bias / theta.error() * 100 ) ,
- '%+6.2f' % ( scale * 100 - 100 ) )
+ th1 , expo1 = pretty_ve ( theta , precision = 4 , width = 6 , parentheses = False )
+ th2 , expo2 = pretty_ve ( jackknife , precision = 4 , width = 6 , parentheses = False )
+ ## th3 , expo3 = pretty_ve ( theta_jack , precision = 4 , width = 6 , parentheses = False )
+ th3 , expo3 = pretty_float ( bias , precision = 4 , width = 6 )
+
+ if expo1 : expo1 = '10^%+d' % expo1
+ else : expo1 = ''
+
+ if expo2 : expo2 = '10^%+d' % expo2
+ else : expo2 = ''
+
+ if expo3 : expo3 = '10^%+d' % expo3
+ else : expo3 = ''
+
+ bias = bias / theta.error() * 100
+
+ fbias = '%+.1f' % bias
+ errs = '%+.2f' % scale
+
+ if expo1 : has_e1 = True
+ if expo2 : has_e2 = True
+ if expo3 : has_e3 = True
+
+ if 50 < abs ( bias ) : fbias = attention ( fbias )
+ if 0.5 < abs ( scale - 1 ) : errs = attention ( errs )
+
+ row = name , th1, expo1 , th2 , expo2 , th3 , expo3 , fbias , errs
+
table.append ( row )
@@ -230,13 +353,44 @@ def print_jackknife ( fitresult ,
statistics = stats [ name ]
jackknife = jackknife_statistics ( statistics )
+
+ th2, expo2 = pretty_ve ( jackknife , precision = 4 , width = 6 , parentheses = False )
+
+ if expo2 : expo2 = '10^%+d' % expo2
+ else : expo2 = ''
+
+ if expo2 : has_e2 = True
- row = name , '' , "%+13.6g +/- %-13.6g" % ( jackknife . value () , jackknife .error () ) , '' , '' , ''
+ row = name , '' , '' , th2 , expo2 , '' , '' , '' , ''
table.append ( row )
-
- table = [ header ] + table
-
+ ## skip empty column
+ if not has_e3 :
+ rows = []
+ for row in table :
+ r = list ( row )
+ del r [ 6 ]
+ rows.append ( r )
+ table = rows
+
+ ## skip empty column
+ if not has_e2 :
+ rows = []
+ for row in table :
+ r = list ( row )
+ del r [ 4 ]
+ rows.append ( r )
+ table = rows
+
+ ## skip empty column
+ if not has_e1 :
+ rows = []
+ for row in table :
+ r = list ( row )
+ del r [ 2 ]
+ rows.append ( r )
+ table = rows
+
title = title if title else "Jackknife results (N=%d)" % N
import ostap.logger.table as Table
@@ -254,14 +408,17 @@ def print_bootstrap ( fitresult ,
morevars = {} ,
logger = logger ,
title = '' ) :
- """print Bootstrap statistics
+ """ Print Bootstrap statistics
"""
- header = ( 'Parameter' , 'theta' , 'theta_boot' , 'bias/sigma [%]' , 'error [%]' )
+ header = ( 'Parameter' , 'theta' , 'x[...]' , 'theta_boot' , 'x[...]', 'bias[%]' , 's/s' )
table = []
n = 0
+ has_e1 = False
+ has_e2 = False
+
for name in sorted ( stats ) :
if name in fitresult :
@@ -284,15 +441,30 @@ def print_bootstrap ( fitresult ,
theta_boot = VE ( statistics.mean().value() , statistics.mu2() )
-
bias = theta_boot.value () - theta .value ()
- scale = theta .error () / theta_boot.error ()
+ scale = theta .error () / theta_boot.error ()
+
+ th1 , expo1 = pretty_ve ( theta , precision = 4 , width = 6 , parentheses = False )
+ th2 , expo2 = pretty_ve ( theta_boot , precision = 4 , width = 6 , parentheses = False )
+
+ if expo1 : expo1 = '10^%+d' % expo1
+ else : expo1 = ''
+
+ if expo2 : expo2 = '10^%+d' % expo2
+ else : expo2 = ''
+
+ bias = bias / theta.error() * 100
+
+ fbias = '%+.1f' % bias
+ errs = '%+.2f' % scale
- row = ( name ,
- "%+13.6g +/- %-13.6g" % ( theta . value () , theta .error () ) ,
- "%+13.6g +/- %-13.6g" % ( theta_boot . value () , theta_boot .error () ) ,
- '%+6.2f' % ( bias / theta.error() * 100 ) ,
- '%+6.2f' % ( scale * 100 - 100 ) )
+ if 50 < abs ( bias ) : fbias = attention ( fbias )
+ if 0.5 < abs ( scale - 1 ) : errs = attention ( errs )
+
+ row = name , th1, expo1 , th2 , expo2 , fbias , errs
+
+ if expo1 : has_e1 = True
+ if expo2 : has_e2 = True
table.append ( row )
@@ -304,19 +476,42 @@ def print_bootstrap ( fitresult ,
statistics = stats [ name ]
theta_boot = VE ( statistics.mean().value() , statistics.mu2() )
- row = name , '', "%+13.6g +/- %-13.6g" % ( theta_boot . value () , theta_boot .error () ) , '' , ''
+ th2 , expo2 = pretty_ve ( theta_boot , precision = 4 , width = 6 , parentheses = False )
+
+ if expo2 : expo2 = '10^%+d' % expo2
+ else : expo2 = ''
+
+ if expo2 : has_e2 = True
+ row = name , '','' , th2 , expo2 , '' , ''
table.append ( row )
-
+ table = [ header ] + table
- table = [ header ] + table
+ ## skip empty column
+ if not has_e2 :
+ rows = []
+ for row in table :
+ r = list ( row )
+ del r [ 4 ]
+ rows.append ( r )
+ table = rows
+
+ ## skip empty column
+ if not has_e1 :
+ rows = []
+ for row in table :
+ r = list ( row )
+ del r [ 2 ]
+ rows.append ( r )
+ table = rows
+
title = title if title else "Bootstrapping with #%d samples" % n
import ostap.logger.table as Table
table = Table.table ( table ,
title = title ,
- alignment = 'lcccc' ,
+ alignment = 'lccccccc' ,
prefix = "# " )
logger.info ( '%s:\n%s' % ( title , table ) )
@@ -325,7 +520,7 @@ def print_bootstrap ( fitresult ,
## Default function to generate the data
# - simple call for PDF.generate
def generate_data ( pdf , varset , **config ) :
- """Default function to generate the data
+ """ Default function to generate the data
- simple call for `PDF.generate`
"""
return pdf.generate ( varset = varset , **config )
@@ -353,7 +548,7 @@ def make_fit ( pdf , dataset , **config ) :
# @param dataset pdf
#
def accept_fit ( result , pdf = None , dataset = None ) :
- """Accept the fit result?
+ """ Accept the fit result?
- valid fit result
- fit status is 0 (SUCCESS)
- covariance matrix quality is either 0 or -1
@@ -431,7 +626,7 @@ def make_toys ( pdf ,
progress = True ,
logger = logger ,
frequency = 500 ) : ##
- """Make `nToys` pseudoexperiments
+ """ Make `nToys` pseudoexperiments
- Schematically:
>>> for toy in range ( nToys ) :
@@ -575,11 +770,13 @@ def make_toys ( pdf ,
## 3. fit it!
r = fit_fun ( pdf , dataset , **fitcnf )
- ## fit status
- fits [ r.status () ] += 1
+ ## fit status
+ st = r.status()
+ if 0 != st : fits [ st ] += 1
## covariance matrix quality
- covs [ r.covQual () ] += 1
+ cq = r.covQual()
+ if not cq in ( -1 , 3 ) : covs [ cq ] += 1
## ok ?
ok = accept_fun ( r , pdf , dataset )
@@ -839,10 +1036,12 @@ def make_toys2 ( gen_pdf , ## pdf to generate toys
r = fit_fun ( fit_pdf , dataset , **fitcnf )
## fit status
- fits [ r.status () ] += 1
+ st = r.status()
+ if 0 != st : fits [ st ] += 1
## covariance matrix quality
- covs [ r.covQual () ] += 1
+ cq = r.covQual()
+ if not cq in ( -1 , 3 ) : covs [ cq ] += 1
## ok ?
ok = accept_fun ( r , fit_pdf , dataset )
@@ -925,14 +1124,14 @@ def make_jackknife ( pdf ,
fit_config = {} , ## parameters for pdf.fitTo
fit_pars = {} , ## fit-parameters to reset/use
more_vars = {} , ## additional results to be calculated
- fit_fun = None , ## fit function ( pdf , dataset , **fit_config )
- accept_fun = None , ## accept function ( fit-result, pdf, dataset )
+ fit_fun = None , ## - fit function ( pdf , dataset , **fit_config )
+ accept_fun = None , ## - accept function ( fit-result, pdf, dataset )
event_range = () , ## event range for jackknife
silent = True ,
progress = True ,
logger = logger ,
frequency = 500 ) :
- """Run Jackknife analysis, useful for evaluaton of fit biased and uncertainty estimates
+ """ Run Jackknife analysis, useful for evaluaton of fit biased and uncertainty estimates
For each i
remove event with index i
from the dataset, and refit it.
>>> dataset = ...
>>> model = ...
@@ -970,7 +1169,11 @@ def make_jackknife ( pdf ,
assert 0 <= begin and begin < end and begin < N , 'make_jackknife: invalid event range (%s,%s)/%d' % ( begin , end , N )
## adjust the end
end = min ( end , N )
-
+
+ ## check if pdf and `extended` flag are in agreemrnt, and print warning message otherwise
+ ok1 = check_extended ( pdf , False )
+ if not ok1 and not silent : logger.warning ( "Jackknife: estimates for `yield` parameters could be wrong!")
+
## 1. fitting function?
if fit_fun is None :
if not silent : logger.info ( "make_jackknife: use default 'make_fit' function!")
@@ -1024,10 +1227,12 @@ def make_jackknife ( pdf ,
r = fit_fun ( pdf , ds , **fitcnf )
## 4. fit status
- fits [ r.status () ] += 1
+ st = r.status()
+ if 0 != st : fits [ st ] += 1
## 5. covariance matrix quality
- covs [ r.covQual () ] += 1
+ cq = r.covQual()
+ if not cq in ( -1 , 3 ) : covs [ cq ] += 1
## ok ?
ok = accept_fun ( r , pdf , ds )
@@ -1050,8 +1255,10 @@ def make_jackknife ( pdf ,
results [ '#' ] .append ( len ( ds ) )
results [ '#sumw' ] .append ( ds.sumVar ( '1' ) )
+ ## reset/remove/delete dataset
ds.clear()
-
+ del ds
+
if progress or not silent :
if 0 < frequency and 1 <= i and 0 == i % frequency :
stats = make_stats ( results , fits , covs , accept )
@@ -1066,19 +1273,27 @@ def make_jackknife ( pdf ,
r_tot = fit_fun ( pdf , data , **fitcnf )
r_tot = fit_fun ( pdf , data , **fitcnf )
- ## 10. the final table
+ ## 10. the final table
+ title = ''
+ if not ok1 :
+ logger.warning ( "Jackknife: estimates for `yield` parameters are likely wrong!")
+ NN = 0
+ for k in stats : NN = max ( NN , stats[k].nEntries() )
+ title = "Jackknife results (N=%d).[Estimates for `yield` are likely wrong!]" % NN
print_jackknife ( r_tot ,
stats ,
morevars = dict ( ( k , more_vars [ k ]( r_tot , pdf ) ) for k in more_vars ) ,
- logger = logger )
-
+ logger = logger ,
+ title = title )
+
+ if not ok1 and not silent : logger.warning ( "Jackknife: estimates for `yield` parameters are likely wrong!")
return results , stats
# =============================================================================
## Run Bootstrap analysis, useful for evaluaton of fit biases and uncertainty estimates
#
-# In total size
datasets are sampled (with replacement) from the original dataste
+# In total size
datasets are sampled (with replacement) from the original dataset
# data
and each sampled dataset is fit
# @code
# dataset = ...
@@ -1102,6 +1317,7 @@ def make_jackknife ( pdf ,
# @param fit_config configuration of pdf.FitTo( data , ... )
# @param fit_pars redefine these parameters before each fit
# @param more_vars calculate more variables from the fit-results
+# @param extended use extended bootstrap?
# @param fit_fun specific fitting action (if needed)
# @param accept_fun specific accept action (if needed)
# @param silent silent processing
@@ -1115,14 +1331,15 @@ def make_bootstrap ( pdf ,
fit_config = {} , ## parameters for pdf.fitTo
fit_pars = {} , ## fit-parameters to reset/use
more_vars = {} , ## additional results to be calculated
- fit_fun = None , ## fit function ( pdf , dataset , **fit_config )
+ extended = True , ## use extended/non-extended bootstrtap
+ fit_fun = None , ## fit function ( pdf , dataset , **fit_config )
accept_fun = None , ## accept function ( fit-result, pdf, dataset )
silent = True , ## silent processing?
progress = True , ## shpow progress bar?
- logger = logger , ## use this logger
+ logger = logger , ## use this logger
frequency = 500 ) :
- """Run Bootstrap analysis, useful for evaluaton of fit biased and uncertainty estimates
+ """ Run Bootstrap analysis, useful for evaluaton of fit biased and uncertainty estimates
In total `size` datasets are sampled (with replacement) from the original dataste
`data` and each sampled dataset is fit
>>> dataset = ...
@@ -1137,6 +1354,7 @@ def make_bootstrap ( pdf ,
- `fit_config` : configuration of `pdf.FitTo( data , ... )`
- `fit_pars` : redefine these parameters before each fit
- `more_vars` : calculate more variables from the fit-results
+ - `extended` : use extended bootstrap?
- `fit_fun` : specific fitting acion (if needed)
- `accept_fun` : specific accept action (if needed)
- `silent` : silent processing?
@@ -1152,6 +1370,10 @@ def make_bootstrap ( pdf ,
assert isinstance ( size , integer_types ) and 1 <= size ,\
'Invalid "size" argument %s/%s' % ( size , type ( size ) )
+ ## check if pdf and `extended` flag are in agreemrnt, and print warning message otherwise
+ ok1 = check_extended ( pdf , extended )
+ if not ok1 and not silent : logger.warning ( "Bootstrap: estimates for `yield` parameters could be wrong!")
+
## 1. fitting function?
if fit_fun is None :
if not silent : logger.info ( "make_bootstrap: use default 'make_fit' function!")
@@ -1193,10 +1415,14 @@ def make_bootstrap ( pdf ,
pdf.load_params ( params = fix_fit_pars , silent = True ) ## silent = silent )
r_tot = fit_fun ( pdf , data , **fitcnf )
+ nn = 0
+
from ostap.utils.progress_bar import progress_bar
- ## run jackknife bootstrapping
- for i , ds in progress_bar ( enumerate ( data.bootstrap ( size ) ) , max_value = size , silent = not progress ) :
-
+ ## run bootstrapping
+ for i , ds in progress_bar ( enumerate ( data.bootstrap ( size , extended = extended ) ) ,
+ max_value = size ,
+ silent = not progress ) :
+
## 2. reset parameters of fit_pdf
pdf.load_params ( params = fix_fit_init , silent = True ) ## silent = silent )
pdf.load_params ( params = fix_fit_pars , silent = True ) ## silent = silent )
@@ -1205,10 +1431,12 @@ def make_bootstrap ( pdf ,
r = fit_fun ( pdf , ds , **fitcnf )
## 4. fit status
- fits [ r.status () ] += 1
+ st = r.status()
+ if 0 != st : fits [ st ] += 1
## 5. covariance matrix quality
- covs [ r.covQual () ] += 1
+ cq = r.covQual()
+ if not cq in ( -1 , 3 ) : covs [ cq ] += 1
## ok ?
ok = accept_fun ( r , pdf , ds )
@@ -1230,18 +1458,24 @@ def make_bootstrap ( pdf ,
results [ '#' ] .append ( len ( ds ) )
results [ '#sumw' ] .append ( ds.sumVar ( '1' ) )
+ nn += 1
+ ## reset/remove/delete dataset
ds.clear()
+ del ds
if progress or not silent :
if 0 < frequency and 1 <= i and 0 == i % frequency :
stats = make_stats ( results , fits , covs , accept )
- ## print_stats ( stats , i + 1 , logger = logger )
+ title = "Bootstrapping with #%d samples" % nn
+ if extended : title = '(Extended) %s' % title
+ if not ok1 : title += '[Estimates for `yield` are likely wrong!]'
print_bootstrap ( r_tot ,
stats ,
morevars = dict ( ( k , more_vars [ k ] ( r_tot , pdf ) ) for k in more_vars ),
- logger = logger )
-
+ logger = logger ,
+ title = title )
+ if not ok1 and not silent : logger.warning ( "Bootstrap: estimates for `yield` parameters are likely wrong!")
## 8. make a final statistics
stats = make_stats ( results , fits , covs , accept )
@@ -1253,11 +1487,19 @@ def make_bootstrap ( pdf ,
r_tot = fit_fun ( pdf , data , **fitcnf )
## 10. the final table
+ title = "Bootstrapping with #%d samples" % nn
+ if extended : title = '(Extended) %s' % title
+ if not ok1 :
+ title += '[Estimates for `yield` are likely wrong!]'
+ logger.warning ( "Bootstrap: estimates for `yield` parameters are likely wrong!")
print_bootstrap ( r_tot ,
stats ,
morevars = dict ( ( k , more_vars [ k ]( r_tot , pdf ) ) for k in more_vars ),
- logger = logger )
+ logger = logger ,
+ title = title )
+ if not ok1 and not silent : logger.warning ( "Bootstrap: estimates for `yield` parameters are likely wrong!")
+
return results , stats
diff --git a/ostap/parallel/parallel_toys.py b/ostap/parallel/parallel_toys.py
index f825bbbc..fd506d94 100644
--- a/ostap/parallel/parallel_toys.py
+++ b/ostap/parallel/parallel_toys.py
@@ -302,7 +302,8 @@ def __init__ ( self ,
silent = True ,
progress = True ,
frequency = 100 ) :
-
+
+
TheTask_.__init__ ( self ,
data = data ,
fit_config = fit_config ,
@@ -354,9 +355,38 @@ def process ( self , jobid , event_range ) :
# @author Vanya BELYAEV Ivan.Belyaev@itep.ru
# @date 2023-06-22
class BootstrapTask(JackknifeTask) :
- """The simple task object for parallel Jackknife
+ """ The simple task object for parallel Jackknife
- see ostap.fitting.toys.make_jackknife
"""
+
+ def __init__ ( self ,
+ pdf ,
+ data ,
+ fit_config = {} , ## parameters for pdf.fitTo
+ fit_pars = {} , ## fit-parameters to reset/use
+ more_vars = {} , ## additional results to be calculated
+ extended = True , ## use extended bootstrap
+ fit_fun = None , ## fit function ( pdf , dataset , **fit_config )
+ accept_fun = None , ## accept function ( fit-result, pdf, dataset )
+ silent = True ,
+ progress = True ,
+ frequency = 100 ) :
+
+ JackknifeTask.__init__ ( self ,
+ pdf = pdf ,
+ data = data ,
+ fit_config = fit_config , ## parameters for pdf.fitTo
+ fit_pars = fit_pars , ## fit-parameters to reset/use
+ more_vars = more_vars , ## additional results to be calculated
+ fit_fun = fit_fun , ## fit function ( pdf , dataset , **fit_config )
+ accept_fun = accept_fun , ## accept function ( fit-result, pdf, dataset )
+ silent = silent ,
+ progress = progress ,
+ frequency = frequency )
+
+ self.extended = True if extended else False
+
+
## the actual processing
def process ( self , jobid , size ) :
@@ -370,16 +400,17 @@ def process ( self , jobid , size ) :
import ostap.fitting.variables
from ostap.core.ostap_types import integer_types
- assert isinstance ( size , integer_types ) and 0 < size ,\
+ assert isinstance ( size , integer_types ) and 1 <= size ,\
'Jobid %s: Invalid "size" argument %s/%s' % ( jobid , size , type ( size ) )
import ostap.fitting.toys as Toys
return Toys.make_bootstrap ( pdf = self.pdf ,
data = self.data ,
- size = size , ## ATTENTION
+ size = size , ## ATTENTION !!!
fit_config = self.fit_config ,
fit_pars = self.fit_pars ,
more_vars = self.more_vars ,
+ extended = self.extended ,
fit_fun = self.fit_fun ,
accept_fun = self.accept_fun ,
silent = self.silent ,
@@ -867,6 +898,10 @@ def parallel_jackknife ( pdf ,
import ostap.fitting.toys as Toys
if nSplit < 2 : return Toys.make_jackknife ( progress = progress , **config )
+
+ ## check if pdf and `extended` flag are in agreemrnt, and print warning message otherwise
+ ok1 = Toys.check_extended ( pdf , False )
+ if not ok1 : logger.warning ( "Jackknife: estimates for `yield` parameters could be wrong!")
## create the task
task = JackknifeTask ( progress = progress and not silent , **config )
@@ -885,8 +920,6 @@ def parallel_jackknife ( pdf ,
results , stats = task.results ()
if progress or not silent :
- ## Toys.print_stats ( stats )
-
fitcnf = {}
fitcnf.update ( fit_config )
if not 'silent' in fitcnf : fitcnf [ 'silent' ] = silent
@@ -896,14 +929,21 @@ def parallel_jackknife ( pdf ,
## 9. fit total dataset (twice)
r_tot = fit_fun ( pdf , data , **fitcnf )
r_tot = fit_fun ( pdf , data , **fitcnf )
-
+
+ title = ''
+ if not ok1 :
+ logger.warning ( "Jackknife: estimates for `yield` parameters are likely wrong!")
+ NN = 0
+ for k in stats : NN = max ( NN , stats[k].nEntries() )
+ title = "Jackknife results (N=%d).[Estimates for `yield` are likely wrong!]" % NN
## the final table
Toys.print_jackknife (
r_tot ,
stats ,
- morevars = dict ( ( k , more_vars [ k ]( r_tot , pdf ) ) for k in more_vars ) )
-
+ morevars = dict ( ( k , more_vars [ k ]( r_tot , pdf ) ) for k in more_vars ) ,
+ title = title )
+ if not ok1 : logger.warning ( "Jackknife: estimates for `yield` parameters are likely wrong!")
return results, stats
@@ -948,6 +988,7 @@ def parallel_bootstrap ( pdf ,
fit_config = {} , ## parameters for pdf.fitTo
fit_pars = {} , ## fit-parameters to reset/use
more_vars = {} , ## additional results to be calculated
+ extended = True , ## use extended bootstrap
fit_fun = None , ## fit function ( pdf , dataset , **fit_config )
accept_fun = None , ## accept function ( fit-result, pdf, dataset )
silent = True , ## silent processing?
@@ -973,7 +1014,10 @@ def parallel_bootstrap ( pdf ,
import ostap.fitting.toys as Toys
if nSplit < 2 : return Toys.make_boostrap ( size = size , progress = progress , **config )
-
+
+ ## check if pdf and `extended` flag are in agreemrnt, and print warning message otherwise
+ ok1 = Toys.check_extended ( pdf , extended )
+ if not ok1 : logger.warning ( "Bootstrap: estimates for `yield` parameters could be wrong!")
## create teh task
task = BootstrapTask ( progress = progress and not silent , **config )
@@ -1002,13 +1046,19 @@ def parallel_bootstrap ( pdf ,
r_tot = fit_fun ( pdf , data , **fitcnf )
r_tot = fit_fun ( pdf , data , **fitcnf )
- ## the final table
+ ## the final table
+ title = ''
+ if not ok1 :
+ logger.warning ( "Bootstrap: estimates for `yield` parameters are likely wrong!")
+ NN = 0
+ for k in stats : NN = max ( NN , stats[k].nEntries() )
+ title = "Bootstrap results (N=%d).[Estimates for `yield` are likely wrong!]" % NN
Toys.print_bootstrap (
r_tot ,
stats ,
morevars = dict ( ( k , more_vars [ k ]( r_tot , pdf ) ) for k in more_vars ) )
-
+ if not ok1 : logger.warning ( "Bootstrap: estimates for `yield` parameters are likely wrong!")
return results, stats
diff --git a/ostap/parallel/tests/test_parallel_toys.py b/ostap/parallel/tests/test_parallel_toys.py
index e5ece3ea..a89f9e6c 100755
--- a/ostap/parallel/tests/test_parallel_toys.py
+++ b/ostap/parallel/tests/test_parallel_toys.py
@@ -18,12 +18,16 @@
from ostap.core.meta_info import root_info
import ostap.fitting.roofit
import ostap.fitting.models as Models
-from ostap.utils.timing import timing
+from ostap.utils.timing import timing
+from ostap.utils.memory import memory
from ostap.core.core import cpp, VE, dsID, hID, rooSilent
import ostap.fitting.models as Models
from ostap.plotting.canvas import use_canvas
from ostap.utils.utils import wait
from ostap.fitting.toys import pull_var
+from ostap.math.base import num_range
+from ostap.utils.progress_bar import progress_bar
+import ostap.logger.table as T
import ROOT, sys, os, random, time
# =============================================================================
# logging
@@ -47,16 +51,27 @@
nominal_mean = 0.4
nominal_sigma = 0.1
+nominal_S = 1000
+nominal_B = 100
+nominal_F = float ( nominal_S ) / ( nominal_S + nominal_B )
-mass = ROOT.RooRealVar ( 'mass' , '', 0 , 1 )
-gen_gauss = Models.Gauss_pdf ( 'GG' , xvar = mass )
-fit_gauss = Models.Gauss_pdf ( 'FG' , xvar = mass )
+
+mass = ROOT.RooRealVar ( 'mass' , '', 0 , 1 )
+gen_gauss = Models.Gauss_pdf ( 'GG' , xvar = mass )
+fit_gauss = Models.Gauss_pdf ( 'FG' , xvar = mass )
gen_gauss.mean = nominal_mean
gen_gauss.sigma = nominal_sigma
-model = Models.Fit1D ( signal = gen_gauss ,
- background = 'flat' )
+model = Models.Fit1D ( signal = gen_gauss , background = 'flat' )
+model_NE = Models.Fit1D ( signal = gen_gauss , background = 'flat' , extended = False )
+
+model.S = nominal_S
+model.B = nominal_B
+model_NE.F = nominal_F
+
+toy_results = {}
+fit_config = { 'silent' : True , 'refit' : 5 }
# ==============================================================================
## Perform toy-study for possible fit bias and correct uncertainty evaluation
@@ -67,7 +82,7 @@
# - fill distributions for fit results
# - fill distribution of pulls
def test_parallel_toys ( ) :
- """Perform toys-study for possible fit bias and correct uncertainty evaluation
+ """ Perform toys-study for possible fit bias and correct uncertainty evaluation
- generate `nToys` pseudoexperiments with some PDF `pdf`
- fit teach experiment with the same PDF
- store fit results
@@ -84,11 +99,11 @@ def test_parallel_toys ( ) :
with timing ( 'Toys analysis' , logger = logger ) :
results , stats = Toys.parallel_toys (
pdf = gen_gauss ,
- nToys = 1000 ,
+ nToys = 2000 ,
nSplit = 20 ,
data = [ mass ] ,
gen_config = { 'nEvents' : 200 , 'sample' : True } ,
- fit_config = { 'silent' : True } ,
+ fit_config = fit_config ,
init_pars = { 'mean_GG' : nominal_mean , 'sigma_GG' : nominal_sigma } ,
more_vars = more_vars ,
silent = True ,
@@ -105,6 +120,8 @@ def test_parallel_toys ( ) :
for h in ( h_mean , h_sigma ) :
with use_canvas ( 'test_paralllel_toys %s' % h.title , wait = 1 ) : h.draw()
+ toy_results['test_parallel_toys'] = results , stats
+
# =============================================================================
## Perform toy-study for possible fit bias and correct uncertainty evaluation
# - generate nToys
pseudoexperiments with some PDF gen_pdf
@@ -112,7 +129,7 @@ def test_parallel_toys ( ) :
# - store fit results
# - fill distributions for fit results
def test_parallel_toys2 ( ) :
- """Perform toys-study for possible fit bias and correct uncertainty evaluation
+ """ Perform toys-study for possible fit bias and correct uncertainty evaluation
- generate `nToys` pseudoexperiments with some PDF `gen_pdf`
- fit teach experiment with the PDF `fit_pdf`
- store fit results
@@ -128,11 +145,11 @@ def test_parallel_toys2 ( ) :
results , stats = Toys.parallel_toys2 (
gen_pdf = gen_gauss ,
fit_pdf = fit_gauss ,
- nToys = 1000 ,
+ nToys = 2000 ,
nSplit = 20 ,
data = [ mass ] ,
gen_config = { 'nEvents' : 200 , 'sample' : True } ,
- fit_config = { 'silent' : True } ,
+ fit_config = fit_config ,
gen_pars = { 'mean_GG' : nominal_mean , 'sigma_GG' : nominal_sigma } ,
fit_pars = { 'mean_FG' : nominal_mean , 'sigma_FG' : nominal_sigma } ,
more_vars = more_vars ,
@@ -150,58 +167,221 @@ def test_parallel_toys2 ( ) :
for h in ( h_mean , h_sigma ) :
with use_canvas ( 'test_parallel_toys2 %s' % h.title , wait = 1 ) : h.draw()
+ toy_results['test_parallel_toys2'] = results , stats
-# ==============================================================================
-## Perform toy-study for Jackknife
-def test_parallel_jackknife ( ) :
- """Perform toys-study for Jackknife
- """
- logger = getLogger ( 'test_parallel_jackknife' )
- if root_info < (6,18) :
- logger.warning ( "(parallel) Jackknife is temporarily disabled for this version of ROOT" )
- return
+# ==============================================================================
+## Perform toy-study for Jackknife (non-extended model)
+def test_parallel_jackknife_NE1 ( ) :
+ """ Perform toys-study for Jackknife (non-extended model)
+ """
- with timing ( 'Jackknife analysis' , logger = logger ) :
-
- model.S = 1000
- model.B = 100
-
- dataset = model.generate ( 1100 )
-
- Toys.parallel_jackknife ( pdf = model ,
- nSplit = 6 ,
- data = dataset ,
- fit_config = { 'silent' : True } ,
- progress = True ,
- silent = True )
-
+ logger = getLogger ( 'test_parallel_jackknife_NE1' )
+ logger.info ( 'Jackknife analysis for non-extended model' )
+
+ pdf = model_NE
+
+ pdf.signal.mean = nominal_mean
+ pdf.signal.sigma = nominal_sigma
+ pdf.F = nominal_F
+
+ with timing ( 'Jackknife analysis (non-extended)' , logger = logger ) :
+ data = pdf.generate ( nominal_S )
+ with use_canvas ( "Jackknife analysis (non-extended)" , wait = 3 ) :
+ r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config )
+
+ pdf.signal.mean = nominal_mean
+ pdf.signal.sigma = nominal_sigma
+ pdf.F = nominal_F
+
+ results, stats = Toys.parallel_jackknife ( pdf = pdf ,
+ nSplit = 10 ,
+ data = data ,
+ fit_config = fit_config ,
+ progress = True ,
+ silent = True )
+
+ toy_results [ 'test_parallel_jackknife_NE1' ] = results , stats
+
+# ==============================================================================
+## Perform toy-study for Jackknife (non-extended model)
+def test_parallel_jackknife_NE2 ( ) :
+ """ Perform toys-study for Jackknife (non-extended model)
+ """
+
+ logger = getLogger ( 'test_parallel_jackknife_NE2' )
+ logger.info ( 'Jackknife analysis for non-extended model' )
+
+ pdf = fit_gauss
+
+ pdf.mean = nominal_mean
+ pdf.sigma = nominal_sigma
+
+ with timing ( 'Jackknife analysis (non-extended)' , logger = logger ) :
+ data = pdf.generate ( nominal_S )
+ with use_canvas ( "Jackknife analysis (non-extended)" , wait = 3 ) :
+ r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config )
+
+ pdf.mean = nominal_mean
+ pdf.sigma = nominal_sigma
+
+ results, stats = Toys.parallel_jackknife ( pdf = pdf ,
+ nSplit = 10 ,
+ data = data ,
+ fit_config = fit_config ,
+ progress = True ,
+ silent = True )
+
+ toy_results [ 'test_parallel_jackknife_NE2' ] = results , stats
+
# ==============================================================================
-## Perform toy-study for Bootstrap
-def test_parallel_bootstrap ( ) :
- """Perform toys-study for Bootstrap
+## Perform toy-study for Jackknife (extended model)
+def test_parallel_jackknife_EXT ( ) :
+ """ Perform toys-study for Jackknife (extended model)
"""
- logger = getLogger ( 'test_parallel_bootstrap' )
+ logger = getLogger ( 'test_parallel_jackknife_EXT' )
+ logger.info ( 'Jackknife analysis for extended model' )
+
+ pdf = model
- with timing ( 'Bootstrap analysis' , logger = logger ) :
-
- model.S = 1000
- model.B = 100
+ pdf.signal.mean = nominal_mean
+ pdf.signal.sigma = nominal_sigma
+ pdf.S = nominal_S
+ pdf.B = nominal_B
+
+ with timing ( 'Jackknife analysis (extended)' , logger = logger ) :
- dataset = model.generate ( 1100 )
+ data = pdf.generate ( nominal_S )
+ with use_canvas ( "Jackknife analysis (extended)" , wait = 3 ) :
+ r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config )
+
+ pdf.signal.mean = nominal_mean
+ pdf.signal.sigma = nominal_sigma
+ pdf.S = nominal_S
+ pdf.B = nominal_B
+
+ results, stats = Toys.parallel_jackknife ( pdf = pdf ,
+ nSplit = 10 ,
+ data = data ,
+ fit_config = fit_config ,
+ progress = True ,
+ silent = True )
+
+ toy_results [ 'test_parallel_jackknife_EXT' ] = results , stats
- Toys.parallel_bootstrap ( pdf = model ,
- size = 1000 ,
- nSplit = 6 ,
- data = dataset ,
- fit_config = { 'silent' : True } ,
- progress = True ,
- silent = True )
+# ==============================================================================
+## Perform toy-study for Bootstrap (non-extended)
+def test_parallel_bootstrap_NE1 ( ) :
+ """ Perform toys-study for Bootstrap (non-extended)
+ """
+
+ logger = getLogger ( 'test_parallel_bootstrap_NE1' )
+ logger.info ( 'Bootstrap analysis for non-extended model' )
+
+ pdf = model_NE
+
+ pdf.signal.mean = nominal_mean
+ pdf.signal.sigma = nominal_sigma
+ pdf.F = nominal_F
+
+ with timing ( 'Bootstrap analysis (non-extended)' , logger = logger ) :
+
+ data = pdf.generate ( nominal_S + nominal_B )
+ with use_canvas ( "Booststrap analysis" , wait = 3 ) :
+ r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config )
+
+ pdf.signal.mean = nominal_mean
+ pdf.signal.sigma = nominal_sigma
+ pdf.F = nominal_F
+
+ results, stats = Toys.parallel_bootstrap ( pdf = pdf ,
+ nSplit = 10 ,
+ size = 400 ,
+ data = data ,
+ fit_config = fit_config ,
+ extended = False ,
+ progress = True ,
+ silent = True )
+
+ toy_results [ 'test_parallel_bootstrap_NE1' ] = results , stats
+
+# ==============================================================================
+## Perform toy-study for Bootstrap (non-extended)
+def test_parallel_bootstrap_NE2 ( ) :
+ """ Perform toys-study for Bootstrap (non-extended)
+ """
+
+ logger = getLogger ( 'test_parallel_bootstrap_NE2' )
+ logger.info ( 'Bootstrap analysis for non-extended model' )
+
+ pdf = fit_gauss
+
+ pdf.mean = nominal_mean
+ pdf.sigma = nominal_sigma
+
+ with timing ( 'Bootstrap analysis (non-extended)' , logger = logger ) :
+
+ data = pdf.generate ( nominal_S + nominal_B )
+ with use_canvas ( "Booststrap analysis" , wait = 3 ) :
+ r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config )
+
+ pdf.mean = nominal_mean
+ pdf.sigma = nominal_sigma
+
+ results, stats = Toys.parallel_bootstrap ( pdf = pdf ,
+ nSplit = 10 ,
+ size = 400 ,
+ data = data ,
+ fit_config = fit_config ,
+ extended = False ,
+ progress = True ,
+ silent = True )
+
+ toy_results [ 'test_parallel_bootstrap_NE2' ] = results , stats
+
+# ==============================================================================
+## Perform toy-study for Bootstrap (extended)
+def test_parallel_bootstrap_EXT ( ) :
+ """ Perform toys-study for Bootstrap (extended)
+ """
+
+ logger = getLogger ( 'test_parallel_bootstrap_EXT' )
+ logger.info ( 'Bootstrap analysis for extended model' )
+
+ pdf = model
+
+ pdf.signal.mean = nominal_mean
+ pdf.signal.sigma = nominal_sigma
+ pdf.S = nominal_S
+ pdf.B = nominal_B
+
+ with timing ( 'Bootstrap analysis (extended)' , logger = logger ) :
+
+ data = pdf.generate ( nominal_S + nominal_B )
+ with use_canvas ( "(Extended) Booststrap analysis" , wait = 3 ) :
+ r , f = pdf.fitTo ( data , draw = True , nbins = 100 , **fit_config )
+
+ pdf.signal.mean = nominal_mean
+ pdf.signal.sigma = nominal_sigma
+ pdf.S = nominal_S
+ pdf.B = nominal_B
+
+ results, stats = Toys.parallel_bootstrap ( pdf = pdf ,
+ nSplit = 10 ,
+ size = 400 ,
+ data = data ,
+ fit_config = fit_config ,
+ extended = True ,
+ progress = True ,
+ silent = True )
+
+ toy_results [ 'test_parallel_bootstrap_EXT' ] = results , stats
+
# =============================================================================
## Perform toy-study for significance of the signal
# - generate nToys
pseudoexperiments using background-only hypothesis
@@ -209,7 +389,7 @@ def test_parallel_bootstrap ( ) :
# - store fit results
# - fill distributions for fit results
def test_parallel_significance ( ) :
- """Perform toy-study for significance of the signal
+ """ Perform toy-study for significance of the signal
- generate `nToys` pseudoexperiments using background-only hypothesis
- fit each experiment with signal+background hypothesis
- store fit results
@@ -236,34 +416,117 @@ def test_parallel_significance ( ) :
results , stats = Toys.parallel_toys2 (
gen_pdf = bkg_only ,
fit_pdf = model ,
- nToys = 1000 ,
+ nToys = 10000 ,
nSplit = 20 ,
data = [ mass ] ,
gen_config = { 'nEvents' : 100 , 'sample' : True } ,
- fit_config = { 'silent' : True } ,
+ fit_config = fit_config ,
gen_pars = { 'tau_BKG' : 0. } , ## initial values for generation
fit_pars = { 'B' : 100 ,
'S' : 10 ,
'phi0_Bkg_S': 0.0 } , ## initial fit values for parameters
silent = True ,
progress = True )
-
- h_S = ROOT.TH1F ( hID() , '#S' , 60 , 0 , 60 )
- for r in results ['S' ] : h_S .Fill ( r )
+
+
+ # =========================================================================
+ ## yields
+ h_Y = ROOT.TH1F ( hID() , '#S' , 140 , 0 , 70 )
+ for r in results [ 'S' ] : h_Y .Fill ( r )
+
+ ## get p-value and significance histograms:
+ h_P , h_S = h_Y.significance ( cut_low = True )
+
+ h_S.red ()
+ h_P.blue ()
+
+ minv = 1.e+100
+ for i,_,y in h_P.items() :
+ yv = y.value()
+ if 0 < yv and yv < minv : minv = yv
+ minv , _ = num_range ( 0.75 * minv )
+ h_P.SetMinimum ( minv )
+ h_S.SetMinimum ( 0 )
+
+ with use_canvas ( 'test_significance: yields' , wait = 1 ) : h_Y.draw ( )
+ with use_canvas ( 'test_significance: p-value' , wait = 1 ) : h_P.draw ( logy = True )
+ with use_canvas ( 'test_significance: significance' , wait = 3 ) : h_S.draw ()
- for h in ( h_S , ) :
- with use_canvas ( 'test_parallel_significance %s' % h.title , wait = 1 ) : h.draw()
+ toy_results [ 'test_parallel_significance' ] = results , stats
+
+# =============================================================================
+## Save resuls of toys to DBASE
+def test_db () :
+ """ Save resuls of toys to DBASE
+ """
+ logger = getLogger ( 'test_db' )
+ logger.info ( 'Saving all toys results into DBASE' )
+ import ostap.io.zipshelve as DBASE
+ from ostap.utils.timing import timing
+ with timing( 'Save everything to DBASE', logger ), DBASE.tmpdb() as db :
+ for key in progress_bar ( toy_results ) :
+ r , s = toy_results [ key ]
+ key1 = '%s:results' % key
+ key2 = '%s:stats' % key
+ db [ key1 ] = r
+ db [ key2 ] = s
+ db.ls()
+ while toy_results : toy_results.popitem()
+# =============================================================================
+
+
# =============================================================================
if '__main__' == __name__ :
+ with memory ( 'test_toys' , logger = logger ) as mtt : test_parallel_toys ()
+ with memory ( 'test_toys2' , logger = logger ) as mt2 : test_parallel_toys2 ()
+ with memory ( 'test_jackknife_NE1' , logger = logger ) as mj1 : test_parallel_jackknife_NE1 ()
+ with memory ( 'test_jackknife_NE2' , logger = logger ) as mj2 : test_parallel_jackknife_NE2 ()
+ with memory ( 'test_jackknife_EXT' , logger = logger ) as mje : test_parallel_jackknife_EXT ()
+ with memory ( 'test_bootstrap_NE1' , logger = logger ) as mb1 : test_parallel_bootstrap_NE1 ()
+ with memory ( 'test_bootstrap_NE2' , logger = logger ) as mb2 : test_parallel_bootstrap_NE2 ()
+ with memory ( 'test_bootstrap_EXT' , logger = logger ) as mbe : test_parallel_bootstrap_EXT ()
+
+ with memory ( 'test_significance' , logger = logger ) as mts : test_parallel_significance ()
+ with memory ( 'test_db' , logger = logger ) as mdb : test_db ()
+
+ rows = [ ( 'Test' , 'Memory [MB]' ) ]
+
+ row = 'Toys' , '%.1f' % mtt.delta
+ rows.append ( row )
- test_parallel_toys ( )
- test_parallel_toys2 ( )
- test_parallel_significance ( )
+ row = 'Toys2' , '%.1f' % mt2.delta
+ rows.append ( row )
+
+ row = 'Jackknife_NE1' , '%.1f' % mj1.delta
+ rows.append ( row )
+
+ row = 'Jackknife_NE2' , '%.1f' % mj2.delta
+ rows.append ( row )
+
+ row = 'Jackknife_EXT' , '%.1f' % mje.delta
+ rows.append ( row )
+
+ row = 'Bootstrap_NE1' , '%.1f' % mb1.delta
+ rows.append ( row )
+
+ row = 'Bootstrap_NE2' , '%.1f' % mb2.delta
+ rows.append ( row )
+
+ row = 'Bootstrap_EXT' , '%.1f' % mbe.delta
+ rows.append ( row )
+
+ row = 'Significance' , '%.1f' % mts.delta
+ rows.append ( row )
+
+ row = 'test_db' , '%.1f' % mdb.delta
+ rows.append ( row )
+
+ title = 'Memory usage for various toys'
+ table = T.table ( rows , title = title , prefix = '# ' , alignment = 'lc' )
+ logger.info ( '%s:\n%s' % ( title , table ) )
- test_parallel_jackknife ( )
- test_parallel_bootstrap ( )
# =============================================================================