Skip to content

Commit

Permalink
1. extend gof1d and gofnd tests
Browse files Browse the repository at this point in the history
  • Loading branch information
VanyaBelyaev committed Oct 10, 2024
1 parent 72fcb68 commit e59bf2f
Show file tree
Hide file tree
Showing 6 changed files with 95 additions and 59 deletions.
3 changes: 2 additions & 1 deletion ReleaseNotes/release_notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
1. prepend the default progress-bar for trees/datasets/frames with `Entries:`
1. add a kind of replacement of `ROOT.RooAbsCollection.assign` for old versions of ROOT
1. add meaningful `description` argument to all `progress_bar` instance

1. extend `gof1d` and `gofnd` tests

## Backward incompatible

## Bug fixes
Expand Down
18 changes: 8 additions & 10 deletions ostap/stats/gof_np.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,16 +45,15 @@
if (1,6,0) <= sp_version :
qconf = { 'k' : [ 2 ] , 'workers' : -1 }
def neighbour_distances ( tree , data ) :
dist , _ = tree.query ( data , **qconf )
dist = dist.flatten()
return dist
dist , xx = tree.query ( data , **qconf )
del xx
return dist.flatten()
else :
qconf = { 'k' : 2 }
def neighbour_distances ( tree , data ) :
dist , _ = tree.query ( data , **qconf )
dist = np.delete ( dist , 0 , axis = 1 )
dist = dist.flatten()
return dist
dist , xx = tree.query ( data , **qconf )
del xx
return np.delete ( dist , 0 , axis = 1 ).flatten()

# =========================================================================
except ImportError :
Expand Down Expand Up @@ -406,12 +405,11 @@ def t_value ( self , ds1 , vpdf ) :
sh2 = vpdf.shape
assert 2 == len ( sh1 ) and 1 == len ( sh2 ) and len ( ds1 ) == len ( vpdf ) , \
"Invalid arrays: %s , %s" % ( sh1 , sh2 )
tree = sp.spatial.KDTree ( ds1 )

tree = sp.spatial.KDTree ( ds1 )
## uvalues , _ = tree.query ( ds1 , **qconf )
## uvalues = uvalues.flatten ()
uvalues = neighbour_distances ( tree , ds1 )

del tree

## dimension of the problem (it must be set in __call__)
Expand Down
46 changes: 18 additions & 28 deletions ostap/stats/gof_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,19 +234,6 @@ def normalize ( ds , others = () , weight = () , first = True ) :
normalize.__doc__ = normalize2.__doc__


# =============================================================================
jl = None
# =============================================================================
try : # =======================================================================
# =========================================================================
if ( 3 , 0 ) <= python_info :
with warnings.catch_warnings():
warnings.simplefilter ( "ignore" , category = DeprecationWarning )
import joblib as jl
# =========================================================================
except ImportError : # ========================================================
# =========================================================================
jl = None
# =============================================================================
## @class PERMUTATOR
# Helper class that allow to run permutattion test in parallel
Expand Down Expand Up @@ -284,16 +271,17 @@ def __call__ ( self , N , silent = True ) :
with warnings.catch_warnings():
warnings.simplefilter ( "ignore" , category = DeprecationWarning )
import joblib as jl
jl_version = tuple ( int ( i ) for i in jl.__version__.split('.') )
# =====================================================================
## Run NN-permutations in parallel using joblib
def joblib_run ( self , NN , silent = True ) :
""" Run NN-permutations in parallel using joblib """
nj = 2 ## 2 * numcpu () + 3
nj = 2 * numcpu () + 3
lst = splitter ( NN , nj )
##
conf = { 'n_jobs' : -1 , 'verbose' : 0 }
if '1.3.0' <= jl.__version__ < '1.4.0' : conf [ 'return_as' ] = 'generator'
elif '1.4.0' <= jl.__version__ : conf [ 'return_as' ] = 'unordered_generator'
if (1,3,0) <= jl_version < (1,4,0) : conf [ 'return_as' ] = 'generator'
elif (1,4,0) <= jl_version : conf [ 'return_as' ] = 'unordered_generator'
##
input = ( jl.delayed (self)( N ) for N in lst )
counter = EffCounter()
Expand All @@ -307,32 +295,34 @@ def joblib_run ( self , NN , silent = True ) :
# =====================================================================
PERMUTATOR.run = joblib_run
# =====================================================================
logger.debug ( 'Joblib will be used foe parallel permuations')
logger.debug ( 'Joblib will be used foe parallel permutations')
# =====================================================================
except ImportError : # ====================================================
# =====================================================================
jl = None



jl = None
# =============================================================================
if not jl : # =================================================================
# =========================================================================
## Run NN-permutations in parallel using WorkManager
def pp_run ( self , NN , silent = True ) :
""" Run NN-permutations in parallel using WorkManager"""
nj = 2 ## 2 * numcpu () + 3
nj = 2 * numcpu () + 3
lst = splitter ( NN , nj )
##
from ostap.parallel.parallel import WorkManager
manager = WorkManager ( silent = silent )
counter = EffCounter()
##
## use the bare interface
for result in manager.iexecute ( self , lst , progress = not silent , njobs = nj ) :
counter += result
from ostap.parallel.parallel import WorkManager
with WorkManager ( silent = silent ) as manager :
for result in manager.iexecute ( self , lst , progress = not silent , njobs = nj ) :
counter += result
#
return counter
# =========================================================================
logger.debug ( 'Parallel will be used for parallel permuations')
logger.debug ( 'Parallel will be used for parallel permutations')
# =====================================================================
PERMUTATOR.run = pp_run
# =========================================================================
Expand Down Expand Up @@ -377,13 +367,13 @@ def run ( self , NN , silent = False ) :
nj = 2 ## 2 * numcpu () + 3
lst = splitter ( NN , nj )
##
from ostap.parallel.parallel import WorkManager
manager = WorkManager ( silent = silent )
counter = EffCounter()
##
## use the bare interface
for result in manager.iexecute ( self , lst , progress = not silent , njobs = nj ) :
counter += result
from ostap.parallel.parallel import WorkManager
with WorkManager ( silent = silent ) as manager :
for result in manager.iexecute ( self , lst , progress = not silent , njobs = nj ) :
counter += result
#
return counter

Expand Down
24 changes: 13 additions & 11 deletions ostap/stats/tests/test_stats_gof1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def run_USTAT ( pdf , data, result , logger ) :

rows = [ ( 't-value' , 'x[..]', 'p-value [%]' , '#sigma' ) ]

ustat = USTAT ( nToys = 1000 , histo = 100 )
ustat = USTAT ( nToys = 1000 , histo = 100 , parallel = True )

pdf.load_params ( result , silent = True )

Expand All @@ -144,7 +144,7 @@ def run_USTAT ( pdf , data, result , logger ) :
logger.info ( '%s:\n%s' % ( title , table ) )

return ustat.histo

# ==============================================================================
def test_good_fit_1 ( ) :
""" Make a test for presumably good fit: fit Gauss to Gauss
Expand All @@ -155,7 +155,7 @@ def test_good_fit_1 ( ) :

with use_canvas ( 'test_good_fit_1: G -> G' , wait = 1 ) :
r , f = gauss.fitTo ( data_g , **fitconf )

with use_canvas ( 'test_good_fit_1: GoF' , wait = 1 ) :

gauss.load_params ( r , silent = True )
Expand All @@ -166,19 +166,24 @@ def test_good_fit_1 ( ) :
got = G1D.GoF1DToys ( gauss , data_g )
logger.info ( 'Goodness-of-fit with %d toys:\n%s' % ( got.nToys , got ) )

del gof
del got

## Try to use multidimensional methods
run_PPD ( gauss , data_g , r , logger )

udist1 = run_DNN ( gauss , data_g , r , logger )
if udist1 :
keep.add ( udist1 )
with use_canvas ( 'test_good_fit_1: DNN' , wait = 5 ) :
udist1.draw()

udist2 = run_USTAT ( gauss , data_g , r , logger )
if udist2 :
keep.add ( udist2 )
with use_canvas ( 'test_good_fit_1: USTAT' , wait = 5 ) :
with use_canvas ( 'test_good_fit_1: USTAT' , wait = 1 ) :
udist2.draw()

# =============================================================================
def test_good_fit_2 ( ) :
""" Make a test for presumably good fit: fit Gauss+Bkg to Gauss
Expand Down Expand Up @@ -301,12 +306,9 @@ def test_bad_fit_1 ( ) :
if '__main__' == __name__ :

test_good_fit_1 () ## fit Gauss to Gauss

"""
test_good_fit_2 () ## fit Gauss+Bkg to Gauss
test_good_fit_3 () ## fit Gauss+Bkg to Gauss+Bkg
test_bad_fit_1 () ## fit Gauss to Gauss+Bkg
"""
test_good_fit_2 () ## fit Gauss+Bkg to Gauss
test_good_fit_3 () ## fit Gauss+Bkg to Gauss+Bkg
test_bad_fit_1 () ## fit Gauss to Gauss+Bkg

# ===============================================================================
## The END
Expand Down
53 changes: 48 additions & 5 deletions ostap/stats/tests/test_stats_gofnd.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@
ygauss = M.Gauss_pdf ( 'GY' , xvar = yvar , mean = ( 5 , 4 , 6 ) , sigma = ( 1.0 , 0.5 , 2.5 ) )
gauss2 = xgauss*ygauss

NG = 125
NG2 = 25
NG = 100
NG2 = 50
data_good = gauss2.generate ( NG + NG2 , sample = False )
data_bad = gauss2.generate ( NG , sample = False )
for i in range ( NG2 ) :
Expand Down Expand Up @@ -132,7 +132,7 @@ def test_DNN () :
## 't/bad' , 'x[..]' ,
rows = [ ( 'p-value/good[%]' , 'p-value/bad[%]' , '#sigma/good' , '#sigma/bad') ]

dnn = GnD.DNN ( nToys = 1000 )
dnn = GnD.DNN ( nToys = 1000 , histo = 50 )

## presumably good fit
with timing ( "Good fit DNN" , logger = logger ) :
Expand Down Expand Up @@ -164,12 +164,55 @@ def test_DNN () :
title= 'Goodness-of-Fit DNN test'
table = T.table ( rows , title = title , prefix = '# ')
logger.info ( '%s:\n%s' % ( title , table ) )

# ===============================================================================
def test_USTAT () :

logger = getLogger ("test_USTAT")
from ostap.stats.ustat import USTAT


rows = [ ( 'p-value/good[%]' , 'p-value/bad[%]' , '#sigma/good' , '#sigma/bad') ]

ust = USTAT ( nToys = 1000 , histo = 50 )

## presumably good fit
with timing ( "Good fit USTAT" , logger = logger ) :
pdf.load_params ( rgood , silent = True )
tgood = ust ( pdf , data_good )
tgood, pgood = ust.pvalue ( pdf , data_good )

## presumably bad fit
with timing ( "Bad fit USTAT" , logger = logger ) :
pdf.load_params ( rbad , silent = True )
tbad = ust ( pdf , data_bad )
tbad, pbad = ust.pvalue ( pdf , data_bad )

gp = pgood * 100
bp = pbad * 100

gt , ge = pretty_float ( tgood )
bt , be = pretty_float ( tbad )

nsg = significance ( pgood )
nsb = significance ( pbad )
nsg = '%.1f +/- %.1f' % ( nsg.value() , nsg.error () )
nsb = '%.1f +/- %.1f' % ( nsb.value() , nsb.error () )

row = '%4.1f +/- %.1f' % ( gp.value() , gp.error () ) , \
'%4.1f +/- %.1f' % ( bp.value() , bp.error () ) , nsg , nsb
rows.append ( row )

title= 'Goodness-of-Fit USTAT test'
table = T.table ( rows , title = title , prefix = '# ')
logger.info ( '%s:\n%s' % ( title , table ) )

# ===============================================================================
if '__main__' == __name__ :

test_PPD ()
test_DNN ()
test_PPD ()
test_DNN ()
test_USTAT ()

# ===============================================================================
## The END
Expand Down
10 changes: 6 additions & 4 deletions ostap/stats/ustat.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,14 +325,16 @@ def pvalue ( self , pdf , data ) :

## prepare toys
toys = TOYS ( self , t_value , pdf = pdf , Ndata = len ( data ) , sample = self.sample )

silent = self.silent
self.__silent = True
if self.parallel :
silent = self.silent
self.__silent = True
counter = toys.run ( self.nToys , silent = silent )
self.__silent = silent
else :
counter = toys ( self.nToys , silent = self.silent )


self.__silent = silent

p_value = 1 - counter.eff
return t_value, p_value

Expand Down

0 comments on commit e59bf2f

Please sign in to comment.