Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
VanyaBelyaev committed Nov 27, 2024
1 parent ab7fdfd commit 890736b
Show file tree
Hide file tree
Showing 7 changed files with 119 additions and 35 deletions.
3 changes: 2 additions & 1 deletion ostap/logger/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,8 @@ def table ( rows ,

rows = [ list ( row ) for row in rows ]

if style is None : style = '%s' % default_style
## if style is None : style = '%s' % default_style
if not style : style = '%s' % default_style

fmt = style.lower()

Expand Down
10 changes: 6 additions & 4 deletions ostap/stats/gof.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
# t_value, p_value = gof.pvalue ( pdf , data )
# @endcode
class AGoF(object) :
""" An abstract base class for family of methods to probe Goodness-of-Git
""" An abstract base class for family of methods to probe Goodness-of-Fit
There are two abstract methods
- `__call__` to evaluate t-value, the value of GoF estimator
- `pvalue` to evaluate (t,p)-vaues
Expand All @@ -57,7 +57,7 @@ class AGoF(object) :
>>> t_value , p_value = gof.pvalue ( pdf , data )
"""
# =========================================================================
## Calculate T-value for Goodness-of-Git
## Calculate T-value for Goodness-of-Fit test
# @code
# gof = ...
# pdf = ...
Expand All @@ -72,7 +72,8 @@ def __call__ ( self , pdf , data ) :
>>> data = ...
>>> t_value = gof ( pdf , data )
"""
return NotImplemented
return NotImplemented

# =========================================================================
## Calculate the t & p-values
# @code
Expand All @@ -90,7 +91,7 @@ def pvalue ( self , pdf , data ) :
>>> t_value , p_value = gof.pvalue ( pdf , data )
"""
return NotImplemented

# =============================================================================
## @class AGoFnp
# An absract base class for numpy-related family of methods to probe goodness-of fit
Expand Down Expand Up @@ -135,6 +136,7 @@ def __call__ ( self , data1 , data2 , normalize = True ) :
>>> t = gof ( data1 , data1 , normalize = True )
"""
return NotImplemented

# =========================================================================
## Calculate the t & p-values
# @code
Expand Down
6 changes: 4 additions & 2 deletions ostap/stats/gof1d.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,15 +338,15 @@ def ZK_estimator ( self ) :
def ZA_estimator ( self ) :
""" Get ZA statistics
"""
return self.estimators['ZK']
return self.estimators.get( 'ZA' , None )

# =========================================================================
## Get ZC statististics
@property
def ZC_estimator ( self ) :
""" Get ZC statistics
"""
return self.__estimators['ZK']
return self.__estimators.get ( 'ZC' , None )

__repr__ = Estimators.table
__str__ = Estimators.table
Expand Down Expand Up @@ -598,6 +598,8 @@ def __iadd__ ( self , other ) :
self.__nToys += other.nToys
return self

plot = Summary.draw

# =============================================================================
if '__main__' == __name__ :

Expand Down
30 changes: 17 additions & 13 deletions ostap/stats/gof_np.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from ostap.utils.utils import split_n_range
from ostap.utils.basic import numcpu
from ostap.stats.gof import AGoFnp
from ostap.utils.memory import memory_enough
from ostap.utils.memory import memory, memory_enough
import os, abc, warnings, ROOT
# =============================================================================
try : # =======================================================================
Expand Down Expand Up @@ -81,15 +81,16 @@ class GoFnp (AGoFnp) :
def __init__ ( self ,
nToys = 0 ,
silent = False ,
parallel = False ) :
parallel = False ,
method = 'GoF' ) :

assert isinstance ( nToys , int ) and 0 <= nToys , \
"Invalid number of permulations/toys:%s" % nToys

self.__nToys = nToys
self.__silent = True if silent else False
self.__parallel = True if parallel else False
self.__rows = []
self.__method = method

if self.__parallel and memory_enough () < numcpu () :
logger.warning ( 'Available/Used memory ratio: %.1f; switch-off parallel processing')
Expand Down Expand Up @@ -153,14 +154,14 @@ def parallel ( self ) :
return self.__parallel
# ========================================================================
@property
def rows ( self ) :
"""`rows` : rows of the table with summary information"""
return self.__rows
def method ( self ) :
"""`method` : the actual GoF method """
return self.__method

# ============================================================================
## define configurtaion for psi-function for PPD method
## define configuration for psi-function for PPD method
# - distance type of <code>cdist</code>
# - transformation funciton for cdisct output
# - transformation function for cdist output
# - increasing function ?
# @code
# distance_type , transform, increasing = psi_conf ( 'linear' )
Expand Down Expand Up @@ -209,7 +210,8 @@ def __init__ ( self ,
GoFnp.__init__ ( self ,
nToys = nToys ,
parallel = parallel ,
silent = silent )
silent = silent ,
method = 'Point-to-Point Dissimilarity' )

self.__mc2mc = True if mc2mc else False
self.__transform = None
Expand Down Expand Up @@ -278,6 +280,7 @@ def t_value ( self , ds1 , ds2 ) :
""" Calculate t-value for (non-structured) 2D arrays
"""
##

sh1 = ds1.shape
sh2 = ds2.shape
assert 2 == len ( sh1 ) and 2 == len ( sh2 ) and sh1[1] == sh2[1] , \
Expand All @@ -286,7 +289,7 @@ def t_value ( self , ds1 , ds2 ) :
n1 = len ( ds1 )
n2 = len ( ds2 )
##

## calculate sums of distances, Eq (3.7)
result = self.sum_distances ( ds1 , ds1 ) / ( n1 * ( n1 - 1 ) )
result -= self.sum_distances ( ds1 , ds2 ) / ( n1 * n2 )
Expand Down Expand Up @@ -327,7 +330,7 @@ def __call__ ( self , data1 , data2 , normalize = True ) :
if 1 == uds2.shape [ 1 ] : uds2 = np.c_[ uds2 , np.zeros ( len ( uds2 ) ) ]

return self.t_value ( uds1 , uds2 )

# =========================================================================
## Calculate the t & p-values
# @code
Expand Down Expand Up @@ -367,7 +370,7 @@ def pvalue ( self , data1 , data2 , normalize = True ) :
p_value = counter.eff

if self.__increasing : p_value = 1 - p_value

return t_value , p_value


Expand All @@ -393,7 +396,8 @@ def __init__ ( self ,
GoFnp.__init__ ( self ,
nToys = nToys ,
parallel = parallel ,
silent = silent )
silent = silent ,
method = 'Distance-to-Nearest-Neighbour' )

self.__histo = None
if isinstance ( histo , ROOT.TH1 ) :
Expand Down
90 changes: 79 additions & 11 deletions ostap/stats/gof_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
'normalize' , ## "normalize" variables in dataset/structured array
'Estimators' , ## helper mixin class to print statistical estimators
'Summary' , ## helper mixin class to print statistical estimators
'GoFSummary' , ## helper class to print summary of GoF methods
)
# =============================================================================
from collections import namedtuple
Expand All @@ -29,8 +30,9 @@
from ostap.stats.counters import EffCounter
from ostap.utils.basic import numcpu, loop_items
from ostap.utils.utils import splitter
from ostap.utils.memory import memory_enough
from ostap.utils.progress_bar import progress_bar
import ROOT, sys, warnings
import ROOT, sys, warnings, math
# =============================================================================
try : # =======================================================================
# =========================================================================
Expand Down Expand Up @@ -238,7 +240,6 @@ def normalize ( ds , others = () , weight = () , first = True ) :
exec ( code2 )
normalize.__doc__ = normalize2.__doc__


# =============================================================================
## @class PERMUTATOR
# Helper class that allow to run permutattion test in parallel
Expand All @@ -261,8 +262,7 @@ def __call__ ( self , N , silent = True ) :
for i in progress_bar ( N , silent = silent , description = 'Permutations:') :
np.random.shuffle ( pooled )
tv = self.gof.t_value ( pooled [ : n1 ] , pooled [ n1: ] )
counter += bool ( self.t_value < tv )

counter += bool ( self.t_value < tv )
del pooled
return counter

Expand All @@ -281,8 +281,10 @@ def __call__ ( self , N , silent = True ) :
## Run NN-permutations in parallel using joblib
def joblib_run ( self , NN , silent = True ) :
""" Run NN-permutations in parallel using joblib """
nj = 2 * numcpu () + 3
me = math.ceil ( memory_enough() ) + 1
nj = min ( 2 * numcpu () + 3 , me )
lst = splitter ( NN , nj )
if not silent : logger.info ( 'permutations: #%d parallel subjobs to be used' % nj )
##
conf = { 'n_jobs' : -1 , 'verbose' : 0 }
if (1,3,0) <= jl_version < (1,4,0) : conf [ 'return_as' ] = 'generator'
Expand All @@ -300,7 +302,7 @@ def joblib_run ( self , NN , silent = True ) :
# =====================================================================
PERMUTATOR.run = joblib_run
# =====================================================================
logger.debug ( 'Joblib will be used foe parallel permutations')
logger.debug ( 'Joblib will be used for parallel permutations')
# =====================================================================
except ImportError : # ====================================================
# =====================================================================
Expand All @@ -312,15 +314,17 @@ def joblib_run ( self , NN , silent = True ) :
## Run NN-permutations in parallel using WorkManager
def pp_run ( self , NN , silent = True ) :
""" Run NN-permutations in parallel using WorkManager"""
nj = 2 * numcpu () + 3
me = math.ceil ( memory_enough() ) + 1
nj = min ( 2 * numcpu () + 3 , me )
lst = splitter ( NN , nj )
##
if not silent : logger.info ( 'permutations: #%d parallel subjobs to be used' % nj )
counter = EffCounter()
##
## use the bare interface
from ostap.parallel.parallel import WorkManager
with WorkManager ( silent = silent ) as manager :
for result in manager.iexecute ( self , lst , progress = not silent , njobs = nj ) :
for result in manager.iexecute ( self , lst , progress = not silent , njobs = nj , description = 'Permutations:') :
counter += result
#
return counter
Expand All @@ -332,7 +336,7 @@ def pp_run ( self , NN , silent = True ) :

# =============================================================================
## @class TOYS
# Helper class to tun toys for Goodness-of-Fit studies
# Helper class to run toys for Goodness-of-Fit studies
class TOYS(object) :
""" Helper class that allow to run permutation test in parallel
"""
Expand Down Expand Up @@ -367,15 +371,17 @@ def __call__ ( self , N , silent = True ) :
## Run N-toys in parallel using WorkManager
def run ( self , NN , silent = False ) :
""" Run NN-permutations in parallel using WorkManager"""
nj = 2 ## 2 * numcpu () + 3
me = math.ceil ( memory_enough() ) + 1
nj = min ( 2 * numcpu () + 3 , me )
lst = splitter ( NN , nj )
if not silent : logger.info ( 'toys: #%d parallel subjobs to be used' % nj )
##
counter = EffCounter()
##
## use the bare interface
from ostap.parallel.parallel import WorkManager
with WorkManager ( silent = silent ) as manager :
for result in manager.iexecute ( self , lst , progress = not silent , njobs = nj ) :
for result in manager.iexecute ( self , lst , progress = not silent , njobs = nj , description = 'Toys:' ) :
counter += result
#
return counter
Expand Down Expand Up @@ -585,6 +591,68 @@ def draw ( self , what , opts = '' , *args , **kwargs ) :
self._line = line
return result, line

# =============================================================================
## @class GoFSummary
# Helper class for format summary table
class GoFSummary ( object) :

def __init__ ( self ) :
self.__header = ( 'Method' , 't-value' , '' , '#Toys' , 'p-value [%]' , '#sigma' )
self.__items = []

@property
def items ( self ) :
"""`items`: get all items"""
return tuple ( self.__items )


def add_row ( self , method , tvalue , pvalue , nToys ) :
item = method, tvalue, pvalue, nToys
self.__items.append ( item )

# =======================================================================
## Make a summary table
def table ( self , title = '' , prefix = '' , width = 5 , precision = 3 , style = '' ) :
""" Make a summary table
"""
import ostap.logger.table as T
from ostap.logger.pretty import pretty_float
##
rows = [ self.__header ]
for item in self.__items :
m, t, p, n = item
sv = significance ( p )
tv , texpo = pretty_float ( t , width = width , precision = precision )
pv = p * 100
pval = '%5.3f +/- %.3f' % ( pv.value() , pv.error() )
sval = '%.2f +/- %.2f' % ( sv.value() , sv.error() )
row = m , tv , '[10^%+d]' % texpo if texpo else '' , '%d' % n , pval , sval
rows.append ( row )

## skip empty column
has_expo = False
for row in rows :
if row [ 2 ] :
has_expo = True
break

if not has_expo :
new_rows = []
for row in rows :
r = list ( row )
del r [ 2 ]
new_rows.append ( r )
rows = new_rows

title = title if title else 'Goodness of 1D-fit'
return T.table ( rows , title = title , prefix = prefix , alignment = 'lclcc', style = style )







# =============================================================================
if '__main__' == __name__ :

Expand Down
Loading

0 comments on commit 890736b

Please sign in to comment.