From 890736b22b5adaae9bd4783969c91da04c2ad7e9 Mon Sep 17 00:00:00 2001 From: Vanya Belyaev Date: Wed, 27 Nov 2024 14:24:53 +0100 Subject: [PATCH] update --- ostap/logger/table.py | 3 +- ostap/stats/gof.py | 10 +++-- ostap/stats/gof1d.py | 6 ++- ostap/stats/gof_np.py | 30 ++++++++------ ostap/stats/gof_utils.py | 90 +++++++++++++++++++++++++++++++++++----- ostap/stats/gofnd.py | 13 ++++-- ostap/utils/memory.py | 2 +- 7 files changed, 119 insertions(+), 35 deletions(-) diff --git a/ostap/logger/table.py b/ostap/logger/table.py index f66395fd..b5a4ed1c 100644 --- a/ostap/logger/table.py +++ b/ostap/logger/table.py @@ -358,7 +358,8 @@ def table ( rows , rows = [ list ( row ) for row in rows ] - if style is None : style = '%s' % default_style + ## if style is None : style = '%s' % default_style + if not style : style = '%s' % default_style fmt = style.lower() diff --git a/ostap/stats/gof.py b/ostap/stats/gof.py index baa36b70..82fa5afb 100644 --- a/ostap/stats/gof.py +++ b/ostap/stats/gof.py @@ -44,7 +44,7 @@ # t_value, p_value = gof.pvalue ( pdf , data ) # @endcode class AGoF(object) : - """ An abstract base class for family of methods to probe Goodness-of-Git + """ An abstract base class for family of methods to probe Goodness-of-Fit There are two abstract methods - `__call__` to evaluate t-value, the value of GoF estimator - `pvalue` to evaluate (t,p)-vaues @@ -57,7 +57,7 @@ class AGoF(object) : >>> t_value , p_value = gof.pvalue ( pdf , data ) """ # ========================================================================= - ## Calculate T-value for Goodness-of-Git + ## Calculate T-value for Goodness-of-Fit test # @code # gof = ... # pdf = ... @@ -72,7 +72,8 @@ def __call__ ( self , pdf , data ) : >>> data = ... >>> t_value = gof ( pdf , data ) """ - return NotImplemented + return NotImplemented + # ========================================================================= ## Calculate the t & p-values # @code @@ -90,7 +91,7 @@ def pvalue ( self , pdf , data ) : >>> t_value , p_value = gof.pvalue ( pdf , data ) """ return NotImplemented - + # ============================================================================= ## @class AGoFnp # An absract base class for numpy-related family of methods to probe goodness-of fit @@ -135,6 +136,7 @@ def __call__ ( self , data1 , data2 , normalize = True ) : >>> t = gof ( data1 , data1 , normalize = True ) """ return NotImplemented + # ========================================================================= ## Calculate the t & p-values # @code diff --git a/ostap/stats/gof1d.py b/ostap/stats/gof1d.py index 1996d0ef..c9af285d 100644 --- a/ostap/stats/gof1d.py +++ b/ostap/stats/gof1d.py @@ -338,7 +338,7 @@ def ZK_estimator ( self ) : def ZA_estimator ( self ) : """ Get ZA statistics """ - return self.estimators['ZK'] + return self.estimators.get( 'ZA' , None ) # ========================================================================= ## Get ZC statististics @@ -346,7 +346,7 @@ def ZA_estimator ( self ) : def ZC_estimator ( self ) : """ Get ZC statistics """ - return self.__estimators['ZK'] + return self.__estimators.get ( 'ZC' , None ) __repr__ = Estimators.table __str__ = Estimators.table @@ -598,6 +598,8 @@ def __iadd__ ( self , other ) : self.__nToys += other.nToys return self + plot = Summary.draw + # ============================================================================= if '__main__' == __name__ : diff --git a/ostap/stats/gof_np.py b/ostap/stats/gof_np.py index 3f29721c..6a571c27 100644 --- a/ostap/stats/gof_np.py +++ b/ostap/stats/gof_np.py @@ -31,7 +31,7 @@ from ostap.utils.utils import split_n_range from ostap.utils.basic import numcpu from ostap.stats.gof import AGoFnp -from ostap.utils.memory import memory_enough +from ostap.utils.memory import memory, memory_enough import os, abc, warnings, ROOT # ============================================================================= try : # ======================================================================= @@ -81,7 +81,8 @@ class GoFnp (AGoFnp) : def __init__ ( self , nToys = 0 , silent = False , - parallel = False ) : + parallel = False , + method = 'GoF' ) : assert isinstance ( nToys , int ) and 0 <= nToys , \ "Invalid number of permulations/toys:%s" % nToys @@ -89,7 +90,7 @@ def __init__ ( self , self.__nToys = nToys self.__silent = True if silent else False self.__parallel = True if parallel else False - self.__rows = [] + self.__method = method if self.__parallel and memory_enough () < numcpu () : logger.warning ( 'Available/Used memory ratio: %.1f; switch-off parallel processing') @@ -153,14 +154,14 @@ def parallel ( self ) : return self.__parallel # ======================================================================== @property - def rows ( self ) : - """`rows` : rows of the table with summary information""" - return self.__rows + def method ( self ) : + """`method` : the actual GoF method """ + return self.__method # ============================================================================ -## define configurtaion for psi-function for PPD method +## define configuration for psi-function for PPD method # - distance type of cdist -# - transformation funciton for cdisct output +# - transformation function for cdist output # - increasing function ? # @code # distance_type , transform, increasing = psi_conf ( 'linear' ) @@ -209,7 +210,8 @@ def __init__ ( self , GoFnp.__init__ ( self , nToys = nToys , parallel = parallel , - silent = silent ) + silent = silent , + method = 'Point-to-Point Dissimilarity' ) self.__mc2mc = True if mc2mc else False self.__transform = None @@ -278,6 +280,7 @@ def t_value ( self , ds1 , ds2 ) : """ Calculate t-value for (non-structured) 2D arrays """ ## + sh1 = ds1.shape sh2 = ds2.shape assert 2 == len ( sh1 ) and 2 == len ( sh2 ) and sh1[1] == sh2[1] , \ @@ -286,7 +289,7 @@ def t_value ( self , ds1 , ds2 ) : n1 = len ( ds1 ) n2 = len ( ds2 ) ## - + ## calculate sums of distances, Eq (3.7) result = self.sum_distances ( ds1 , ds1 ) / ( n1 * ( n1 - 1 ) ) result -= self.sum_distances ( ds1 , ds2 ) / ( n1 * n2 ) @@ -327,7 +330,7 @@ def __call__ ( self , data1 , data2 , normalize = True ) : if 1 == uds2.shape [ 1 ] : uds2 = np.c_[ uds2 , np.zeros ( len ( uds2 ) ) ] return self.t_value ( uds1 , uds2 ) - + # ========================================================================= ## Calculate the t & p-values # @code @@ -367,7 +370,7 @@ def pvalue ( self , data1 , data2 , normalize = True ) : p_value = counter.eff if self.__increasing : p_value = 1 - p_value - + return t_value , p_value @@ -393,7 +396,8 @@ def __init__ ( self , GoFnp.__init__ ( self , nToys = nToys , parallel = parallel , - silent = silent ) + silent = silent , + method = 'Distance-to-Nearest-Neighbour' ) self.__histo = None if isinstance ( histo , ROOT.TH1 ) : diff --git a/ostap/stats/gof_utils.py b/ostap/stats/gof_utils.py index f22d00b3..a22cf280 100644 --- a/ostap/stats/gof_utils.py +++ b/ostap/stats/gof_utils.py @@ -18,6 +18,7 @@ 'normalize' , ## "normalize" variables in dataset/structured array 'Estimators' , ## helper mixin class to print statistical estimators 'Summary' , ## helper mixin class to print statistical estimators + 'GoFSummary' , ## helper class to print summary of GoF methods ) # ============================================================================= from collections import namedtuple @@ -29,8 +30,9 @@ from ostap.stats.counters import EffCounter from ostap.utils.basic import numcpu, loop_items from ostap.utils.utils import splitter +from ostap.utils.memory import memory_enough from ostap.utils.progress_bar import progress_bar -import ROOT, sys, warnings +import ROOT, sys, warnings, math # ============================================================================= try : # ======================================================================= # ========================================================================= @@ -238,7 +240,6 @@ def normalize ( ds , others = () , weight = () , first = True ) : exec ( code2 ) normalize.__doc__ = normalize2.__doc__ - # ============================================================================= ## @class PERMUTATOR # Helper class that allow to run permutattion test in parallel @@ -261,8 +262,7 @@ def __call__ ( self , N , silent = True ) : for i in progress_bar ( N , silent = silent , description = 'Permutations:') : np.random.shuffle ( pooled ) tv = self.gof.t_value ( pooled [ : n1 ] , pooled [ n1: ] ) - counter += bool ( self.t_value < tv ) - + counter += bool ( self.t_value < tv ) del pooled return counter @@ -281,8 +281,10 @@ def __call__ ( self , N , silent = True ) : ## Run NN-permutations in parallel using joblib def joblib_run ( self , NN , silent = True ) : """ Run NN-permutations in parallel using joblib """ - nj = 2 * numcpu () + 3 + me = math.ceil ( memory_enough() ) + 1 + nj = min ( 2 * numcpu () + 3 , me ) lst = splitter ( NN , nj ) + if not silent : logger.info ( 'permutations: #%d parallel subjobs to be used' % nj ) ## conf = { 'n_jobs' : -1 , 'verbose' : 0 } if (1,3,0) <= jl_version < (1,4,0) : conf [ 'return_as' ] = 'generator' @@ -300,7 +302,7 @@ def joblib_run ( self , NN , silent = True ) : # ===================================================================== PERMUTATOR.run = joblib_run # ===================================================================== - logger.debug ( 'Joblib will be used foe parallel permutations') + logger.debug ( 'Joblib will be used for parallel permutations') # ===================================================================== except ImportError : # ==================================================== # ===================================================================== @@ -312,15 +314,17 @@ def joblib_run ( self , NN , silent = True ) : ## Run NN-permutations in parallel using WorkManager def pp_run ( self , NN , silent = True ) : """ Run NN-permutations in parallel using WorkManager""" - nj = 2 * numcpu () + 3 + me = math.ceil ( memory_enough() ) + 1 + nj = min ( 2 * numcpu () + 3 , me ) lst = splitter ( NN , nj ) ## + if not silent : logger.info ( 'permutations: #%d parallel subjobs to be used' % nj ) counter = EffCounter() ## ## use the bare interface from ostap.parallel.parallel import WorkManager with WorkManager ( silent = silent ) as manager : - for result in manager.iexecute ( self , lst , progress = not silent , njobs = nj ) : + for result in manager.iexecute ( self , lst , progress = not silent , njobs = nj , description = 'Permutations:') : counter += result # return counter @@ -332,7 +336,7 @@ def pp_run ( self , NN , silent = True ) : # ============================================================================= ## @class TOYS -# Helper class to tun toys for Goodness-of-Fit studies +# Helper class to run toys for Goodness-of-Fit studies class TOYS(object) : """ Helper class that allow to run permutation test in parallel """ @@ -367,15 +371,17 @@ def __call__ ( self , N , silent = True ) : ## Run N-toys in parallel using WorkManager def run ( self , NN , silent = False ) : """ Run NN-permutations in parallel using WorkManager""" - nj = 2 ## 2 * numcpu () + 3 + me = math.ceil ( memory_enough() ) + 1 + nj = min ( 2 * numcpu () + 3 , me ) lst = splitter ( NN , nj ) + if not silent : logger.info ( 'toys: #%d parallel subjobs to be used' % nj ) ## counter = EffCounter() ## ## use the bare interface from ostap.parallel.parallel import WorkManager with WorkManager ( silent = silent ) as manager : - for result in manager.iexecute ( self , lst , progress = not silent , njobs = nj ) : + for result in manager.iexecute ( self , lst , progress = not silent , njobs = nj , description = 'Toys:' ) : counter += result # return counter @@ -585,6 +591,68 @@ def draw ( self , what , opts = '' , *args , **kwargs ) : self._line = line return result, line +# ============================================================================= +## @class GoFSummary +# Helper class for format summary table +class GoFSummary ( object) : + + def __init__ ( self ) : + self.__header = ( 'Method' , 't-value' , '' , '#Toys' , 'p-value [%]' , '#sigma' ) + self.__items = [] + + @property + def items ( self ) : + """`items`: get all items""" + return tuple ( self.__items ) + + + def add_row ( self , method , tvalue , pvalue , nToys ) : + item = method, tvalue, pvalue, nToys + self.__items.append ( item ) + + # ======================================================================= + ## Make a summary table + def table ( self , title = '' , prefix = '' , width = 5 , precision = 3 , style = '' ) : + """ Make a summary table + """ + import ostap.logger.table as T + from ostap.logger.pretty import pretty_float + ## + rows = [ self.__header ] + for item in self.__items : + m, t, p, n = item + sv = significance ( p ) + tv , texpo = pretty_float ( t , width = width , precision = precision ) + pv = p * 100 + pval = '%5.3f +/- %.3f' % ( pv.value() , pv.error() ) + sval = '%.2f +/- %.2f' % ( sv.value() , sv.error() ) + row = m , tv , '[10^%+d]' % texpo if texpo else '' , '%d' % n , pval , sval + rows.append ( row ) + + ## skip empty column + has_expo = False + for row in rows : + if row [ 2 ] : + has_expo = True + break + + if not has_expo : + new_rows = [] + for row in rows : + r = list ( row ) + del r [ 2 ] + new_rows.append ( r ) + rows = new_rows + + title = title if title else 'Goodness of 1D-fit' + return T.table ( rows , title = title , prefix = prefix , alignment = 'lclcc', style = style ) + + + + + + + # ============================================================================= if '__main__' == __name__ : diff --git a/ostap/stats/gofnd.py b/ostap/stats/gofnd.py index 4f1791a7..0471e7b9 100644 --- a/ostap/stats/gofnd.py +++ b/ostap/stats/gofnd.py @@ -51,7 +51,7 @@ class GoF(AGoF) : """ A base class for numpy-related family of methods to probe goodness-of fit """ def __init__ ( self , - gof , ## actual GoF-evaluator + gof , ## actual GoF-evaluator mcFactor = 10 , sample = False ) : @@ -78,7 +78,7 @@ def nToys ( self ) : @property def sample ( self ) : - """`sample` : sample numbef of events for generation step?""" + """`sample` : sample number of events for generation step?""" return self.__sample @property @@ -91,6 +91,12 @@ def parallel ( self ) : """`parallel` : parallel processing where/when/if possible?""" return self.gof.parallel + @property + def method ( self ) : + """`method` : the actual GoF-method + """ + return self.gof.method + # ======================================================================= ## Generate MC dataset from PDF according to model data def generate ( self , pdf , data ) : @@ -196,7 +202,8 @@ def __init__ ( self , sigma = sigma , parallel = parallel , silent = silent ) , - mcFactor = mcFactor ) + mcFactor = mcFactor ) + @property def ppd ( self ) : """`ppd` : Point-To-Point Dissimilarity calculator for two datasets """ diff --git a/ostap/utils/memory.py b/ostap/utils/memory.py index 86f8c4c9..a3f56d37 100755 --- a/ostap/utils/memory.py +++ b/ostap/utils/memory.py @@ -122,7 +122,7 @@ def memory_enough () : # @author Vanya Belyaev Ivan.Belyaev@itep.ru # @date 2013-02-10 class Memory(object): - """Simple class to evaluate the change in virtual memory + """ Simple class to evaluate the change in virtual memory to be used as context manager: >>> with Memory('here...') :