update

OstapHEP · Nov 27, 2024 · 890736b · 890736b
1 parent ab7fdfd
commit 890736b
Show file tree

Hide file tree

Showing 7 changed files with 119 additions and 35 deletions.
diff --git a/ostap/logger/table.py b/ostap/logger/table.py
@@ -358,7 +358,8 @@ def table ( rows                          ,
 
     rows = [ list ( row ) for row in rows ]
 
-    if style is None : style = '%s' % default_style
+    ## if style is None : style = '%s' % default_style
+    if not style : style = '%s' % default_style
 
     fmt = style.lower()
 

diff --git a/ostap/stats/gof.py b/ostap/stats/gof.py
@@ -44,7 +44,7 @@
 #  t_value, p_value  = gof.pvalue ( pdf , data )
 #  @endcode
 class AGoF(object) :
-    """ An abstract base class for family of methods to probe Goodness-of-Git
+    """ An abstract base class for family of methods to probe Goodness-of-Fit
     There are two abstract methods
     - `__call__` to evaluate t-value, the value of GoF estimator 
     - `pvalue` to evaluate (t,p)-vaues
@@ -57,7 +57,7 @@ class AGoF(object) :
     >>> t_value , p_value  = gof.pvalue ( pdf , data )
     """
     # =========================================================================
-    ## Calculate T-value for Goodness-of-Git 
+    ## Calculate T-value for Goodness-of-Fit test
     #  @code
     #  gof   = ...
     #  pdf   = ...  
@@ -72,7 +72,8 @@ def __call__ ( self , pdf , data ) :
         >>> data  = ... 
         >>> t_value = gof ( pdf , data ) 
         """
-        return NotImplemented 
+        return NotImplemented
+
     # =========================================================================
     ## Calculate the t & p-values
     #  @code
@@ -90,7 +91,7 @@ def pvalue ( self , pdf , data ) :
         >>> t_value , p_value = gof.pvalue ( pdf , data ) 
         """
         return NotImplemented
-
+    
 # =============================================================================
 ## @class AGoFnp
 #  An absract base class for numpy-related family of methods to probe goodness-of fit
@@ -135,6 +136,7 @@ def __call__ ( self , data1 , data2 , normalize = True ) :
         >>> t = gof ( data1 , data1 , normalize = True  ) 
         """
         return NotImplemented 
+
     # =========================================================================
     ## Calculate the t & p-values
     #  @code

diff --git a/ostap/stats/gof1d.py b/ostap/stats/gof1d.py
@@ -338,15 +338,15 @@ def ZK_estimator  ( self ) :
     def ZA_estimator  ( self ) :
         """ Get ZA statistics
         """        
-        return self.estimators['ZK'] 
+        return self.estimators.get( 'ZA' , None ) 
 
     # =========================================================================
     ## Get ZC statististics 
     @property 
     def ZC_estimator ( self ) :
         """ Get ZC statistics
         """        
-        return self.__estimators['ZK'] 
+        return self.__estimators.get ( 'ZC' , None ) 
 
     __repr__ = Estimators.table
     __str__  = Estimators.table
@@ -598,6 +598,8 @@ def __iadd__ ( self , other ) :
         self.__nToys += other.nToys 
         return self 
 
+    plot = Summary.draw 
+
 # =============================================================================
 if '__main__' == __name__ :
 

diff --git a/ostap/stats/gof_np.py b/ostap/stats/gof_np.py
@@ -31,7 +31,7 @@
 from   ostap.utils.utils        import split_n_range
 from   ostap.utils.basic        import numcpu 
 from   ostap.stats.gof          import AGoFnp
-from   ostap.utils.memory       import memory_enough 
+from   ostap.utils.memory       import memory, memory_enough 
 import os, abc, warnings, ROOT   
 # =============================================================================
 try : # =======================================================================
@@ -81,15 +81,16 @@ class GoFnp (AGoFnp) :
     def __init__ ( self              ,
                    nToys    = 0      ,
                    silent   = False  , 
-                   parallel = False  ) : 
+                   parallel = False  ,
+                   method   = 'GoF'  ) : 
 
         assert isinstance ( nToys , int ) and 0 <= nToys  , \
             "Invalid number of permulations/toys:%s" % nToys
 
         self.__nToys    = nToys
         self.__silent   = True if silent   else False
         self.__parallel = True if parallel else False
-        self.__rows     = []
+        self.__method   = method
 
         if self.__parallel and memory_enough () < numcpu () : 
             logger.warning ( 'Available/Used memory ratio: %.1f; switch-off parallel processing')
@@ -153,14 +154,14 @@ def parallel ( self ) :
         return self.__parallel
     # ========================================================================
     @property
-    def rows     ( self ) :
-        """`rows` : rows of the table with summary information"""
-        return self.__rows
+    def method ( self ) :
+        """`method` : the actual GoF method """
+        return self.__method
 
 # ============================================================================
-## define configurtaion for psi-function for PPD method
+## define configuration for psi-function for PPD method
 #   - distance type of <code>cdist</code>
-#   - transformation funciton for cdisct output
+#   - transformation function for cdist output
 #   - increasing function ?
 #   @code
 #   distance_type , transform, increasing = psi_conf ( 'linear' )
@@ -209,7 +210,8 @@ def __init__ ( self                   ,
         GoFnp.__init__ ( self                ,
                          nToys    = nToys    ,
                          parallel = parallel , 
-                         silent   = silent   )
+                         silent   = silent   ,
+                         method   = 'Point-to-Point Dissimilarity' )
 
         self.__mc2mc     = True if mc2mc else False
         self.__transform = None
@@ -278,6 +280,7 @@ def t_value ( self , ds1 , ds2 ) :
         """ Calculate t-value for (non-structured) 2D arrays
         """
         ##
+
         sh1 = ds1.shape
         sh2 = ds2.shape
         assert 2 == len ( sh1 ) and 2 == len ( sh2 ) and sh1[1] == sh2[1] , \
@@ -286,7 +289,7 @@ def t_value ( self , ds1 , ds2 ) :
         n1 = len ( ds1 ) 
         n2 = len ( ds2 ) 
         ##
-
+        
         ## calculate sums of distances, Eq (3.7) 
         result  = self.sum_distances ( ds1 , ds1 ) / ( n1 * ( n1 - 1 ) )
         result -= self.sum_distances ( ds1 , ds2 ) / ( n1 * n2 )
@@ -327,7 +330,7 @@ def __call__ ( self , data1 , data2 , normalize = True ) :
         if 1 == uds2.shape [ 1 ] : uds2 = np.c_[ uds2 , np.zeros ( len ( uds2 ) ) ] 
 
         return self.t_value ( uds1 , uds2 )
-
+    
     # =========================================================================
     ## Calculate the t & p-values
     #  @code
@@ -367,7 +370,7 @@ def pvalue ( self , data1 , data2 , normalize = True ) :
         p_value = counter.eff
 
         if self.__increasing : p_value = 1 - p_value
-        
+
         return t_value , p_value 
 
 
@@ -393,7 +396,8 @@ def __init__ ( self              ,
         GoFnp.__init__ ( self                ,
                          nToys    = nToys    ,
                          parallel = parallel , 
-                         silent   = silent   )
+                         silent   = silent   , 
+                         method   = 'Distance-to-Nearest-Neighbour' )
 
         self.__histo = None 
         if   isinstance ( histo , ROOT.TH1 ) :

diff --git a/ostap/stats/gof_utils.py b/ostap/stats/gof_utils.py
@@ -18,6 +18,7 @@
     'normalize'  , ## "normalize" variables in dataset/structured array
     'Estimators' , ## helper mixin class to print statistical estimators 
     'Summary'    , ## helper mixin class to print statistical estimators 
+    'GoFSummary' , ## helper class to print summary of GoF methods 
 )
 # =============================================================================
 from   collections              import namedtuple
@@ -29,8 +30,9 @@
 from   ostap.stats.counters     import EffCounter
 from   ostap.utils.basic        import numcpu, loop_items 
 from   ostap.utils.utils        import splitter
+from   ostap.utils.memory       import memory_enough 
 from   ostap.utils.progress_bar import progress_bar
-import ROOT, sys, warnings  
+import ROOT, sys, warnings, math  
 # =============================================================================
 try : # =======================================================================
     # =========================================================================
@@ -238,7 +240,6 @@ def normalize ( ds , others = () , weight = () , first = True ) :
     exec ( code2 )
 normalize.__doc__ = normalize2.__doc__ 
 
-
 # =============================================================================
 ## @class PERMUTATOR
 #  Helper class that allow to run permutattion test in parallel 
@@ -261,8 +262,7 @@ def __call__ ( self , N , silent = True ) :
         for i in progress_bar ( N , silent = silent , description = 'Permutations:') : 
             np.random.shuffle ( pooled )            
             tv       = self.gof.t_value ( pooled [ : n1 ] , pooled [ n1: ] )
-            counter += bool ( self.t_value < tv  )
-
+            counter += bool ( self.t_value < tv  )            
         del pooled
         return counter
 
@@ -281,8 +281,10 @@ def __call__ ( self , N , silent = True ) :
         ## Run NN-permutations in parallel using joblib 
         def joblib_run ( self , NN , silent = True ) :
             """ Run NN-permutations in parallel using joblib """
-            nj    = 2 * numcpu () + 3
+            me    = math.ceil ( memory_enough() ) + 1 
+            nj    = min ( 2 * numcpu () + 3 , me ) 
             lst   = splitter ( NN , nj )
+            if not silent : logger.info ( 'permutations: #%d parallel subjobs to be used' % nj ) 
             ## 
             conf  = { 'n_jobs' : -1 , 'verbose' : 0 }
             if    (1,3,0) <= jl_version < (1,4,0) : conf [ 'return_as' ] = 'generator'           
@@ -300,7 +302,7 @@ def joblib_run ( self , NN , silent = True ) :
         # =====================================================================
         PERMUTATOR.run = joblib_run        
         # =====================================================================
-        logger.debug ( 'Joblib will be  used foe parallel permutations')
+        logger.debug ( 'Joblib will be  used for parallel permutations')
         # =====================================================================        
     except ImportError : # ====================================================
         # =====================================================================
@@ -312,15 +314,17 @@ def joblib_run ( self , NN , silent = True ) :
     ## Run NN-permutations in parallel using WorkManager
     def pp_run ( self , NN , silent = True ) :
         """ Run NN-permutations in parallel using WorkManager"""
-        nj    = 2 * numcpu () + 3
+        me    = math.ceil ( memory_enough() ) + 1 
+        nj    = min ( 2 * numcpu () + 3 , me ) 
         lst   = splitter ( NN , nj )
         ##
+        if not silent : logger.info ( 'permutations: #%d parallel subjobs to be used' % nj ) 
         counter = EffCounter()
         ## 
         ## use the bare interface 
         from ostap.parallel.parallel import WorkManager
         with WorkManager ( silent = silent ) as manager : 
-            for result in manager.iexecute ( self , lst , progress = not silent  , njobs = nj ) :
+            for result in manager.iexecute ( self , lst , progress = not silent  , njobs = nj , description = 'Permutations:') :
                 counter += result 
         # 
         return counter
@@ -332,7 +336,7 @@ def pp_run ( self , NN , silent = True ) :
 
 # =============================================================================
 ## @class TOYS
-#  Helper class to tun toys for Goodness-of-Fit studies 
+#  Helper class to run toys for Goodness-of-Fit studies 
 class TOYS(object) :
     """ Helper class that allow to run permutation test in parallel 
     """
@@ -367,15 +371,17 @@ def __call__ ( self , N , silent = True ) :
     ## Run N-toys in parallel using WorkManager
     def run ( self , NN , silent = False ) :
         """ Run NN-permutations in parallel using WorkManager"""
-        nj    = 2 ## 2 * numcpu () + 3
+        me    = math.ceil ( memory_enough() ) + 1 
+        nj    = min ( 2 * numcpu () + 3 , me ) 
         lst   = splitter ( NN , nj )
+        if not silent : logger.info ( 'toys: #%d parallel subjobs to be used' % nj ) 
         ##
         counter = EffCounter()
         ## 
         ## use the bare interface 
         from ostap.parallel.parallel import WorkManager
         with WorkManager ( silent = silent ) as manager : 
-            for result in manager.iexecute ( self , lst , progress = not silent , njobs = nj ) :
+            for result in manager.iexecute ( self , lst , progress = not silent , njobs = nj , description = 'Toys:' ) :
                 counter += result 
         # 
         return counter
@@ -585,6 +591,68 @@ def draw  ( self , what , opts = '' , *args , **kwargs ) :
         self._line = line 
         return result, line  
 
+# =============================================================================
+## @class GoFSummary
+#  Helper class for format summary table 
+class GoFSummary ( object) :
+
+    def __init__ ( self ) :
+        self.__header = ( 'Method' , 't-value' , '' , '#Toys' , 'p-value [%]' , '#sigma' ) 
+        self.__items  = []
+
+    @property 
+    def items  ( self ) :
+        """`items`: get all items"""
+        return tuple ( self.__items )
+
+
+    def add_row ( self , method , tvalue , pvalue , nToys ) :
+        item = method, tvalue, pvalue, nToys 
+        self.__items.append ( item ) 
+
+    # =======================================================================
+    ## Make a summary table 
+    def table ( self , title = '' , prefix = '' , width = 5 , precision = 3 , style = '' ) :
+        """ Make a summary table 
+        """
+        import ostap.logger.table  as     T 
+        from   ostap.logger.pretty import pretty_float
+        ## 
+        rows = [ self.__header ]
+        for item in self.__items :
+            m, t, p, n = item
+            sv = significance ( p ) 
+            tv , texpo = pretty_float ( t , width = width , precision = precision )
+            pv   = p * 100
+            pval = '%5.3f +/- %.3f' %  ( pv.value() , pv.error() )
+            sval = '%.2f +/- %.2f'  %  ( sv.value() , sv.error() )
+            row  = m , tv , '[10^%+d]' % texpo if texpo else '' , '%d' % n , pval , sval 
+            rows.append ( row )
+
+        ## skip empty column 
+        has_expo = False 
+        for row in rows :
+            if row [ 2 ] :
+                has_expo = True
+                break
+
+        if not has_expo :
+            new_rows = []
+            for row in rows :
+                r = list ( row )
+                del r [ 2 ]
+                new_rows.append ( r ) 
+            rows = new_rows 
+
+        title = title if title else 'Goodness of 1D-fit' 
+        return T.table ( rows , title = title , prefix = prefix , alignment = 'lclcc', style = style  )
+
+
+
+
+
+
+
 # =============================================================================
 if '__main__' == __name__ :