1. extend gof1d and gofnd tests

OstapHEP · Oct 10, 2024 · e59bf2f · e59bf2f
1 parent 72fcb68
commit e59bf2f
Show file tree

Hide file tree

Showing 6 changed files with 95 additions and 59 deletions.
diff --git a/ReleaseNotes/release_notes.md b/ReleaseNotes/release_notes.md
@@ -19,7 +19,8 @@
   1. prepend the default progress-bar for trees/datasets/frames with `Entries:`
   1. add a kind of replacement of `ROOT.RooAbsCollection.assign` for old versions of ROOT 
   1. add meaningful `description` argument to all `progress_bar` instance
-
+  1. extend `gof1d` and `gofnd` tests 
+
 ## Backward incompatible
 
 ## Bug fixes

diff --git a/ostap/stats/gof_np.py b/ostap/stats/gof_np.py
@@ -45,16 +45,15 @@
         if (1,6,0) <= sp_version :
             qconf = { 'k' : [ 2 ] , 'workers' : -1 }
             def neighbour_distances ( tree , data ) :
-                dist , _ = tree.query ( data , **qconf )
-                dist     = dist.flatten() 
-                return dist 
+                dist , xx = tree.query ( data , **qconf )
+                del xx 
+                return dist.flatten() 
         else :
             qconf = { 'k' :   2                    }
             def neighbour_distances ( tree , data ) :
-                dist , _ = tree.query ( data , **qconf )
-                dist = np.delete ( dist , 0 , axis = 1 )
-                dist = dist.flatten()
-                return dist 
+                dist , xx = tree.query ( data , **qconf )
+                del xx 
+                return np.delete ( dist , 0 , axis = 1 ).flatten() 
 
     # =========================================================================
 except ImportError :
@@ -406,12 +405,11 @@ def t_value ( self , ds1 , vpdf ) :
         sh2 = vpdf.shape
         assert 2 == len ( sh1 ) and 1 == len ( sh2 ) and len ( ds1 ) == len ( vpdf ) , \
             "Invalid arrays: %s , %s" % ( sh1 , sh2 )
-        
-        tree        = sp.spatial.KDTree ( ds1 )
+
+        tree = sp.spatial.KDTree ( ds1 )
         ## uvalues , _ = tree.query ( ds1 , **qconf )
         ## uvalues     = uvalues.flatten ()
         uvalues = neighbour_distances ( tree , ds1 ) 
-
         del tree
 
         ## dimension of the problem (it must be set in __call__)

diff --git a/ostap/stats/gof_utils.py b/ostap/stats/gof_utils.py
@@ -234,19 +234,6 @@ def normalize ( ds , others = () , weight = () , first = True ) :
 normalize.__doc__ = normalize2.__doc__ 
 
 
-# =============================================================================
-jl = None 
-# =============================================================================
-try : # =======================================================================
-    # =========================================================================
-    if ( 3 , 0 ) <= python_info :
-        with warnings.catch_warnings(): 
-            warnings.simplefilter ( "ignore" , category = DeprecationWarning  )
-            import joblib as jl
-    # =========================================================================
-except ImportError : # ========================================================
-    # =========================================================================
-    jl = None
 # =============================================================================
 ## @class PERMUTATOR
 #  Helper class that allow to run permutattion test in parallel 
@@ -284,16 +271,17 @@ def __call__ ( self , N , silent = True ) :
         with warnings.catch_warnings(): 
             warnings.simplefilter ( "ignore" , category = DeprecationWarning  )
             import joblib as jl
+            jl_version = tuple ( int ( i ) for i in jl.__version__.split('.') )
         # =====================================================================
         ## Run NN-permutations in parallel using joblib 
         def joblib_run ( self , NN , silent = True ) :
             """ Run NN-permutations in parallel using joblib """
-            nj    = 2 ## 2 * numcpu () + 3
+            nj    = 2 * numcpu () + 3
             lst   = splitter ( NN , nj )
             ## 
             conf  = { 'n_jobs' : -1 , 'verbose' : 0 }
-            if    '1.3.0' <= jl.__version__ < '1.4.0' : conf [ 'return_as' ] = 'generator'           
-            elif  '1.4.0' <= jl.__version__           : conf [ 'return_as' ] = 'unordered_generator'
+            if    (1,3,0) <= jl_version < (1,4,0) : conf [ 'return_as' ] = 'generator'           
+            elif  (1,4,0) <= jl_version           : conf [ 'return_as' ] = 'unordered_generator'
             ##
             input   = ( jl.delayed (self)( N ) for N in lst )
             counter = EffCounter()
@@ -307,32 +295,34 @@ def joblib_run ( self , NN , silent = True ) :
         # =====================================================================
         PERMUTATOR.run = joblib_run        
         # =====================================================================
-        logger.debug ( 'Joblib will be  used foe parallel permuations')
+        logger.debug ( 'Joblib will be  used foe parallel permutations')
         # =====================================================================        
     except ImportError : # ====================================================
         # =====================================================================
         jl = None
-
+
+
+jl = None 
 # =============================================================================
 if not jl : # =================================================================
     # =========================================================================
     ## Run NN-permutations in parallel using WorkManager
     def pp_run ( self , NN , silent = True ) :
         """ Run NN-permutations in parallel using WorkManager"""
-        nj    = 2 ## 2 * numcpu () + 3
+        nj    = 2 * numcpu () + 3
         lst   = splitter ( NN , nj )
         ##
-        from ostap.parallel.parallel import WorkManager
-        manager = WorkManager ( silent = silent )
         counter = EffCounter()
         ## 
         ## use the bare interface 
-        for result in manager.iexecute ( self , lst , progress = not silent  , njobs = nj ) :
-            counter += result 
+        from ostap.parallel.parallel import WorkManager
+        with WorkManager ( silent = silent ) as manager : 
+            for result in manager.iexecute ( self , lst , progress = not silent  , njobs = nj ) :
+                counter += result 
         # 
         return counter
     # =========================================================================
-    logger.debug ( 'Parallel will be  used for parallel permuations')
+    logger.debug ( 'Parallel will be  used for parallel permutations')
     # =====================================================================        
     PERMUTATOR.run = pp_run
     # =========================================================================
@@ -377,13 +367,13 @@ def run ( self , NN , silent = False ) :
         nj    = 2 ## 2 * numcpu () + 3
         lst   = splitter ( NN , nj )
         ##
-        from ostap.parallel.parallel import WorkManager
-        manager = WorkManager ( silent = silent )
         counter = EffCounter()
         ## 
         ## use the bare interface 
-        for result in manager.iexecute ( self , lst , progress = not silent , njobs = nj ) :
-            counter += result 
+        from ostap.parallel.parallel import WorkManager
+        with WorkManager ( silent = silent ) as manager : 
+            for result in manager.iexecute ( self , lst , progress = not silent , njobs = nj ) :
+                counter += result 
         # 
         return counter
 

diff --git a/ostap/stats/tests/test_stats_gof1d.py b/ostap/stats/tests/test_stats_gof1d.py
@@ -124,7 +124,7 @@ def run_USTAT  ( pdf , data, result , logger ) :
 
     rows  =  [ ( 't-value'  , 'x[..]', 'p-value [%]' , '#sigma' ) ]
 
-    ustat = USTAT ( nToys = 1000 , histo = 100 )
+    ustat = USTAT ( nToys = 1000 , histo = 100 , parallel = True )
 
     pdf.load_params ( result , silent = True )
 
@@ -144,7 +144,7 @@ def run_USTAT  ( pdf , data, result , logger ) :
     logger.info ( '%s:\n%s' % ( title , table ) )
 
     return ustat.histo
-    
+
 # ==============================================================================
 def test_good_fit_1 ( ) :
     """ Make a test for presumably good fit: fit Gauss to Gauss
@@ -155,7 +155,7 @@ def test_good_fit_1 ( ) :
 
     with use_canvas ( 'test_good_fit_1: G -> G' ,      wait = 1 ) :
         r , f = gauss.fitTo ( data_g , **fitconf ) 
-        
+
     with use_canvas ( 'test_good_fit_1: GoF' , wait = 1 ) :
 
         gauss.load_params ( r , silent = True ) 
@@ -166,19 +166,24 @@ def test_good_fit_1 ( ) :
         got = G1D.GoF1DToys ( gauss , data_g )
         logger.info ( 'Goodness-of-fit with %d toys:\n%s' % ( got.nToys , got ) ) 
 
+        del gof
+        del got
+
     ## Try to use multidimensional methods
     run_PPD ( gauss , data_g , r , logger )
+
     udist1 = run_DNN ( gauss , data_g , r , logger )
     if udist1 :
         keep.add ( udist1 ) 
         with use_canvas ( 'test_good_fit_1: DNN' , wait = 5 ) :
             udist1.draw()
+
     udist2 = run_USTAT ( gauss , data_g , r , logger )
     if udist2 :
         keep.add ( udist2 ) 
-        with use_canvas ( 'test_good_fit_1: USTAT' , wait = 5 ) :
+        with use_canvas ( 'test_good_fit_1: USTAT' , wait = 1 ) :
             udist2.draw()
-            
+
 # =============================================================================
 def test_good_fit_2 ( ) :
     """ Make a test for presumably good fit: fit Gauss+Bkg to Gauss
@@ -301,12 +306,9 @@ def test_bad_fit_1 ( ) :
 if '__main__' == __name__ :
 
     test_good_fit_1 ()  ## fit Gauss       to Gauss 
-
-"""
-test_good_fit_2 ()  ## fit Gauss+Bkg   to Gauss 
-test_good_fit_3 ()  ## fit Gauss+Bkg   to Gauss+Bkg
-test_bad_fit_1  ()  ## fit Gauss       to Gauss+Bkg
-"""
+    test_good_fit_2 ()  ## fit Gauss+Bkg   to Gauss 
+    test_good_fit_3 ()  ## fit Gauss+Bkg   to Gauss+Bkg
+    test_bad_fit_1  ()  ## fit Gauss       to Gauss+Bkg
 
 # ===============================================================================
 ##                                                                        The END 

diff --git a/ostap/stats/tests/test_stats_gofnd.py b/ostap/stats/tests/test_stats_gofnd.py
@@ -30,8 +30,8 @@
 ygauss = M.Gauss_pdf     ( 'GY' , xvar = yvar , mean = ( 5 , 4 , 6 ) , sigma = ( 1.0 , 0.5 , 2.5 ) )
 gauss2 = xgauss*ygauss
 
-NG        = 125
-NG2       =  25
+NG        = 100
+NG2       =  50
 data_good = gauss2.generate ( NG + NG2 , sample = False )
 data_bad  = gauss2.generate ( NG       , sample = False )
 for i in range ( NG2 ) :
@@ -132,7 +132,7 @@ def test_DNN () :
     ## 't/bad' , 'x[..]' ,
     rows  = [ ( 'p-value/good[%]' , 'p-value/bad[%]' , '#sigma/good' , '#sigma/bad') ]
 
-    dnn = GnD.DNN ( nToys = 1000  )
+    dnn = GnD.DNN ( nToys = 1000 , histo = 50  )
 
     ## presumably good fit
     with timing ( "Good fit DNN" , logger = logger ) :
@@ -164,12 +164,55 @@ def test_DNN () :
     title= 'Goodness-of-Fit DNN test'
     table = T.table ( rows , title = title , prefix = '# ')
     logger.info ( '%s:\n%s' % ( title , table ) )
+
+# ===============================================================================
+def test_USTAT () :
+
+    logger = getLogger ("test_USTAT")
+    from ostap.stats.ustat import USTAT 
+
+
+    rows  = [ ( 'p-value/good[%]' , 'p-value/bad[%]' , '#sigma/good' , '#sigma/bad') ]
+
+    ust = USTAT ( nToys = 1000  , histo = 50 )
+
+    ## presumably good fit
+    with timing ( "Good fit USTAT" , logger = logger ) :
+        pdf.load_params ( rgood , silent = True ) 
+        tgood        = ust        ( pdf , data_good )
+        tgood, pgood = ust.pvalue ( pdf , data_good )
+
+    ## presumably bad fit 
+    with timing ( "Bad  fit USTAT" , logger = logger ) : 
+        pdf.load_params ( rbad  , silent = True ) 
+        tbad        = ust        ( pdf , data_bad )
+        tbad, pbad  = ust.pvalue ( pdf , data_bad )
+
+    gp = pgood * 100 
+    bp = pbad  * 100
+
+    gt , ge = pretty_float ( tgood )
+    bt , be = pretty_float ( tbad  )
+
+    nsg    = significance ( pgood )
+    nsb    = significance ( pbad  )
+    nsg    = '%.1f +/- %.1f' % ( nsg.value() , nsg.error () )
+    nsb    = '%.1f +/- %.1f' % ( nsb.value() , nsb.error () )
+
+    row = '%4.1f +/- %.1f' % ( gp.value() , gp.error () ) , \
+        '%4.1f +/- %.1f' % ( bp.value() , bp.error () ) , nsg , nsb 
+    rows.append ( row )
+
+    title= 'Goodness-of-Fit USTAT test'
+    table = T.table ( rows , title = title , prefix = '# ')
+    logger.info ( '%s:\n%s' % ( title , table ) )
 
 # ===============================================================================
 if '__main__' == __name__ :
 
-    test_PPD  ()
-    test_DNN  ()
+    test_PPD   ()
+    test_DNN   ()
+    test_USTAT ()
 
 # ===============================================================================
 ##                                                                        The END 

diff --git a/ostap/stats/ustat.py b/ostap/stats/ustat.py
@@ -325,14 +325,16 @@ def pvalue ( self , pdf , data ) :
 
         ## prepare toys
         toys = TOYS ( self , t_value , pdf = pdf , Ndata = len ( data ) , sample = self.sample )
+
+        silent = self.silent
+        self.__silent = True 
         if self.parallel :
-            silent = self.silent
-            self.__silent = True 
             counter = toys.run ( self.nToys , silent = silent )
-            self.__silent = silent 
         else :
             counter = toys     ( self.nToys , silent = self.silent )            
-
+
+        self.__silent = silent 
+
         p_value = 1 - counter.eff
         return t_value, p_value