From f9ac417f8e1751606170721073f32505d4cb0392 Mon Sep 17 00:00:00 2001 From: Vanya Belyaev Date: Sun, 22 Sep 2024 10:54:27 +0200 Subject: [PATCH] 1. Add Kuiper's Goodness-of-Fit estimator to `ostap.stats.gof1d` --- ReleaseNotes/release_notes.md | 4 +- ostap/core/core.py | 4 +- ostap/io/files.py | 52 +++++------ ostap/stats/gof1d.py | 123 +++++++++++++++++++++----- ostap/stats/moment.py | 73 +++++++-------- ostap/stats/moments.py | 37 ++++---- ostap/stats/tests/test_stats_gof1d.py | 14 +-- 7 files changed, 186 insertions(+), 121 deletions(-) diff --git a/ReleaseNotes/release_notes.md b/ReleaseNotes/release_notes.md index f10ea261..6774966b 100644 --- a/ReleaseNotes/release_notes.md +++ b/ReleaseNotes/release_notes.md @@ -14,7 +14,8 @@ 1. add weight to `tree.withCut` and other tree-looping methods 1. Tiny tweak for treatment of drawing ranges for `f1_draw/f2_draw/f3_draw` fnuctions: form now the explicit setting has a precedence. 1. `ostap.stats.gof1d` : improve drawing methods - + 1. Add Kuiper's Goodness-of-Fit estimator to `ostap.stats.gof1d` + ## Backward incompatible 1. add `weight` to `tree.withCut` and other tree-looping methods @@ -23,7 +24,6 @@ 1. fix typo in `ostap.math.bnase.axis_range` - # v1.13.3.0 ## New features diff --git a/ostap/core/core.py b/ostap/core/core.py index dc06475e..eef67268 100644 --- a/ostap/core/core.py +++ b/ostap/core/core.py @@ -110,9 +110,9 @@ wilsonEff = Ostap.Math.wilsonEff agrestiCoullEff = Ostap.Math.agrestiCoullEff # ============================================================================= -## helper function for case-insensitive dictionary with ignorance of underscores and blanks +## helper function for case-insensitive dictionary with ignorance of underscores and blanks and dashes from ostap.utils.cidict import case_transform -cidict_fun = lambda k : case_transform ( k ) . replace('_','') . replace ( ' ', '') +cidict_fun = lambda k : case_transform ( k ) . replace('_','') . replace ( ' ', '').replace('-','') # ============================================================================= ## @class ROOTCWD # context manager to preserve current directory (rather confusing stuff in ROOT) diff --git a/ostap/io/files.py b/ostap/io/files.py index 0c448e9b..5d2029e3 100644 --- a/ostap/io/files.py +++ b/ostap/io/files.py @@ -35,7 +35,7 @@ # Helper class for parallel processing of long list of files # It defiend the basic teatemtn of the, calling `Files.treatFile` method class TreatFileTask(Task) : - """Helper class for parallel processing of long list of files + """ Helper class for parallel processing of long list of files It defiens the basic teatemtn of the, calling `Files.treatFile` method """ def __init__ ( self , data ) : @@ -44,7 +44,7 @@ def initialize_local ( self ) : pass def initialize_remote ( self , jobid = -1 ) : pass ## actgual processing def process ( self , jobid , files ) : - """Actual processing""" + """ Actual processing""" for f in files : self.__data.treatFile ( f ) return self.__data ## get the results @@ -56,7 +56,7 @@ def merge_results ( self , results , jobid = -1 ) : # =========================================================================== ## get the size with units for the file-size units def fsize_unit ( size ) : - """Get the size with units for the file-size units""" + """ Get the size with units for the file-size units""" assert isinstance ( size , integer_types ) , "unit: invalid 'size' type" if ( 1024 ** 5 ) <= size : return size // ( 1024 ** 5 ) , 'PB' elif ( 1024 ** 4 ) <= size : return size // ( 1024 ** 4 ) , 'TB' @@ -76,7 +76,7 @@ def fsize_unit ( size ) : # @author Alexander BARANOV a.baranov@cern.ch # @date 2014-06-08 class Files(object): - """Simple utility to pickup the list of files + """ Simple utility to pickup the list of files - collect file the file - keeps and store them @@ -203,7 +203,7 @@ def bad_files ( self ) : # ========================================================================= ## get the disk size of files, only existing files are counted def getsize ( self ) : - """Get the disk size of files, only existing/valid files are counted""" + """ Get the disk size of files, only existing/valid files are counted""" s = 0 for f in self.__files : s += max ( 0 , self.get_file_size ( f ) ) @@ -214,7 +214,7 @@ def getsize ( self ) : # @param fname input file name # @return ROOT file sizeor -1 for non-existingt/invalid files def get_file_size ( self , fname ) : - """Get the size of the file + """ Get the size of the file - fname : input file name - return file size or -1 for non-existing/invalid files """ @@ -229,7 +229,7 @@ def get_file_size ( self , fname ) : # if the_file in files : .... # @endcode def __contains__ ( self , the_file ) : - """Check if the file is a part of collection + """ Check if the file is a part of collection >>> files = ... >>> if the_file in files : .... """ @@ -238,7 +238,7 @@ def __contains__ ( self , the_file ) : # ========================================================================= ## Get the list of files from the patterns def the_files ( self ) : - """Get the list of files from the patterns""" + """ Get the list of files from the patterns""" _files = set () @@ -346,7 +346,7 @@ def __or__ ( self , other ) : ## get an intersection of two datasets def __and__ ( self , other ) : - """ get intersection of two sets + """ Get intersection of two sets >>> ds1 = ... >>> ds2 = ... >>> ds = ds1 & ds2 ## get intersection @@ -363,7 +363,7 @@ def __and__ ( self , other ) : ## get an exclusive OR for two datasets def __xor__ ( self , other ) : - """ get an exclusive OR for two sets + """ Get an exclusive OR for two sets >>> ds1 = ... >>> ds2 = ... >>> ds = ds1 ^ ds2 ## get an exclusive OR @@ -405,27 +405,27 @@ def __ior__ ( self , other ) : ## get union of two datasets def union ( self , other ) : - """Union of two datasets""" + """ Union of two datasets""" return self | other ## get intersection of two datasets def intersection ( self , other ) : - """Intersection of two datasets""" + """ Intersection of two datasets""" return self & other ## get difference of two datasets def difference ( self , other ) : - """Differfence of two datasets""" + """ Differfence of two datasets""" return self - other ## get symmetric difference of two datasets def symmetric_difference ( self , other ) : - """Symmetric Differfence of two datasets""" + """ Symmetric Differfence of two datasets""" return self ^ other ## subtraction for datasets def __sub__ ( self , other ) : - """ get subtraction of two sets + """ Get subtraction of two sets >>> ds1 = ... >>> ds2 = ... >>> ds = ds1 - ds2 ## get subtraction @@ -439,7 +439,7 @@ def __sub__ ( self , other ) : ## remove the files from another dataset def __isub__ ( self , other ) : - """ Remove file forom anothere dataset + """ Remove file from another dataset >>> ds1 = ... >>> ds2 = ... >>> ds1 -= ds2 ## get subtraction @@ -457,7 +457,7 @@ def __isub__ ( self , other ) : # ========================================================================= ## get a sample of at most n-elements (if n is integer and >=1 ) or n-fraction def sample_files ( self , n , sort ) : - """get a sample of at most n-elements (if n is integer and >=1 ) or n-fraction + """ Get a sample of at most n-elements (if n is integer and >=1 ) or n-fraction """ if isinstance ( n , int ) and 1 <= n <= len ( self.files ) : files = random.sample ( self.files , n ) @@ -477,7 +477,7 @@ def sample_files ( self , n , sort ) : # f2 = files.samlpe ( 0.1 ) ## 10% of files # @endcode def sample ( self , n , sort = True , **kwargs ) : - """Get a sub-sample + """ Get a sub-sample >>> files = ... >>> f1 = files.sample ( 5 ) ## 5 files >>> f2 = files.sample ( 0.1 ) ## 10% of files @@ -495,7 +495,7 @@ def sample ( self , n , sort = True , **kwargs ) : # f2 = files[4:10] # @endcode def __getitem__ ( self , item ) : - """Get a sub-sample + """ Get a sub-sample >>> files = ... >>> f1 = files[5] >>> f2 = files[4:10] @@ -511,7 +511,7 @@ def __getitem__ ( self , item ) : ## printout def __str__(self): - """The specific printout + """ The specific printout """ return "<#files: %4d>" % len ( self.files ) @@ -527,7 +527,7 @@ def __len__ ( self ) : return len ( self.files ) # print ( files.table() ) # @endcode def table ( self , title = 'Files' , prefix = '' ) : - """Print collection of files as table + """ Print collection of files as table """ rows = [ ( '#' , 'size' , 'name' ) ] files = self.files @@ -616,7 +616,7 @@ def commonpath ( self ) : # - new directory will be created (if needed) # - common path (prefix) for all files will be replaced by new directory def copy_files ( self , new_dir = None , parallel = False , also_bad = False ) : - """copy all the files to new directory + """ Copy all the files to new directory - new directory will be created (if needed) - common path (prefix) for all files will be replaced by new directory """ @@ -644,7 +644,7 @@ def copy_files ( self , new_dir = None , parallel = False , also_bad = False ) : # - new directory will be created (if needed) # - common path (prefix) for all files will be replaced by new directory def sync_files ( self , new_dir = None , parallel = False , also_bad = False ) : - """Sync/copy all the files to new directory + """ Sync/copy all the files to new directory - new directory will be created (if needed) - common path (prefix) for all files will be replaced by new directory """ @@ -685,7 +685,7 @@ def copy_files ( files_to_copy , copier = None , copy_cmd = '' , progress = False ) : - """Copy set of files into new directory. + """ Copy set of files into new directory. Files are copied in a way to preserve they name uniqueness: @@ -799,7 +799,7 @@ def sync_files ( files_to_copy , copy_cmd = '' , progress = False ) : - """Sync/copy set of files into new directory. + """ Sync/copy set of files into new directory. Files are copied in a way to preserve they name uniqueness: @@ -859,7 +859,7 @@ def sync_dirs ( source_dir , copier = None , copy_cmd = '' , progress = False ) : - """Sync/copy two directories + """ Sync/copy two directories Files are copied in a way to preserve they name uniqueness: diff --git a/ostap/stats/gof1d.py b/ostap/stats/gof1d.py index 169deefd..3d07bfac 100644 --- a/ostap/stats/gof1d.py +++ b/ostap/stats/gof1d.py @@ -15,13 +15,22 @@ __author__ = "Vanya BELYAEV Ivan.Belyaev@cern.ch" __date__ = "2024-09-16" __all__ = ( + 'komogorov_smirnov' , ## Kolmogorov-Sminov GoF estimator + 'anderson_darling' , ## Anderson-Darling GoF estimator + 'cramer_von_mises' , ## Cramer-von Mises GoF estimator + 'kuiper' , ## Kuiper GoF estimator + 'ZK' , ## ZK GoF estimator + 'ZA' , ## ZA GoF estimator + 'ZC' , ## ZC GoF estimator + 'GoF1D' , ## helper utility for GoF estimate + 'GoF1DToys' , ## helper utility for GoF estimate with toys ) # ============================================================================= from collections import defaultdict, namedtuple from ostap.core.meta_info import root_info from ostap.fitting.funbasic import AFUN1 from ostap.fitting.pdfbasic import PDF1 -from ostap.core.core import SE, VE, Ostap +from ostap.core.core import SE, VE, Ostap, cidict_fun from ostap.math.base import doubles, axis_range from ostap.math.models import f1_draw from ostap.math.math_ve import significance @@ -45,13 +54,13 @@ if ( 6 , 32 ) <= root_info : data2vct = lambda s : s else : data2vct = lambda s : doubles ( s ) # ============================================================================= -## Get Kolmogorov-Smirnov statistis KS +## Get Kolmogorov-Smirnov statistics KS # @code # cdf_data =... # ks2 = kolmogorov_smirnov ( cdf_data ) # @endcode # @param cdf_data sorted array of F0(X_i) - values of CDF at X data points -# @return Kolmogorov-Smirnov statistisc KS +# @return Kolmogorov-Smirnov statistics KS def kolmogorov_smirnov ( cdf_data ) : """ Get Kolmogorov-Smirnov statistis KS - `cdf_data` : sorted array of F0(X_i) - values of CDF at (sorted) X data points @@ -60,7 +69,7 @@ def kolmogorov_smirnov ( cdf_data ) : """ n = len ( cdf_data ) result = max ( max ( ( i + 1.0 ) / n - Fi , Fi - float ( i ) / n ) for ( i, Fi ) in enumerate ( cdf_data ) ) - return math.sqrt ( result ) + return math.sqrt ( result ) # ============================================================================= ## Get Anderson-Darling statistiscs AD^2 # @code @@ -68,7 +77,7 @@ def kolmogorov_smirnov ( cdf_data ) : # ad2 = anderson_darling ( cdf_data ) # @endcode # @param cdf_data sorted array of F0(X_i) - values of CDF at (sorted) X data points -# @return Anderson-Darling statistisc AD^2 +# @return Anderson-Darling statistics AD^2 def anderson_darling ( cdf_data ) : """ Get Anderson-Darling statistiscs AD^2 - `cdf_data` : sorted array of F0(X_i) - values of CDF at X data points @@ -88,7 +97,7 @@ def anderson_darling ( cdf_data ) : # cm2 = cramer_von_mises ( cdf_data ) # @endcode # @param cdf_data sorted array of F0(X_i) - values of CDF at X data points -# @return Cramer-von Mises statistisc CM^2 +# @return Cramer-von Mises statistics CM^2 def cramer_von_mises ( cdf_data ) : """ Get Cramer-von Mises statistics CM^2 - `cdf_data` : sorted array of F0(X_i) - values of CDF at (sorted) X data points @@ -98,15 +107,35 @@ def cramer_von_mises ( cdf_data ) : n = len ( cdf_data ) result = sum ( ( Fi - ( i + 0.5 ) / n ) ** 2 for ( i, Fi ) in enumerate ( cdf_data ) ) result += 12.0 / n - return result + return result# +# ============================================================================= +## Get Kuiper's statistis K +# @code +# cdf_data =... +# k = kuiper ( cdf_data ) +# @endcode +# @param cdf_data sorted array of F0(X_i) - values of CDF at X data points +# @return Kolmogorov-Smirnov statistics K +def kuiper ( cdf_data ) : + """ Get Kuiper statistis KS + - `cdf_data` : sorted array of F0(X_i) - values of CDF at (sorted) X data points + >>> cdf_data =... + >>> ks2 = kolmogorov_smirnov ( cdf_data ) + """ + n = len ( cdf_data ) + d_plus = max ( ( i + 1.0 ) / n - Fi for ( i, Fi ) in enumerate ( cdf_data ) ) + d_minus = max ( Fi - ( i + 1.0 ) / n for ( i, Fi ) in enumerate ( cdf_data ) ) + result = d_plus + d_minus + return result # ============================================================================= ## Get ZK statististics # @code # cdf_data = ... # zk = ZK ( cdf_data ) # @endcode +# @see https://doi.org/10.1111/1467-9868.00337 # @param cdf_data sorted array of F0(X_i) - values of CDF at X data points -# @return ZK statistisc ZK +# @return ZK statistics ZK def ZK ( cdf_data ) : """ Get ZK statististics - `cdf_data` : sorted array of F0(X_i) - values of CDF at X data points @@ -124,8 +153,9 @@ def ZK ( cdf_data ) : # cdf_data = ... # za = ZA ( cdf_data ) # @endcode +# @see https://doi.org/10.1111/1467-9868.00337 # @param cdf_data sorted array of F0(X_i) - values of CDF at X data points -# @return ZA statistisc ZA +# @return ZA statistics ZA def ZA ( cdf_data ) : """ Get ZA statististics - `cdf_data` : sorted array of F0(X_i) - values of CDF at (sorted) X data points @@ -143,8 +173,9 @@ def ZA ( cdf_data ) : # cdf_data = ... # zc = ZC ( cdf_data ) # @endcode +# @see https://doi.org/10.1111/1467-9868.00337 # @param cdf_data sorted array of F0(X_i) - values of CDF at X data points -# @return ZC statistisc ZC +# @return ZC statistics ZC def ZC ( cdf_data ) : """ Get ZC statististics - `cdf_data` : sorted array of F0(X_i) - values of CDF at (sorted) X data points @@ -155,6 +186,7 @@ def ZC ( cdf_data ) : flog = math.log result = sum ( ( flog ( ( 1.0 / Fi - 1 ) / ( ( n - 0.5 ) / ( i + 0.25 ) - 1 ) ) ) ** 2 for ( i , Fi ) in enumerate ( cdf_data ) ) return result +# ============================================================================== # ============================================================================= ## @class GoF1D @@ -176,10 +208,16 @@ def __init__ ( self , assert pdf.xvar in dataset , 'GoF1D: `xvar`:%s is not in dataset!' % ( self.xvar.name ) cdf = pdf.cdf () + if hasattr ( pdf.pdf , 'setPars' ) : pdf.pdf.setPars() if hasattr ( pdf.pdf , 'function' ) : + if hasattr ( pdf.pdf , 'setPars' ) : pdf.pdf.setPars() fun = pdf.pdf.function() if hasattr ( fun , 'cdf' ) : - cdf = fun.cdf() + try : + a = fun.cdf ( 0.0 ) + self.__store = pdf, fun + def cdf ( x ) : return fun.cdf ( x ) + except TypeError : pass self.__cdf = cdf self.__xmnmx = pdf.xminmax() @@ -206,6 +244,7 @@ def __init__ ( self , self.__ZK_val = ZK ( self.__cdf_data ) self.__ZA_val = ZA ( self.__cdf_data ) self.__ZC_val = ZC ( self.__cdf_data ) + self.__K_val = kuiper ( self.__cdf_data ) # ========================================================================= ## size of dataset @@ -290,7 +329,15 @@ def ZA_estimator ( self ) : def ZC_estimator ( self ) : """ Get ZC statistics """ - return self.__ZC_val + return self.__ZC_val + + # ========================================================================= + ## Get Kuiper statististics + @property + def kuiper_estimator ( self ) : + """ Get Kuiperstatistics + """ + return self.__K_val # ========================================================================== ## Print the summary as Table @@ -315,6 +362,11 @@ def table ( self , title = '' , prefix = '' , width = 5 , precision = 3 ) : else : row = 'Cramer-von Mises' , cm rows.append ( row ) + k , expo = pretty_float ( self.__K_val , width = width , precision = precision ) + if expo : row = 'Kuiper' , k , '10^%+d' % expo + else : row = 'Kuiper' , k + rows.append ( row ) + zk , expo = pretty_float ( self.__ZK_val , width = width , precision = precision ) if expo : row = 'ZK' , zk, '10^%+d' % expo else : row = 'ZK' , zk @@ -330,6 +382,7 @@ def table ( self , title = '' , prefix = '' , width = 5 , precision = 3 ) : else : row = 'ZC' , zc rows.append ( row ) + title = title if title else 'Goodness of 1D-fit' return T.table ( rows , title = title , prefix = prefix , alignment = 'lcl' ) @@ -368,6 +421,14 @@ def draw ( self , opts = '' , *args , **kwargs ) : return ecdf.draw ( '%s %s' % ( 'same' , opts ) , color = 2 , linewidth = 3 , xmin = xmin , xmax = xmax , **kwargs ) +# ============================================================================= +KS_keys = 'ks' , 'kolmogorov' , 'kolmogorovsmirnov' +AD_keys = 'ad' , 'andersen' , 'andersendarling' +CM_keys = 'cm' , 'cramer' , 'cramervonmises' +ZK_keys = 'zk' , +ZA_keys = 'za' , +ZC_keys = 'zc' , +K_keys = 'k' , 'kuiper' # ============================================================================= ## @class GoF1DToys # Check Goodness of 1D-fits using toys @@ -396,6 +457,7 @@ def __init__ ( self , self.__ZK_cnt = SE () self.__ZA_cnt = SE () self.__ZC_cnt = SE () + self.__K_cnt = SE () self.__ecdfs = {} @@ -428,6 +490,7 @@ def run ( self , nToys = 1000 , silent = False ) : zk = ZK ( cdf_data ) za = ZA ( cdf_data ) zc = ZC ( cdf_data ) + k = kuiper ( cdf_data ) self.__KS_cnt += ks self.__AD_cnt += ad @@ -435,6 +498,7 @@ def run ( self , nToys = 1000 , silent = False ) : self.__ZK_cnt += zk self.__ZA_cnt += za self.__ZC_cnt += zc + self.__K_cnt += k results [ 'KS' ].append ( ks ) results [ 'AD' ].append ( ad ) @@ -442,6 +506,7 @@ def run ( self , nToys = 1000 , silent = False ) : results [ 'ZK' ].append ( zk ) results [ 'ZA' ].append ( za ) results [ 'ZC' ].append ( zc ) + results [ 'K' ].append ( k ) ## delete data if isinstance ( dset , ROOT.RooDataSet ) : @@ -541,7 +606,15 @@ def ZC ( self ) : """ Get ZC statistics """ return self.result ( self.ZC_estimator , self.__ZC_cnt , 'ZC' ) - + + # ========================================================================= + ## Get Kuiper statististics + @property + def kuiper ( self ) : + """ Get Kuiper statistics + """ + return self.result ( self.kuiper_estimator , self.__K_cnt , 'K' ) + # ========================================================================= ## format a row in the table def _row ( self , what , result , width = 5 , precision = 3 ) : @@ -592,6 +665,7 @@ def table ( self , title = '' , prefix = '' , width = 5 , precision = 3 ) : rows.append ( self._row ( 'Kolmogorov-Smirnov' , self.kolmogorov_smirnov , width = width , precision = precision ) ) rows.append ( self._row ( 'Anderson-Darling' , self.anderson_darling , width = width , precision = precision ) ) rows.append ( self._row ( 'Cramer-von Mises' , self.cramer_von_mises , width = width , precision = precision ) ) + rows.append ( self._row ( 'Kuiper' , self.kuiper , width = width , precision = precision ) ) rows.append ( self._row ( 'ZK' , self.ZK , width = width , precision = precision ) ) rows.append ( self._row ( 'ZA' , self.ZA , width = width , precision = precision ) ) rows.append ( self._row ( 'ZC' , self.ZC , width = width , precision = precision ) ) @@ -628,36 +702,37 @@ def table ( self , title = '' , prefix = '' , width = 5 , precision = 3 ) : def draw ( self , what , opts = '' , *args , **kwargs ) : """ Draw fit CDF & empirical CDF """ - if not what or not what in self.__ecdfs : - return GoF1D.draw ( self , opts , *args , **kwargs ) - - if 'KS' == what : + key = cidict_fun ( what ) + if key in KS_keys : result = self.kolmogorov_smirnov ecdf = self.__ecdfs [ 'KS' ] logger.info ( 'Toy resuls for Kolmogorov-Smirnov estimate' ) - elif 'AD' == what : + elif key in AD_keys : result = self.anderson_darling ecdf = self.__ecdfs [ 'AD' ] logger.info ( 'Toy resuls for Anderson-Darling estimate' ) - elif 'CM' == what : + elif key in CM_keys : result = self.cramer_von_mises ecdf = self.__ecdfs [ 'CM' ] logger.info ( 'Toy resuls for Cramer-von Mises estimate' ) - elif 'ZK' == what : + elif key in ZK_keys : result = self.ZK ecdf = self.__ecdfs [ 'ZK' ] logger.info ( 'Toy resuls for ZK estimate' ) - elif 'ZA' == what : + elif key in ZA_keys : result = self.ZA ecdf = self.__ecdfs [ 'ZA' ] logger.info ( 'Toy resuls for ZK estimate' ) - elif 'ZC' == what : + elif key in ZC_keys : result = self.ZC ecdf = self.__ecdfs [ 'ZC' ] logger.info ( 'Toy resuls for ZK estimate' ) + elif key in K_keys : + result = self.kuiper + ecdf = self.__ecdfs [ 'K' ] + logger.info ( 'Toy resuls for Kuiper estimate' ) else : - logger.error ( "draw: Invalid `what`:%s" % what ) - return GoF1D.draw ( self , opts , *args , **kwargs ) + raise KeyError ( "draw: Invalid `what`:%s" % what ) xmin,xmax = ecdf.xmin () , ecdf.xmax () value = result.statistics diff --git a/ostap/stats/moment.py b/ostap/stats/moment.py index f8067efb..53a5036d 100644 --- a/ostap/stats/moment.py +++ b/ostap/stats/moment.py @@ -33,7 +33,7 @@ __all__ = () # ============================================================================= from ostap.core.ostap_types import integer_types, num_types -from ostap.math.base import isfinite, isequal +from ostap.math.base import isfinite, isequal, pos_infinity, neg_infinity from ostap.core.core import Ostap, VE from ostap.core.meta_info import root_version_int, root_info from ostap.logger.pretty import pretty_float, pretty_ve @@ -54,7 +54,7 @@ # @endcode # If order of the moment-counter exceeds 1, the uncertainty is also evaluated def _om_mean ( obj ) : - """Get a mean for the moment-counter + """ Get a mean for the moment-counter >>> m = ... >>> v = m.mean() - If order of the moment-counter exceeds 1, the uncertainty is also evaluated @@ -73,7 +73,7 @@ def _om_mean ( obj ) : # @endcode # If order of the moment-counter exceeds 3, the uncertainty is also evaluated def _om_variance ( obj ) : - """Get a variance for the moment-counter + """ Get a variance for the moment-counter >>> m = ... >>> v = m.variance() - If order of the moment-counter exceeds 3, the uncertainty is also evaluated @@ -91,7 +91,7 @@ def _om_variance ( obj ) : # v = m.skewness () # @endcode def _om_skewness ( obj ) : - """Get a skewness for the moment-counter + """ Get a skewness for the moment-counter >>> m = ... >>> v = m.skewness() """ @@ -109,7 +109,7 @@ def _om_skewness ( obj ) : # v = m.kurtosis () # @endcode def _om_kurtosis ( obj ) : - """Get an excess kurtosis for the moment-counter + """ Get an excess kurtosis for the moment-counter >>> m = ... >>> v = m.kurtosis () """ @@ -127,7 +127,7 @@ def _om_kurtosis ( obj ) : # v = m.cumulant1() # @endcode def _om_cumulant_1st( obj ) : - """Get 1st unbiased cumulant, well it is equal to the mean + """ Get 1st unbiased cumulant, well it is equal to the mean >>> m = ... >>> v = m.cumulant_1st() @@ -141,7 +141,7 @@ def _om_cumulant_1st( obj ) : # v = m.cumulant2() # @endcode def _om_cumulant_2nd ( obj ) : - """Get 2nd unbiased cumulant, well it is equal to the variance + """ Get 2nd unbiased cumulant, well it is equal to the variance >>> m = ... >>> v = m.cumulant2() @@ -161,7 +161,7 @@ def _om_cumulant_2nd ( obj ) : # v = m.cumulant_3rd() # @endcode def _om_cumulant_3rd ( obj ) : - """Get 3rd unbiased cumulant, well it is equal to the 3rd central moment + """ Get 3rd unbiased cumulant, well it is equal to the 3rd central moment >>> m = ... >>> v = m.cumulant_3rd() @@ -189,7 +189,7 @@ def _om_cumulant_3rd ( obj ) : # v = m.cumulant_4th() # @endcode def _om_cumulant_4th ( obj ) : - """Get 4th unbiased cumulant, well it is equal to the 3rd central moment + """ Get 4th unbiased cumulant, well it is equal to the 3rd central moment >>> m = ... >>> v = m.cumulant_4th() @@ -215,7 +215,7 @@ def _om_cumulant_4th ( obj ) : # v = m.unbiased_2nd() # @endcode def _om_u2nd ( obj ) : - """Get an unbiased 2nd order moment from the moment-counter + """ Get an unbiased 2nd order moment from the moment-counter >>> m = ... >>> v = m.unbiased_3nd() """ @@ -231,7 +231,7 @@ def _om_u2nd ( obj ) : # v = m.unbiased_3rd() # @endcode def _om_u3rd ( obj ) : - """Get an unbiased 3rd order moment fro the moment-counter + """ Get an unbiased 3rd order moment fro the moment-counter >>> m = ... >>> v = m.unbiased_3rd() """ @@ -247,7 +247,7 @@ def _om_u3rd ( obj ) : # v = m.unbiased_4th() # @endcode def _om_u4th ( obj ) : - """Get an unbiased 4th order moment from the moment-counter + """ Get an unbiased 4th order moment from the moment-counter >>> m = ... >>> v = m.unbiased_4th() """ @@ -263,7 +263,7 @@ def _om_u4th ( obj ) : # v = m.unbiased_5th() # @endcode def _om_u5th ( obj ) : - """Get an unbiased 5th order moment from the moment-counter + """ Get an unbiased 5th order moment from the moment-counter >>> m = ... >>> v = m.unbiased_5th() """ @@ -272,12 +272,6 @@ def _om_u5th ( obj ) : return obj.moment ( 5 ) return Ostap.Math.Moments.unbiased_5th ( obj ) - - -pos_infinity = float('+Inf') -neg_infinity = float('-Inf') - - # ============================================================================= ## get central moment # @code @@ -346,7 +340,7 @@ def _om_cm3 ( obj , order ) : # v = m.cumulant ( 4 ) # @endcode def _om_cumulant_ ( obj , order ) : - """ Get th ecumulant + """ Get the cumulant >>> m = ... >>> v = m.cumulant ( 3 ) ## ditto """ @@ -368,7 +362,7 @@ def _om_cumulant_ ( obj , order ) : # v = m.rms () # @endcode def _om_rms ( obj ) : - """Get a RMS value for the moment-counter + """ Get a RMS value for the moment-counter >>> m = ... >>> v = m.rms () """ @@ -387,7 +381,7 @@ def _om_rms ( obj ) : # t = m.table() # @endcode def _om_table ( obj , title = '' , prefix = '' , standard = False ) : - """Print object as a table + """ Print object as a table >>> m = ... >>> t = m.table() """ @@ -663,7 +657,7 @@ def _om_table ( obj , title = '' , prefix = '' , standard = False ) : # ============================================================ ## equality for two counters def _mom0_eq_ ( cnt , another ) : - """Equality for two Ostap.Math.Momemnt_<0> counters + """ Equality for two Ostap.Math.Momemnt_<0> counters """ ## The same base type if not isinstance ( another , Ostap.Math.Moment ) : return NotImplemented @@ -676,7 +670,7 @@ def _mom0_eq_ ( cnt , another ) : # ============================================================ ## equality for two counters def _mom1_eq_ ( cnt , another ) : - """Equality for two Ostap.Math.Momemnt_<1> counters + """ Equality for two Ostap.Math.Momemnt_<1> counters """ ## The same base type if not isinstance ( another , Ostap.Math.Moment ) : return NotImplemented @@ -688,7 +682,7 @@ def _mom1_eq_ ( cnt , another ) : # ============================================================ ## equality for two counters def _momN_eq_ ( cnt , another ) : - """Equality for two Ostap.Math.Moment_ counters + """ Equality for two Ostap.Math.Moment_ counters """ ## The same base type if not isinstance ( another , Ostap.Math.Moment ) : return NotImplemented @@ -706,7 +700,7 @@ def _momN_eq_ ( cnt , another ) : # ============================================================ ## equality for two counters def _wmom0_eq_ ( cnt , another ) : - """Equality for two Ostap.Math.Momemnt_<0> counters + """ Equality for two Ostap.Math.Momemnt_<0> counters """ ## The same base type if not isinstance ( another , Ostap.Math.WMoment ) : return NotImplemented @@ -724,7 +718,7 @@ def _wmom0_eq_ ( cnt , another ) : # ============================================================ ## equality for two counters def _wmom1_eq_ ( cnt , another ) : - """Equality for two Ostap.Math.WMomemnt_<1> counters + """ Equality for two Ostap.Math.WMomemnt_<1> counters """ ## The same base type if not isinstance ( another , Ostap.Math.WMoment ) : return NotImplemented @@ -736,7 +730,7 @@ def _wmom1_eq_ ( cnt , another ) : # ============================================================ ## equality for two counters def _wmomN_eq_ ( cnt , another ) : - """Equality for two Ostap.Math.WMoment_ counters + """ Equality for two Ostap.Math.WMoment_ counters """ ## The same base type if not isinstance ( another , Ostap.Math.WMoment ) : return NotImplemented @@ -759,19 +753,19 @@ def _wmomN_eq_ ( cnt , another ) : # ========================================================== ## Redude Ostap::Math::Moment_<0> def _mom0_reduce_ ( cnt ) : - """Redude Ostap::Math::Moment_<0> + """ Reduce Ostap::Math::Moment_<0> """ return root_factory , ( type ( cnt ), cnt.size () ) # ========================================================= ## Redude Ostap::Math::Moment_<1> def _mom1_reduce_ ( cnt ) : - """Redude Ostap::Math::Moment_<1> + """ Reduce Ostap::Math::Moment_<1> """ return root_factory , ( type ( cnt ), cnt.mu () , cnt.previous () ) # ======================================================== ## Redude Ostap::Math::Moment_ def _momN_reduce_ ( cnt ) : - """Redude Ostap::Math::Moment_ + """ Reduce Ostap::Math::Moment_ """ return root_factory , ( type ( cnt ), cnt.moment () , cnt.previous () ) @@ -782,19 +776,19 @@ def _momN_reduce_ ( cnt ) : # ========================================================== ## Redude Ostap::Math::WMoment_<0> def _wmom0_reduce_ ( cnt ) : - """Redude Ostap::Math::WMoment_<0> + """ Reduce Ostap::Math::WMoment_<0> """ return root_factory , ( type ( cnt ), cnt.size () , cnt.w() , cnt.w2 () ) # ========================================================= ## Redude Ostap::Math::WMoment_<1> def _wmom1_reduce_ ( cnt ) : - """Redude Ostap::Math::WMoment_<1> + """ Reduce Ostap::Math::WMoment_<1> """ return root_factory , ( type ( cnt ), cnt.mu () , cnt.previous () ) # ======================================================== ## Redude Ostap::Math::WMoment_ def _wmomN_reduce_ ( cnt ) : - """Redude Ostap::Math::WMoment_ + """ Reduce Ostap::Math::WMoment_ """ return root_factory , ( type ( cnt ), cnt.moment () , cnt.previous () ) @@ -811,7 +805,7 @@ def _wmomN_reduce_ ( cnt ) : # @see Ostap::Math::WGeometricMean # @see Ostap::Math::WHarmonicMean def _mn_reduce_ ( cnt ) : - """Serialization of Harmonic&Geometric means + """ Serialization of Harmonic&Geometric means - see Ostap::Math::GeometricMean - see Ostap::Math::HarmonicMean - see Ostap::Math::WGeometricMean @@ -832,7 +826,7 @@ def _mn_reduce_ ( cnt ) : # @see Ostap::Math::PoweMean # @see Ostap::Math::WPoweMean def _pm_reduce_ ( cnt ) : - """Serialization of Power means + """ Serialization of Power means - see Ostap::Math::PowerMean - see Ostap::Math::WPowerMean """ @@ -846,7 +840,7 @@ def _pm_reduce_ ( cnt ) : # @see Ostap::Math::LehmerMean # @see Ostap::Math::WLehmerMean def _lm_reduce_ ( cnt ) : - """Serialization of Lehmermeans + """ Serialization of Lehmer means - see Ostap::Math::LehmerMean - see Ostap::Math::WLehmerMean """ @@ -860,7 +854,7 @@ def _lm_reduce_ ( cnt ) : # @see Ostap::Math::MinMaxValue # @see Ostap::Math::WMinMaxValue def _nx_reduce_ ( cnt ) : - """Serialization of MinMax values + """ Serialization of MinMax values - see Ostap::Math::MinMaxValue - see Ostap::Math::WMinMaxValue """ @@ -878,7 +872,7 @@ def _nx_reduce_ ( cnt ) : # @see Ostap::Math::WHarmonicMean # @see Ostap::Math::WArithmeticMean def _mn_eq_ ( cnt , another ) : - """Equality Harmonic&Geometric means + """ Equality Harmonic&Geometric means - see Ostap::Math::GeometricMean - see Ostap::Math::HarmonicMean - see Ostap::Math::ArithmeticMean @@ -897,7 +891,6 @@ def _mn_eq_ ( cnt , another ) : Ostap.Math.WHarmonicMean . __eq__ = _mn_eq_ Ostap.Math.WArithmeticMean . __eq__ = _mn_eq_ - _decorated_classes = ( Ostap.Math.Moment , Ostap.Math.WMoment , diff --git a/ostap/stats/moments.py b/ostap/stats/moments.py index 053aaf79..82fd224e 100644 --- a/ostap/stats/moments.py +++ b/ostap/stats/moments.py @@ -109,7 +109,8 @@ ## ) # ============================================================================= -from ostap.core.ostap_types import integer_types, num_types +from ostap.core.ostap_types import integer_types, num_types +from ostap.math.base import pos_infinity, neg_infinity from ostap.core.core import Ostap # ============================================================================= # logging @@ -1099,30 +1100,24 @@ def prob ( self ) : "``prop'' - confidence level" return self.__prob - - -pos_inf = float ( '+Inf' ) -neg_inf = float ( '-Inf' ) - - # ============================================================================= ## calculate some statistical quantities of variable, # considering function to be PDF # @author Vanya BELYAEV Ivan.Belyaev@itep.ru # @date 2015-07-11 def sp_action ( func , actor , xmin = None , xmax = None , *args , **kwargs ) : - """Calculate some statistical quantities of variable, considering function to be PDF + """ Calculate some statistical quantities of variable, considering function to be PDF """ ## if isinstance ( xmin , num_types ) : xmn = float ( xmin ) elif hasattr ( func ,'GetXmin' ) : xmn = float ( func.GetXmin () ) elif hasattr ( func ,'xmin' ) : xmn = float ( func.xmin () ) - else : xmn = neg_inf + else : xmn = neg_infinity ## if isinstance ( xmax , num_types ) : xmx = float ( xmax ) elif hasattr ( func ,'GetXmax' ) : xmx = float ( func.GetXmax () ) elif hasattr ( func ,'xmax' ) : xmx = float ( func.xmax () ) - else : xmx = pos_inf + else : xmx = pos_infinity ## xmn = float ( xmn ) if isinstance ( xmn , integer_types ) else xmn xmx = float ( xmx ) if isinstance ( xmx , integer_types ) else xmx @@ -1158,7 +1153,7 @@ def moment ( func , N , xmin = None , xmax = None , err = False , x0 = 0 ) : # @author Vanya BELYAEV Ivan.Belyaev@itep.ru # @date 2015-07-11 def central_moment ( func , N , xmin = None , xmax = None , err = False ) : - """Get the central moment for the distribution using + """ Get the central moment for the distribution using >>> fun = ... >>> mom5 = central_moment ( fun , 5 , xmin = 10 , xmax = 50 ) """ @@ -1177,7 +1172,7 @@ def central_moment ( func , N , xmin = None , xmax = None , err = False ) : # @author Vanya BELYAEV Ivan.Belyaev@itep.ru # @date 2022-07-13 def std_moment ( func , N , xmin = None , xmax = None , err = False ) : - """Get the standartized moment for the distribution using + """ Get the standartized moment for the distribution using >>> fun = ... >>> mom5 = std_moment ( fun , 5 , xmin = 10 , xmax = 50 ) """ @@ -1197,7 +1192,7 @@ def std_moment ( func , N , xmin = None , xmax = None , err = False ) : # @author Vanya BELYAEV Ivan.Belyaev@itep.ru # @date 2015-07-11 def mean ( func , xmin = None , xmax = None , err = False ) : - """Get the mean-value for the distribution + """ Get the mean-value for the distribution >>> fun = ... >>> m = mean( fun , xmin = 10 , xmax = 50 ) """ @@ -1215,7 +1210,7 @@ def mean ( func , xmin = None , xmax = None , err = False ) : # @author Vanya BELYAEV Ivan.Belyaev@itep.ru # @date 2015-07-11 def variance ( func , xmin = None , xmax = None , err = False ) : - """Get the variance for the distribution using + """ Get the variance for the distribution using >>> fun = ... >>> v = variance( fun , xmin = 10 , xmax = 50 ) """ @@ -1234,7 +1229,7 @@ def variance ( func , xmin = None , xmax = None , err = False ) : # @author Vanya BELYAEV Ivan.Belyaev@itep.ru # @date 2015-07-11 def rms ( func , xmin = None , xmax = None , err = False ) : - """Get RMS for the distribution using + """ Get RMS for the distribution using >>> fun = ... >>> v = rms( fun , xmin = 10 , xmax = 50 ) """ @@ -1253,7 +1248,7 @@ def rms ( func , xmin = None , xmax = None , err = False ) : # @author Vanya BELYAEV Ivan.Belyaev@itep.ru # @date 2015-07-11 def skewness ( func , xmin = None , xmax = None , err = False ) : - """Get the skewness for the distribution using + """ Get the skewness for the distribution using >>> fun = ... >>> v = skewness ( fun , xmin = -10 , xmax = 10 ) """ @@ -1274,7 +1269,7 @@ def skewness ( func , xmin = None , xmax = None , err = False ) : # @author Vanya BELYAEV Ivan.Belyaev@itep.ru # @date 2015-07-11 def kurtosis ( func , xmin = None , xmax = None , err = False ) : - """Get the (exessive) kurtosis for the distribution + """ Get the (exessive) kurtosis for the distribution >>> fun = ... >>> kurt = kurtosis ( fun , xmin = 10 , xmax = 50 ) """ @@ -1293,7 +1288,7 @@ def kurtosis ( func , xmin = None , xmax = None , err = False ) : # @author Vanya BELYAEV Ivan.Belyaev@itep.ru # @date 2015-07-11 def median ( func , xmin = None , xmax = None ) : - """Get the median for the distribution using + """ Get the median for the distribution using >>> fun = ... >>> v = median( fun , xmin = 10 , xmax = 50 ) """ @@ -1311,7 +1306,7 @@ def median ( func , xmin = None , xmax = None ) : # @author Vanya BELYAEV Ivan.Belyaev@itep.ru # @date 2015-07-11 def quantile ( func , Q , xmin = None , xmax = None , err = False , x0 = 0 ) : - """Get quantile for the distribution + """ Get quantile for the distribution >>> fun = ... >>> quan = quantile ( fun , 0.1 , xmin = 10 , xmax = 50 ) """ @@ -1328,7 +1323,7 @@ def quantile ( func , Q , xmin = None , xmax = None , err = False , x0 = 0 ) : # @author Vanya BELYAEV Ivan.Belyaev@itep.ru # @date 2015-07-11 def mode ( func , xmin = None , xmax = None ) : - """Get the mode for the distribution + """ Get the mode for the distribution >>> fun = ... >>> v = mode( fun , xmin = 10 , xmax = 50 ) """ @@ -1410,7 +1405,7 @@ def cl_symm ( func , prob , xmin = None , xmax = None , x0 = None ) : # @author Vanya BELYAEV Ivan.Belyaev@itep.ru # @date 2015-08-03 def cl_asymm ( func , prob , xmin = None , xmax = None ) : - """Asymmetric confidence interval around x0 + """ Asymmetric confidence interval around x0 >>> fun = lambda x : exp( - 0.5 * x * x ) >>> x_1,x_2 = cl_asymm ( fun , 0.68 , -10 , 10 ) >>> print x_1,x_2 diff --git a/ostap/stats/tests/test_stats_gof1d.py b/ostap/stats/tests/test_stats_gof1d.py index 6f975c0c..5c902b45 100644 --- a/ostap/stats/tests/test_stats_gof1d.py +++ b/ostap/stats/tests/test_stats_gof1d.py @@ -94,12 +94,14 @@ def test_bad_fit_1 ( ) : got.run ( 10000 ) logger.info ( 'Goodness-of-fit with toys:\n%s' % got ) - with use_canvas ( 'test_bad_fit_1: GoF/KS' , wait = 1 ) : - dks = got.draw('KS') - with use_canvas ( 'test_bad_fit_1: GoF/AD' , wait = 1 ) : - dad = got.draw('AD') - with use_canvas ( 'test_bad_fit_1: GoF/CM' , wait = 1 ) : - dcm = got.draw('CM') + with use_canvas ( 'test_bad_fit_1: GoF/Kolmogorov-Smirnov' , wait = 1 ) : + dks = got.draw('Kolmogorov-Smirnov') + with use_canvas ( 'test_bad_fit_1: GoF/Andersen-Darling' , wait = 1 ) : + dad = got.draw('Andersen-Darling') + with use_canvas ( 'test_bad_fit_1: GoF/Cramer-von Mises' , wait = 1 ) : + dcm = got.draw('Cramer-von Mises') + with use_canvas ( 'test_bad_fit_1: GoF/Kuiper' , wait = 1 ) : + dcm = got.draw('Kuiper') with use_canvas ( 'test_bad_fit_1: GoF/ZK' , wait = 1 ) : dzk = got.draw('ZK') with use_canvas ( 'test_bad_fit_1: GoF/ZA' , wait = 1 ) :