diff --git a/ostap/fitting/dataset.py b/ostap/fitting/dataset.py index 91f408c3..00d6f527 100644 --- a/ostap/fitting/dataset.py +++ b/ostap/fitting/dataset.py @@ -763,17 +763,9 @@ def _rds_make_unique_ ( dataset , ROOT.RooAbsData . sample = _rad_sample_ ROOT.RooAbsData . shuffle = _rad_shuffle_ -from ostap.trees.trees import ( _stat_var_ , _stat_vars_ , - _stat_cov_ , _stat_covs_ , _stat_nEff_ , - _sum_var_ , _sum_var_old_ , _stat_vct_ ) -ROOT.RooAbsData . sumVar = _sum_var_ -ROOT.RooAbsData . sumVar_ = _sum_var_old_ -ROOT.RooAbsData . statVar = _stat_var_ -ROOT.RooAbsData . statVars = _stat_vars_ -ROOT.RooAbsData . statCov = _stat_cov_ +from ostap.trees.trees import _stat_covs_ + ROOT.RooAbsData . statCovs = _stat_covs_ -ROOT.RooAbsData . statVct = _stat_vct_ -ROOT.RooAbsData . nEff = _stat_nEff_ from ostap.stats.statvars import data_the_moment ROOT.RooAbsData. the_moment = data_the_moment @@ -810,13 +802,7 @@ def _rds_make_unique_ ( dataset , ROOT.RooAbsData . sample , ROOT.RooAbsData . shuffle , # - ROOT.RooAbsData . statVar , - ROOT.RooAbsData . sumVar , - ROOT.RooAbsData . sumVar_ , - # - ROOT.RooAbsData . statCov , ROOT.RooAbsData . statCovs , - ROOT.RooAbsData . statVct , ] @@ -3078,7 +3064,7 @@ def _rad_rows_ ( dataset , variables = [] , cuts = '' , cutrange = '' , first = # ============================================================================ from ostap.stats.statvars import data_decorate as _dd -_dd ( ROOT.RooAbsData ) +_new_methods_ += list ( _dd ( ROOT.RooAbsData ) ) _decorated_classes_ = ( ROOT.RooAbsData , diff --git a/ostap/stats/statvars.py b/ostap/stats/statvars.py index 51502054..f795cca4 100644 --- a/ostap/stats/statvars.py +++ b/ostap/stats/statvars.py @@ -12,6 +12,8 @@ - data_get_stat - get the momentt-based statistics - data_central_moment - get the central moment (with uncertainty) - data_mean - get the mean (with uncertainty) +- data_nEff - get the effective number of entries +- data_sum - get the (weigted) sum - data_variance - get the variance (with uncertainty) - data_dispersion - get the dispersion (with uncertainty) - data_rms - get the RMS (with uncertainty) @@ -46,6 +48,8 @@ 'data_moment' , ## get the moment (with uncertainty) 'data_get_stat' , ## get the momentt-based statistics 'data_central_moment' , ## get the central moment (with uncertainty) + 'data_nEff' , ## get the effective number of entries + 'data_sum' , ## get the (weigted) sum 'data_mean' , ## get the mean (with uncertainty) 'data_variance' , ## get the variance (with uncertainty) 'data_dispersion' , ## get the dispersion (with uncertainty) @@ -77,9 +81,11 @@ # ============================================================================= from builtins import range from ostap.math.base import isequal, iszero -from ostap.core.core import Ostap, rootException, strings, WSE -from ostap.core.ostap_types import string_types, integer_types, num_types -from ostap.trees.cuts import expression_types, vars_and_cuts +from ostap.core.core import Ostap, rootException, WSE +from ostap.core.ostap_types import ( string_types , integer_types , + num_types , dictlike_types ) +from ostap.trees.cuts import expression_types, vars_and_cuts +from ostap.utils.basic import loop_items import ostap.stats.moment import ostap.logger.table as T import ROOT @@ -110,8 +116,8 @@ def data_get_moment ( data , order , center , expression , cuts = '' , *args ) : >>> print data.get_moment ( 3 , 0.0 , 'mass' , 'pt>1' ) ## ditto - see Ostap::StatVar::get_moment """ - assert isinstance ( order , integer_types ) and 0<= order , 'Invalid order %s' % order - assert isinstance ( center , num_types ) , 'Invalid center!' + assert isinstance ( order , integer_types ) and 0 <= order , 'Invalid order %s' % order + assert isinstance ( center , num_types ) , 'Invalid center!' assert isinstance ( expression , expression_types ) , 'Invalid type of expression!' assert isinstance ( cuts , expression_types ) , 'Invalid type of cuts/weight!' @@ -189,19 +195,19 @@ def data_central_moment ( data , order , expression , cuts = '' , *args ) : cuts , *args ) # ============================================================================== -## Get the statistics from data +## Get the (s)Statistic-bases statistics/counter from data # @code # statobj = Ostap.Math.MinValue() # data = ... # result = data.get_stat( statobj , 'x+y' , 'pt>1' ) # @encode -# @see Ostap::Math::Moment -# @see Ostap::Math::WMoment +# @see Ostap::Math::Statistic +# @see Ostap::Math::WStatistic # @see Ostap::statVar::the_moment def data_get_stat ( data , statobj , expression , cuts = '' , *args ) : - """Get the (w)moments -based statistics + """Get the (W)Statistic-based statistics.counters from data >>> data = ... - >>> stat = Ostap.Math.MinValue() + >>> stat = Ostap.Math.HarmonicMean() >>> result = data.get_stat ( stat , 'x/y+z' , '01' ) +# result = data_statistics ( data , 'x+y' , 'pt>1' ) +# results = data_statistics ( data , 'x;y;z' , 'pt>1' ) ## result is dictionary # @encode # @see Ostap::StatEntity # @see Ostap::WStatEntity # @see Ostap::StatVar::statVar def data_statistics ( data , expressions , cuts = '' , *args ) : """Get statistics from data - >>> data = ... - >>> result = data_statistics ( data , 'x/y+z' , '0>> data = ... + >>> result = data_statistics ( data , 'x/y+z' , '0>> results = data_statistics ( data , 'x/y;z' , '01' ) +# results = data_sum ( data , 'x;y;z' , 'pt>1' ) ## result is dictionary +# @encode +# @see Ostap::StatVar::statVar +def data_sum ( data , expressions , cuts = '' , *args ) : + """Get (weighted) sum over the variables + >>> data = ... + >>> result = data_sum ( data , 'x/y+z' , '0>> results = data_sum ( data , 'x/y;z' , '0>> data = ... + >>> result = data_nEff ( data , 'x/y+z' ) + - see Ostap.StatVar.nEff + """ + + assert isinstance ( expression , expression_types ) , 'Invalid type of expression!' + expression = str ( expression ).strip() + + return StatVar.nEff ( expression ) # ============================================================================= ## Get harmonic mean over the data @@ -925,6 +978,7 @@ def data_decorate ( klass ) : if hasattr ( klass , 'get_moment' ) : klass.orig_get_moment = klass.get_moment if hasattr ( klass , 'moment' ) : klass.orig_moment = klass.moment if hasattr ( klass , 'central_moment' ) : klass.orig_central_moment = klass.central_moment + if hasattr ( klass , 'nEff' ) : klass.orig_nEff = klass.nEff if hasattr ( klass , 'mean' ) : klass.orig_mean = klass.mean if hasattr ( klass , 'variance' ) : klass.orig_variance = klass.variance if hasattr ( klass , 'dispersion' ) : klass.orig_dispersion = klass.dispersion @@ -944,7 +998,9 @@ def data_decorate ( klass ) : klass.get_moment = data_get_moment klass.moment = data_moment klass.central_moment = data_central_moment + klass.mean = data_mean + klass.nEff = data_nEff klass.variance = data_variance klass.dispersion = data_dispersion klass.rms = data_rms @@ -961,7 +1017,19 @@ def data_decorate ( klass ) : klass.deciles = data_deciles if hasattr ( klass , 'get_stats' ) : klass.orig_get_stats = klass.get_stats - + + if hasattr ( klass , 'statVar' ) : klass.orig_statVar = klass.statVar + if hasattr ( klass , 'statVars' ) : klass.orig_statVars = klass.statVars + if hasattr ( klass , 'sumVar' ) : klass.orig_sumVar = klass.sumVar + if hasattr ( klass , 'sumVars' ) : klass.orig_sumVars = klass.sumVars + if hasattr ( klass , 'statCov' ) : klass.orig_statCov = klass.statCov + + klass.statVar = data_statistics + klass.statVars = data_statistics + klass.sumVar = data_sum + klass.sumVars = data_sum + klass.statCov = data_covariance + if hasattr ( klass , 'the_moment' ) : klass.orig_the_moment = klass.the_moment if hasattr ( klass , 'the_mean' ) : klass.orig_the_mean = klass.the_mean if hasattr ( klass , 'the_rms' ) : klass.orig_the_rms = klass.the_rms @@ -993,6 +1061,7 @@ def data_decorate ( klass ) : klass.moment , klass.central_moment , klass.mean , + klass.nEff , klass.variance , klass.dispersion , klass.rms , @@ -1007,6 +1076,11 @@ def data_decorate ( klass ) : klass.quintiles , klass.deciles , klass.get_stats , + klass.statVar , + klass.statVars , + klass.sumVar , + klass.sumVars , + klass.statCov , klass.the_moment , klass.the_mean , klass.the_rms , diff --git a/ostap/tools/tests/test_tools_reweight2.py b/ostap/tools/tests/test_tools_reweight2.py index da490305..b6b5c402 100755 --- a/ostap/tools/tests/test_tools_reweight2.py +++ b/ostap/tools/tests/test_tools_reweight2.py @@ -407,8 +407,8 @@ def prepare_data ( ) : ## 4e) 2D-statistics mcstat = mcds.statCov('x','y','weight') - logger.info ( tag + ': x/y covariance DATA (unbinned):\n# %s' % ( str ( datastat [2] ).replace ( '\n' , '\n# ' ) ) ) - logger.info ( tag + ': x/y covariance MC (unbinned):\n# %s' % ( str ( mcstat [2] ).replace ( '\n' , '\n# ' ) ) ) + logger.info ( tag + ': x/y correlation DATA (unbinned): %+.2f' % datastat.correlation () ) + logger.info ( tag + ': x/y correlation MC (unbinned): %+.2f' % mcstat.correlation () ) if not active and 3 < iter : logger.info ( allright ( 'No more iterations, converged after #%d' % iter ) ) diff --git a/ostap/trees/trees.py b/ostap/trees/trees.py index e27aaba8..dbf73be2 100755 --- a/ostap/trees/trees.py +++ b/ostap/trees/trees.py @@ -54,11 +54,6 @@ import ostap.trees.cuts # ============================================================================= _large = ROOT.TVirtualTreePlayer.kMaxEntries -# ============================================================================= - - - - # ============================================================================= ## check validity/emptiness of TTree/TChain # require non-zero poniter and non-empty Tree/Chain @@ -721,87 +716,6 @@ def _rt_contains_ ( tree , obj ) : ROOT.TTree .__contains__ = _rt_contains_ ROOT.TChain.__contains__ = _rt_contains_ -# ============================================================================= -## get the statistic for certain expression(s) in Tree/Dataset -# @code -# tree = ... -# stat1 = tree.statVar ( 'S_sw/effic' ) -# stat2 = tree.statVar ( 'S_sw/effic' , 'pt>1000' ) -# @endcode -# @author Vanya BELYAEV Ivan.Belyaev@itep.ru -# @date 2013-09-15 -def _stat_var_ ( tree , expression , *cuts ) : - """Get a statistic for the expression in Tree/Dataset - - >>> tree = ... - >>> stat1 = tree.statVar ( 'S_sw/effic' ) - >>> stat2 = tree.statVar ( 'S_sw/effic' ,'pt>1000') - - """ - - if isinstance ( expression , string_types ) : - - explist = split_string ( expression , var_separators , strip = True , respect_groups = True ) - if 1 != len ( explist ) : - return _stat_vars_ ( tree , explist , *cuts ) ## RETURRN - - else : - - return _stat_vars_ ( tree , expression , *cuts ) ## RETURN - - with rootException() : - return Ostap.StatVar.statVar ( tree , expression , *cuts ) - -ROOT.TTree . statVar = _stat_var_ -ROOT.TChain . statVar = _stat_var_ - - -# ============================================================================= -## get the statistic for certain expressions in Tree/Dataset -# @code -# tree = ... -# stat1 = tree.statVars( [ 'S_sw/effic', 'pt1' , 'pt2' ] ) -# stat2 = tree.statVars( [ 'S_sw/effic', 'pt1' , 'pt2' ] , 'mass>10') -# @endcode -# It is more efficient than getting statistics individually for each expression -# @see Ostap::Math::StatVar -# @see Ostap::Math::StatVar::statVars -# @author Vanya BELYAEV Ivan.Belyaev@itep.ru -# @date 2018-11-03 -def _stat_vars_ ( tree , expressions , *cuts ) : - """Get the statistic for certain expressions in Tree/Dataset - >>> tree = ... - >>> stat1 = tree.statVars( [ 'S_sw/effic', 'pt1' , 'pt2' ] ) - >>> stat2 = tree.statVars( [ 'S_sw/effic', 'pt1' , 'pt2' ] , 'mass>10') - - It is more efficient than getting statistics individually for each expression - - see Ostap::Math::StatVar - - see Ostap::Math::StatVar::statVars - """ - - if isinstance ( expressions , string_types ) : - return _stat_var_ ( tree , expressions , *cuts ) - - if not expressions : return {} - - vct = strings ( *expressions ) - res = std.vector(WSE)() - - with rootException() : - ll = Ostap.StatVar.statVars ( tree , res , vct , *cuts ) - - assert res.size() == vct.size(), 'stat_vars: Invalid size of structures!' - - N = res.size() - results = {} - - for i in range ( N ) : - results[ vct [ i ] ] = WSE ( res[i] ) - - return results - -ROOT.TTree . statVars = _stat_vars_ -ROOT.TChain . statVars = _stat_vars_ - # ============================================================================= ## get the statistic for pair of expressions in Tree/Dataset # @code @@ -929,36 +843,6 @@ def _stat_covs_ ( tree , ROOT.TChain . statCovs = _stat_covs_ -# ============================================================================ -## get the effectove vector of mean-values with covarinaces for the dataset -# @code -# ds =... -# vct = ds.statVct('a,b,c') -# @endcode -def _stat_vct_ ( ds , - variables , - cuts = '' ) : - """Get the effective vector of mean-values with covariances for the dataset - >>> ds =... - >>> vct = ds.statVct() - """ - - if isinstance ( variables , string_types ) : - variables = split_string ( variables , strip = True , respect_groups = True ) - - stats, cov2, length = ds.statCovs ( variables , cuts ) - - N = len ( stats ) - v = Ostap.Vector ( N ) () - for i in range ( N ) : v[i] = stats[i].mean() - - return Ostap.VectorE ( N ) ( v , cov2 ) - - -ROOT.TTree . statVct = _stat_vct_ -ROOT.TChain . statVct = _stat_vct_ - - # ============================================================================= from ostap.stats.statvars import data_the_moment @@ -999,75 +883,6 @@ def _tc_minmax_ ( tree , var , cuts = '' , delta = 0.0 ) : ROOT.TTree . vminmax = _tc_minmax_ ROOT.TChain . vminmax = _tc_minmax_ -# ============================================================================= -## @var _h_one_ -# special helper histogram for summation -_h_one_ = ROOT.TH1D( hID() , '' , 3 , -1 , 2 ) ; _h_one_.Sumw2() -# ============================================================================= -## make a sum over expression in Tree/Dataset -# -# @code -# -# >>> dataset = ... -# ## get corrected number of events -# >>> n_corr = dataset.sumVar ( "S_sw/effic" ) -# -# @endcode -# -# @author Vanya BELYAEV Ivan.Belyaev@itep.ru -# @date 2013-09-15 -def _sum_var_old_ ( tree , expression ) : - """Make a sum over expression in Tree/Dataset - - >>> dataset = ... - ## get corrected number of signal events - >>> n_corr = dataset.sumVar_( 'S_sw/effic' ) - - """ - _h_one_.Reset() - tree.project ( _h_one_ , '1' , expression ) - return _h_one_.accumulate() - - -ROOT.TTree . sumVar_ = _sum_var_old_ -ROOT.TChain . sumVar_ = _sum_var_old_ - -# ============================================================================= -## make a sum over expression in Tree/Dataset -# -# @code -# -# >>> dataset = ... -# -# ## get corrected number of events -# >>> n_corr = dataset.sumVar ( "S_sw/effic" ) -# -# ## get corrected number of events -# >>> n_corr_pt = dataset.sumVar ( "S_sw/effic" , 'pt>1') -# -# @endcode -# -# @author Vanya BELYAEV Ivan.Belyaev@itep.ru -# @date 2013-09-15 -def _sum_var_ ( tree , expression , *cuts ) : - """Make a sum over expression in Tree/Dataset - - >>> dataset = ... - ## get corrected number of signal events - >>> n_corr = dataset.sumVar ( 'S_sw/effic' ) - - ## get corrected number of signal events - >>> n_corr_pt = dataset.sumVar ( 'S_sw/effic' , 'pt>1') - - """ - ## if hasattr ( tree , 'pStatVar' ) : w = tree.pStatVar ( expression , *cuts ) - ## else : w = tree. statVar ( expression , *cuts ) - w = tree. statVar ( expression , *cuts ) - ## - return VE ( w.sum() , w.sum2() ) - -ROOT.TTree . sumVar = _sum_var_ -ROOT.TChain . sumVar = _sum_var_ # ============================================================================= ## get the leaves for the given tree/chain @@ -2668,27 +2483,6 @@ def add_new_branch ( tree , ROOT.TTree.add_new_branch = add_new_branch -# ============================================================================= -## Get the effective entries in data set -# @code -# data = ... -# neff = data.nEff('b1*b1') -# @endcode -def _stat_nEff_ ( self , cuts = '' , *args ) : - """Get the effective entries in data set - >>> data = ... - >>> neff = data.nEff('b1*b1') - """ - if isinstance ( cuts , ROOT.TCut ) : cuts = str ( cuts ) - with rootException() : - return Ostap.StatVar.nEff ( self , cuts , *args ) - -ROOT.TTree.nEff = _stat_nEff_ -# ============================================================================= - -from ostap.stats.statvars import data_decorate as _dd -_dd ( ROOT.TTree ) - # ============================================================================= ## get all variables needed to evaluate the expressions for the given tree # @code @@ -3557,12 +3351,9 @@ def use_aliases ( tree , **aliases ) : return UseAliases ( tree , **aliases ) # ============================================================================= -_decorated_classes_ = ( - ROOT.TTree , - ROOT.TChain , - ROOT.TLeaf - ) -_new_methods_ = ( +from ostap.stats.statvars import data_decorate as _dd +_new_methods_ = _dd ( ROOT.TTree ) +_new_methods_ += ( # ROOT.TTree .withCuts , ROOT.TChain.withCuts , @@ -3576,24 +3367,12 @@ def use_aliases ( tree , **aliases ) : ROOT.TTree .project , ROOT.TChain.project , # - ROOT.TTree .statVar , - ROOT.TChain.statVar , - ROOT.TTree .statCov , - ROOT.TChain.statCov , ROOT.TTree .statCovs , ROOT.TChain.statCovs , - ROOT.TTree .statVct , - ROOT.TChain.statVct , # ROOT.TTree .vminmax , ROOT.TChain.vminmax , # - ROOT.TTree .sumVar_ , - ROOT.TChain.sumVar_ , - # - ROOT.TTree .sumVar , - ROOT.TChain.sumVar , - # ROOT.TTree .branches , ROOT.TTree .__repr__ , ROOT.TTree .__str__ , @@ -3610,29 +3389,20 @@ def use_aliases ( tree , **aliases ) : ROOT.TTree.valid_formula , ROOT.TTree.valid_expression , # - ROOT.TTree.nEff , - ROOT.TTree.get_moment , - ROOT.TTree.central_moment , - ROOT.TTree.mean , - ROOT.TTree.rms , - ROOT.TTree.skewness , - ROOT.TTree.kurtosis , - ROOT.TTree.quantile , - ROOT.TTree.median , - ROOT.TTree.quantiles , - ROOT.TTree.interval , - ROOT.TTree.terciles , - ROOT.TTree.quartiles , - ROOT.TTree.quintiles , - ROOT.TTree.deciles , - # ROOT.TTree.the_variables , ROOT.TTree.add_new_branch , ## ROOT.TLeaf.get_type , ROOT.TLeaf.get_type_short , ROOT.TLeaf.get_short_type , - ) +) + + +_decorated_classes_ = ( + ROOT.TTree , + ROOT.TChain , + ROOT.TLeaf +) # ============================================================================= if '__main__' == __name__ :