diff --git a/ostap/core/ostap_types.py b/ostap/core/ostap_types.py index 33215c68..ca17cc95 100644 --- a/ostap/core/ostap_types.py +++ b/ostap/core/ostap_types.py @@ -26,7 +26,8 @@ 'sequence_types' , ## sequence types 'iterable_types' , ## iterable 'sized_types' , ## sized types - 'path_types' , ## path-like types + 'path_types' , ## path-like types + 'ordered_dict' , ## normal/ordered dict ## 'is_integer' , ## is a value of int-like type? 'is_number' , ## is a value of numeric type? @@ -76,11 +77,9 @@ # ============================================================================= logger.debug ( 'Core objects/classes/functions for Ostap') # ============================================================================= - iterable_types = Iterable, num_types = integer_types + ( float , ) str_types = str, - list_types = list , tuple import array listlike_types = list_types + ( set , C.Sequence , array.array ) @@ -99,7 +98,13 @@ if (3,6) <= python_version : path_types = string_types + ( os.PathLike , ) # ============================================================================= - +## sometimewe we need to ensure that dictionary is ordered +ordered_dict = dict +if python_version < ( 3 , 7 ) : + from collections import OrderedDict as ordered_dict + dictlike_types += ( ordered_dict, ) + dict_types += ( ordered_dict, ) + # ============================================================================= ## Is this number of a proper integer? def is_integer ( v ) : diff --git a/ostap/fitting/dataset.py b/ostap/fitting/dataset.py index 00d6f527..84d8abf4 100644 --- a/ostap/fitting/dataset.py +++ b/ostap/fitting/dataset.py @@ -763,10 +763,6 @@ def _rds_make_unique_ ( dataset , ROOT.RooAbsData . sample = _rad_sample_ ROOT.RooAbsData . shuffle = _rad_shuffle_ -from ostap.trees.trees import _stat_covs_ - -ROOT.RooAbsData . statCovs = _stat_covs_ - from ostap.stats.statvars import data_the_moment ROOT.RooAbsData. the_moment = data_the_moment @@ -802,7 +798,6 @@ def _rds_make_unique_ ( dataset , ROOT.RooAbsData . sample , ROOT.RooAbsData . shuffle , # - ROOT.RooAbsData . statCovs , ] diff --git a/ostap/frames/frames.py b/ostap/frames/frames.py index 399fadc2..8a5f71f8 100644 --- a/ostap/frames/frames.py +++ b/ostap/frames/frames.py @@ -51,10 +51,11 @@ # ============================================================================= from ostap.core.core import ( cpp , Ostap , strings , split_string , var_separators ) -from ostap.math.base import isequal, iszero +from ostap.math.base import isequal, iszero from ostap.core.meta_info import root_info from ostap.core.ostap_types import ( integer_types , num_types , - string_types , sequence_types ) + string_types , sequence_types , + ordered_dict ) from ostap.logger.utils import multicolumn from ostap.utils.progress_conf import progress_conf from ostap.utils.basic import isatty, loop_items @@ -268,8 +269,7 @@ def frame_progress2 ( frame , length = -1 ) : return frame __all__ = __all__ + ( 'frame_progress2', ) - - + # ============================================================================= ## Get the effective entries in data frame # @code @@ -359,8 +359,10 @@ def _fr_helper_ ( frame , expressions , cuts = '' ) : exprs, cuts, input_string = SV.vars_and_cuts ( expressions , cuts ) ## Frame/Tree ? - if isinstance ( frame , ROOT.TTree ) : frame = DataFrame ( frame ) - node = as_rnode ( frame ) + if isinstance ( frame , ROOT.TTree ) : node = DataFrame ( frame ) + elif isinstance ( frame , DataFrame ) : node = frame + elif isinstance ( frame , FrameNode ) : node = frame + else : node = as_rnode ( frame ) ## get the list of currently known names vars = frame_columns ( node ) @@ -368,17 +370,22 @@ def _fr_helper_ ( frame , expressions , cuts = '' ) : current = node - vnames = {} - for i,expr in enumerate ( exprs ) : + vnames = ordered_dict() + for i , expr in enumerate ( exprs , start = 1 ) : vname = expr if not expr in all_vars : used = tuple ( all_vars | set ( frame_columns ( current ) ) ) - vn = var_name ( 'var_' , used , expr , *vars ) + vn = var_name ( 'var%d_' % i , used , expr , *vars ) all_vars.add ( vn ) current = current.Define ( vn , expr ) vname = vn vnames [ expr ] = vname - + + ## Filter frame, if needed! + ## if cuts : + ## ## add named filter + ## current = current.Filter ( '(bool) (%s)' % cuts, 'FILTER: %s' % cuts ) + cname = cuts if cuts and not cuts in all_vars : used = tuple ( all_vars | set ( frame_columns ( current ) ) ) @@ -387,8 +394,8 @@ def _fr_helper_ ( frame , expressions , cuts = '' ) : ncuts = '1.0*(%s)' % cuts current = current.Define ( cn , ncuts ) cname = cn - - return current, vnames , cname, input_string + + return current, vnames, cname, input_string # ================================================================================== ## the second helper method to implement "statistics"-related actions @@ -414,7 +421,6 @@ def _fr_helper2_ ( frame , creator , expressions , cuts = '' , lazy = True ) : return results - # ============================================================================ _types_1D = Ostap.Math.LegendreSum , Ostap.Math.Bernstein , Ostap.Math.ChebyshevSum , _types_2D = Ostap.Math.LegendreSum2 , Ostap.Math.Bernstein2D , @@ -422,7 +428,7 @@ def _fr_helper2_ ( frame , creator , expressions , cuts = '' , lazy = True ) : _types_4D = Ostap.Math.LegendreSum4 , _types_nD = _types_1D + _types_2D + _types_3D + _types_4D # ============================================================================= -## "project/parameterise" frame into polynomial structures +## "project/parameterise" frame into polynomial structures (lazy action) # @code # frame = ... # @@ -439,111 +445,87 @@ def _fr_helper2_ ( frame , creator , expressions , cuts = '' , lazy = True ) : # res = frame_param ( frame , ls2 , 'y' , 'x' , 'z>0' ) # # bs2 = Ostap.Math.Bernstein2D ( ... ) -# res = frame_param ( frame , bs2 , 'y' , 'x' , 'z>0' ) +# res = frame_param ( frame , bs2 , ( 'y' , 'x' ) , 'z>0' ) # # ls3 = Ostap.Math.LegendreSum3 ( ... ) -# res = frame_param ( frame , ls3 , 'z' , 'y' , 'x' , 'z>0' ) +# res = frame_param ( frame , ls3 , ( 'z' , 'y' , 'x' ) , 'z>0' ) # # bs2 = Ostap.Math.Bernstein2D ( ... ) -# res = frame_param ( frame , bs2 , 'y' , 'x' , 'z>0' ) -# +# res = frame_param ( frame , bs2 , ( 'y' , 'x' ) , 'z>0' ) # @endcode -def _fr_param_ ( frame , poly , *expressions ) : +def _fr_param_ ( frame , poly , expressions , cuts = '' , lazy = True ) : """ `project/parameterise` frame into polynomial structures + >>> frame = ... >>> ls = Ostap.Math.LegendreSum ( ... ) - >>> res = frame_param ( frame , ls , 'x' , 'y>0' ) + >>> res = frame_param_lazy ( frame , ls , 'x' , 'y>0' ) >>> bs = Ostap.Math.Bernstein ( ... ) - >>> res = frame_param ( frame , bs , 'x' , 'y>0' ) + >>> res = frame_param_lazy ( frame , bs , 'x' , 'y>0' ) >>> cs = Ostap.Math.ChebyshevSum ( ... ) - >>> res = frame_param ( frame , cs , 'x' , 'y>0' ) + >>> res = frame_param_lazy( frame , cs , 'x' , 'y>0' ) >>> ls2 = Ostap.Math.LegendreSum2 ( ... ) - >>> res = frame_param ( frame , ls2 , 'y' , 'x' , 'z>0' ) + >>> res = frame_param_lazy ( frame , ls2 , 'y,x' , 'z>0' ) >>> bs2 = Ostap.Math.Bernstein2D ( ... ) - >>> res = frame_param ( frame , bs2 , 'y' , 'x' , 'z>0' ) + >>> res = frame_param_lazy ( frame , bs2 , 'y,x' , 'z>0' ) >>> ls3 = Ostap.Math.LegendreSum3 ( ... ) - >>> res = frame_param ( frame , ls3 , 'z' , 'y' , 'x' , 'z>0' ) + >>> res = frame_param_lazy ( frame , ls3 , 'z,y;z' , 'z>0' ) >>> bs2 = Ostap.Math.Bernstein2D ( ... ) - >>> res = frame_param ( frame , bs2 , 'y' , 'x' , 'z>0' ) + >>> res = frame_param_lazy ( frame , bs2 , 'y,x' , 'z>0' ) """ - - if isinstance ( frame , ROOT.TTree ) : frame = DataFrame ( frame ) - - node = as_rnode ( frame ) - - items = [] - for e in expressions : - items += split_string ( e , ',:;' , strip = True , respect_groups = True ) - - ## get the list of currently known names - vars = frame_columns ( node ) - all_vars = set ( vars ) + ## the histogram ? + if isinstance ( poly , ROOT.TH1 ) : + return _fr_project_ ( frame , poly , expressions , cuts = cuts , lazy = lazy ) - assert ( isinstance ( poly , _types_1D ) and 1 <= len ( items ) <= 2 ) or \ - ( isinstance ( poly , _types_2D ) and 2 <= len ( items ) <= 3 ) or \ - ( isinstance ( poly , _types_3D ) and 3 <= len ( items ) <= 4 ) or \ - ( isinstance ( poly , _types_4D ) and 4 <= len ( items ) <= 5 ) , \ - "Invalid structure of polynomial and variables/cuts!" + ## + current , items, cname , _ = _fr_helper_ ( frame , expressions , cuts ) - current = node + nvars = len ( items ) - ## actual variables - uvars = [] - for e in items : - if e in all_vars : - uvars.append ( e ) - continue - else : - used = tuple ( all_vars | set ( frame_columns ( current ) ) ) - vn = var_name ( 'var_' , used , e , *vars ) - all_vars.add ( vn ) - current = current.Define ( vn , e ) - uvars.append ( vn ) - - if ( isinstance ( poly , _types_1D ) and 2 == len ( uvars ) ) or \ - ( isinstance ( poly , _types_2D ) and 3 == len ( uvars ) ) or \ - ( isinstance ( poly , _types_3D ) and 4 == len ( uvars ) ) or \ - ( isinstance ( poly , _types_4D ) and 5 == len ( uvars ) ) : - - cuts = uvars [ -1] - current = current.Filter ( cuts ) - - wn = var_name ( 'weight_' , used , cuts , *vars ) - all_vars.add ( wn ) - current = current.Define ( wn , '1.0*(%s)' % cuts ) - - uvars = [ u for u in reversed ( uvars [:-1] ) ] + [ wn ] ## ATTENTION !! RESERSED HERE! - - else : - uvars = [ u for u in reversed ( uvars ) ] ## ATTENTION !! RESERSED HERE! + assert \ + ( 1 == nvars and isinstance ( poly , _types_1D ) ) or \ + ( 2 == nvars and isinstance ( poly , _types_2D ) ) or \ + ( 3 == nvars and isinstance ( poly , _types_3D ) ) or \ + ( 4 == nvars and isinstance ( poly , _types_4D ) ) , \ + "Invalid structure of polynomial and variables/cuts!" + ## variables + uvars = [ k for k in items.values() ] + if cuts : uvars.append ( cname ) + uvars = CNT ( uvars ) ## finally book the actions! if isinstance ( poly , Ostap.Math.LegendreSum ) : - result = current.Book ( ROOT.std.move ( Ostap.Actions.LegendrePoly ( poly ) ) , CNT ( uvars ) ) + result = current.Book ( ROOT.std.move ( Ostap.Actions.LegendrePoly ( poly ) ) , uvars ) elif isinstance ( poly , Ostap.Math.LegendreSum2 ) : - result = current.Book ( ROOT.std.move ( Ostap.Actions.LegendrePoly2 ( poly ) ) , CNT ( uvars ) ) + result = current.Book ( ROOT.std.move ( Ostap.Actions.LegendrePoly2 ( poly ) ) , uvars ) elif isinstance ( poly , Ostap.Math.LegendreSum3 ) : - result = current.Book ( ROOT.std.move ( Ostap.Actions.LegendrePoly3 ( poly ) ) , CNT ( uvars ) ) + result = current.Book ( ROOT.std.move ( Ostap.Actions.LegendrePoly3 ( poly ) ) , uvars ) elif isinstance ( poly , Ostap.Math.LegendreSum4 ) : - result = current.Book ( ROOT.std.move ( Ostap.Actions.LegendrePoly4 ( poly ) ) , CNT ( uvars ) ) + result = current.Book ( ROOT.std.move ( Ostap.Actions.LegendrePoly4 ( poly ) ) , uvars ) elif isinstance ( poly , Ostap.Math.ChebyshevSum ) : - result = current.Book ( ROOT.std.move ( Ostap.Actions.ChebyshevPoly ( poly ) ) , CNT ( uvars ) ) + result = current.Book ( ROOT.std.move ( Ostap.Actions.ChebyshevPoly ( poly ) ) , uvars ) elif isinstance ( poly , Ostap.Math.Bernstein ) : - result = current.Book ( ROOT.std.move ( Ostap.Actions.BernsteinPoly ( poly ) ) , CNT ( uvars ) ) + result = current.Book ( ROOT.std.move ( Ostap.Actions.BernsteinPoly ( poly ) ) , uvars ) elif isinstance ( poly , Ostap.Math.Bernstein2D ) : - result = current.Book ( ROOT.std.move ( Ostap.Actions.BernsteinPoly2 ( poly ) ) , CNT ( uvars ) ) + result = current.Book ( ROOT.std.move ( Ostap.Actions.BernsteinPoly2 ( poly ) ) , uvars ) elif isinstance ( poly , Ostap.Math.Bernstein3D ) : - result = current.Book ( ROOT.std.move ( Ostap.Actions.BernsteinPoly3 ( poly ) ) , CNT ( uvars ) ) + result = current.Book ( ROOT.std.move ( Ostap.Actions.BernsteinPoly3 ( poly ) ) , uvars ) + + print ( 'I AM HERE/1' ) + if not lazy : + result = result.GetValue() + poly = result + print ( 'I AM HERE/2' ) + print ( 'I AM HERE/3', result ) return result @@ -617,8 +599,8 @@ def col_type ( var ) : vcols = tuple ( [ v for v in vcols if cpat.match ( v ) ] ) svars = vcols + tuple ( more_vars ) - stats = frame_statVars ( frame , svars , cuts = cuts , lazy = False ) - + stats = _fr_statVar_ ( frame , svars , cuts = cuts , lazy = False ) + if len ( vcols ) < 10 : ifmt = '%1d.' elif len ( vcols ) < 100 : ifmt = '%2d.' elif len ( vcols ) < 1000 : ifmt = '%3d.' @@ -852,15 +834,15 @@ def _p2_model_ ( histo ) : # ============================================================================== -## 'Lazy' project of the frame +## Project of the frame # @code # frame = ... # h1_model = ... # h1 = frame_project ( frame , h1_model , 'pt' ) # h2_model = ... -# h2 = frame_project ( frame , h2_model , 'pt' , 'x' ) +# h2 = frame_project ( frame , h2_model , ( 'pt' , 'x' ) ) # h3_model = ... -# h3 = frame_project ( frame , h3_model , 'pt' , 'x' , 'y' ) +# h3 = frame_project ( frame , h3_model , ( 'pt' , 'x' , 'y' ) ) # ... # @endcode # Cuts/weigth are also can be specified @@ -869,9 +851,9 @@ def _p2_model_ ( histo ) : # h1_model = ... # h1 = frame_project ( frame , h1_model , 'pt' , 'w') # h2_model = ... -# h2 = frame_project ( frame , h2_model , 'pt' , 'x' , 'w') +# h2 = frame_project ( frame , h2_model , 'pt,x' , 'w') # h3_model = ... -# h3 = frame_project ( frame , h3_model , 'pt' , 'x' , 'y' , 'w' ) +# h3 = frame_project ( frame , h3_model , 'pt,x:y' , 'w' ) # ... # @endcode # @@ -883,16 +865,19 @@ def _p2_model_ ( histo ) : # - ROOT::RDF::TH3DModel # - anything that can be converted to ROOT::RDF::TH1DModel, # ROOT::RDF::TH2DModel or ROOT::RDF::TH3DModel objects -# -def frame_project ( frame , model , *what ) : - """ 'Lazy' project of the frame +# @attention: variables should be listed in reverse order! +def frame_project ( frame , model , expressions , cuts = '' , lazy = True ) : + """ Project of the frame into histigram + + - attention: variables should be listed in reverse order! + >>> frame = ... >>> h1_model = ... >>> h1 = frame_project ( frame , h1_model , 'pt' ) >>> h2_model = ... - >>> h2 = frame_project ( frame , h2_model , 'pt' , 'x' ) + >>> h2 = frame_project ( frame , h2_model , 'pt,x' ) >>> h3_model = ... - >>> h3 = frame_project ( frame , h3_model , 'pt' , 'x' , 'y' ) + >>> h3 = frame_project ( frame , h3_model , ( 'pt' , 'x' , 'y' ) ) >>> ... - Cuts/weigth are also can be specified @@ -901,107 +886,59 @@ def frame_project ( frame , model , *what ) : >>> h1_model = ... >>> h1 = frame_project ( frame , h1_model , 'pt' , 'w') >>> h2_model = ... - >>> h2 = frame_project ( frame , h2_model , 'pt' , 'x' , 'w') + >>> h2 = frame_project ( frame , h2_model , ( 'pt' , 'x' ) , 'w') >>> h3_model = ... - >>> h3 = frame_project ( frame , h3_model , 'pt' , 'x' , 'y' , 'w' ) + >>> h3 = frame_project ( frame , h3_model , 'pt' , 'x,y,z' , 'w' ) >>>... - If model is a real 1D/2D/3D-hstogram, action is *not* lazy, otherwise action *is* lazy - + Model can be a real histogram or - - `ROOT.ROOT.RDF.TH1DModel` - - `ROOT.ROOT.RDF.TH2DModel` - - `ROOT.ROOT.RDF.TH3DModel` - - anything that can be converted to `ROOT.ROOT.RDF.TH1DModel`, - `ROOT.ROOT.RDF.TH2DModel` or `ROOT.ROOT.RDF.TH3DModel` objects + - `ROOT.ROOT.RDF.TH1DModel` + - `ROOT.ROOT.RDF.TH2DModel` + - `ROOT.ROOT.RDF.TH3DModel` + - anything that can be converted to : + -- `ROOT.ROOT.RDF.TH1DModel`, + -- `ROOT.ROOT.RDF.TH2DModel` or + -- `ROOT.ROOT.RDF.TH3DModel` objects + """ - - if isinstance ( frame , ROOT.TTree ) : frame = DataFrame ( frame ) - frame = as_rnode ( frame ) + if isinstance ( model , _types_nD ) : + return _fr_param_ ( frame , model , expression, cuts = cuts , laszy = lazy ) - if ( 6 , 16 ) <= root_info and isinstance ( model , _types_nD ) : - return _fr_param_ ( frame , model , *what ) - - if 1 <= len ( what ) <= 2 : - if isinstance ( what [ 0 ] , string_types ) : - ww = split_string ( what [ 0 ] , var_separators , strip = True, respect_groups = True ) - if 1 < len ( ww ) : ww.reverse() ## ATTENTION HERE: REVERSE!! - what = tuple ( ww ) + what [1:] - elif isinstance ( what [ 0 ] , sequence_types ) : - what = tuple ( w for w in what [ 0 ] ) + what [1:] - - ## strip blanks - what = tuple ( w.strip() for w in what ) + current , items, cname , _ = _fr_helper_ ( frame , expressions , cuts ) - ## cuts are empty - if 1 < len ( what ) and not what [ -1 ] : what = what [ :-1 ] + ## convert histogram-like objects into 'models' histo = None - - # - ## convert histogram-like objects into 'models' - # - - if isinstance ( model , ROOT.TProfile2D ) : - histo = model - model = model.model () - elif isinstance ( model , ROOT.TProfile ) : - histo = model - model = model.model () - elif isinstance ( model , ROOT.TH3 ) and 3 == model.dim () : - histo = model - model = model.model () - elif isinstance ( model , ROOT.TH2 ) and 2 == model.dim () : - histo = model - model = model.model () - elif isinstance ( model , ROOT.TH1 ) and 1 == model.dim () : - histo = model - model = model.model () + if isinstance ( model , ROOT.TProfile2D ) : histo, model = model, model.model () + elif isinstance ( model , ROOT.TProfile ) : histo, model = model, model.model () + elif isinstance ( model , ROOT.TH3 ) and 3 == model.dim () : histo, model = model, model.model () + elif isinstance ( model , ROOT.TH2 ) and 2 == model.dim () : histo, model = model, model.model () + elif isinstance ( model , ROOT.TH1 ) and 1 == model.dim () : histo, model = model, model.model () if histo : histo.Reset() - ## get the list of currently known names - vars = frame_columns ( frame ) + nvars = len ( items ) + pvars = [ v for v in items.values() ] + if cname : pvars.append ( cname ) - pvars = [] - current = frame - all_vars = set ( vars ) - for ww in what : - - w = ww - if isinstance ( ww , ROOT.TCut ) : w = str ( ww ) - - if not w : continue - - if w in vars : pvars.append ( w ) - elif w in pvars : pvars.append ( w ) - else : - used = tuple ( all_vars | set ( frame_columns ( current ) ) ) - vname = var_name ( 'var_' , used , *what ) - current = current.Define ( vname , w ) - all_vars.add ( vname ) - pvars.append ( vname ) - - - numvars = len ( pvars ) - - if isinstance ( model , DF_P2Model ) and 3 <= numvars <= 4 : action = current.Profile2D ( model , *pvars ) - elif isinstance ( model , DF_P1Model ) and 2 <= numvars <= 3 : action = current.Profiel1D ( model , *pvars ) - elif isinstance ( model , DF_H3Model ) and 3 <= numvars <= 4 : action = current.Histo3D ( model , *pvars ) - elif isinstance ( model , DF_H2Model ) and 2 <= numvars <= 3 : action = current.Histo2D ( model , *pvars ) - elif isinstance ( model , DF_H1Model ) and 1 <= numvars <= 2 : action = current.Histo1D ( model , *pvars ) + if 3 == nvars and isinstance ( model , DF_P2Model ) : action = current.Profile2D ( model , *pvars ) + elif 2 == nvars and isinstance ( model , DF_P1Model ) : action = current.Profiel1D ( model , *pvars ) + elif 3 == nvars and isinstance ( model , DF_H3Model ) : action = current.Histo3D ( model , *pvars ) + elif 2 == nvars and isinstance ( model , DF_H2Model ) : action = current.Histo2D ( model , *pvars ) + elif 1 == nvars and isinstance ( model , DF_H1Model ) : action = current.Histo1D ( model , *pvars ) else : - raise TypeError ('Invalid model/what objects %s %s ' % ( type ( model ) , what ) ) - - ## ATTENTION! lazy action! - if not histo : - return action + raise TypeError ('Invalid model/what objects %s %s ' % ( type ( model ) , str ( pvars ) ) ) - ## make a loop! - histo += action.GetValue() - return histo + ## if true histo is specified, the aciton is NOT lazy! + if histo : + histo += action.GetValue() + return histo + + return action if lazy else action.GetValue() # ============================================================================== ## Prescale the frame @@ -1245,8 +1182,6 @@ def frame_deciles ( frame , expression , cuts = '' , exact = True ) : """ return frame_quantiles ( frame , quantiles = 10 , expression = expression , cuts = cuts , exact = exact ) - - # ============================================================================== ## Get the mean for the frame object # @code @@ -1329,7 +1264,6 @@ def frame_kurtosis ( frame , expression , cuts = '' ) : node = as_rnode ( frame ) return SV.data_kurtosis ( node , expression = expression , cuts = cuts ) - from ostap.fitting.dataset import ds_draw as _ds_draw # ============================================================================== ## draw for frame @@ -1340,6 +1274,10 @@ def frame_draw ( frame , *args , **kwargs ) : return _ds_draw ( node , *args , **kwargs ) +# ================================================================================== +## Action-based methods +# ================================================================================== + # ================================================================================== ## get statistics of variable(s) # @code @@ -1347,7 +1285,7 @@ def frame_draw ( frame , *args , **kwargs ) : # stat = frame.statVar ( 'pt' , lazy = True ) # stat = frame.statVar ( 'pt' , 'eta>0' , lazy = True ) # @endcode -def _fr_statVar_new_ ( frame , expressions , cuts = '' , lazy = False ) : +def _fr_statVar_ ( frame , expressions , cuts = '' , lazy = True) : """Get statistics of variable(s) >>> frame = .... >>> stat = frame.statVar ( 'pt' , lazy = True ) @@ -1371,7 +1309,7 @@ def _fr_nEff_ ( frame , expressions , cuts = '' , lazy = False ) : >>> frame = ... >>> nEff = data_get_nEff ( 'x*x' , 'y>0' ) """ - return _fr_statVar_new_ ( frame , expressions , cuts = cuts , lazy = lazy ) + return _fr_statVar_ ( frame , expressions , cuts = cuts , lazy = lazy ) # ================================================================================== ## get statistics of variable(s) @@ -1412,7 +1350,7 @@ def mcreator ( node , var_name , cut_name ) : # stat = frame.the_mean ( 'x'x' , 'y<0' ) # mean = stat.mean() # @ndcode -def _fr_the_mean_ ( frame , expressions , cuts , errors = True , lazy = True ) : +def _fr_the_mean_ ( frame , expressions , cuts = '' , errors = True , lazy = True ) : """ Get a mean via moment >>> frame = ... >>> stat = frame.the_mean ( 'x'x' , 'y<0' ) @@ -1427,7 +1365,7 @@ def _fr_the_mean_ ( frame , expressions , cuts , errors = True , lazy = True ) : # stat = frame.the_rms ( 'x'x' , 'y<0' ) # value = stat.rms() # @ndcode -def _fr_the_rms_ ( frame , expressions , cuts , errors = True , lazy = True ) : +def _fr_the_rms_ ( frame , expressions , cuts = '' , errors = True , lazy = True ) : """Get a rms via moment >>> frame = ... >>> stat = frame.the_rms ( 'x'x' , 'y<0' ) @@ -1442,7 +1380,7 @@ def _fr_the_rms_ ( frame , expressions , cuts , errors = True , lazy = True ) : # stat = frame.the_variance ( 'x'x' , 'y<0' ) # value = stat.variance () # @ndcode -def _fr_the_variance_ ( frame , expressions , cuts , errors = True , lazy = True ) : +def _fr_the_variance_ ( frame , expressions , cuts = '' , errors = True , lazy = True ) : """Get a variance via moment >>> frame = ... >>> stat = frame.the_variance( 'x'x' , 'y<0' ) @@ -1457,7 +1395,7 @@ def _fr_the_variance_ ( frame , expressions , cuts , errors = True , lazy = True # stat = frame.the_skewness ( 'x'x' , 'y<0' ) # mean = stat.skewness () # @ndcode -def _fr_the_skewness_ ( frame , expressions , cuts , errors = True , lazy = True ) : +def _fr_the_skewness_ ( frame , expressions , cuts = '' , errors = True , lazy = True ) : """Get a variance via moment >>> frame = ... >>> stat = frame.the_skewness ( 'x'x' , 'y<0' ) @@ -1472,7 +1410,7 @@ def _fr_the_skewness_ ( frame , expressions , cuts , errors = True , lazy = True # stat = frame.the_kurtosis ( 'x'x' , 'y<0' ) # value = stat.kurtosis () # @ndcode -def _fr_the_kurtosis_ ( frame , expressions , cuts , errors = True , lazy = True ) : +def _fr_the_kurtosis_ ( frame , expressions , cuts = '' , errors = True , lazy = True ) : """Get a kurtosis via moment >>> frame = ... >>> stat = frame.the_kurtosis ( 'x'x' , 'y<0' ) @@ -1480,7 +1418,6 @@ def _fr_the_kurtosis_ ( frame , expressions , cuts , errors = True , lazy = True """ return _fr_the_moment_ ( frame , 8 if errors else 4 , expressions , cuts = cuts , lazy = lazy ) - # ============================================================================ ## Get an arithmetic mean # @see Ostap::Math::ArithmeticMean @@ -1695,14 +1632,18 @@ def frame_rms ( frame , expression , cuts = '' ) : # ================================================================================ if Frames_OK : - frame_statVar = _fr_statVar_new_ - frame_statVars = _fr_statVar_new_ + frame_table = _fr_table_ + + frame_the_statVar = _fr_statVar_ + frame_the_statVars = _fr_statVar_ frame_the_moment = _fr_the_moment_ - + + frame_the_nEff = _fr_nEff_ frame_the_mean = _fr_the_mean_ frame_the_rms = _fr_the_rms_ frame_the_variance = _fr_the_variance_ + frame_the_dispersion = _fr_the_variance_ frame_the_skewness = _fr_the_skewness_ frame_the_kurtosis = _fr_the_kurtosis_ @@ -1711,8 +1652,9 @@ def frame_rms ( frame , expression , cuts = '' ) : frame_harmonic_mean = _fr_harmonic_mean_ frame_power_mean = _fr_power_mean_ frame_lehmer_mean = _fr_lehmer_mean_ - for f in frames : - f.statVars = frame_statVars + + for f in frames : + f.statVars = frame_the_statVars _new_methods_ .append ( f.statVars ) f.table = frame_table _new_methods_ .append ( f.table ) @@ -1739,11 +1681,11 @@ def frame_rms ( frame , expression , cuts = '' ) : f.lehmer_mean = frame_lehmer_mean _new_methods_ .append ( f.lehmer_mean ) - _new_methods_ .append ( frame_statVars ) - _new_methods_ .append ( frame_table ) - _new_methods_ .append ( frame_the_moment ) + _new_methods_ .append ( frame_table ) + _new_methods_ .append ( frame_the_statVar ) + _new_methods_ .append ( frame_the_moment ) _new_methods_ .append ( frame_arithmetic_mean ) - __all__ = __all__ + ( 'frame_statVars' , + __all__ = __all__ + ( 'frame_the_statVar' , 'frame_table' , 'frame_the_moment' , 'frame_the_mean' , @@ -1757,8 +1699,8 @@ def frame_rms ( frame , expression , cuts = '' ) : 'frame_power_mean' , 'frame_lehmer_mean' ) - ROOT.TTree. fstatVars = frame_statVars - ROOT.TTree. fthe_moment = frame_statVars + ROOT.TTree. fstatVars = frame_the_statVar + ROOT.TTree. fthe_moment = frame_the_statVar _new_methods_ .append ( ROOT.TTree. fstatVars ) _new_methods_ .append ( ROOT.TTree. fthe_moment ) diff --git a/ostap/frames/tests/test_frames_frames.py b/ostap/frames/tests/test_frames_frames.py index c4581cfe..fe1a0a3b 100644 --- a/ostap/frames/tests/test_frames_frames.py +++ b/ostap/frames/tests/test_frames_frames.py @@ -462,7 +462,7 @@ def test_frame7 () : poll = rl.GetValue() polb = rb.GetValue() - with use_canvas ( 'test_frame5: b4,b5.b6 as polynomials' , wait = 2 ) : + with use_canvas ( 'test_frame7: b4,b5,b6 as polynomials' , wait = 2 ) : poll.draw ( linecolor = 2 ) polc.draw ( 'same' , linecolor = 4 ) polb.draw ( 'same' , linecolor = 8 ) diff --git a/ostap/parallel/parallel_statvar.py b/ostap/parallel/parallel_statvar.py index 61be02a3..2e07ae56 100644 --- a/ostap/parallel/parallel_statvar.py +++ b/ostap/parallel/parallel_statvar.py @@ -77,26 +77,25 @@ def process ( self , jobid , item ) : with logWarning() : import ostap.core.pyrouts import ostap.trees.trees - chain = item.chain first = item.first last = min ( n_large , first + item.nevents if 0 < item.nevents else n_large ) - from ostap.trees.trees import _stat_vars_ - - self.__output = _stat_vars_ ( chain , self.what , self.cuts , first , last ) + from ostap.stats.starvrs import data_statistics + self.__output = data_statistics ( chain , self.what , self.cuts , first , last ) return self.__output ## merge results def merge_results ( self , result , jobid = -1 ) : - from ostap.stats.counters import WSE + from ostap.stats.counters import WSE + from ostap.core.ostap_types import dictlike_types if not self.__output : self.__output = result else : assert type( self.__output ) == type ( result ) , 'Invalid types for merging!' - if isinstance ( self.__output , dict ) : + if isinstance ( self.__output , dictlike_types ) : for key in result : if key in self.output : self.__output [ key ] += result [ key ] else : self.__output [ key ] = result [ key ] @@ -127,7 +126,7 @@ def pStatVar ( chain , """ ## few special/trivial cases - from ostap.trees.trees import _stat_vars_ + import ostap.trees.trees last = min ( n_large , first + nevents if 0 < nevents else n_large ) if 0 <= first and 0 < nevents < chunk_size : diff --git a/ostap/stats/statvars.py b/ostap/stats/statvars.py index f795cca4..c0500e1f 100644 --- a/ostap/stats/statvars.py +++ b/ostap/stats/statvars.py @@ -50,6 +50,10 @@ 'data_central_moment' , ## get the central moment (with uncertainty) 'data_nEff' , ## get the effective number of entries 'data_sum' , ## get the (weigted) sum + 'data_statistics' , ## get the statistics + 'data_covariance' , ## get the covariance + 'data_covariances' , ## get the covariances + 'data_statvector' , ## get the stat-vector 'data_mean' , ## get the mean (with uncertainty) 'data_variance' , ## get the variance (with uncertainty) 'data_dispersion' , ## get the dispersion (with uncertainty) @@ -81,7 +85,7 @@ # ============================================================================= from builtins import range from ostap.math.base import isequal, iszero -from ostap.core.core import Ostap, rootException, WSE +from ostap.core.core import Ostap, rootException, WSE, VE, std from ostap.core.ostap_types import ( string_types , integer_types , num_types , dictlike_types ) from ostap.trees.cuts import expression_types, vars_and_cuts @@ -299,7 +303,7 @@ def data_covariance ( data , expression1 , expression2 , cuts = '' , *args ) : expression1 = str ( expression1 ) expression2 = str ( expression2 ) cuts = str ( cuts ).strip() - + with rootException() : if isinstance ( data , ROOT.TTree ) and not cuts : return StatVar.statCov ( data , expression1 , expression2 , *args ) @@ -307,6 +311,86 @@ def data_covariance ( data , expression1 , expression2 , cuts = '' , *args ) : return StatVar.statCov ( data , expression1 , expression2 , cuts , *args ) +# ============================================================================= +## get the covariances for many varibales +# @code +# tree = ... +# stats , cov2 , len = data_covarinces ( ['x' , 'y'] ) +# # apply some cuts +# stats , cov2 , len = data_covariances ( [ 'x' , 'y' , 'z'] , 'z>0' ) +# @endcode +# @author Vanya BELYAEV Ivan.Belyaev@itep.ru +# @date 2017-02-19 +def data_covariances ( tree , + expressions , + cuts = '' , *args ) : + """Get the statistic for the list of expressions + >>> tree = ... + >>> stats , cov2 , len = data_covariances ( data , ['x' , 'y'] ) + Apply some cuts + >>> stats , cov2 , len = data_covariances ( data , [ 'x' , 'y' , 'z'] , 'z>0' ) + Use only subset of events + >>> stats , cov2 , len = data_covariances ( data , [ 'x' , 'y' , 'z' ], 'z>0' , 100 , 10000 ) + """ + + ## decode expressions & cuts + var_lst, cuts, input_string = vars_and_cuts ( expressions , cuts ) + + assert 2 <= len ( var_lst ) , 'Invalid number of varibales!' + + from ostap.core.core import strings + vars = strings ( var_lst ) + + WSE = Ostap.WStatEntity + WV = std.vector( WSE ) + stats = WV () + + import ostap.math.linalg + import ostap.math.linalgt + + N = len ( var_lst ) + cov2 = Ostap.TMatrixSymD ( N ) + + with rootException() : + length = Ostap.StatVar.statCov ( tree , + vars , + cuts , + stats , + cov2 , *args ) + + assert N == len ( stats ) , "Invalid size 'stats' from 'Ostap.StatVar.statCov'" + assert cov2.IsValid() , "Invalid 'cov2' from 'Ostap.StatVar.statCov'" + assert N == cov2.GetNrows() , "Invalid size of 'cov2' from 'Ostap.StatVar.statCov'" + assert N == cov2.GetNcols() , "Invalid size of 'cov2' from 'Ostap.StatVar.statCov'" + + ## covert to tuple + stats = tuple ( WSE ( s ) for s in stats ) + + ## convert to S-matrix + cov2 = cov2.smatrix() + + return stats, cov2 , length + +# ============================================================================ +## get the effective vector of mean-values with covarinaces for the dataset +# @code +# vct = data_statvct ( data , 'a,b,c' ) +# @endcode +def data_statvector ( data , + expressions , + cuts = '' ) : + """Get the effective vector of mean-values with covariances for the dataset + >>> vct = data_statvct ( data , 'a,y,z' ,'w' ) + """ + + stats , cov2, _ = data_covariances ( data , expressions , cuts = cuts ) + + N = len ( stats ) + v = Ostap.Vector ( N ) () + for i in range ( N ) : v[i] = stats[i].mean() + + return Ostap.VectorE ( N ) ( v , cov2 ) + # ============================================================================== ## Get the (weighted) sum over the variable(s) # @code @@ -325,7 +409,7 @@ def data_sum ( data , expressions , cuts = '' , *args ) : result = data_statistics ( data , expressions , cuts , *args ) - if isinstance ( result , dictlike_type ) : + if isinstance ( result , dictlike_types ) : for k , r in loop_items ( result ) : result [ key ] = VE ( r.sum() , r.sum2() ) else : @@ -334,7 +418,8 @@ def data_sum ( data , expressions , cuts = '' , *args ) : return result # ============================================================================== -## Get the effewctoevnumebr of etries in data +## Get the effective number of entries in data +# \f$ \frac{ (\sum w)^2}{\sum w^2} \f$ # @code # data = ... # result = data_nEff ( data , 'x+y' ) @@ -350,7 +435,7 @@ def data_nEff ( data , expression = '' , *args ) : assert isinstance ( expression , expression_types ) , 'Invalid type of expression!' expression = str ( expression ).strip() - return StatVar.nEff ( expression ) + return StatVar.nEff ( data , expression ) # ============================================================================= ## Get harmonic mean over the data @@ -1023,12 +1108,16 @@ def data_decorate ( klass ) : if hasattr ( klass , 'sumVar' ) : klass.orig_sumVar = klass.sumVar if hasattr ( klass , 'sumVars' ) : klass.orig_sumVars = klass.sumVars if hasattr ( klass , 'statCov' ) : klass.orig_statCov = klass.statCov + if hasattr ( klass , 'statCovs' ) : klass.orig_statCovs = klass.statCovs + if hasattr ( klass , 'statVct' ) : klass.orig_statVct = klass.statVct klass.statVar = data_statistics klass.statVars = data_statistics klass.sumVar = data_sum klass.sumVars = data_sum klass.statCov = data_covariance + klass.statCovs = data_covariances + klass.statVct = data_statvector if hasattr ( klass , 'the_moment' ) : klass.orig_the_moment = klass.the_moment if hasattr ( klass , 'the_mean' ) : klass.orig_the_mean = klass.the_mean @@ -1081,6 +1170,7 @@ def data_decorate ( klass ) : klass.sumVar , klass.sumVars , klass.statCov , + klass.statCovs , klass.the_moment , klass.the_mean , klass.the_rms , diff --git a/ostap/tools/tests/test_tools_reweight2.py b/ostap/tools/tests/test_tools_reweight2.py index b6b5c402..adeff98a 100755 --- a/ostap/tools/tests/test_tools_reweight2.py +++ b/ostap/tools/tests/test_tools_reweight2.py @@ -235,7 +235,7 @@ def prepare_data ( ) : datatree = ROOT.TChain ( 'DATA_tree' ) ; datatree.Add ( testdata ) mctree = ROOT.TChain ( tag_mc ) ; mctree.Add ( testdata ) -datastat = datatree.statCov('x','y') +datastat = datatree.statCov ( 'x' , 'y' ) # ============================================================================= ## prebook MC histograms # ============================================================================= @@ -405,8 +405,8 @@ def prepare_data ( ) : title = tag + ': DATA(x,y) vs MC(x,y) difference' logger.info ( '%s:\n%s' % ( title , hdata .cmp_diff_prnt ( hmc , density = True , title = title , prefix = '# ' ) ) ) - ## 4e) 2D-statistics - mcstat = mcds.statCov('x','y','weight') + ## 4e) 2D-statistics + mcstat = mcds .statCov ( 'x' , 'y' , 'weight' ) logger.info ( tag + ': x/y correlation DATA (unbinned): %+.2f' % datastat.correlation () ) logger.info ( tag + ': x/y correlation MC (unbinned): %+.2f' % mcstat.correlation () ) diff --git a/ostap/trees/trees.py b/ostap/trees/trees.py index dbf73be2..facb7a3b 100755 --- a/ostap/trees/trees.py +++ b/ostap/trees/trees.py @@ -716,133 +716,6 @@ def _rt_contains_ ( tree , obj ) : ROOT.TTree .__contains__ = _rt_contains_ ROOT.TChain.__contains__ = _rt_contains_ -# ============================================================================= -## get the statistic for pair of expressions in Tree/Dataset -# @code -# tree = ... -# stat1 , stat2 , cov2 , len = tree.statCov ( 'x' , 'y' ) -# # apply some cuts -# stat1 , stat2 , cov2 , len = tree.statCov ( 'x' , 'y' , 'z>0' ) -# # use only subset of events -# stat1 , stat2 , cov2 , len = tree.statCov ( 'x' , 'y' , 'z>0' , 100 , 10000 ) -# @endcode -# @author Vanya BELYAEV Ivan.Belyaev@itep.ru -# @date 2013-09-15 -def _stat_cov_ ( tree , - expression1 , - expression2 , - cuts = '' , *args ) : - """Get the statistic for pair of expressions in Tree/Dataset - - >>> tree = ... - >>> stat1 , stat2 , cov2 , len = tree.statCov( 'x' , 'y' ) - - Apply some cuts: - >>> stat1 , stat2 , cov2 , len = tree.statCov( 'x' , 'y' , 'z>0' ) - - Use only subset of events - >>> stat1 , stat2 , cov2 , len = tree.statCov( 'x' , 'y' , 'z>0' , 100 , 10000 ) - """ - - - """ - import ostap.math.linalg - stat1 = Ostap.WStatEntity () - stat2 = Ostap.WStatEntity () - cov2 = Ostap.Math.SymMatrix(2) () - - with rootException() : - if cuts : length = Ostap.StatVar.statCov ( tree , - expression1 , - expression2 , - cuts , - stat1 , - stat2 , - cov2 , - *args ) - else : length = Ostap.StatVar.statCov ( tree , - expression1 , - expression2 , - stat1 , - stat2 , - cov2 , - *args ) - - return stat1 , stat2 , cov2, length - """ - - - -ROOT.TTree . statCov = _stat_cov_ -ROOT.TChain . statCov = _stat_cov_ - -# ============================================================================= -## get the statistic for the list of expressions -# @code -# tree = ... -# stats , cov2 , len = tree.statCovs( ['x' , 'y'] ) -# # apply some cuts -# stats , cov2 , len = tree.statCovs( [ 'x' , 'y' , 'z'] , 'z>0' ) -# # use only subset of events -# stats , cov2 , len = tree.statCovs( [ 'x' , 'y' , 'z' ], 'z>0' , 100 , 10000 ) -# @endcode -# @author Vanya BELYAEV Ivan.Belyaev@itep.ru -# @date 2017-02-19 -def _stat_covs_ ( tree , - expressions , - cuts = '' , *args ) : - """Get the statistic for the list of expressions - >>> tree = ... - >>> stats , cov2 , len = tree.statCovs( ['x' , 'y'] ) - Apply some cuts - >>> stats , cov2 , len = tree.statCovs( [ 'x' , 'y' , 'z'] , 'z>0' ) - Use only subset of events - >>> stats , cov2 , len = tree.statCovs( [ 'x' , 'y' , 'z' ], 'z>0' , 100 , 10000 ) - """ - ## - if isinstance ( expressions , string_types ) : - expressions = split_string ( expressions , strip = True , respect_groups = True ) - ## - - vars = strings ( expressions ) - N = len ( vars ) - - assert N , "Invalid 'expressions' %s" % str ( expressions ) - - - WSE = Ostap.WStatEntity - WV = std.vector( WSE ) - stats = WV () - - import ostap.math.linalg - import ostap.math.linalgt - - cov2 = Ostap.TMatrixSymD ( N ) - - with rootException() : - length = Ostap.StatVar.statCov ( tree , - vars , - cuts , - stats , - cov2 , *args ) - - assert N == len ( stats ) , "Invalid size 'stats' from 'Ostap.StatVar.statCov'" - assert cov2.IsValid() , "Invalid 'cov2' from 'Ostap.StatVar.statCov'" - assert N == cov2.GetNrows() , "Invalid size of 'cov2' from 'Ostap.StatVar.statCov'" - assert N == cov2.GetNcols() , "Invalid size of 'cov2' from 'Ostap.StatVar.statCov'" - - ## covert to tuple - stats = tuple ( WSE ( s ) for s in stats ) - - ## convert to S-matrix - cov2 = cov2.smatrix() - - return stats, cov2 , length - -ROOT.TTree . statCovs = _stat_covs_ -ROOT.TChain . statCovs = _stat_covs_ - - # ============================================================================= from ostap.stats.statvars import data_the_moment diff --git a/source/src/StatVar.cpp b/source/src/StatVar.cpp index ba7eca2b..2f2626f8 100644 --- a/source/src/StatVar.cpp +++ b/source/src/StatVar.cpp @@ -1570,6 +1570,11 @@ Ostap::StatVar::statVar const unsigned long first , const unsigned long last ) { + // ========================================================================== + Ostap::Assert ( nullptr != data , + "Invalid RotAbsData" , + "Ostap::StatVar::statVar" ) ; + // ========================================================================== Statistic result ; if ( 0 == data || last <= first ) { return result ; } // RETURN // @@ -1686,6 +1691,72 @@ Ostap::StatVar::statVars return result.empty() ? 0 : result[0].nEntries() ; } // ============================================================================ +/* calculate the covariance of two expressions + * @param tree (INPUT) the input tree + * @param exp1 (INPUT) the first expresiion + * @param exp2 (INPUT) the second expresiion + * @param cuts (INPUT) selection/weight expression + * @return number of processed events + * @author Vanya BELYAEV Ivan.Belyaev@itep.ru + * @date 2014-03-27 + */ +// ============================================================================ +Ostap::StatVar::WCovariance +Ostap::StatVar::statCov +( const RooAbsData* data , + const std::string& exp1 , + const std::string& exp2 , + const std::string& cuts , + const std::string& cut_range , + const unsigned long first , + const unsigned long last ) +{ + // =========================================================================== + Ostap::Assert ( nullptr != data , + "Invalid RotAbsData" , + "Ostap::StatVar::statCov" ) ; + // + // prepare the result + Ostap::StatVar::WCovariance result {} ; + // + if ( last <= first ) { return result ; } // RETURN + // + const std::unique_ptr formula1 { make_formula ( exp1 , *data ) } ; + const std::unique_ptr formula2 { make_formula ( exp2 , *data ) } ; + const std::unique_ptr selection { make_formula ( cuts , *data , true ) } ; + // + const bool weighted = data->isWeighted() ; + const char* cutrange = cut_range.empty() ? nullptr : cut_range.c_str() ; + // + const unsigned long the_last = std::min ( last , (unsigned long) data->numEntries() ) ; + // + // start the loop + for ( unsigned long entry = first ; entry < the_last ; ++entry ) + { + // + const RooArgSet* vars = data->get( entry ) ; + if ( nullptr == vars ) { break ; } // RETURN + if ( cutrange && !vars->allInRange ( cutrange ) ) { continue ; } // CONTINUE + // + // apply cuts: + const long double wc = selection ? selection -> getVal() : 1.0L ; + if ( !wc ) { continue ; } // CONTINUE + // apply weight: + const long double wd = weighted ? data->weight() : 1.0L ; + if ( !wd ) { continue ; } // CONTINUE + // cuts & weight: + const long double w = wd * wc ; + if ( !w ) { continue ; } // CONTINUE + // + const double v1 = formula1->getVal () ; + const double v2 = formula2->getVal () ; + // + result.add ( v1 , v2 , w ) ; + } + // + return result ; +} +// ============================================================================ /* calculate the covariance of several expressions * @param tree (INPUT) the inpout tree * @param vars (INPUT) expressions