diff --git a/.aux/test_with_lcg b/.aux/test_with_lcg index 70432bed..1fc4ab91 100755 --- a/.aux/test_with_lcg +++ b/.aux/test_with_lcg @@ -4,5 +4,5 @@ CMTCONFIG=$2 source /cvmfs/sft.cern.ch/lcg/views/${LCG}/${CMTCONFIG}/setup.sh source build/INSTALL/thisostap.sh cd build -ctest -N && cmake .. -DCMAKE_INSTALL_PREFIX=./INSTALL/ && ctest -R frame -j8 --output-on-failure +ctest -N && cmake .. -DCMAKE_INSTALL_PREFIX=./INSTALL/ && ctest -R '(frame|stat)' -j8 --output-on-failure diff --git a/ostap/frames/frames.py b/ostap/frames/frames.py index 845846a3..7a2db49d 100644 --- a/ostap/frames/frames.py +++ b/ostap/frames/frames.py @@ -51,11 +51,13 @@ # ============================================================================= from ostap.core.core import ( cpp , Ostap , strings , split_string , var_separators ) +from ostap.math.base import isequal, iszero from ostap.core.meta_info import root_info -from ostap.core.ostap_types import integer_types, string_types, sequence_types +from ostap.core.ostap_types import ( integer_types , num_types , + string_types , sequence_types ) from ostap.logger.utils import multicolumn from ostap.utils.progress_conf import progress_conf -from ostap.utils.basic import isatty +from ostap.utils.basic import isatty, loop_items import ostap.core.config as OCC import ostap.stats.statvars as SV import ostap.logger.table as T @@ -75,9 +77,23 @@ ## @see Ostap/DataFrame.h FrameNode = Ostap.FrameNode +# =============================================================================== +Frames_OK = False +std_move = None +has_std_move = False +try : + std_move = ROOT.std.move + has_std_move = True + Frames_OK = has_std_move and (6,25)<= root_info ## ATTENTION! +except AttributeError : + std_move = None + has_std_move = False + Frames_OK = False + + ## try : ## DataFrame = ROOT.ROOT.RDataFrame -## except AttributeError : +## except AttributeError :_new ## DataFrame = ROOT.ROOT.Experimental.TDataFrame ## # ============================================================================= ## try : @@ -89,6 +105,15 @@ if not hasattr ( Ostap ,'DataFrame' ) : Ostap.DataFrame = DataFrame if not hasattr ( Ostap ,'FrameNode' ) : Ostap.FrameNode = FrameNode +has_std_move = False +try : + std_move = ROOT.std.move + has_std_move = True and (6,25)<= root_info ## ATTENTION! +except AttributeError : + has_std_move = False + + +## type for column names CNT = DataFrame.ColumnNames_t # ============================================================================= @@ -316,141 +341,111 @@ def frame_statCov ( frame , return stat1 , stat2 , cov2 , length -# ================================================================================== -## get statistics of variable(s) +# ============================================================================== +## possible types of expressions +expression_types = string_types + ( ROOT.TCut , ) +# ============================================================================== +## Prepare the arguments: variable names and cuts # @code -# frame = .... -# stat = frame.statVar ( 'pt' , lazy = True ) -# stat = frame.statVar ( 'pt' , 'eta>0' , lazy = True ) +# vars_lst, cuts, input_string = vars_and_cuts ( 'x', 'y<0' ) +# vars_lst, cuts, input_string = vars_and_cuts ( ('x','y','z') , 'y<0' ) # @endcode -def _fr_statVar_new_ ( frame , expressions , cuts = '' , lazy = False ) : - """Get statistics of variable(s) - >>> frame = .... - >>> stat = frame.statVar ( 'pt' , lazy = True ) - >>> stat = frame.statVar ( 'pt' , 'eta>0' , lazy = True ) +def vars_and_cuts ( expressions , cuts ) : + """Prepare the arguments: variable names and cuts + >>> vars_lst, cuts, input_string = vars_and_cuts ( 'x', 'y<0' ) + >>> vars_lst, cuts, input_string = vars_and_cuts ( ('x','y','z') , 'y<0' ) """ - if isinstance ( frame , ROOT.TTree ) : frame = DataFrame ( frame ) - - node = as_rnode ( frame ) - + ## single string as input input_string = False - if isinstance ( expressions , string_types ) : + if isinstance ( expressions , expression_types ) : input_string = True - expressions = [ expressions ] + expressions = [ expressions ] + + assert expressions and all ( s and isinstance ( s , expression_types ) for s in expressions ) , \ + "Invalid expression(s) : %s" % str ( expressions ) + + exprs = tuple ( str(s).strip() for s in expressions ) + assert exprs and all ( exprs ) , "Invalid expression(s): %s" % str ( exprs ) + + assert isinstance ( cuts , expression_types ) or not cuts , \ + 'Invaild type of cuts: %s' % str ( cuts ) + + cuts = str ( cuts ) if cuts else '' + + return exprs, cuts, input_string + +# ================================================================================== +## helper functionm that defines expression and cuts +# @code +# frame = ... +# currnt , vexpr , cexpr, input_string = _fr_helper_ ( frame , 'x*x' , 'z<0' ) +# #endcode +def _fr_helper_ ( frame , expressions , cuts = '' ) : + """Helper function that defin4es expressions and cuts + >>> frame = ... + >>> current , vexpr , cexpr = _fr_helper_ ( frame , 'x*x' , 'z<0' ) + """ + + exprs, cuts, input_string = vars_and_cuts ( expressions , cuts ) + + ## Frame/Tree ? + if isinstance ( frame , ROOT.TTree ) : frame = DataFrame ( frame ) + node = as_rnode ( frame ) ## get the list of currently known names vars = frame_columns ( node ) all_vars = set ( vars ) - names = {} - current = node - for e in expressions : + current = node - e = str ( e ) - if e in vars : - names [ e ] = e - continue - + vnames = {} + for i,expr in enumerate ( exprs ) : + vname = expr + if not expr in all_vars : + used = tuple ( all_vars | set ( frame_columns ( current ) ) ) + vn = var_name ( 'var_' , used , expr , *vars ) + all_vars.add ( vn ) + current = current.Define ( vn , expr ) + vname = expression + vnames [ expr ] = vname + + cname = cuts + if cuts and not cuts in all_vars : used = tuple ( all_vars | set ( frame_columns ( current ) ) ) - vn = var_name ( 'var_' , used , e , *vars ) - all_vars.add ( vn ) - current = current.Define ( vn , e ) - names [ e ] = vn - print ( 'DEFINE:', e , vn ) + cn = var_name ( 'cut_' , used , cuts , *vars ) + all_vars.add ( cn ) + ncuts = '1.0*(%s)' % cuts + current = current.Define ( cn , ncuts ) + cname = cn - - cuts = str ( cuts ) - cname = cuts - if cuts and not cuts in vars : - used = tuple ( all_vars | set ( frame_columns ( current ) ) ) - vn = var_name ( 'cut_' , used , cuts , *vars ) - all_vars.add ( vn ) - current = current.Define ( vn , cuts ) - cname = vn - print ( 'DEFINE CUT:', cuts , vn ) + return current, vnames , cname, input_string - print ( 'CNAME:' , cname ) - print ( 'NAMES:' , names ) +# ================================================================================== +## the second helper method to implement "statistics"-related actions +def _fr_helper2_ ( frame , creator , expressions , cuts = '' , lazy = True ) : + """The seocnd helper method to implement statitic related actions + """ - results = {} - for e in names : + current, var_names, cut_name, input_string = _fr_helper_ ( frame , expressions , cuts ) - if cname : - print ( 'book WSTATVAR' , CNT ( [ names [ e ] , cname ] ) ) - results [ e ] = current.Book( ROOT.std.move ( Ostap.Actions.WStatVar() ) , CNT ( [ names [ e ] , cname ] ) ) - else : - print ( 'book STATVAR' , CNT ( 1 , names[e] ) ) - results [ e ] = current.Book( ROOT.std.move ( Ostap.Actions. StatVar() ) , CNT ( 1 , names[e] ) ) + results = {} + for expr, var_name in loop_items ( var_names ) : + results [ expr ] = creator ( current , var_name , cut_name ) + lasy = False if not lazy : - for e in results : - r = results [ e ] - results [ e ] = r.GetValue() + for expr, res in loop_items ( results ) : + results [ expr ] = res.GetValue() + rr = results [ expr ] if input_string and 1 == len ( results ) : - e , r = results.popitem() + _ , r = results.popitem() return r return results -# ================================================================================== -## get statistics of variable(s) -# @code -# frame = .... -# stat = frame.the_moment ( 5 , 'pt' ) -# stat = frame.the_moment ( 5 , 'pt' , 'eta>0' ) -# @endcode -def _fr_the_moment_ ( frame , N , expression , cuts = '' ) : - """Get statistics of variable(s) - >>> frame = .... - >>> stat = frame.the_moment ( 5 , 'pt' ) - >>> stat = frame.the_moment ( 5 , 'pt' , 'eta>0' ) - """ - - assert isinstance ( N , int ) and 0 <= N , 'Invalid order!' - assert isinstance ( expression , str ) , 'Invalid expression type!' - - if isinstance ( frame , ROOT.TTree ) : frame = DataFrame ( frame ) - - node = as_rnode ( frame ) - - ## get the list of currently known names - vars = frame_columns ( node ) - all_vars = set ( vars ) - - current = node - - vname = expression - if not expression in all_vars : - - used = tuple ( all_vars | set ( frame_columns ( current ) ) ) - vn = var_name ( 'var_' , used , expression , *vars ) - all_vars.add ( vn ) - current = current.Define ( vn , expression ) - vname = expression - - cuts = str ( cuts ) - cname = cuts - if cuts and not cuts in vars : - used = tuple ( all_vars | set ( frame_columns ( current ) ) ) - vn = var_name ( 'cut_' , used , cuts , *vars ) - all_vars.add ( vn ) - current = current.Define ( vn , cuts ) - cname = vn - - if cname : - ## TT = Ostap.Actions.WMoment_(N) - TT = ROOT.Detail.RDF.WStatAction ( Ostap.Math.WMoment_(N) ) - return current.Book ( ROOT.std.move ( TT () ) , CNT ( [ vname , cname ] ) ) - else : - ## TT = Ostap.Actions.Moment_(N) - TT = ROOT.Detail.RDF.StatAction ( Ostap.Math.Moment_(N) ) - return current.Book ( ROOT.std.move ( TT () ) , CNT ( 1 , vname ) ) - - return results - # ============================================================================ _types_1D = Ostap.Math.LegendreSum , Ostap.Math.Bernstein , Ostap.Math.ChebyshevSum , _types_2D = Ostap.Math.LegendreSum2 , Ostap.Math.Bernstein2D , @@ -1077,8 +1072,7 @@ def frame_prescale ( frame , prescale , name = '' ) : return node raise TypeError ( "Invalid type/value for 'prescale' %s/%s" %( prescale , type ( prescale ) ) ) - - + # ============================================================================== ## Get the moment for the frame object # @code @@ -1127,38 +1121,6 @@ def frame_central_moment ( frame , order , expression , cuts = '' ) : node = as_rnode ( frame ) return SV.data_central_moment ( node , order = order , expression = expression , cuts = cuts ) -# ============================================================================== -## Get the skewness for the frame object -# @code -# frame = ... -# value = frame_skewness ( 'b1*b2' , 'b3>0' ) -# @endcode -# @see Ostap::StatVar::skewness -def frame_skewness ( frame , expression , cuts = '' ) : - """ Get the skewness for the frame object - >>> frame = ... - >>> value = frame_skeness ( 'b1*b2' , 'b3>0' ) - - see `Ostap.StatVar.skewness` - """ - node = as_rnode ( frame ) - return SV.data_skewness ( node , expression = expression , cuts = cuts ) - -# ============================================================================== -## Get the kurtosis for the frame object -# @code -# frame = ... -# value = frame_kurtosis ( 'b1*b2' , 'b3>0' ) -# @endcode -# @see Ostap::StatVar::kurtosis -def frame_kurtosis ( frame , expression , cuts = '' ) : - """ Get the skewness for the frame object - >>> frame = ... - >>> value = frame_kurtosis ( 'b1*b2' , 'b3>0' ) - - see `Ostap.StatVar.skewness` - """ - node = as_rnode ( frame ) - return SV.data_kurtosis ( node , expression = expression , cuts = cuts ) - # ============================================================================== ## Get the quantile for the frame object @@ -1314,6 +1276,8 @@ def frame_deciles ( frame , expression , cuts = '' , exact = True ) : """ return frame_quantiles ( frame , quantiles = 10 , expression = expression , cuts = cuts , exact = exact ) + + # ============================================================================== ## Get the mean for the frame object # @code @@ -1358,21 +1322,370 @@ def frame_variance ( frame , expression , cuts = '' ) : def frame_rms ( frame , expression , cuts = '' ) : """ Get the RMS for the frame object >>> frame = ... - >>> value = frame_mean ( 'b1*b2' , 'b3>0' , exact = True ) - >>> value = frame_mean ( 'b1*b2' , 'b3>0' , exact = False ) ## use P2 algorithm + >>> value = frame_rms ( 'b1*b2' , 'b3>0' , exact = True ) + >>> value = frame_rms ( 'b1*b2' , 'b3>0' , exact = False ) ## use P2 algorithm - see `Ostap.StatVar.moment` """ - return frame_central_moment ( frame , order = 2 , expression = expression , cuts = cuts ) + return frame_variance ( frame , expression = expression , cuts = cuts ) ** 0.5 + +# ============================================================================== +## Get the skewness for the frame object +# @code +# frame = ... +# value = frame_skewness ( 'b1*b2' , 'b3>0' ) +# @endcode +# @see Ostap::StatVar::skewness +def frame_skewness ( frame , expression , cuts = '' ) : + """ Get the skewness for the frame object + >>> frame = ... + >>> value = frame_skeness ( 'b1*b2' , 'b3>0' ) + - see `Ostap.StatVar.skewness` + """ + node = as_rnode ( frame ) + return SV.data_skewness ( node , expression = expression , cuts = cuts ) + +# ============================================================================== +## Get the kurtosis for the frame object +# @code +# frame = ... +# value = frame_kurtosis ( 'b1*b2' , 'b3>0' ) +# @endcode +# @see Ostap::StatVar::kurtosis +def frame_kurtosis ( frame , expression , cuts = '' ) : + """ Get the kurtosis for the frame object + >>> frame = ... + >>> value = frame_kurtosis ( 'b1*b2' , 'b3>0' ) + - see `Ostap.StatVar.kurtosis` + """ + node = as_rnode ( frame ) + return SV.data_kurtosis ( node , expression = expression , cuts = cuts ) + from ostap.fitting.dataset import ds_draw as _ds_draw # ============================================================================== ## draw for frame def frame_draw ( frame , *args , **kwargs ) : - """Draw function for tehe frames + """Draw function for the frames """ node = as_rnode ( frame ) return _ds_draw ( node , *args , **kwargs ) + +# ================================================================================== +## get statistics of variable(s) +# @code +# frame = .... +# stat = frame.statVar ( 'pt' , lazy = True ) +# stat = frame.statVar ( 'pt' , 'eta>0' , lazy = True ) +# @endcode +def _fr_statVar_new_ ( frame , expressions , cuts = '' , lazy = False ) : + """Get statistics of variable(s) + >>> frame = .... + >>> stat = frame.statVar ( 'pt' , lazy = True ) + >>> stat = frame.statVar ( 'pt' , 'eta>0' , lazy = True ) + """ + + def screator ( node , var_name , cut_name ) : + if cut_name: return node.Book( ROOT.std.move ( Ostap.Actions.WStatVar() ) , CNT ( [ var_name , cut_name ] ) ) + else : return node.Book( ROOT.std.move ( Ostap.Actions. StatVar() ) , CNT ( 1 , var_name ) ) + + return _fr_helper2_ ( frame , screator , expressions , cuts , lazy = lazy ) + +# ================================================================================== +## get nEff through action +# @code +# frame = ... +# bEff = data_the_nEff ( 'x*x' , 'y>0' ) +# @endcode +def _fr_nEff_ ( frame , expressions , cuts = '' , lazy = False ) : + """Get nEff through action + >>> frame = ... + >>> nEff = data_get_nEff ( 'x*x' , 'y>0' ) + """ + return _fr_statVar_new_ ( frame , expressions , cuts = cuts , lazy = lazy ) + +# ================================================================================== +## get statistics of variable(s) +# @code +# frame = .... +# stat = frame.the_moment ( 5 , 'pt' ) +# stat = frame.the_moment ( 5 , 'pt' , 'eta>0' ) +# @endcode +# @see Ostap::Math::Moment_ +# @see Ostap::Math::WMoment_ +def _fr_the_moment_ ( frame , N , expressions , cuts = '' , lazy = True ) : + """Get statistics of variable(s) + >>> frame = .... + >>> stat = frame.the_moment ( 5 , 'pt' ) + >>> stat = frame.the_moment ( 5 , 'pt' , 'eta>0' ) + """ + + assert isinstance ( N , integer_types ) and 0 <= N , 'Invalid order!' + + current, vname , cname , input_string = _fr_helper_ ( frame , expressions , cuts ) + + def mcreator ( node , var_name , cut_name ) : + if cname : + ## TT = Ostap.Actions.WMoment_(N) + TT = ROOT.Detail.RDF.WStatAction ( Ostap.Math.WMoment_(N) ) + return current.Book ( ROOT.std.move ( TT () ) , CNT ( [ var_name , cut_name ] ) ) + else : + ## TT = Ostap.Actions.Moment_(N) + TT = ROOT.Detail.RDF.StatAction ( Ostap.Math.Moment_(N) ) + return current.Book ( ROOT.std.move ( TT () ) , CNT ( 1 , var_name ) ) + + return _fr_helper2_ ( frame , mcreator , expressions , cuts , lazy = lazy ) + +# ============================================================================ +## Get a mean via moment +# @code +# frame = ... +# stat = frame.the_mean ( 'x'x' , 'y<0' ) +# mean = stat.mean() +# @ndcode +def _fr_the_mean_ ( frame , expressions , cuts , errors = True , lazy = True ) : + """ Get a mean via moment + >>> frame = ... + >>> stat = frame.the_mean ( 'x'x' , 'y<0' ) + >>> mean = stat.mean() + """ + return _fr_the_moment_ ( frame , 2 if errors else 1 , expressions , cuts = cuts , lazy = lazy ) + +# ============================================================================ +## Get a rms via moment +# @code +# frame = ... +# stat = frame.the_rms ( 'x'x' , 'y<0' ) +# value = stat.rms() +# @ndcode +def _fr_the_rms_ ( frame , expressions , cuts , errors = True , lazy = True ) : + """Get a rms via moment + >>> frame = ... + >>> stat = frame.the_rms ( 'x'x' , 'y<0' ) + >>> value = stat.rms () + """ + return _fr_the_moment_ ( frame , 4 if errors else 2 , expressions , cuts = cuts , lazy = lazy ) + +# ============================================================================ +## Get a variance via moment +# @code +# frame = ... +# stat = frame.the_variance ( 'x'x' , 'y<0' ) +# value = stat.variance () +# @ndcode +def _fr_the_variance_ ( frame , expressions , cuts , errors = True , lazy = True ) : + """Get a variance via moment + >>> frame = ... + >>> stat = frame.the_variance( 'x'x' , 'y<0' ) + >>> value = stat.variance () + """ + return _fr_the_moment_ ( frame , 4 if errors else 2 , expressions , cuts = cuts , lazy = lazy ) + +# ============================================================================ +## Get a skewness via moment +# @code +# frame = ... +# stat = frame.the_skewness ( 'x'x' , 'y<0' ) +# mean = stat.skewness () +# @ndcode +def _fr_the_skewness_ ( frame , expressions , cuts , errors = True , lazy = True ) : + """Get a variance via moment + >>> frame = ... + >>> stat = frame.the_skewness ( 'x'x' , 'y<0' ) + >>> mean = stat.skreness () + """ + return _fr_the_moment_ ( frame , 6 if errors else 3 , expressions , cuts = cuts , lazy = lazy ) + +# ============================================================================ +## Get a kurtosis via moment +# @code +# frame = ... +# stat = frame.the_kurtosis ( 'x'x' , 'y<0' ) +# value = stat.kurtosis () +# @ndcode +def _fr_the_kurtosis_ ( frame , expressions , cuts , errors = True , lazy = True ) : + """Get a kurtosis via moment + >>> frame = ... + >>> stat = frame.the_kurtosis ( 'x'x' , 'y<0' ) + >>> value = stat.kurtosis () + """ + return _fr_the_moment_ ( frame , 8 if errors else 4 , expressions , cuts = cuts , lazy = lazy ) + + +# ============================================================================ +## Get an arithmetic mean +# @see Ostap::Math::ArithmeticMean +# @code +# frame = ... +# mean = frame_arithmetic_mean ( frame , 'x*x' , '0>> frame = ... + >>> mean = frame_arithmetic_mean ( frame , 'x*x' , '0>> frame = ... + >>> mean = frame_arithmetic_mean ( frame , 'x*x' , '0>> frame = ... + >>> mean = frame_harmonic_mean ( frame , 'x*x' , '0>> frame = ... + >>> mean = frame_power_mean ( frame , 5 , 'x*x' , '0>> frame = ... + >>> mean = frame_lehmer_mean ( frame , 5 , 'x*x' , '0 1 : + + # ========================================================================== + ## get the mean using moment + # @see frame_the_mean + def frame_mean ( frame , expression , cuts = '' ) : + """Get mean using moment + - see frame_the_mean + """ + return frame_the_mean ( frame , expression , cuts = cuts , errors = True ).mean() + # ========================================================================== + ## get variance using moment + # @see frame_the_variance + def frame_variance ( frame , expression , cuts = '' ) : + """Get variance using moment + - see frame_the_variance + """ + return frame_the_variance ( frame , expression , cuts = cuts , errors = True ) .variance () + # ========================================================================== + ## get RMS using moment + # @see frame_the_rms + def frame_rms ( frame , expression , cuts = '' ) : + """Get frame using moment + - see frame_the_rms + """ + return frame_the_rms ( frame , expression , cuts = cuts , errors = True ) .rms () + + + # ============================================================================== # decorate # ============================================================================== @@ -1410,38 +1723,71 @@ def frame_draw ( frame , *args , **kwargs ) : if FrameNode is DataFrame : frames = DataFrame , else : frames = DataFrame , FrameNode -# =============================================================================== -has_std_move = False -try : - mv = ROOT.std.move - has_std_move = True and (6,25)<= root_info ## ATTENTION! -except AttributeError : - has_std_move = False - # ================================================================================ -## if ( 6 , 25 ) <= root_info : -## if ( 6 , 16 ) <= root_info and has_std_move : -if ( 6 , 25 ) <= root_info and has_std_move : - frame_statVar = _fr_statVar_new_ - frame_statVars = _fr_statVar_new_ - frame_table = _fr_table_ - frame_the_moment = _fr_the_moment_ +if Frames_OK : + frame_statVar = _fr_statVar_new_ + frame_statVars = _fr_statVar_new_ + frame_table = _fr_table_ + frame_the_moment = _fr_the_moment_ + + frame_the_mean = _fr_the_mean_ + frame_the_rms = _fr_the_rms_ + frame_the_variance = _fr_the_variance_ + frame_the_skewness = _fr_the_skewness_ + frame_the_kurtosis = _fr_the_kurtosis_ + + frame_arithmetic_mean = _fr_arithmetic_mean_ + frame_geometric_mean = _fr_geometric_mean_ + frame_harmonic_mean = _fr_harmonic_mean_ + frame_power_mean = _fr_power_mean_ + frame_lehmer_mean = _fr_lehmer_mean_ for f in frames : - f.statVars = frame_statVars - _new_methods_ .append ( f.statVars ) - f.table = frame_table - _new_methods_ .append ( f.table ) - f.the_moment = frame_the_moment - _new_methods_ .append ( f.the_moment ) + f.statVars = frame_statVars + _new_methods_ .append ( f.statVars ) + f.table = frame_table + _new_methods_ .append ( f.table ) + f.the_moment = frame_the_moment + _new_methods_ .append ( f.the_moment ) + f.the_mean = frame_the_mean + _new_methods_ .append ( f.the_mean ) + f.the_rms = frame_the_rms + _new_methods_ .append ( f.the_rms ) + f.the_variance = frame_the_variance + _new_methods_ .append ( f.the_variance ) + f.the_skewness = frame_the_skewness + _new_methods_ .append ( f.the_skewness ) + f.the_kurtosis = frame_the_kurtosis + _new_methods_ .append ( f.the_kurtosis ) + f.arithmetic_mean = frame_arithmetic_mean + _new_methods_ .append ( f.arithmetic_mean ) + f.geometric_mean = frame_geometric_mean + _new_methods_ .append ( f.geometric_mean ) + f.harmonic_mean = frame_harmonic_mean + _new_methods_ .append ( f.harmonic_mean ) + f.power_mean = frame_power_mean + _new_methods_ .append ( f.power_mean ) + f.lehmer_mean = frame_lehmer_mean + _new_methods_ .append ( f.lehmer_mean ) _new_methods_ .append ( frame_statVars ) _new_methods_ .append ( frame_table ) _new_methods_ .append ( frame_the_moment ) - __all__ = __all__ + ( 'frame_statVars' , - 'frame_table' , - 'frame_the_moment' ) - + _new_methods_ .append ( frame_arithmetic_mean ) + __all__ = __all__ + ( 'frame_statVars' , + 'frame_table' , + 'frame_the_moment' , + 'frame_the_mean' , + 'frame_the_rms' , + 'frame_the_variance' , + 'frame_the_skewness' , + 'frame_the_kurtosis' , + 'frame_arithmetic_mean' , + 'frame_geometric_mean' , + 'frame_harmonic_mean' , + 'frame_power_mean' , + 'frame_lehmer_mean' ) + ROOT.TTree. fstatVars = frame_statVars ROOT.TTree. fthe_moment = frame_statVars @@ -1563,8 +1909,7 @@ def _rt_fproject_ ( tree , histo , *args ) : # ============================================================================= -## if ( 6 , 16 ) <= root_info and has_std_move : -if ( 6 , 25 ) <= root_info and has_std_move : +if Frames_OK : # ========================================================================= frame_param = _fr_param_ __all__ = __all__ + ( 'frame_param' , ) diff --git a/ostap/frames/tests/test_frames_frames.py b/ostap/frames/tests/test_frames_frames.py index e029f45d..c4581cfe 100644 --- a/ostap/frames/tests/test_frames_frames.py +++ b/ostap/frames/tests/test_frames_frames.py @@ -16,6 +16,7 @@ from ostap.core.meta_info import root_info from ostap.utils.utils import implicitMT from ostap.utils.timing import timing +import ostap.logger.table as T import ROOT, os, sys # ============================================================================= # logging @@ -24,16 +25,15 @@ if '__main__' == __name__ : logger = getLogger ( 'test_frames_frames' ) else : logger = getLogger ( __name__ ) # ============================================================================= +from ostap.frames.frames import Frames_OK - -if root_info < (6,16) : +if not Frames_OK : logger.warning ( "Tests are disabled for this version of ROOT %s" % str ( root_info ) ) else : - from ostap.frames.frames import DataFrame, frame_project, \ - frame_statVar, frame_statCov, frame_progress, has_std_move + from ostap.frames.frames import * # A simple helper function to fill a test tree def fill_tree ( tname , fname ) : @@ -41,12 +41,12 @@ def fill_tree ( tname , fname ) : tdf = DataFrame ( 10000 ) a = tdf.ProgressBar ( 10000 ) tdf.Define ("one", "1.0" )\ - .Define ("b1" , "(double) tdfentry_ + 1 ") \ + .Define ("b1" , "tdfentry_ + 1000.0") \ .Define ("b2" , "(1.0+b1)*(1.0+b1)" ) \ .Define ("b3" , "(1.0+b1)*(1.0+b1)" ) \ .Define ("b4" , "gRandom->Gaus()" ) \ .Define ("b5" , "gRandom->Gaus()" ) \ - .Define ("b6" , "gRandom->Gaus()" ) \ + .Define ("b6" , "10+0.5*gRandom->Gaus()" ) \ .Snapshot ( tname, fname ) # We prepare an input tree to run on @@ -58,59 +58,131 @@ def fill_tree ( tname , fname ) : # ============================================================================= -def test_frame0 () : - +def test_frame0 () : + logger = getLogger ( 'test_frame0' ) - if root_info < (6,16) : + if not Frames_OK : logger.warning ( "Test is disabled for this version of ROOT %s" % str ( root_info ) ) return frame = DataFrame ( tname , fname ) tree = Tree ( name = tname , file = fname ).chain - logger.info ( 80*'*' ) logger.info ( 'MT enabled? %s' % ROOT.ROOT.IsImplicitMTEnabled() ) - + logger.info ( 'Tree :\n%s' % tree ) logger.info ( 'Frame :\n%s' % frame ) + + rows = [ ( 'Parameter' , 'Tree' , 'Frame' ) ] + + row = 'Lengh' , '%d' % len ( tree ) , '%d' % len ( frame ) + rows.append ( row ) + + row = 'Branches' , str ( tree.branches() ) , str ( frame.branches() ) + rows.append ( row ) - logger.info ( 'Len: %30s vs %-30s ' % ( len ( frame ) , len ( tree ) ) ) - logger.info ( 'Branches: %30s vs %-30s ' % ( frame.branches() , tree.branches() ) ) - logger.info ( 'nEff: %30s vs %-30s ' % ( frame.nEff () , tree.nEff () ) ) - logger.info ( 'nEff(b1): %30s vs %-30s ' % ( frame.nEff ('b1') , tree.nEff ('b1') ) ) - logger.info ( "m(5,50,'b1','b1/(b2+1'): %30s vs %-30s " % ( frame.get_moment ( 5 , 50 , 'b1' , 'b1/(b2+1)' ) , - tree .get_moment ( 5 , 50 , 'b1' , 'b1/(b2+1)' ) ) ) - logger.info ( "m(5,'b1','b1/(b2+1'): %30s vs %-30s " % ( frame.moment ( 5 , 'b1' , 'b1/(b2+1)' ) , - tree .moment ( 5 , 'b1' , 'b1/(b2+1)' ) ) ) - logger.info ( "cm(5,'b1','b1/(b2+1'): %30s vs %-30s " % ( frame.central_moment ( 2 , 'b1' , 'b1/(b2+1)' ) , - tree .central_moment ( 2 , 'b1' , 'b1/(b2+1)' ) ) ) - logger.info ( "mean('b1','b1/(b2+1'): %30s vs %-30s " % ( frame.mean ( 'b1' , 'b1/(b2+1)' ) , - tree .mean ( 'b1' , 'b1/(b2+1)' ) ) ) - logger.info ( "rms ('b1','b1/(b2+1'): %30s vs %-30s " % ( frame.rms ( 'b1' , 'b1/(b2+1)' ) , - tree .rms ( 'b1' , 'b1/(b2+1)' ) ) ) - logger.info ( "skew('b1','b1/(b2+1'): %30s vs %-30s " % ( frame.skewness ( 'b1' , 'b1/(b2+1)' ) , - tree .skewness ( 'b1' , 'b1/(b2+1)' ) ) ) - logger.info ( "kurt('b1','b1/(b2+1'): %30s vs %-30s " % ( frame.kurtosis ( 'b1' , 'b1/(b2+1)' ) , - tree .kurtosis ( 'b1' , 'b1/(b2+1)' ) ) ) - logger.info ( "quantile(0.3, 'b1','b1<500'): %30s vs %-30s " % ( frame.quantile ( 0.3 , 'b1' , 'b1/(b2+1)' ) , - tree .quantile ( 0.3 , 'b1' , 'b1/(b2+1)' ) ) ) - logger.info ( "median(0.3, 'b1','b1<500'): %30s vs %-30s " % ( frame.median ( 'b1' , 'b1/(b2+1)' ) , - tree .median ( 'b1' , 'b1/(b2+1)' ) ) ) - logger.info ( "terciles('b1','b1<500'): %30s vs %-30s " % ( frame.terciles ( 'b1' , 'b1/(b2+1)' ) , - tree .terciles ( 'b1' , 'b1/(b2+1)' ) ) ) + row = 'nEff' , '%.2f' % tree.nEff() , '%.2f' % frame.nEff() + rows.append ( row ) + + row = 'nEff(b6)' , '%.2f' % tree.nEff('b6') , '%.2f' % frame.nEff('b6') + rows.append ( row ) + + row = 'mean (b6)' , \ + tree .mean ( 'b6' ).toString ( '%+.2f +/- %-.2f' ) , \ + frame.mean ( 'b6' ).toString ( '%+.2f +/- %-.2f' ) + rows.append ( row ) + + row = 'rms (b6)' , \ + tree .rms ( 'b6' ).toString ( '%+.2f +/- %-.2f' ) , \ + frame.rms ( 'b6' ).toString ( '%+.2f +/- %-.2f' ) + rows.append ( row ) + + row = 'variance (b6)' , \ + tree .variance ( 'b6' ).toString ( '%+.2f +/- %-.2f' ) , \ + frame.variance ( 'b6' ).toString ( '%+.2f +/- %-.2f' ) + rows.append ( row ) + + row = 'mean (b6,1)' , \ + tree .mean ( 'b6', '1' ).toString ( '%+.2f +/- %-.2f' ) , \ + frame.mean ( 'b6', '1' ).toString ( '%+.2f +/- %-.2f' ) + rows.append ( row ) + + row = 'rms (b6,1)' , \ + tree .rms ( 'b6' , '1' ).toString ( '%+.2f +/- %-.2f' ) , \ + frame.rms ( 'b6' , '1' ).toString ( '%+.2f +/- %-.2f' ) + rows.append ( row ) + + row = 'variance (b6,1)' , \ + tree .variance ( 'b6' , '1' ).toString ( '%+.2f +/- %-.2f' ) , \ + frame.variance ( 'b6' , '1' ).toString ( '%+.2f +/- %-.2f' ) + rows.append ( row ) + row = 'skewness (b6,1)' , \ + tree .skewness ( 'b6' , '1' ).toString ( '%+.2f +/- %-.2f' ) , \ + frame.skewness ( 'b6' , '1' ).toString ( '%+.2f +/- %-.2f' ) + rows.append ( row ) + + row = 'kurtosis (b6,1)' , \ + tree .kurtosis ( 'b6' , '1' ).toString ( '%+.2f +/- %-.2f' ) , \ + frame.kurtosis ( 'b6' , '1' ).toString ( '%+.2f +/- %-.2f' ) + rows.append ( row ) + + row = 'mean (b6,1/b6)' , \ + tree .mean ( 'b6' , '1/b6' ).toString ( '%+.2f +/- %-.2f' ) , \ + frame.mean ( 'b6' , '1/b6' ).toString ( '%+.2f +/- %-.2f' ) + rows.append ( row ) + + row = 'rms (b6,1/b6)' , \ + tree .rms ( 'b6' , '1/b6' ).toString ( '%+.2f +/- %-.2f' ) , \ + frame.rms ( 'b6' , '1/b6' ).toString ( '%+.2f +/- %-.2f' ) + rows.append ( row ) + + row = 'variance (b6,1/b6)' , \ + tree .variance ( 'b6' , '1/b6' ).toString ( '%+.2f +/- %-.2f' ) , \ + frame.variance ( 'b6' , '1/b6' ).toString ( '%+.2f +/- %-.2f' ) + rows.append ( row ) + row = 'skewness (b6,1/b6)' , \ + tree .skewness ( 'b6' , '1/b6' ).toString ( '%+.2f +/- %-.2f' ) , \ + frame.skewness ( 'b6' , '1/b6' ).toString ( '%+.2f +/- %-.2f' ) + rows.append ( row ) + + row = 'kurtosis (b6,1/b6)' , \ + tree .kurtosis ( 'b6' , '1/b6' ).toString ( '%+.2f +/- %-.2f' ) , \ + frame.kurtosis ( 'b6' , '1/b6' ).toString ( '%+.2f +/- %-.2f' ) + rows.append ( row ) + + row = 'quantile (0.3,b6,1/b6)' , \ + '%s' % tree .quantile ( 0.3 , 'b6' , '1/b6' ) ,\ + '%s' % frame.quantile ( 0.3 , 'b6' , '1/b6' ) + rows.append ( row ) + + row = 'terciles (b6,1/b6)' , \ + '%s' % tree .terciles ( 'b6' , '1/b6' ) ,\ + '%s' % frame.terciles ( 'b6' , '1/b6' ) + rows.append ( row ) + + row = 'median (b6,1/b6)' , \ + '%s' % tree .median ( 'b6' , '1/b6' ) ,\ + '%s' % frame.median ( 'b6' , '1/b6' ) + rows.append ( row ) + + title = 'Frame/Tree statistics' + logger.info ( '%s\n%s' % ( title , T.table ( rows , title = title , prefix = '# ' , alignment = 'lrl' ) ) ) # ============================================================================= def test_frame1 ( ) : logger = getLogger ( 'test_frame1' ) - if root_info < ( 6 , 16 ) : + if not Frames_OK : logger.warning ( "Test is disabled for this version of ROOT %s" % str ( root_info ) ) return + logger.info ( 80*'*' ) + logger.info ( 'MT enabled? %s' % ROOT.ROOT.IsImplicitMTEnabled() ) + frame = DataFrame ( tname , fname ) tree = Tree ( name = tname , file = fname ).chain @@ -133,37 +205,143 @@ def test_frame1 ( ) : def test_frame2 ( ) : logger = getLogger ( 'test_frame2' ) - if root_info < ( 6 , 16 ) : + if not Frames_OK : logger.warning ( "Test is disabled for this version of ROOT %s" % str ( root_info ) ) return + + logger.info ( 80*'*' ) + logger.info ( 'MT enabled? %s' % ROOT.ROOT.IsImplicitMTEnabled() ) frame = DataFrame ( tname , fname ) tree = Tree ( name = tname , file = fname ).chain - s1 = tree.statVar ( 'b1' ) ## , '1/b1' ) - s2 = frame_statVar ( frame , 'b1' ) ## , '1/b1' ) + rows = [ ( 'Statistics' , 'Tree' , 'Frame' ) ] + + s1 = tree.statVar ( 'b1' ) + s2 = frame_statVar ( frame , 'b1' ) + + row = 'StatVar mean (b1)' , s1.mean().toString ( '%+.2f +/- %-.2f' ) , s2.mean().toString ( '%+.2f +/- %-.2f' ) + rows.append ( row ) + + s1 = tree.statVar ( 'b1' , '1/b1' ) + s2 = frame_statVar ( frame , 'b1' , '1/b1' ) - print ( 'FIRST s1/s2', type ( s1 ) , type ( s2 ) ) + row = 'StatVar mean (b1,1/b1)' , s1.mean().toString ( '%+.2f +/- %-.2f' ) , s2.mean().toString ( '%+.2f +/- %-.2f' ) + rows.append ( row ) - logger.info ('StatTree :%s' % s1 ) - logger.info ('StatFrame :%s' % s2 ) + s1 = tree.statVar ( 'b1' , 'b1>0' ) + s2 = frame_statVar ( frame , 'b1' , 'b1>0' ) - s1 = tree.statVar ( 'b1' , '1/b1' ) - s2 = frame_statVar ( frame , 'b1' , '1/b1' ) + row = 'StatVar mean (b1,b1>0)' , s1.mean().toString ( '%+.2f +/- %-.2f' ) , s2.mean().toString ( '%+.2f +/- %-.2f' ) + rows.append ( row ) - print ( 'SECONF s1/s2', type ( s1 ) , type ( s2 ) ) + s1 = tree.arithmetic_mean ( 'b1' ) + s2 = frame_arithmetic_mean ( frame , 'b1' ) + + row = 'Arithmetic mean (b1)' , '%+.2f' % s1.mean() , '%+.2f' % s2.GetValue().value() + rows.append ( row ) + + s1 = tree.arithmetic_mean ( 'b1' , '1/b1') + s2 = frame_arithmetic_mean ( frame , 'b1' , '1/b1') + + row = 'Arithmetic mean (b1,1/b1)' , '%+.2f' % s1.mean() , '%+.2f' % s2.GetValue().value() + rows.append ( row ) + + s1 = tree.arithmetic_mean ( 'b1' , 'b1>0') + s2 = frame_arithmetic_mean ( frame , 'b1' , 'b1>0') + row = 'Arithmetic mean (b1,b1>0)' , '%+.2f' % s1.mean() , '%+.2f' % s2.GetValue().value() + rows.append ( row ) + + s1 = tree.geometric_mean ( 'b1' ) + s2 = frame_geometric_mean ( frame , 'b1' ) + + row = 'Geometric mean (b1)' , '%+.2f' % s1.mean() , '%+.2f' % s2.GetValue().value() + rows.append ( row ) - logger.info ('StatTree :%s' % s1 ) - logger.info ('StatFrame :%s' % s2 ) + s1 = tree.geometric_mean ( 'b1' , '1/b1') + s2 = frame_geometric_mean ( frame , 'b1' , '1/b1') + + row = 'Geometric mean (b1,1/b1)' , '%+.2f' % s1.mean() , '%+.2f' % s2.GetValue().value() + rows.append ( row ) + + s1 = tree.geometric_mean ( 'b1' , 'b1>0') + s2 = frame_geometric_mean ( frame , 'b1' , 'b1>0') + + row = 'Geometric mean (b1,b1>0)' , '%+.2f' % s1.mean() , '%+.2f' % s2.GetValue().value() + rows.append ( row ) + + s1 = tree.harmonic_mean ( 'b1' ) + s2 = frame_harmonic_mean ( frame , 'b1' ) + row = 'Harmonic mean (b1)' , '%+.2f' % s1.mean() , '%+.2f' % s2.GetValue().value() + rows.append ( row ) + + s1 = tree.harmonic_mean ( 'b1' , '1/b1') + s2 = frame_harmonic_mean ( frame , 'b1' , '1/b1') + + row = 'Harmonic mean (b1,1/b1)' , '%+.2f' % s1.mean() , '%+.2f' % s2.GetValue().value() + rows.append ( row ) + + s1 = tree.harmonic_mean ( 'b1' , 'b1>0') + s2 = frame_harmonic_mean ( frame , 'b1' , 'b1>0') + + row = 'Harmonic mean (b1,b1>0)' , '%+.2f' % s1.mean() , '%+.2f' % s2.GetValue().value() + rows.append ( row ) + + s1 = tree.power_mean ( 2 , 'b1' ) + s2 = frame_power_mean ( frame , 2 , 'b1' ) + + row = 'Power mean (2,b1)' , '%+.2f' % s1.mean() , '%+.2f' % s2.GetValue().value() + rows.append ( row ) + + s1 = tree.power_mean ( 2 , 'b1' , '1/b1') + s2 = frame_power_mean ( frame , 2 , 'b1' , '1/b1') + + row = 'Power mean (2,b1,1/b1)' , '%+.2f' % s1.mean() , '%+.2f' % s2.GetValue().value() + rows.append ( row ) + + s1 = tree.power_mean ( 2 , 'b1' , 'b1>0') + s2 = frame_power_mean ( frame , 2 , 'b1' , 'b1>0') + + row = 'Power mean (2,b1,b1>0)' , '%+.2f' % s1.mean() , '%+.2f' % s2.GetValue().value() + rows.append ( row ) + + s1 = tree.lehmer_mean ( 3 , 'b1' ) + s2 = frame_lehmer_mean ( frame , 3 , 'b1' ) + + row = 'Lehmer mean (3,b1)' , '%+.2f' % s1.mean() , '%+.2f' % s2.GetValue().value() + rows.append ( row ) + + s1 = tree.lehmer_mean ( 3 , 'b1' , '1/b1') + s2 = frame_lehmer_mean ( frame , 3 , 'b1' , '1/b1') + + row = 'Lehmer mean (3,b1,1/b1)' , '%+.2f' % s1.mean() , '%+.2f' % s2.GetValue().value() + rows.append ( row ) + + s1 = tree.lehmer_mean ( 3 , 'b1' , 'b1>0') + s2 = frame_lehmer_mean ( frame , 3 , 'b1' , 'b1>0') + + row = 'Lehmer mean (3,b1,b1>0)' , '%+.2f' % s1.mean() , '%+.2f' % s2.GetValue().value() + rows.append ( row ) + + title = 'Frame/Tree statistics' + logger.info ( '%s\n%s' % ( title , T.table ( rows , title = title , prefix = '# ' , alignment = 'lcc' ) ) ) + + # ============================================================================= def test_frame3 () : logger = getLogger ( 'test_frame3' ) - if root_info < ( 6 , 16 ) : + if not Frames_OK : logger.warning ( "Test is disabled for this version of ROOT %s" % str ( root_info ) ) return + + logger.info ( 80*'*' ) + logger.info ( 'MT enabled? %s' % ROOT.ROOT.IsImplicitMTEnabled() ) + + logger.info ( 80*'*' ) + logger.info ( 'MT enabled? %s' % ROOT.ROOT.IsImplicitMTEnabled() ) tree = Tree ( name = tname , file = fname ).chain @@ -172,28 +350,26 @@ def test_frame3 () : frame = DataFrame ( tname , fname ) pb = frame_progress ( frame , len ( tree ) ) message = 'Value: %d %s' % ( i , pb.GetValue() ) - ## sys.stdout.write('\n') logger.info ( message ) # ============================================================================= def test_frame4 () : logger = getLogger ( 'test_frame4' ) - logger = getLogger ( 'test_frame0' ) - if root_info < (6,16) : + if not Frames_OK : logger.warning ( "Test is disabled for this version of ROOT %s" % str ( root_info ) ) return + logger.info ( 80*'*' ) + logger.info ( 'MT enabled? %s' % ROOT.ROOT.IsImplicitMTEnabled() ) + frame = DataFrame ( tname , fname ) tree = Tree ( name = tname , file = fname ).chain pb = frame_progress ( frame , len ( tree ) ) - frame = frame.Filter ( 'b1>100' , '#1' ) - frame = frame.Filter ( 'b1>200' , '#2' ) - frame = frame.Filter ( 'b1>300' , '#3' ) - frame = frame.Filter ( 'b1>400' , '#4' ) - frame = frame.Filter ( 'b1>500' , '#5' ) - cnt = frame.Count () + for i,c in enumerate ( range ( 0 , 10000 , 1000 ) , start = 1 ) : + frame = frame.Filter ( 'b1>%s' % c , '#cut%d' % i ) + cnt = frame.Count () report = frame.Report() from ostap.frames.frames import report_print, report_as_table @@ -204,14 +380,13 @@ def test_frame4 () : def test_frame5 () : logger = getLogger ( 'test_frame5' ) - if root_info < (6,16) : + if not Frames_OK : logger.warning ( "Test is disabled for this version of ROOT %s" % str ( root_info ) ) return - - if not has_std_move : - logger.warning ( "Test is disabled (no std::move)" ) - return + logger.info ( 80*'*' ) + logger.info ( 'MT enabled? %s' % ROOT.ROOT.IsImplicitMTEnabled() ) + frame = DataFrame ( tname , fname ) tree = Tree ( name = tname , file = fname ).chain @@ -225,10 +400,13 @@ def test_frame5 () : def test_frame6 () : logger = getLogger ( 'test_frame6' ) - if root_info < (6,16) : + if not Frames_OK : logger.warning ( "Test is disabled for this version of ROOT %s" % str ( root_info ) ) return - + + logger.info ( 80*'*' ) + logger.info ( 'MT enabled? %s' % ROOT.ROOT.IsImplicitMTEnabled() ) + frame = DataFrame ( tname , fname ) tree = Tree ( name = tname , file = fname ).chain @@ -258,19 +436,17 @@ def test_frame6 () : def test_frame7 () : logger = getLogger ( 'test_frame7' ) - if root_info < (6,16) or not has_std_move : + if not Frames_OK : logger.warning ( "Test is disabled for this version of ROOT %s" % str ( root_info ) ) return - - if not has_std_move : - logger.warning ( "Test is disabled (no std::move)" ) - return - + + logger.info ( 80*'*' ) + logger.info ( 'MT enabled? %s' % ROOT.ROOT.IsImplicitMTEnabled() ) + frame = DataFrame ( tname , fname ) tree = Tree ( name = tname , file = fname ).chain pb = frame_progress ( frame , len ( tree ) ) - bs = Ostap.Math.Bernstein ( 10 , -3 , 3 ) ls = Ostap.Math.LegendreSum ( 10 , -3 , 3 ) @@ -278,7 +454,6 @@ def test_frame7 () : from ostap.frames.frames import frame_param - rb = frame_param ( frame , bs , 'b4' , '(b4>-1)*1.01' ) rl = frame_param ( frame , ls , 'b5' ) rc = frame_param ( frame , cs , 'b6' ) @@ -297,14 +472,13 @@ def test_frame7 () : def test_frame8 () : logger = getLogger ( 'test_frame8' ) - if root_info < (6,16) or not has_std_move : + if not Frames_OK : logger.warning ( "Test is disabled for this version of ROOT %s" % str ( root_info ) ) return - - if not has_std_move : - logger.warning ( "Test is disabled (no std::move)" ) - return - + + logger.info ( 80*'*' ) + logger.info ( 'MT enabled? %s' % ROOT.ROOT.IsImplicitMTEnabled() ) + frame = DataFrame ( tname , fname ) tree = Tree ( name = tname , file = fname ).chain @@ -330,10 +504,10 @@ def test_frame8 () : # ============================================================================= if '__main__' == __name__ : - if (6,16) <= root_info : + if Frames_OK : - ## test_frame0 () - ## test_frame1 () + test_frame0 () + test_frame1 () test_frame2 () test_frame3 () test_frame4 () diff --git a/ostap/stats/statvars.py b/ostap/stats/statvars.py index e3d68c47..eebfbf82 100644 --- a/ostap/stats/statvars.py +++ b/ostap/stats/statvars.py @@ -10,7 +10,6 @@ - data_get_moment - calculate the moment - data_moment - get the moment (with uncertainty) - data_get_stat - get the momentt-based statistics -- data_the_moment - get the (central) moment - data_central_moment - get the central moment (with uncertainty) - data_mean - get the mean (with uncertainty) - data_variance - get the variance (with uncertainty) @@ -26,11 +25,17 @@ - data_quartiles - get three quartiles - data_quintiles - get four quintiles - data_deciles - get nine deciles +- data_the_moment - get the (central) moment +- data_the_mean - get mean via moment +- data_the_rms - get rms via moment +- data_the_variance - get variance via moment +- data_the_skewness - get skewness via moment +- data_the_kurtosis - get kurtosis via moment - data_harmonic_mean - get the (weighted) harmonic mean -- data_geometric_mean - get the geometric mean -- data_arithmetic_mean - get the geometric mean (just for completeness) -- data_power_mean - get the power mean -- data_lehmer_mean - get Lehmer mean +- data_geometric_mean - get the (weighte) geometric mean +- data_arithmetic_mean - get the (weighted) geometric mean +- data_power_mean - get the (weighted) power mean +- data_lehmer_mean - get the (weighted) Lehmer mean """ # ============================================================================= __version__ = "$Revision$" @@ -39,7 +44,6 @@ __all__ = ( 'data_get_moment' , ## calculate the moment 'data_moment' , ## get the moment (with uncertainty) - 'data_the_moment' , ## get the cental moment 'data_get_stat' , ## get the momentt-based statistics 'data_central_moment' , ## get the central moment (with uncertainty) 'data_mean' , ## get the mean (with uncertainty) @@ -56,16 +60,23 @@ 'data_quartiles' , ## get three quartiles 'data_quintiles' , ## get four quintiles 'data_deciles' , ## get nine deciles - 'data_harmonic_mean' , ## get the (weighted) harmonic mean - 'data_geometric_mean' , ## get the geometric mean - 'data_arithmetic_mean' , ## get the geometric mean (just for completeness) - 'data_power_mean' , ## get the power mean - 'data_lehmer_mean' , ## get Lehmer mean + 'data_the_moment' , ## get the cental moment + 'data_the_mean' , ## get mean via moment + 'data_the_rms' , ## get rms via moment + 'data_the_variance' , ## get variance via moment + 'data_the_skewness' , ## get skewness via moment + 'data_the_kurtosis' , ## get kurtosis via moment + 'data_harmonic_mean' , ## get the (weighted) harmonic mean + 'data_geometric_mean' , ## get the (weighted) geometric mean + 'data_arithmetic_mean' , ## get the (weighted) arithmetic mean + 'data_power_mean' , ## get the (weighted) power mean + 'data_lehmer_mean' , ## get the (weighted) Lehmer mean 'data_decorate' , ## technical function to decorate the class ) # ============================================================================= -from builtins import range -from ostap.core.core import Ostap, rootException +from builtins import range +from ostap.core.core import Ostap, rootException +from ostap.core.ostap_types import string_types, num_types import ostap.stats.moment import ostap.logger.table as T # ============================================================================= @@ -154,13 +165,12 @@ def data_central_moment ( data , order , expression , cuts = '' , *args ) : expression , cuts , *args ) - -# ============================================================================= -## Get the (w)moments-base statsitocs from data +# ============================================================================== +## Get the statsitocs from data # @code # statobj = Ostap.Math.MinValue() # data = ... -# result = data.get_moment ( statobj , 'x+y' , 'pt>1' ) +# result = data.get_stat( statobj , 'x+y' , 'pt>1' ) # @encode # @see Ostap::Math::Moment # @see Ostap::Math::WMoment @@ -169,20 +179,20 @@ def data_get_stat ( data , statobj , expression , cuts = '' , *args ) : """Get the (w)moments -based statistics >>> data = ... >>> stat = Ostap.Math.MinValue() - >>> result = data.get_thestat ( stat , 'x/y+z' , '0>> result = data.get_stat ( stat , 'x/y+z' , '0>> data = ... >>> result = data_arithmetic_mean ( data , 5 , 'pt' , 'eta>0' ) @@ -347,22 +354,17 @@ def data_lehmer_mean ( data , p , expression , cuts = '' , *args ) : assert isinstance ( p , num_types ) , 'Invalid p-parameter!' - if ( not cuts ) and isinstance ( data , ROOT.TTree ) : - - if p == 0 or iszero ( p ) : statobj = Ostap.Math.HarmonicMean ( ) - elif 2 * p == 1 or isequal ( p , 0.5 ) : statobj = Ostap.Math.GeometricMean ( ) - elif p == 1 or isequal ( p , 1.0 ) : statobj = Ostap.Math.ArithmeticMean ( ) - else : statobj = Ostap.Math.LehmerMean ( p ) + if p == 0 or iszero ( p ) : return data_harmonic_mean ( data , expression , cuts , *args ) + elif p == 1 or isequal ( p , 1.0 ) : return data_arithmetic_mean ( data , expression , cuts , *args ) + if ( not cuts ) and isinstance ( data , ROOT.TTree ) : + statobj = Ostap.Math.LehmerMean ( p ) return data_get_stat ( data , statobj , expression , '' , *args ) - if p == 0 or iszero ( p ) : statobj = Ostap.Math.WHarmonicMean ( ) - elif 2 * p == 1 or isequal ( p , 0.5 ) : statobj = Ostap.Math.WGeometricMean ( ) - elif p == 1 or isequal ( p , 1.0 ) : statobj = Ostap.Math.WArithmeticMean ( ) - else : statobj = Ostap.Math.WLehmerMean ( p ) - + statobj = Ostap.Math.WLehmerMean ( p ) return data_get_stat ( data , statobj , expression , cuts , *args ) + # ============================================================================= ## Get the moments or order order as Ostap::Math::(W)Moment_ # @code @@ -374,11 +376,11 @@ def data_lehmer_mean ( data , p , expression , cuts = '' , *args ) : def data_the_moment ( data , order , expression , cuts = '' , *args ) : """Get the moments or order order as Ostap::Math::(W)Moment_ >>> data = ... - >>> moment = data.the_momemnt ( 5 , 'x/y+z' , '0>> moment = data.the_moment ( 5 , 'x/y+z' , '0>> data = ... + >>> value = data_the_mean ( data , 'x' ,'0>> data = ... + >>> value = data_the_rmsn ( data , 'x' ,'0>> data = ... + >>> value = data_the_variance ( data , 'x' ,'0>> data = ... + >>> value = data_the_skewness ( data , 'x' ,'0>> data = ... + >>> value = data_the_kurtosis ( data , 'x' ,'0 - using StatAction = ROOT::Detail::RDF::StatAction ; + using StatAction = ROOT::Detail::RDF::StatAction ; template - using WStatAction = ROOT::Detail::RDF::WStatAction ; + using WStatAction = ROOT::Detail::RDF::WStatAction ; using StatVar = StatAction ; using WStatVar = WStatAction ; @@ -690,12 +690,18 @@ namespace Ostap template using WMoment_ = WStatAction > ; - using GeometricMean = StatAction ; - using ArithmeticMean = StatAction ; - using HarmonicMean = StatAction ; - using PowerMean = StatAction ; - using LehmerMean = StatAction ; - + using GeometricMean = StatAction ; + using ArithmeticMean = StatAction ; + using HarmonicMean = StatAction ; + using PowerMean = StatAction ; + using LehmerMean = StatAction ; + + using WGeometricMean = WStatAction ; + using WArithmeticMean = WStatAction ; + using WHarmonicMean = WStatAction ; + using WPowerMean = WStatAction ; + using WLehmerMean = WStatAction ; + template using Poly1Action = ROOT::Detail::RDF::Poly1Action ; template @@ -704,7 +710,6 @@ namespace Ostap using Poly3Action = ROOT::Detail::RDF::Poly4Action ; template using Poly4Action = ROOT::Detail::RDF::Poly4Action ; - using LegendrePoly = Poly1Action ; using ChebyshevPoly = Poly1Action ; diff --git a/source/include/Ostap/Moments.h b/source/include/Ostap/Moments.h index 5a292a4d..61c39ab0 100644 --- a/source/include/Ostap/Moments.h +++ b/source/include/Ostap/Moments.h @@ -25,20 +25,31 @@ namespace Ostap namespace Math { // ======================================================================== - /** @struct Moment - * Helper (empty) base class for moment-counters - * - it is not really neeeded for C++, but it simplifies python decorations + /** @class Statistic + * Helper abstract base class for statistic counters */ - class Moment + class Statistic { public : // ====================================================================== - virtual ~Moment() ; + virtual ~Statistic() ; /// add new value to the counter virtual void update ( const double x ) = 0 ; // ====================================================================== } ; // ======================================================================== + /** @class Moment + * Helper (empty) base class for weighted statistic counters + * - it is not really neeeded for C++, but it simplifies python decorations + */ + class Moment : public Statistic + { + public : + // ====================================================================== + virtual ~Moment() ; + // ====================================================================== + } ; + // ======================================================================== /// forward declaration template class Moment_ ; /// template spccialization for N=0 @@ -94,6 +105,8 @@ namespace Ostap // ====================================================================== /// get number of entries inline unsigned long long size () const { return m_prev.size () ; } + /// get effective number of entries + inline unsigned long long nEff () const { return m_prev.nEff () ; } /// get the mean value (if \f$ 1 \le N \$4) inline long double mu () const { return m_prev.mu () ; } /// empty ? @@ -465,7 +478,9 @@ namespace Ostap { return 0 == k || 2 == k ? 1 : 0 ; } // ====================================================================== /// get number of entries - inline unsigned long long size () const { return m_size ; } + inline unsigned long long size () const { return m_size ; } + /// get effective number of entries + inline unsigned long long nEff () const { return size () ; } /// empty ? inline bool empty () const { return 0 == m_size ; } /// ok ? @@ -575,7 +590,9 @@ namespace Ostap { return 0 == k || 2 == k ? 1 : 0 ; } // ====================================================================== /// get number of entries - inline unsigned long long size () const { return m_prev.size() ; } + inline unsigned long long size () const { return m_prev.size () ; } + /// get effective number of entries + inline unsigned long long nEff () const { return m_prev.nEff () ; } // get the mean value inline long double mu () const { return m_mu ; } /// empty ? @@ -731,18 +748,30 @@ namespace Ostap // Weighted moments // ======================================================================== + // ======================================================================== - /** @struct WMoment + /** @class WStatistic + * Helper (empty) base class for weighted statistics + */ + class WStatistic + { + public : + // ====================================================================== + virtual ~WStatistic () ; + /// add new value to the counter + virtual void update ( const double x , const double w = 1 ) = 0 ; + // ====================================================================== + } ; + // ======================================================================== + /** @class WMoment * Helper (empty) base class for weighted moment-counters * - it is not really neeeded for C++, but it simplifies python decorations */ - class WMoment + class WMoment : public WStatistic { public : // ====================================================================== virtual ~WMoment() ; - /// add new value to the counter - virtual void update ( const double x , const double w = 1 ) = 0 ; // ====================================================================== } ; // ======================================================================== @@ -905,7 +934,7 @@ namespace Ostap // if ( !this->ok() ) { return 0 ; } // - const auto n = this->nEff () ; + const long double n = this->w() ; // ATENTION! // const long double muo = this->template M_ () / n ; const long double mu2o = this->template M_<2*K> () / n ; @@ -1339,13 +1368,14 @@ namespace Ostap * \f$ \left(x_1x_2...x_n\right)^{\frac{1}{n}} \f$ * @see https://en.wikipedia.org/wiki/Geometric_mean */ - class GeometricMean : public Moment + class GeometricMean : public Statistic { public: // ====================================================================== /// get the geometric mean - inline double value () const { return std::pow ( 2 , m_log.mean() ) ; } - // ====================================================================== + inline double mean () const { return value () ; } + inline double value () const { return std::pow ( 2 , m_log.mean() ) ; } + // ====================================================================== public: // ====================================================================== inline GeometricMean& operator+=( const double x ) { return add ( x ) ; } @@ -1392,20 +1422,20 @@ namespace Ostap Moment_<1> m_log {} ; // ====================================================================== }; - // ======================================================================== /** @class HarmonicMean * Calcualet the harmonic mean * \f$ \frac{n}{ \frac{1}{x_1} + ... + \frac{1}{x_n}} \f$ * @see https://en.wikipedia.org/wiki/Harmonic_mean */ - class HarmonicMean : public Moment + class HarmonicMean : public Statistic { public: // ====================================================================== /// get the harmonic mean inline double value () const { return 1. / m_inv.mean() ; } - // ====================================================================== + inline double mean () const { return value () ; } + // ====================================================================== public: // ====================================================================== inline HarmonicMean& operator+=( const double x ) { return add ( x ) ; } @@ -1453,7 +1483,7 @@ namespace Ostap * \f$ \left(\frac{1}{n}\sum x_i^p \right)^{\frac{1}{p}}\f$ * @see https://en.wikipedia.org/wiki/Power_mean */ - class PowerMean : public Moment + class PowerMean : public Statistic { public: // ====================================================================== @@ -1464,6 +1494,7 @@ namespace Ostap // ====================================================================== /// get the power mean inline double value () const { return std::pow ( m_pow.mean() , 1 / m_p ) ; } + inline double mean () const { return value () ; } // ====================================================================== public: // ====================================================================== @@ -1522,7 +1553,7 @@ namespace Ostap * - \f$ p \rigtharrow + \infty\f$ : maximal value * @see https://en.wikipedia.org/wiki/Lehmer_mean */ - class LehmerMean : public Moment + class LehmerMean : public Statistic { public: // ====================================================================== @@ -1533,7 +1564,8 @@ namespace Ostap // ====================================================================== /// get the power mean inline double value () const { return m_lp.mean() / m_lpm1.mean () ; } - // ====================================================================== + inline double mean () const { return value () ; } + // ====================================================================== public: // ====================================================================== inline LehmerMean& operator+=( const double x ) { return add ( x ) ; } @@ -1584,19 +1616,19 @@ namespace Ostap /// get the counter of x^(p-1) Moment_<1> m_lpm1 {} ; // ====================================================================== - }; - + } ; // ======================================================================== /** @class WGeometricMean * Calculate the weighted geometric mean * @see https://en.wikipedia.org/wiki/Geometric_mean */ - class WGeometricMean : public WMoment + class WGeometricMean : public WStatistic { public: // ====================================================================== /// get the geometric mean inline double value () const { return std::pow ( 2 , m_log.mean() ) ; } + inline double mean () const { return value () ; } // ====================================================================== public: // ====================================================================== @@ -1654,12 +1686,13 @@ namespace Ostap * Calcualet the weighted harmonic mean * @see https://en.wikipedia.org/wiki/Harmonic_mean */ - class WHarmonicMean : public WMoment + class WHarmonicMean : public WStatistic { public: // ====================================================================== /// get the harmonic mean inline double value () const { return 1. / m_inv.mean() ; } + inline double mean () const { return value () ; } // ====================================================================== public: // ====================================================================== @@ -1711,7 +1744,7 @@ namespace Ostap * Calculate the weighted power mean * @see https://en.wikipedia.org/wiki/Power_mean */ - class WPowerMean : public WMoment + class WPowerMean : public WStatistic { public: // ====================================================================== @@ -1722,6 +1755,7 @@ namespace Ostap // ====================================================================== /// get the weighter power mean inline double value () const { return std::pow ( m_pow.mean() , 1 / m_p ) ; } + inline double mean () const { return value () ; } // ====================================================================== public: // ====================================================================== @@ -1768,14 +1802,12 @@ namespace Ostap WMoment_<1> m_pow {} ; // ====================================================================== }; - - // ======================================================================== /** @class WLehmerMean * Calculate the weighted Lehmer mean * @see https://en.wikipedia.org/wiki/Lehmer_mean */ - class WLehmerMean : public WMoment + class WLehmerMean : public WStatistic { public: // ====================================================================== @@ -1785,7 +1817,8 @@ namespace Ostap public: // ====================================================================== /// get the Lehmer mean - inline double value () const { return m_lp.mean() / m_lpm1.mean () ; } + inline double value () const { return m_lp.mean () / m_lpm1.mean () ; } + inline double mean () const { return value () ; } // ====================================================================== public: // ====================================================================== @@ -2443,7 +2476,8 @@ namespace Ostap { if ( !m.ok() || m.size() < 2 ) { return VE ( s_INVALID_MOMENT , -1 ) ; } // RETURN // - const double m2 = m.template moment_<2> () ; + const double m2 = m.template M_<2> () / m.w() ; + // if ( 0 > m2 ) { return VE ( s_INVALID_MOMENT , -1 ) ; } // RETURN // if ( m.size() < 4 ) { return m2 ; } // RETURN diff --git a/source/include/Ostap/StatVar.h b/source/include/Ostap/StatVar.h index 6344ed7a..431471ba 100644 --- a/source/include/Ostap/StatVar.h +++ b/source/include/Ostap/StatVar.h @@ -27,8 +27,8 @@ class RooAbsData ; // RooFit // ============================================================================ template class TMatrixTSym ; // ROOT // ============================================================================ -namespace Ostap { namespace Math { class Moment ; } } -namespace Ostap { namespace Math { class WMoment ; } } +namespace Ostap { namespace Math { class Statistic ; } } +namespace Ostap { namespace Math { class WStatistic ; } } // ============================================================================ namespace Ostap { @@ -804,7 +804,7 @@ namespace Ostap * @date 2023-02-28 */ static unsigned long statCov - ( const RooAbsData* tree , + ( const RooAbsData* tree , const std::vector& vars , std::vector& stats , TMatrixTSym& cov2 , @@ -1522,59 +1522,59 @@ namespace Ostap * @see Ostap::Math::Moment */ static Ostap::StatusCode the_moment - ( TTree* tree , - Ostap::Math::Moment& moment , - const std::string& expression , - const unsigned long first = 0 , - const unsigned long last = LAST ) ; + ( TTree* tree , + Ostap::Math::Statistic& moment , + const std::string& expression , + const unsigned long first = 0 , + const unsigned long last = LAST ) ; // ======================================================================== /** get the moment as Ostap::Math::WMoment_ * @see Ostap::Math::WMoment_ * @see Ostap::Math::WMoment */ static Ostap::StatusCode the_moment - ( TTree* tree , - Ostap::Math::WMoment& moment , - const std::string& expression , - const std::string& selection , - const unsigned long first = 0 , - const unsigned long last = LAST ) ; + ( TTree* tree , + Ostap::Math::WStatistic& moment , + const std::string& expression , + const std::string& selection , + const unsigned long first = 0 , + const unsigned long last = LAST ) ; // ======================================================================== /** get the moment as Ostap::Math::WMoment_ * @see Ostap::Math::WMoment_ * @see Ostap::Math::WMoment */ static Ostap::StatusCode the_moment - ( const RooAbsData* data , - Ostap::Math::WMoment& moment , - const std::string& expression , - const std::string& selection = "" , - const std::string& cutrange = "" , - const unsigned long first = 0 , - const unsigned long last = LAST ) ; + ( const RooAbsData* data , + Ostap::Math::WStatistic& moment , + const std::string& expression , + const std::string& selection = "" , + const std::string& cutrange = "" , + const unsigned long first = 0 , + const unsigned long last = LAST ) ; // ======================================================================== /** get the moment as Ostap::Math::WMoment_ * @see Ostap::Math::WMoment_ * @see Ostap::Math::WMoment */ static Ostap::StatusCode the_moment - ( const RooAbsData* data , - Ostap::Math::WMoment& moment , - const std::string& expression , - const std::string& selection , - const unsigned long first , - const unsigned long last = LAST ) ; + ( const RooAbsData* data , + Ostap::Math::WStatistic& moment , + const std::string& expression , + const std::string& selection , + const unsigned long first , + const unsigned long last = LAST ) ; // ======================================================================== /** get the moment as Ostap::Math::WMoment_ * @see Ostap::Math::WMoment_ * @see Ostap::Math::WMoment */ static Ostap::StatusCode the_moment - ( const RooAbsData* data , - Ostap::Math::WMoment& moment , - const std::string& expression , - const unsigned long first , - const unsigned long last = LAST ) ; + ( const RooAbsData* data , + Ostap::Math::WStatistic& moment , + const std::string& expression , + const unsigned long first , + const unsigned long last = LAST ) ; // ======================================================================== public: // ======================================================================== diff --git a/source/src/Moments.cpp b/source/src/Moments.cpp index 059f346b..b2936fba 100644 --- a/source/src/Moments.cpp +++ b/source/src/Moments.cpp @@ -26,6 +26,14 @@ const double Ostap::Math::Moments::s_INVALID_MOMENT = -1 * s_INFINITY ; // =========================================================================== // virtual destructor // =========================================================================== +Ostap::Math::Statistic::~Statistic(){} +// =========================================================================== +// virtual destructor +// =========================================================================== +Ostap::Math::WStatistic::~WStatistic(){} +// =========================================================================== +// virtual destructor +// =========================================================================== Ostap::Math::Moment::~Moment(){} // =========================================================================== // virtual destructor diff --git a/source/src/StatVar.cpp b/source/src/StatVar.cpp index 9d9d0f67..4e7f99df 100644 --- a/source/src/StatVar.cpp +++ b/source/src/StatVar.cpp @@ -592,8 +592,8 @@ namespace _quantiles_ ( TTree& tree , const std::set& quantiles , // 0