From e21511e0bc31c35ca60d3cd01610243c4003d292 Mon Sep 17 00:00:00 2001 From: Vanya Belyaev Date: Tue, 30 Jul 2024 08:00:07 +0200 Subject: [PATCH] fix? --- ostap/fitting/dataset.py | 399 ++++++++++++++-------- ostap/stats/statvars.py | 41 ++- ostap/tools/tests/test_tools_reweight2.py | 4 +- ostap/tools/tests/test_tools_reweight3.py | 8 +- ostap/trees/trees.py | 375 +++++++++----------- ostap/utils/utils.py | 40 ++- source/include/Ostap/StatEntity.h | 4 +- source/include/Ostap/WStatEntity.h | 6 + 8 files changed, 503 insertions(+), 374 deletions(-) diff --git a/ostap/fitting/dataset.py b/ostap/fitting/dataset.py index eaec9799..341ca9bd 100644 --- a/ostap/fitting/dataset.py +++ b/ostap/fitting/dataset.py @@ -26,19 +26,19 @@ # ============================================================================= from builtins import range from collections import defaultdict -from ostap.core.meta_info import root_info +from ostap.core.meta_info import root_info, ostap_version from ostap.core.core import ( Ostap, VE, SE , hID , dsID , strings , valid_pointer , split_string , ROOTCWD , var_separators , - loop_items ) + loop_items , cidict_fun ) from ostap.core.ostap_types import ( integer_types , string_types , - num_types , + num_types , dictlike_types , list_types , sequence_types ) -from ostap.math.base import islong +from ostap.math.base import islong, axis_range from ostap.fitting.variables import valid_formula, make_formula -from ostap.trees.cuts import expression_types, vars_and_cuts -import ostap.trees.trees +from ostap.trees.cuts import expression_types, vars_and_cuts +from ostap.utils.utils import evt_range, LAST_ENTRY import ostap.fitting.roocollections import ostap.fitting.printable import ROOT, random, math, sys, ctypes @@ -812,6 +812,9 @@ def _rds_make_unique_ ( dataset , ROOT.RooAbsDataStore . __iter__ , ROOT.RooAbsDataStore . __contains__ ] +# =============================================================================\ +## number of warninf prints +_printed = 10 # ============================================================================= ## Helper project method for RooDataSet/DataFrame/... and similar objects # @@ -827,14 +830,14 @@ def _rds_make_unique_ ( dataset , # @see RooDataSet # @author Vanya BELYAEV Ivan.Belyaev@itep.ru # @date 2013-07-06 -def ds_project ( dataset , - histo , - what , - cuts = '' , - cut_range = '' , - first = 0 , - last = None , - progress = False ) : +def ds_project ( dataset , + histo , + what , + cuts = '' , + cut_range = '' , + first = 0 , + last = LAST_ENTRY , + progress = False ) : """Helper project method for RooDataSet/DataFrame/... and similar objects >>> h1 = ROOT.TH1D(... ) @@ -844,7 +847,8 @@ def ds_project ( dataset , >>> dataset.project ( h1 , 'm', 'chi2<10' ) ## use histo """ - ## 1) redirect to approproate methods + ## 1) redirect to approproate method + ## ROOT.Tree if isinstance ( dataset , ROOT.TTree ) : assert not cut_range, "ds_project(tree,...) cannot be used with cut_range %s" % cut_range from ostap.trees.trees import tree_project @@ -853,80 +857,93 @@ def ds_project ( dataset , cuts = cuts , first = first , last = last ) + target = histo ## 2) if the histogram is specified by the name, try to locate it ROOT memory - if isinstance ( histo , string_types ) : - hname = histo - groot = ROOT.ROOT.GetROOT() - h = groot.FindObject ( hname ) + if isinstance ( target , string_types ) : + hname = str ( target ) + groot = ROOT.ROOT.GetROOT() + h = groot.FindObject ( hname ) assert h , 'Cannot get locate histo by name %s' % histos assert isinstance ( h , ROOT.TH1 ) , 'the object %s i snot ROOT.TH1' % type ( h ) - histo = h - + target = h + - target = histo - - ## input historgam? + ## 3) input histogram? input_histo = isinstance ( target , ROOT.TH1 ) - from ostap.trees.param import param_types_nD + + ## 4) vali dtarget? + from ostap.trees.param import param_types_nD assert input_histo or isinstance ( target , param_types_nD ) , 'Invalid target/histo type %s' % type ( target ) - ## (4) dimension of the target + if isinstance ( histo , ROOT.TProfile2D ) : + histo.Reset() + logger.error ('ds_project: TProfile2D is not (yet) supported') + return histo + elif isinstance ( histo , ROOT.TProfile ) : + logger.error ('ds_project: TProfile is not (yet) supported') + return histo + + ## copy/clone the target + def target_copy ( t ) : return t.Clone() if isinstance ( t , ROOT.TH1 ) else type ( t ) ( t ) + ## reset the target + def target_reset ( t ) : + if isinstance ( t , ROOT.TH1 ) : t.Reset() + else : t *= 0.0 + return t + + ## 5) reset the target + target = target_reset ( target ) + + + ## (5) dimension of the target dim = target.dim () assert 1 <= dim <= 4 , 'Invalid dimension of target: %s' % dim - ## (5) parse input expressions + ## (6) parse input expressions varlst, cuts, input_string = vars_and_cuts ( what , cuts ) + if input_string and 2 <= len ( varlst ) and ostap_info < (1,11) : + if 0 < _to_print : + logger.attention ("From ostap v1.10.1.9 variables are treted in natural order (no reverse!)") + _to_print -= 1 print ( 'VARIABLES/1', varlst, cuts ) nvars = len ( varlst ) assert ( 1 == dim and dim <= nvars ) or dim == nvars , \ 'Mismatch between the target/histo dimension %d and input variables %s' % ( dim , varlst ) - + + print ( 'PROJECT-7' ) + + ## RooAbsData if isinstance ( dataset , ROOT.RooAbsData ) : - ## 3) adjust the first/last - length = len ( dataset ) - if not last : last = length + 1 - elif last < 0 : last += length - if first < 0 : first += length - assert 0 <= first <= last , 'Invalid first/last setting %s/^s' % ( first , last ) + + ## 3) adjust the first/last + first , last = evt_range ( len ( dataset ) , first , last ) + + if not first < last : return target ## RETURN + tail = cuts , cut_range , first , last + + ## soemthinug convertibel to DataFrame/Frame/Node else : - assert not cut_range , 'cut-range is not allowed!' - logger.warning ( 'ds_project(%s): ignored first/last %s/%s ' % ( type ( dataset ) , first , last ) ) + + assert not cut_range , 'cut-range is not allowed!' + if ( firts, last ) != ( 0 , LAST_RNTRY ) : + logger.warning ( "ds_project(%s): ignored `first'/`last' %s/%s " % ( type ( dataset ) , first , last ) ) from ostap.frames.frames import as_rnode frame = as_rnode ( dataset ) dataset = frame tail = () - - args = ( target , ) + varlst + tail - - ## copy/clone the target - def target_copy ( t ) : return t.Clone() if isinstance ( t , ROOT.TH1 ) else type ( t ) ( t ) - ## reset the target - def target_reset ( t ) : - if isinstance ( t , ROOT.TH1 ) : t.Reset() - else : t *= 0.0 - return t - - ## reset the target - target = target_reset ( target ) - from ostap.utils.progress_conf import progress_conf - if isinstance ( histo , ROOT.TProfile2D ) : - histo.Reset() - logger.error ('ds_project: TProfile2D is not (yet) supported') - return histo - elif isinstance ( histo , ROOT.TProfile ) : - logger.error ('ds_project: TProfile is not (yet) supported') - return histo - + the_args = ( target , ) + varlst + tail + print ( 'DS_PROJECT' , dim , nvars, varlst , the_args ) ## very special case of projection several expressions into the same target if 1 == dim and dim < nvars : + print ( 'DS_PROJECT/1' , dim , nvars, varlst , tail ) ## very special case of projection several expressions into the same target htmp = target_copy ( target ) ## prepare temporary object for var in varlst : @@ -939,20 +956,23 @@ def target_reset ( t ) : target += htmp del htmp elif 1 == dim : - if progress : sc = Ostap.HistoProject.project ( dataset , progress_conf () , *args ) - else : sc = Ostap.HistoProject.project ( dataset , *args ) + print ( 'DS_PROJECT/2' , dim , nvars, varlst , tail ) + if progress : sc = Ostap.HistoProject.project ( dataset , progress_conf () , *the_args ) + else : sc = Ostap.HistoProject.project ( dataset , *the_args ) if not sc.isSuccess() : logger.error ( "Error from Ostap.HistoProject.project %s" % sc ) elif 2 == dim : - if progress : sc = Ostap.HistoProject.project2 ( dataset , progress_conf () , *args ) - else : sc = Ostap.HistoProject.project2 ( dataset , *args ) + print ( 'DS_PROJECT/3' , dim , nvars, varlst , tail ) + if progress : sc = Ostap.HistoProject.project2 ( dataset , progress_conf () , *the_args ) + else : sc = Ostap.HistoProject.project2 ( dataset , *the_args ) if not sc.isSuccess() : logger.error ( "Error from Ostap.HistoProject.project2 %s" % sc ) + print ( 'DS_PROJECT/4' , dim , nvars, varlst , tail ) elif 3 == dim : - if progress : sc = Ostap.HistoProject.project3 ( dataset , progress_conf () , *args ) - else : sc = Ostap.HistoProject.project3 ( dataset , *args ) + if progress : sc = Ostap.HistoProject.project3 ( dataset , progress_conf () , *the_args ) + else : sc = Ostap.HistoProject.project3 ( dataset , *the_args ) if not sc.isSuccess() : logger.error ( "Error from Ostap.HistoProject.project2 %s" % sc ) elif 4 == dim : - if progress : sc = Ostap.HistoProject.project4 ( dataset , progress_conf () , *args ) - else : sc = Ostap.HistoProject.project4 ( dataset , *args ) + if progress : sc = Ostap.HistoProject.project4 ( dataset , progress_conf () , *the_args ) + else : sc = Ostap.HistoProject.project4 ( dataset , *the_args ) if not sc.isSuccess() : logger.error ( "Error from Ostap.HistoProject.project2 %s" % sc ) return target if sc.isSuccess() else None @@ -969,100 +989,124 @@ def target_reset ( t ) : # @see RooDataSet # @author Vanya BELYAEV Ivan.Belyaev@itep.ru # @date 2013-07-06 -def ds_draw ( dataset , what , cuts = '' , opts = '' , *args , **kwargs ) : +def ds_draw ( dataset , + what , + cuts = '' , + opts = '' , + cut_range = '' , + first = 0 , + last = LAST_ENTRY , + delta = 0.05 , **kwargs ) : """Helper draw method for drawing of RooDataSet >>> dataset.draw ( 'm', 'chi2<10' ) ## cuts & weight >>> dataset.draw ( 'm', '(chi2<10)*weight' ) ## use drawing options >>> dataset.draw ( 'm', '(chi2<10)*weight' , 'e1' ) - ## start form event #1000 - >>> dataset.draw ( 'm', '(chi2<10)*weight' , 'e1' , 1000 ) + ## start form event #1000 >>> dataset.draw ( 'm', '(chi2<10)*weight' , 'e1' , 1000 ) ## for event in range 1000< i <10000 >>> dataset.draw ( 'm', '(chi2<10)*weight' , 'e1' , 1000 , 100000 ) """ - if isinstance ( cuts , ROOT.TCut ) : cuts = str ( cuts ).strip() - if isinstance ( what , str ) : what = what.strip() - if isinstance ( cuts , str ) : cuts = cuts.strip() - if isinstance ( opts , str ) : opts = opts.strip() - - ## delegate to TTree for non-weighted datasets with TTree-based storage type - if hasattr ( dataset , 'isWeighted') and not dataset.isWeighted() \ - and isinstance ( what , str ) \ - and isinstance ( cuts , str ) \ - and isinstance ( opts , str ) : - if hasattr ( dataset , 'store' ) : - store = dataset.store() - if store : - tree = store.tree() - if tree : return tree.Draw( what , cuts , opts , *args ) - - if isinstance ( what , string_types ) : - - ## ATTENTION reverse here! - what = tuple ( reversed ( [ v.strip() for v in split_string ( what , var_separators , strip = True , respect_groups = True ) ] ) ) - return ds_draw ( dataset , what , cuts , opts , *args , **kwargs ) - - elif isinstance ( what , ROOT.RooArgList ) : - - what = tuple ( v for v in what ) - return ds_draw ( dataset , what , cuts , opts , *args , **kwargs ) - - assert isinstance ( what , list_types ) and 1 <= len ( what ) <=3 , \ - "ds_draw: invalid 'what' %s" % what - - if not args : first, last = 0 , -1 - elif 2<= len ( args ) : - nentries , first = args[0] , args[1] - last = first + nentries if 0 < nentries else len ( dataset ) - elif 1<= len ( args ) : - nentries = args[0] - last = nentries if 0 < nentries else len ( dataset ) - - if 3 == len ( what ) : - w1 = what [ 0 ] - mn1 , mx1 = ds_var_range ( dataset , w1 , cuts ) - w2 = what [ 1 ] - mn2 , mx2 = ds_var_range ( dataset , w2 , cuts ) - w3 = what [ 2 ] - mn3 , mx3 = ds_var_range ( dataset , w3 , cuts ) - nbinsx = kwargs.pop ( 'nbinsx' , 20 ) - nbinsy = kwargs.pop ( 'nbinsy' , 20 ) - nbinsz = kwargs.pop ( 'nbinsz' , 20 ) - histo = ROOT.TH3F ( hID() , "%s:%s:%s" % ( w3 , w2 , w1 ) , - nbinsx , mn1 , mx1 , - nbinsy , mn2 , mx2 , - nbinsz , mn3 , mx3 ) ; histo.Sumw2() - - ds_project ( dataset , histo , what , cuts , first , last ) - histo.draw ( opts , **kwargs ) - return histo + + ## decode variables/cuts + varlst, cuts, input_string = vars_and_cuts ( what , cuts ) - elif 2 == len ( what ) : - w1 = what [ 0 ] - mn1 , mx1 = ds_var_range ( dataset , w1 , cuts ) - w2 = what [ 1 ] - mn2 , mx2 = ds_var_range ( dataset , w2 , cuts ) - nbinsx = kwargs.pop ( 'nbinsx' , 20 ) - nbinsy = kwargs.pop ( 'nbinsy' , 20 ) - histo = ROOT.TH2F ( hID() , "%s:%s" % ( w2 , w1 ) , - nbinsx , mn1 , mx1 , - nbinsy , mn2 , mx2 ) ; histo.Sumw2() - ds_project ( dataset , histo , what , cuts , first , last ) - histo.draw ( opts , **kwargs ) + nvars = len ( varlst ) + assert 1 <= nvars <= 3 , "Invalid number of variables: %s" % str ( varlst ) + + ## get the suitable ranges for the variables + ranges = ds_range ( dataset , varlst , cuts = cuts , cut_range = cut_range , first = first , last = last , delta = delta ) + for _, r in loop_items ( ranges ) : + mn , mx = r + ## no useful entries + if mx < mn : + logger.warning ("No entries, no draw, return None" ) + return None + + assert len ( ranges ) == nvars , 'Invalid ranges: %s' % str ( ranges ) + + print ('RANGES' , ranges ) + + + from ostap.utils.cidict import cidict + kw = cidict ( transform = cidict_fun , **kwargs ) + + if 1 == nvars : + xvar = varlst [ 0 ] + xmin, xmax = ranges [ xvar ] + # + xmin = kw.pop ( 'xmin' , xmin ) + xmax = kw.pop ( 'xmax' , xmax ) + assert xmin < xmax , "Invalid xmin/xmax setting!" + # + xbins = kw.pop ( 'xbins' , kw.pop ( 'bins' , kw.pop ( 'nbinsx' , kw.pop ( 'nbins' , kw.pop ( 'binsx' , 100 ) ) ) ) ) + # + # book the histogram + histo = ROOT.TH1F ( hID() , "%s" % ( xvar ) , xbins , xmin , xmax ) ; histo.Sumw2() + # + ds_project ( dataset , histo , varlst , cuts = cuts , cut_range = cut_range , first = first , last = last ) + histo.draw ( opts , **kw ) return histo - elif 1 == len ( what ) : - w1 = what [ 0 ] - mn1 , mx1 = ds_var_range ( dataset , w1 , cuts ) - nbinsx = kwargs.pop ( 'nbinsx' , 100 ) - histo = ROOT.TH1F ( hID() , "%s" % ( w1 ) , - nbinsx , mn1 , mx1 ) ; histo.Sumw2() - ds_project ( dataset , histo , what , cuts , first , last ) - histo.draw ( opts , **kwargs ) + elif 2 == nvars : + + xvar = varlst [ 0 ] + yvar = varlst [ 1 ] + xmin, xmax = ranges [ xvar ] + ymin, ymax = ranges [ yvar ] + # + xmin = kw.pop ( 'xmin' , xmin ) + xmax = kw.pop ( 'xmax' , xmax ) + assert xmin < xmax , "Invalid xmin/xmax setting!" + # + ymin = kw.pop ( 'ymin' , ymin ) + ymax = kw.pop ( 'ymax' , ymax ) + assert ymin < xmax , "Invalid ymin/ymax setting!" + # + xbins = kw.pop ( 'xbins' , kw.pop ( 'nbinsx' ,kw.pop ( 'binsx' , 50 ) ) ) + ybins = kw.pop ( 'ybins' , kw.pop ( 'nbinsy' ,kw.pop ( 'binsy' , 50 ) ) ) + # + histo = ROOT.TH12 ( hID() , "x = %s , y = %s" % ( xvar , yvar ) , + xbins , xmin , xmax , + ybins , ymin , ymax ) ; histo.Sumw2() + ds_project ( dataset , histo , varlst , cuts = cuts , cut_range = cut_range , first = first , last = last ) + histo.draw ( opts , **kw ) return histo - raise TypeError ( 'ds_draw, invalid case' ) + elif 3 == nvars : + + xvar = varlst [ 0 ] + yvar = varlst [ 1 ] + zvar = varlst [ 2 ] + # + xmin, xmax = ranges [ xvar ] + ymin, ymax = ranges [ yvar ] + zmin, zmax = ranges [ zvar ] + # + xmin = kw.pop ( 'xmin' , xmin ) + xmax = kw.pop ( 'xmax' , xmax ) + assert xmin < xmax , "Invalid xmin/xmax setting!" + # + ymin = kw.pop ( 'ymin' , ymin ) + ymax = kw.pop ( 'ymax' , ymax ) + assert ymin < xmax , "Invalid ymin/ymax setting!" + # + zmin = kw.pop ( 'zmin' , zmin ) + zmax = kw.pop ( 'zmax' , zmax ) + assert zmin < zmax , "Invalid zmin/zmax setting!" + # + # + xbins = kw.pop ( 'xbins' , kw.pop ( 'nbinsx' , kw.pop ( 'binsx' , 20 ) ) ) + ybins = kw.pop ( 'ybins' , kw.pop ( 'nbinsy' , kw.pop ( 'binsy' , 20 ) ) ) + zbins = kw.pop ( 'zbins' , kw.pop ( 'nbinsz' , kw.pop ( 'binsz' , 20 ) ) ) + # + histo = ROOT.TH13 ( hID() , "x = %s , y = %s , z = %s" % ( xvar , yvar , zvar ) , + xbins , xmin , xmax , + ybins , ymin , ymax , + zbins , zmin , zmax ) ; histo.Sumw2() + ds_project ( dataset , histo , varlst , cuts = cuts , cut_range = cut_range , first = first , last = last ) + histo.draw ( opts , **kw ) + return histo # ============================================================================= ## get the attibute for RooDataSet @@ -1077,11 +1121,12 @@ def _ds_getattr_ ( dataset , aname ) : return getattr ( _vars , aname ) ## get the attibute for RooDataSet # ============================================================================= - -def get_var( self, aname ) : +def get_var ( self, aname ) : _vars = self.get() return getattr ( _vars , aname ) + + # ============================================================================= ## Get min/max for the certain variable/expression in dataset # @code @@ -1133,6 +1178,9 @@ def ds_var_minmax ( dataset , var , cuts = '' , delta = 0.0 ) : ROOT.RooDataSet .vminmax , ] + + + # ============================================================================= ## Is there at least one entry that satisfy selection criteria? # @code @@ -1180,9 +1228,62 @@ def ds_var_range ( dataset , var , cuts = '' ) : """ ## min/max values mn , mx = ds_var_minmax ( dataset , var , cuts ) - from ostap.math.base import axis_range return axis_range ( mn , mx , delta = 0.05 ) + + +# =============================================================================\ +## get min/max for the expressions/variables +## @code +# dataset = ... +# result = ds_minmax ( dataset , 'sin(x)*100*y' , 'x<0' ) +# results = ds_minmax ( dataset , 'x,y,z,t,u,v' , 'x<0' ) +# @endcode +def ds_minmax ( dataset , + expressions , + cuts = '' , + cut_range = '' , + first = 0 , + last = LAST_ENTRY ) : + + if isinstance ( dataset , ROOT.RooAbsData ) : + args = dataset , expressions , cuts , cut_range , first, last + elif not cut_range : + args = dataset , expressions , cuts , first, last + else : + raise TypeError ( "dataset/cut_range : %s/`%s' are not consistent" % ( type ( dataset ), cut_range ) ) + import ostap.stats.statvars as SV + return SV.data_minmax ( *args ) + +# =============================================================================\ +## Get suitable ranges for drawing expressions/variables +## @code +# dataset = ... +# result = ds_range ( dataset , 'sin(x)*100*y' , 'x<0' ) +# results = ds_range ( dataset , 'x,y,z,t,u,v' , 'x<0' ) +# @endcode +def ds_range ( dataset , + expressions , + cuts = '' , + cut_range = '' , + first = 0 , + last = LAST_ENTRY , + delta = 0.05 ) : + + results = ds_minmax ( dataset , + expressions , + cuts = cuts , + cut_range = cut_range , + first = first , + last = last ) + + if isinstance ( results , dictlike_types ) : + for k , r in loop_items ( results ) : + results [ k ] = axis_range ( *r , delta = delta ) + else : results = axis_range ( *results , delta = delta ) + ## + return results + # ============================================================================= ## clear dataset storage if not hasattr ( ROOT.RooDataSet , '_old_reset_' ) : diff --git a/ostap/stats/statvars.py b/ostap/stats/statvars.py index c0500e1f..567f2540 100644 --- a/ostap/stats/statvars.py +++ b/ostap/stats/statvars.py @@ -11,12 +11,12 @@ - data_moment - get the moment (with uncertainty) - data_get_stat - get the momentt-based statistics - data_central_moment - get the central moment (with uncertainty) -- data_mean - get the mean (with uncertainty) - data_nEff - get the effective number of entries - data_sum - get the (weigted) sum +- data_mean - get the mean (with uncertainty) +- data_rms - get the RMS (with uncertainty) - data_variance - get the variance (with uncertainty) - data_dispersion - get the dispersion (with uncertainty) -- data_rms - get the RMS (with uncertainty) - data_skewness - get the skewness (with uncertainty) - data_kurtosis - get the (excess) kurtosis (with uncertainty) - data_quantile - get the quantile @@ -38,6 +38,8 @@ - data_arithmetic_mean - get the (weighted) geometric mean - data_power_mean - get the (weighted) power mean - data_lehmer_mean - get the (weighted) Lehmer mean +- data_statistics - get the statistics for variables +- data_minmax - get the (min/max) for variables """ # ============================================================================= __version__ = "$Revision$" @@ -51,6 +53,7 @@ 'data_nEff' , ## get the effective number of entries 'data_sum' , ## get the (weigted) sum 'data_statistics' , ## get the statistics + 'data_minmax' , ## get min/max values for variables 'data_covariance' , ## get the covariance 'data_covariances' , ## get the covariances 'data_statvector' , ## get the stat-vector @@ -264,7 +267,7 @@ def data_statistics ( data , expressions , cuts = '' , *args ) : with rootException() : return StatVar.statVar ( data , var_lst[0] , cuts , *args ) - ## several variables are specofied + ## several variables are specified from ostap.core.core import strings names = strings ( *var_lst ) results = StatVar.Statistics() @@ -278,6 +281,33 @@ def data_statistics ( data , expressions , cuts = '' , *args ) : for v,r in zip ( var_lst , results ) : result [ v ] = r return result +# ============================================================================== +## Get the minmax from data +# @code +# data = ... +# result = data_minmax ( data , 'x+y' , 'pt>1' ) +# results = data_minmax ( data , 'x;y;z' , 'pt>1' ) ## result is dictionary +# @encode +# @see Ostap::StatEntity +# @see Ostap::WStatEntity +# @see Ostap::StatVar::statVar +def data_minmax ( data , expressions , cuts = '' , *args ) : + """Get min/max from data + >>> data = ... + >>> result = data_minmax ( data , 'x/y+z' , '0>> results = data_minmax ( data , 'x/y;z' , '0>> tree = ... # get the tree @@ -108,8 +106,8 @@ def _iter_cuts_ ( tree , cuts = '' , first = 0 , last = _large , progress = Fals >>> for i in tree.withCuts ( 'pt>10' , active = ( 'pt' , 'y') ) : sum_y += i.y """ # - last = min ( last , len ( tree ) ) - first = max ( 0 , first ) + ## redefine first/last + first, last = evt_range ( len ( tree ) , first , last ) ## # if not cuts : cuts = '1' @@ -122,7 +120,7 @@ def _iter_cuts_ ( tree , cuts = '' , first = 0 , last = _large , progress = Fals else : context = NoContext () - from ostap.utils.progress_conf import progress_conf + from ostap.utils.progress_conf import progress_conf with context : if progress : pit = Ostap.PyIterator ( tree , progress_conf () , cuts , first , last ) @@ -150,7 +148,7 @@ def _iter_cuts_ ( tree , cuts = '' , first = 0 , last = _large , progress = Fals # ============================================================================= -## Iterator over ``good events'' in TTree/TChain: +## Iterator over `good events' in TTree/TChain: # @code # >>> tree = ... # get the tree # >>> for i in tree( 0, 100, 'pt>5' ) : print i.y @@ -159,18 +157,15 @@ def _iter_cuts_ ( tree , cuts = '' , first = 0 , last = _large , progress = Fals # @see Ostap::Formula # @author Vanya BELYAEV Ivan.Belyaev@itep.ru # @date 2013-05-06 -def _tc_call_ ( tree , first = 0 , last = -1 , cuts = None , progress = False , active = () ) : +def _tc_call_ ( tree , first = 0 , last = LAST_ENTRY , cuts = None , progress = False , active = () ) : """Iterator over ``good events'' in TTree/TChain: >>> tree = ... # get the tree >>> for i in tree(0, 100 , 'pt>5' ) : print i.y """ - # - if last < 0 : last = _large - - last = min ( last , len ( tree ) ) - first = max ( 0 , first ) + + first, last = evt_range ( len ( tree ) , first , last ) if active : @@ -183,7 +178,7 @@ def _tc_call_ ( tree , first = 0 , last = -1 , cuts = None , progress = False , from ostap.utils.basic import NoContext context = NoContext () - from ostap.utils.progress_conf import progress_conf + from ostap.utils.progress_conf import progress_conf with context : if cuts : ## use Ostap.PyIterator @@ -244,16 +239,14 @@ def get_result ( vct ) : # for row in tree.rows ( 'a a+b/c sin(d)' , 'd>0' ) : # print ( row ) # @code -def _tt_rows_ ( tree , variables , cuts = '' , first = 0 , last = -1 , progress = False , active = () ) : +def _tt_rows_ ( tree , variables , cuts = '' , first = 0 , last = LAST_ENTRY , progress = False , active = () ) : """Iterate over tree entries and get a row/array of values for each good entry >>> tree = ... >>> for row in tree.rows ( 'a a+b/c sin(d)' , 'd>0' ) : >>> print ( row ) """ - - if last < 0 : last = _large - last = min ( last , len ( tree ) ) - first = max ( 0 , first ) + ## redefine first/last + first, last = evt_range ( len ( tree ) , first , last ) if isinstance ( variables , string_types ) : variables = split_string ( variables , var_separators , strip = True , respect_groups = True ) @@ -333,6 +326,9 @@ def _tt_rows_ ( tree , variables , cuts = '' , first = 0 , last = -1 , progress ROOT.TTree .rows = _tt_rows_ +# ============================================================================= +## number of warning prints +_to_print = 10 # ============================================================================= ## help project method for ROOT-trees and chains # @@ -363,14 +359,14 @@ def _tt_rows_ ( tree , variables , cuts = '' , first = 0 , last = -1 , progress # @see TTree::Project # @author Vanya BELYAEV Ivan.Belyaev@itep.ru # @date 2013-07-06 -def tree_project ( tree , - histo , - what , - cuts = '' , - first = 0 , - last = None , - use_frame = True , ## use DataFrame ? - progress = False ) : +def tree_project ( tree , + histo , + what , + cuts = '' , + first = 0 , + last = LAST_ENTRY , + use_frame = True , ## use DataFrame ? + progress = False ) : """Helper project method >>> tree = ... @@ -392,13 +388,8 @@ def tree_project ( tree , """ - print ( 'PROJECT-0' ) - ## 1) adjust the first/last - length = len ( tree ) - if not last : last = length + 1 - elif last < 0 : last += length - if first < 0 : first += length - assert 0 <= first <= last , 'Invalid first/last setting %s/^s' % ( first , last ) + ## 1) adjust the first/last + first , last = evt_range ( len ( tree ) , first , last ) print ( 'PROJECT-1' ) ## 2) if the histogram is specified by the name, try to locate it ROOT memory @@ -438,12 +429,12 @@ def tree_project ( tree , print ( 'PROJECT-5' ) ## use frame if requested and if/when possible - if use_frame and input_histo and 0 == first and len ( tree ) <= last : + if False and use_frame and input_histo and 0 == first and len ( tree ) <= last : if ( 6 , 19 ) <= root_info : import ostap.frames.frames as F frame = F.DataFrame ( tree ) - if progress : frame , _ = frame_progress ( frame , len ( tree ) ) - result = frame_project ( frame , target , expressions = what , cuts = cuts , lazy = False ) + if progress : frame , _ = F.frame_progress ( frame , len ( tree ) ) + result = F.frame_project ( frame , target , expressions = what , cuts = cuts , lazy = False ) return result print ( 'PROJECT-6' ) @@ -454,7 +445,11 @@ def tree_project ( tree , ## 3) parse input expressions varlst, cuts, input_string = vars_and_cuts ( what , cuts ) - + if input_string and 2 <= len ( varlst ) and ostap_info < (1,11) : + if 0 < _to_print : + logger.attention ("From ostap v1.10.1.9 variables are trested in natural order (no reverse!)") + _to_print -= 1 + print ( 'VARIABLES/1', varlst, cuts ) nvars = len ( varlst ) @@ -522,179 +517,117 @@ def target_reset ( t ) : ROOT.TTree .project = tree_project ROOT.TChain.project = tree_project -# ============================================================================= -## help project method for ROOT-trees and chains -# -# @code -# >>> h1 = ROOT.TH1D(... ) -# >>> tree.Project ( h1.GetName() , 'm', 'chi2<10' ) ## standart ROOT -# -# >>> h1 = ROOT.TH1D(... ) -# >>> tree.project ( h1.GetName() , 'm', 'chi2<10' ) ## ditto -# -# >>> h1 = ROOT.TH1D(... ) -# >>> tree.project ( h1 , 'm', 'chi2<10' ) ## use histo -# -# @endcode -# -# @param tree (INPUT) the tree -# @param histo (INPUT/UPDATE) the histogram or histogram name -# @param what (INPUT) variable/expression to be projected. -# It could be a list/tuple of variables/expressions -# or just a comma or semicolumn-separated expression -# @param cuts (INPUT) expression for cuts/weights -# @param options (INPUT) options to be propagated to TTree.Project -# @param nentries (INPUT) number of entries to process -# @param firstentry (INPUT) first entry to process -# @param use_frame (INPUT) use DataFrame for processing? -# @param silent (INPUT) silent processing? -# @see TTree::Project -# @author Vanya BELYAEV Ivan.Belyaev@itep.ru -# @date 2013-07-06 -def tree_project_old ( tree , - histo , - what , - cuts = '' , - options = '' , - nentries = -1 , - firstentry = 0 , - use_frame = False , ## use DataFrame ? - silent = False ) : - """Helper project method + + +def tree_draw ( tree , + what , + cuts = '' , + first = 0 , + last = LAST_ENTRY , + use_frame = False , + delta = 0.05 , **kwargs ) : ## use DataFrame ? - >>> tree = ... + ## adjust first/last indices + first , last = evt_range ( len ( tree ) , first , last ) - >>> h1 = ROOT.TH1D(... ) - >>> tree.Project ( h1.GetName() , 'm', 'chi2<10' ) ## standart ROOT + ## decode variables/cuts + varlst, cuts, input_string = vars_and_cuts ( what , cuts ) - >>> h1 = ROOT.TH1D(... ) - >>> tree.project ( h1.GetName() , 'm', 'chi2<10' ) ## ditto + nvars = len ( varlst ) + assert 1 <= nvars <= 3 , "Invalid number of variables: %s" % str ( varlst ) - >>> h1 = ROOT.TH1D(... ) - >>> tree.project ( h1 , 'm', 'chi2<10' ) ## use histo + ## get the suitable ranges for the variables + ranges = tree_range ( dataset , varlst , cuts = cuts , first = first , last = last , delta = delta ) + for _, r in loop_items ( ranges ) : + mn , mx = r + ## no useful entries + if mx < mn : + logger.warning ("No entries, no draw, return None" ) + return None - - histo : the histogram (or histogram name) - - what : variable/expression to project. It can be expression or list/tuple of expression or comma (or semicolumn) separated expression - - cuts : selection criteria/weights - """ - # + assert len ( ranges ) == nvars , 'Invalid ranges: %s' % str ( ranges ) - if nentries < 0 : nentries = _large - - args = () - if options or 0 < firstentry or 0 < nentries < _large : - args = options , nentries , firstentry - - - if isinstance ( histo , ROOT.TH1 ) : hname = histo.GetName() - elif isinstance ( histo , string_types ) : - hname = histo - groot = ROOT.ROOT.GetROOT() - h = groot.FindObject ( hname ) - if h and isinstance ( h , ROOT.TH1 ) : histo = h - else : histo = None - - assert histo and isinstance ( histo , ROOT.TH1 ) ,\ - "project: invalid type of ``histo'': %s " % type ( histo ) - - ## reset the histogram - histo.Reset() - - if isinstance ( cuts , ROOT.TCut ) : cuts = str ( cuts ) - - ## comma, column or semicolumn separated list - if isinstance ( what , string_types ) : - ## attention! note reversed here! - what = [ w.strip() for w in reversed ( split_string ( what , var_separators , strip = True , respect_groups = True ) ) ] ## attention! - - assert isinstance ( what , sequence_types ) and \ - isinstance ( what , sized_types ) and 1 <= len ( what ) ,\ - "project: invalid ``what''/1: %s" % str ( what ) + print ('RANGES' , ranges ) - what = [ w for w in what ] - assert all ( isinstance ( w , string_types ) for w in what ) , \ - "project: invalid ``what''/2: %s" % str ( what ) - what = [ w.strip() for w in what ] - - - ## special treatment for 1D histograms - if 1 == histo.dim() and 1 < len ( what ) : - nr = 0 - hh = histo.clone() - for i , w in enumerate ( what ) : - n , h = tree_project_old ( tree , hh , w , cuts , *args , use_frame = use_frame , silent = silent ) - histo += h - nr += n - del hh - return nr , histo - - ## number of variables must match the dimensionality of the histogram - assert len ( what ) == histo.dim(), \ - "project: dimension mismatch : ``what''/%d vs ``dim''/%d " % ( len ( what ) , histo.dim() ) - - ## use frame if requested and if possible - if use_frame and isinstance ( tree , ROOT.TTree ) and not args : - - from ostap.frames.frames import DataFrame, frame_project - frame = DataFrame ( tree , enable = False ) - counter = frame.Filter( cuts ).Count() if cuts else frame.Count() - hh = frame_project ( frame , histo , what , cuts ) - return counter.GetValue () , hh - - ## the basic case - with ROOTCWD() : - - groot = ROOT.ROOT.GetROOT() - groot.cd () - - ## attention! note reversed here! - vars = ' : '.join ( ( '(%s)' % w for w in reversed ( what ) ) ) ## attention! note reversed here! - - ## make temporary histogram - uid = vars , cuts , histo.GetName() , tree.GetName() , args , use_frame , silent - - htemp = histo.clone ( prefix = 'htmp_%X_' % ( hash ( uid ) % 2**32 ) ) - hname = htemp.GetName () - - ## make projection to temporary histogram - result = tree.Project ( hname , vars , cuts , *args ) - - if 0 == result : - - del htemp - return result , histo - - elif 0 < result and result == htemp.GetEntries() : - - histo += htemp - - del htemp - return result , histo - - ## make a try extract the temporary histogram from ROOT memory - hroot = groot.FindObject ( hname ) - if hroot and ( hroot is htemp ) : - - histo += hroot - - del hroot - del htemp - - return result, histo - - else : - - logger.error( "project: cannot access temporary histo") - - histo += htemp - - del hroot - del htemp - - return result, histo - -ROOT.TTree .project_old = tree_project_old -ROOT.TChain.project_old = tree_project_old + from ostap.utils.cidict import cidict + kw = cidict ( transform = cidict_fun , **kwargs ) + + if 1 == nvars : + xvar = varlst [ 0 ] + xmin, xmax = ranges [ xvar ] + # + xmin = kw.pop ( 'xmin' , xmin ) + xmax = kw.pop ( 'xmax' , xmax ) + assert xmin < xmax , "Invalid xmin/xmax setting!" + # + xbins = kw.pop ( 'xbins' , kw.pop ( 'bins' , kw.pop ( 'nbinsx' , kw.pop ( 'nbins' , kw.pop ( 'binsx' , 100 ) ) ) ) ) + # + # book the histogram + histo = ROOT.TH1F ( hID() , "%s" % ( xvar ) , xbins , xmin , xmax ) ; histo.Sumw2() + # + tree_project ( dataset , histo , varlst , cuts = cuts , first = first , last = last ) + histo.draw ( opts , **kw ) + return histo + + elif 2 == nvars : + + xvar = varlst [ 0 ] + yvar = varlst [ 1 ] + xmin, xmax = ranges [ xvar ] + ymin, ymax = ranges [ yvar ] + # + xmin = kw.pop ( 'xmin' , xmin ) + xmax = kw.pop ( 'xmax' , xmax ) + assert xmin < xmax , "Invalid xmin/xmax setting!" + # + ymin = kw.pop ( 'ymin' , ymin ) + ymax = kw.pop ( 'ymax' , ymax ) + assert ymin < xmax , "Invalid ymin/ymax setting!" + # + xbins = kw.pop ( 'xbins' , kw.pop ( 'nbinsx' ,kw.pop ( 'binsx' , 50 ) ) ) + ybins = kw.pop ( 'ybins' , kw.pop ( 'nbinsy' ,kw.pop ( 'binsy' , 50 ) ) ) + # + histo = ROOT.TH12 ( hID() , "x = %s , y = %s" % ( xvar , yvar ) , + xbins , xmin , xmax , + ybins , ymin , ymax ) ; histo.Sumw2() + tree_project ( dataset , histo , varlst , cuts = cuts , first = first , last = last ) + histo.draw ( opts , **kw ) + return histo + + elif 3 == nvars : + + xvar = varlst [ 0 ] + yvar = varlst [ 1 ] + zvar = varlst [ 2 ] + # + xmin, xmax = ranges [ xvar ] + ymin, ymax = ranges [ yvar ] + zmin, zmax = ranges [ zvar ] + # + xmin = kw.pop ( 'xmin' , xmin ) + xmax = kw.pop ( 'xmax' , xmax ) + assert xmin < xmax , "Invalid xmin/xmax setting!" + # + ymin = kw.pop ( 'ymin' , ymin ) + ymax = kw.pop ( 'ymax' , ymax ) + assert ymin < xmax , "Invalid ymin/ymax setting!" + # + zmin = kw.pop ( 'zmin' , zmin ) + zmax = kw.pop ( 'zmax' , zmax ) + assert zmin < zmax , "Invalid zmin/zmax setting!" + # + # + xbins = kw.pop ( 'xbins' , kw.pop ( 'nbinsx' , kw.pop ( 'binsx' , 20 ) ) ) + ybins = kw.pop ( 'ybins' , kw.pop ( 'nbinsy' , kw.pop ( 'binsy' , 20 ) ) ) + zbins = kw.pop ( 'zbins' , kw.pop ( 'nbinsz' , kw.pop ( 'binsz' , 20 ) ) ) + # + histo = ROOT.TH13 ( hID() , "x = %s , y = %s , z = %s" % ( xvar , yvar , zvar ) , + xbins , xmin , xmax , + ybins , ymin , ymax , + zbins , zmin , zmax ) ; histo.Sumw2() + tree_project ( dataset , histo , varlst , cuts = cuts , first = first , last = last ) + histo.draw ( opts , **kw ) + return histo # ============================================================================= @@ -1487,7 +1420,13 @@ def _rc_getitem_ ( self , index ) : # @see numpy.array # @author Albert BURSCHE # @date 2015-07-08 -def _rt_slice_ ( tree , varname , cut = '' , weight = '' , transpose = False , first = 0 , last = _large ) : +def _rt_slice_ ( tree , + varname , + cut = '' , + weight = '' , + transpose = False , + first = 0 , + last = LAST_ENTRY ) : """ Get ``slice'' from TTree in a form of numpy.array ## >>> tree = ... @@ -1495,6 +1434,9 @@ def _rt_slice_ ( tree , varname , cut = '' , weight = '' , transpose = False , f >>> print ( varr ) """ + ## adjust first/last indices + first, last = evt_range ( len ( tree ) , first , last ) + if isinstance ( varname , string_types ) : varname = split_string ( varname , var_separators , strip = True , respect_groups = True ) names = [] @@ -1559,7 +1501,13 @@ def _rt_slice_ ( tree , varname , cut = '' , weight = '' , transpose = False , f # @see numpy.array # @author Albert BURSCHE # @date 2015-07-08 -def _rt_slices_ ( tree , varnames , cut = '' , weight = '' , transpose = False , first = 0 , last = _large ) : +def _rt_slices_ ( tree , + varnames , + cut = '' , + weight = '' , + transpose = False , + first = 0 , + last = LAST_ENTRY ) : """ Get ``slices'' from TTree in a form of numpy.array >>> tree = ... @@ -1574,6 +1522,7 @@ def _rt_slices_ ( tree , varnames , cut = '' , weight = '' , transpose = False >>> print vars3 """ # + first , last = evt_range ( len ( tree ) , first , last ) return tree.slice ( varnames , cut , weight , transpose , first , last ) @@ -2758,7 +2707,7 @@ def __init__ ( self , tree = None , name = None , files = [] , first = 0 , neven "Invalid ``first'' %s/%s" % ( first , type ( first ) ) self.__first = int ( first ) - self.__nevents = nevents if 0 <= nevents < _large else -1 + self.__nevents = nevents if 0 <= nevents < LAST_ENTRY else -1 self.__chain = None self.__name = 'Unknown!' @@ -2822,7 +2771,7 @@ def __init__ ( self , tree = None , name = None , files = [] , first = 0 , neven # ===================================================================== ## The final adjustment self.__lens = () - if 0 < self.__first or 0 < nevents < _large : + if 0 < self.__first or 0 < nevents < LAST_ENTRY : _first = self.__first _files = [] @@ -2878,7 +2827,7 @@ def slow_split ( self , chunk_size = 200000 ) : >>> trees = tree.split ( chunk_size = 1000000 ) """ - if chunk_size <= 0 : chunk_size = _large + if chunk_size <= 0 : chunk_size = LAST_ENTRY trees = [] @@ -2922,7 +2871,7 @@ def split ( self , chunk_size = -1 , max_files = 10 ) : >>> tree = .... >>> trees = tree.split ( chunk_size = 1000000 ) """ - if chunk_size <= 0 : chunk_size = _large + if chunk_size <= 0 : chunk_size = LAST_ENTRY if max_files <= 0 : max_files = 1 if 0 != self.first or 0 < self.__nevents : @@ -3106,7 +3055,7 @@ def split ( self , chunk_size = 200000 ) : if 0 >= chunk_size : return self, ll = len ( self ) - last = min ( ll , self.first + self.nevents if 0 <= self.nevents else _large ) + last = min ( ll , self.first + self.nevents if 0 <= self.nevents else LAST_ENTRY ) result = [] for s in self.get_slices ( self.first , last , chunk_size ) : diff --git a/ostap/utils/utils.py b/ostap/utils/utils.py index acd20e8e..91446774 100644 --- a/ostap/utils/utils.py +++ b/ostap/utils/utils.py @@ -94,6 +94,8 @@ 'random_name' , ## get some random name 'short_hash_name' , ## get some short hash name ## + 'evt_range' , ## trivial function to adjut first/last indiced for collictios of given size + ## 'choices' , ## `random.choices` function ## 'memoize' , ## Simple lightweight unbounded cache @@ -489,7 +491,42 @@ def slow ( iterable , wait = 0 ) : if 0 < wait : time.sleep ( wait ) yield r - + +# ============================================================================= +## the last index for laooping over TTRee/RooAbsData +LAST_ENTRY = ROOT.TVirtualTreePlayer.kMaxEntries +# ============================================================================= +### Get the event range from first/last +# @code +# data = ... +# first, last = ... +# first, last = evt_range ( len ( data ) , first , last ) +# @endcode +def evt_range ( size , first = 0 , last = LAST_ENTRY ) : + """ Get the event range from first/last + >>> data = ... + >>> first, last = ... + >>> first, last = evt_range ( len ( data ) , first , last ) + """ + assert isinstance ( size , integer_types ) and 0 <= size , "evt_range: Invalid type of `size'!" + assert isinstance ( first , integer_types ) , "evt_range: Invalid type of `first' : %s " % type ( first ) + assert isinstance ( last , integer_types ) , "evt_range: Invalid type of `last' : %s " % type ( last ) + ## + if not size : return 0, 0 ## empty contained + # + if 0 <= first < size : pass + elif first < 0 and 0 < size : first += size + ## + if first <= last : pass + elif last < 0 and 0 <= size : last += size + #+ + assert 0 <= first <= last , 'evt_range(size=%d,first=%d,last=%d): invalid event range!' % ( size , first , last ) + # + return first, last +# ============================================================================= + + + # ============================================================================= ## @class KeepCanvas # helper class to keep the current canvas @@ -884,7 +921,6 @@ def vrange ( vmin , vmax , n = 100 , edges = True ) : """ return VRange ( vmin , vmax , n , edges ) - # ============================================================================= ## @class LRange # Helper looper over the values between vmin and vmax using log-steps diff --git a/source/include/Ostap/StatEntity.h b/source/include/Ostap/StatEntity.h index d434858a..aa9da4e0 100644 --- a/source/include/Ostap/StatEntity.h +++ b/source/include/Ostap/StatEntity.h @@ -79,7 +79,9 @@ namespace Ostap /// minimal value double min () const { return m_min ; } /// maximal valu e - double max () const { return m_max ; } + double max () const { return m_max ; } + /// get number of "good" (mnon-zero) entriesn + unsigned long long nGood () const { return n () ; } // ====================================================================== public: // derived quantities & aliases // ====================================================================== diff --git a/source/include/Ostap/WStatEntity.h b/source/include/Ostap/WStatEntity.h index 47c81bf6..be5bea97 100644 --- a/source/include/Ostap/WStatEntity.h +++ b/source/include/Ostap/WStatEntity.h @@ -53,6 +53,12 @@ namespace Ostap double rms () const ; /// get the effective number of entries double nEff () const ; + /// get number of "good" (mnon-zero) entriesn + unsigned long long nGood () const { return m_values. n () ; } + /// minimal value (for non-zero weights) + double min () const { return m_values. min () ; } + /// maximal value (for non-zero weights) + double max () const { return m_values. max () ; } // ====================================================================== public: // helper sums // ======================================================================