Skip to content

Commit

Permalink
1. Reduce code duplication
Browse files Browse the repository at this point in the history
  1. Large redesign of staistics/projection& othe rmethids for RooAbdData/TTree/DataFrame
  1. Large redesign if `statvars.py` module
  1. Add `roc_curve` for making ROC curves, and corrresponsing test module
  1. Add `eff_graph` for 1D historgams for creation of the efficiency graph
     from the 1D-distribution.
  1. `project`(&`draw`) for 2 and 3-dimession now follows the natural order of varibales:
       `XX.project ( target , 'x,y,z' , ...) `
  1. For `eff` & effic' and `efficinecy` methods fo r1D histograms
     the confusing  optional argument `increasing=True` is replced by (less-confusin)
     `cut_low` and the argument is not optionl anymore
  • Loading branch information
VanyaBelyaev committed Aug 3, 2024
1 parent eedd016 commit bd44aef
Show file tree
Hide file tree
Showing 4 changed files with 198 additions and 68 deletions.
17 changes: 15 additions & 2 deletions ReleaseNotes/release_notes.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,22 @@
## New features

1. Add estimators for haromins, geometic. power, Lehmer means and their weighted analogues

1. Add estimators for harmonic, geometric, power & Lehmer means and their weighted analogues
1. Reduce code duplication
1. Large redesign of staistics/projection& othe rmethids for RooAbdData/TTree/DataFrame
1. Large redesign if `statvars.py` module
1. Add `roc_curve` for making ROC curves, and corrresponsing test module
1. Add `eff_graph` for 1D historgams for creation of the efficiency graph
from the 1D-distribution.

## Backward incompatible

1. `project`(&`draw`) for 2 and 3-dimession now follows the natural order of varibales:
`XX.project ( target , 'x,y,z' , ...) `
1. For `eff` & effic' and `efficinecy` methods fo r1D histograms
the confusing optional argument `increasing=True` is replced by (less-confusin)
`cut_low` and the argument is not optionl anymore


## Bug fixes

# v1.10.1.8
Expand Down
73 changes: 43 additions & 30 deletions ostap/histos/histos.py
Original file line number Diff line number Diff line change
Expand Up @@ -4186,10 +4186,10 @@ def h1_sumv ( histo , increasing = True ) :
## Calculate the "cut-efficiency from the histogram
# @author Vanya BELYAEV [email protected]
# @date 2011-06-07
def _h1_effic_ ( h , increasing = True ) :
def _h1_effic_ ( h , cut_low ) :
"""Calculate the cut efficiency for the histogram
>>> h = ...
>>> he = h.effic ( 14.2 )
>>> he = h.effic ( 14.2 , cut_low = True )
"""

result = h.Clone ( hID() )
Expand All @@ -4198,15 +4198,15 @@ def _h1_effic_ ( h , increasing = True ) :

for ibin in h :

s1 = VE(0,0)
s2 = VE(0,0)
s1 = VE ( 0 , 0 )
s2 = VE ( 0 , 0 )

for jbin in h :

if jbin < ibin : s1 += h [ jbin ]
else : s2 += h [ jbin ]

result [ibin] = s1.frac( s2 ) if increasing else s2.frac( s1 )
result [ibin] = s2.frac ( s1 ) if cut_low else s1.frac ( s2 )

result.ResetStats()
return result
Expand All @@ -4216,10 +4216,10 @@ def _h1_effic_ ( h , increasing = True ) :
## Calculate the "cut-efficiency from the histogram
# @author Vanya BELYAEV [email protected]
# @date 2011-06-07
def _h1_effic2_ ( h , value , increasing = True ) :
def _h1_effic2_ ( h , value , cut_low ) :
"""Calculate the cut efficiency for the histogram
>>> h = ...
>>> he = h.efficiency ( 14.2 )
>>> he = h.efficiency ( 14.2 , cut_low = True )
"""

s1 = VE ( 0 , 0 )
Expand All @@ -4230,7 +4230,7 @@ def _h1_effic2_ ( h , value , increasing = True ) :
if x.value() < value : s1 += y
else : s2 += y

return s1.frac( s2 ) if increasing else s2.frac ( s1 )
return s2.frac ( s1 ) if cut_low else s1.frac ( s2 )

# =============================================================================
## Convert historgam into "efficinecy" histogram
Expand All @@ -4239,7 +4239,7 @@ def _h1_effic2_ ( h , value , increasing = True ) :
# effic = histo.eff ( ... )
# @endcode
# It adds two extra narrow fake bins!
def _h1_effic3_ ( h1 , increasing = True ) :
def _h1_effic3_ ( h1 , cut_low ) :
"""Convert historgam into "efficinecy" histogram
>>> histo = ...
>>> effic = histo.eff ( ... )
Expand All @@ -4263,7 +4263,7 @@ def _h1_effic3_ ( h1 , increasing = True ) :

edges.insert ( 1 , xf )
edges.insert ( -1 , xl )

result = h1_axis ( edges , title = 'Efficiency histo for %s' % h1.title , double = type ( h1 ) )

def _my_eff_ ( a , r , c ) :
Expand All @@ -4288,49 +4288,62 @@ def _my_eff_ ( a , r , c ) :

return 1.0 / ( 1.0 + d )

N = len ( h1 )
sumi = VE(0,0)
N = len ( h1 )
sumi = VE ( 0 , 0 )
for i in h1 :

c = h1 [ i ]

rest = VE(0,0)
rest = VE ( 0 , 0 )
for j in range ( i + 1 , N + 1 ) : rest += h1 [ j ]

a = sumi
r = rest

result [ i + 1 ] = _my_eff_ ( a , r , c ) if increasing else _my_eff_ ( r , a , c )
result [ i + 1 ] = _my_eff_ ( r , a , c ) if cut_low else _my_eff_ ( a , r , c )

sumi += c

result [ 1 ] = VE ( 0.0 , 0.0 ) if increasing else VE ( 1.0 , 0.0 )
result [ -1 ] = VE ( 1.0 , 0.0 ) if increasing else VE ( 0.0 , 0.0 )


if h1.natural () :
nn = max ( 1 , math.ceil ( h1.Integral () ) )
e0 = binomEff ( 0 , nn )
e1 = binomEff ( nn , nn )
else :
e0 = VE ( 0 , 0 )
e1 = VE ( 1 , 0 )

result [ 1 ] = e1 if cut_low else e0
result [ -1 ] = e0 if cut_low else e1

return result

# ===============================================================================
## Get the cut effciency in form of graph
## Get the cut efficiency in graph form
# - useful for efficincy visualisation
# - a bit better treatment of binnig effects
# - a bit better treatment of binnig effects fo wide bins
# @code
# histo = ...
# eff_graph = histo.eff_graph ( increasing = True )
# eff_graph = histo.eff_graph ( cut_low )
# @endcode
def _h1_effic4_ ( histo , increasing = True ) :
"""Get the cut effciency in form fo graph
def _h1_effic4_ ( histo , cut_low ) :
"""Get the cut efficiency in graph forms
- useful for drawing,
- better treatment of binnig effects
- a bit better treatment of binnig effects for wide bins
>>> histo = ...
>>> eff_graph = histo.eff_graph ( increasing = True )
>>> eff_graph = histo.eff_graph ( cut_low = True )
>>> eff_graph = histo.eff_graph ( cut_low = False )
"""

c1 = [ histo [ i ] for i in histo ]
c2 = c1.copy()
c2.reverse ()

s1 = [ VE () ] + [ s for s in itertools.accumulate ( c1 ) ]
s2 = [ VE () ] + [ s for s in itertools.accumulate ( c2 ) ]

## for "natural" histograms make better treatment of first/last uncertainties
vz = VE ( 0 , 1 ) if histo.natural() else VE ( 0 , 0 )

s1 = [ vz ] + [ s for s in itertools.accumulate ( c1 ) ]
s2 = [ vz ] + [ s for s in itertools.accumulate ( c2 ) ]

s2.reverse ()

import ostap.histos.graphs
Expand All @@ -4341,15 +4354,15 @@ def _h1_effic4_ ( histo , increasing = True ) :
the_eff = lambda a,b : a.frac ( b )

## special treatment of the first point
e0 = the_eff ( s1 [ 0 ] , s2 [ 0 ] ) if increasing else the_eff ( s2 [ 0 ] , s1 [ 0 ] )
e0 = the_eff ( s2 [ 0 ] , s1 [ 0 ] ) if cut_low else the_eff ( s1 [ 0 ] , s2 [ 0 ] )
xmin , _ = histo.xminmax()

graph.SetPoint ( 0 , xmin , e0.value () )
graph.SetPointError ( 0 , 0 , e0.error () )

for i, x , _ in histo.items() :
xx = x.value() + x.error()
ei = the_eff ( s1 [ i ] , s2 [ i ] ) if increasing else the_eff ( s2 [ i ] , s1 [ i ] )
ei = the_eff ( s2 [ i ] , s1 [ i ] ) if cut_low else the_eff ( s1 [ i ] , s2 [ i ] )
graph.SetPoint ( i , xx , ei.value () )
graph.SetPointError ( i , 0 , ei.error () )

Expand Down
70 changes: 34 additions & 36 deletions ostap/histos/roc.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
__author__ = "Vanya BELYAEV [email protected]"
__date__ = "2024-08-02"
__all__ = (
'makeGraph' , # make graph from primitive data
'roc_curve' , # Make ROC curve form signal & backgrund distributions
)
# =============================================================================
from ostap.core.ostap_types import string_types
Expand All @@ -31,6 +31,11 @@
if '__main__' == __name__ : logger = getLogger( 'ostap.histos.roc' )
else : logger = getLogger( __name__ )
# =============================================================================
## symbols to indicate the efficiency
_effs = ( 'e' , 'eff' , 'effs' , 'effic' , 'efficiency' )
## symbols to indicate the rejection
_rejs = ( 'r' , 'rej' , 'reject' , 'rejection' )
# =============================================================================
## Build the ROC-curve from signal and background disctributuions
# @param signal (histogram) of signal distribution
# @param backgrund (histogram) of background distribution
Expand All @@ -40,30 +45,30 @@
# hbkg = ... ## background distribution
# roc = roc_curve ( signal = hsig ,
# backgrund = hbkg ,
# increasing = True , ## "keep valeus less than cut value"
# cut_low = True , ## "keep valeus less than cut value"
# show_sinal = 'efficiency' ,
# show_backgrund = 'rejection' )
# ## get AUC
#
# import ostap.math,integral as I
# auc = I.integral ( roc , xmin = 0 , xmax = 1.0 )
# @endcode
def roc_curve ( signal ,
background ,
increasing ,
cut_low ,
show_signal = 'efficiency' ,
show_background = 'rejection' ) :

"""Build the ROC-curve from signal and background disctributuions
- signal : (histogram) of signal distribution
- backgrund : (histogram) of background distribution
>>> hsignal = ... ## signal distribution
>>> hbkg = ... ## background distribution
>>> roc = roc_curve ( signal = hsig ,
... backgrund = hbkg ,
... increasing = True , ## "keep vaues that are less than the cut value"
... show_sinal = 'efficiency' ,
... show_backgrund = 'rejection' )
>>> roc = roc_curve ( signal = hsig ,
... backgrund = hbkg ,
... cut_low = True , ## "keep vaues that are less than the cut value"
... show_sinal = 'efficiency' ,
... show_background = 'rejection' )
>>> import ostap.math,integral as I
>>> auc = I.integral ( roc , xmin = 0 , xmax = 1.0 )
Expand All @@ -73,48 +78,41 @@ def roc_curve ( signal ,
"Invalid `signal' type: %s" % type ( signal )
assert isinstance ( background , ROOT.TH1 ) and 1 == background.dim() , \
"Invalid `background' type: %s" % type ( background )

def _fun_ ( obj ) :
if callable ( obj ) : return obj
assert isinstance ( obj , string_types ) , 'Invalid type: %s' % type ( obj )
obj = str ( obj ).strip ().lower ()
if obj in _effs : return lambda e : e
elif obj in _rejs : return lambda e : 1.0-e
raise TypeError ( 'Invalid object: %s' % obj )

sig_fun = show_signal
if callable ( sig_fun ) : pass
else :
assert isinstance ( sig_fun , string_types ) , "Invalid type of `show_signal' %s" % type ( sig_fun )
sig_fun = str(sig_fun).strip().lower()
if sig_fun in ( 'e' , 'eff' , 'effic' , 'efficiency' ) : sig_fun = lambda s : s
elif sig_fun in ( 'r' , 'rej' , 'rejec' , 'reject' , 'rejection' ) : sig_fun = lambda s : 1-s
else :
raise TypeError ("Unknown `show_signal' :%s" % show_signal )

bkg_fun = show_background
if callable ( bkg_fun ) : pass
else :
assert isinstance ( bkg_fun , string_types ) , "Invalid type of `show_background' %s" % type ( bkg_fun )
bkg_fun = str(bkg_fun).strip().lower()
if bkg_fun in ( 'e' , 'eff' , 'effic' , 'efficiency' ) : bkg_fun = lambda s : s
elif bkg_fun in ( 'r' , 'rej' , 'rejec' , 'reject' , 'rejection' ) : bkg_fun = lambda s : 1-s
else :
raise TypeError ("Unknown `show_background' :%s" % show_backgrund )
## transformations :

sig_fun = _fun_ ( show_signal )
bkg_fun = _fun_ ( show_background )


hs = signal
hb = background

a## signal efficiency
hse = hs.eff ( increasing = increasing )
## signal efficiency histogram
hse = hs.eff ( cut_low = cut_low )

## background efficiency histogram
hbe = hb.eff ( cut_low = cut_low )

## background efficiency
hbe = hb.eff ( increasing = increasing )

## output graph: ROC curve
np = len ( hse )
graph = ROOT.TGraphErrors ( np )

## loop over signal efficiency
for i , xs , es in hse.items() :

## backrgiund efficiency
## background efficiency
eb = hbe ( xs.value() )

## tarnsform if requested:
## transform if requested:

es = sig_fun ( es )
eb = bkg_fun ( eb )
Expand Down
Loading

0 comments on commit bd44aef

Please sign in to comment.