-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
1. Large redesign of staistics/projection& othe rmethids for RooAbdData/TTree/DataFrame 1. Large redesign if `statvars.py` module 1. Add `roc_curve` for making ROC curves, and corrresponsing test module 1. Add `eff_graph` for 1D historgams for creation of the efficiency graph from the 1D-distribution. 1. `project`(&`draw`) for 2 and 3-dimession now follows the natural order of varibales: `XX.project ( target , 'x,y,z' , ...) ` 1. For `eff` & effic' and `efficinecy` methods fo r1D histograms the confusing optional argument `increasing=True` is replced by (less-confusin) `cut_low` and the argument is not optionl anymore
- Loading branch information
1 parent
eedd016
commit bd44aef
Showing
4 changed files
with
198 additions
and
68 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4186,10 +4186,10 @@ def h1_sumv ( histo , increasing = True ) : | |
## Calculate the "cut-efficiency from the histogram | ||
# @author Vanya BELYAEV [email protected] | ||
# @date 2011-06-07 | ||
def _h1_effic_ ( h , increasing = True ) : | ||
def _h1_effic_ ( h , cut_low ) : | ||
"""Calculate the cut efficiency for the histogram | ||
>>> h = ... | ||
>>> he = h.effic ( 14.2 ) | ||
>>> he = h.effic ( 14.2 , cut_low = True ) | ||
""" | ||
|
||
result = h.Clone ( hID() ) | ||
|
@@ -4198,15 +4198,15 @@ def _h1_effic_ ( h , increasing = True ) : | |
|
||
for ibin in h : | ||
|
||
s1 = VE(0,0) | ||
s2 = VE(0,0) | ||
s1 = VE ( 0 , 0 ) | ||
s2 = VE ( 0 , 0 ) | ||
|
||
for jbin in h : | ||
|
||
if jbin < ibin : s1 += h [ jbin ] | ||
else : s2 += h [ jbin ] | ||
|
||
result [ibin] = s1.frac( s2 ) if increasing else s2.frac( s1 ) | ||
result [ibin] = s2.frac ( s1 ) if cut_low else s1.frac ( s2 ) | ||
|
||
result.ResetStats() | ||
return result | ||
|
@@ -4216,10 +4216,10 @@ def _h1_effic_ ( h , increasing = True ) : | |
## Calculate the "cut-efficiency from the histogram | ||
# @author Vanya BELYAEV [email protected] | ||
# @date 2011-06-07 | ||
def _h1_effic2_ ( h , value , increasing = True ) : | ||
def _h1_effic2_ ( h , value , cut_low ) : | ||
"""Calculate the cut efficiency for the histogram | ||
>>> h = ... | ||
>>> he = h.efficiency ( 14.2 ) | ||
>>> he = h.efficiency ( 14.2 , cut_low = True ) | ||
""" | ||
|
||
s1 = VE ( 0 , 0 ) | ||
|
@@ -4230,7 +4230,7 @@ def _h1_effic2_ ( h , value , increasing = True ) : | |
if x.value() < value : s1 += y | ||
else : s2 += y | ||
|
||
return s1.frac( s2 ) if increasing else s2.frac ( s1 ) | ||
return s2.frac ( s1 ) if cut_low else s1.frac ( s2 ) | ||
|
||
# ============================================================================= | ||
## Convert historgam into "efficinecy" histogram | ||
|
@@ -4239,7 +4239,7 @@ def _h1_effic2_ ( h , value , increasing = True ) : | |
# effic = histo.eff ( ... ) | ||
# @endcode | ||
# It adds two extra narrow fake bins! | ||
def _h1_effic3_ ( h1 , increasing = True ) : | ||
def _h1_effic3_ ( h1 , cut_low ) : | ||
"""Convert historgam into "efficinecy" histogram | ||
>>> histo = ... | ||
>>> effic = histo.eff ( ... ) | ||
|
@@ -4263,7 +4263,7 @@ def _h1_effic3_ ( h1 , increasing = True ) : | |
|
||
edges.insert ( 1 , xf ) | ||
edges.insert ( -1 , xl ) | ||
|
||
result = h1_axis ( edges , title = 'Efficiency histo for %s' % h1.title , double = type ( h1 ) ) | ||
|
||
def _my_eff_ ( a , r , c ) : | ||
|
@@ -4288,49 +4288,62 @@ def _my_eff_ ( a , r , c ) : | |
|
||
return 1.0 / ( 1.0 + d ) | ||
|
||
N = len ( h1 ) | ||
sumi = VE(0,0) | ||
N = len ( h1 ) | ||
sumi = VE ( 0 , 0 ) | ||
for i in h1 : | ||
|
||
c = h1 [ i ] | ||
|
||
rest = VE(0,0) | ||
rest = VE ( 0 , 0 ) | ||
for j in range ( i + 1 , N + 1 ) : rest += h1 [ j ] | ||
|
||
a = sumi | ||
r = rest | ||
|
||
result [ i + 1 ] = _my_eff_ ( a , r , c ) if increasing else _my_eff_ ( r , a , c ) | ||
result [ i + 1 ] = _my_eff_ ( r , a , c ) if cut_low else _my_eff_ ( a , r , c ) | ||
|
||
sumi += c | ||
|
||
result [ 1 ] = VE ( 0.0 , 0.0 ) if increasing else VE ( 1.0 , 0.0 ) | ||
result [ -1 ] = VE ( 1.0 , 0.0 ) if increasing else VE ( 0.0 , 0.0 ) | ||
|
||
|
||
if h1.natural () : | ||
nn = max ( 1 , math.ceil ( h1.Integral () ) ) | ||
e0 = binomEff ( 0 , nn ) | ||
e1 = binomEff ( nn , nn ) | ||
else : | ||
e0 = VE ( 0 , 0 ) | ||
e1 = VE ( 1 , 0 ) | ||
|
||
result [ 1 ] = e1 if cut_low else e0 | ||
result [ -1 ] = e0 if cut_low else e1 | ||
|
||
return result | ||
|
||
# =============================================================================== | ||
## Get the cut effciency in form of graph | ||
## Get the cut efficiency in graph form | ||
# - useful for efficincy visualisation | ||
# - a bit better treatment of binnig effects | ||
# - a bit better treatment of binnig effects fo wide bins | ||
# @code | ||
# histo = ... | ||
# eff_graph = histo.eff_graph ( increasing = True ) | ||
# eff_graph = histo.eff_graph ( cut_low ) | ||
# @endcode | ||
def _h1_effic4_ ( histo , increasing = True ) : | ||
"""Get the cut effciency in form fo graph | ||
def _h1_effic4_ ( histo , cut_low ) : | ||
"""Get the cut efficiency in graph forms | ||
- useful for drawing, | ||
- better treatment of binnig effects | ||
- a bit better treatment of binnig effects for wide bins | ||
>>> histo = ... | ||
>>> eff_graph = histo.eff_graph ( increasing = True ) | ||
>>> eff_graph = histo.eff_graph ( cut_low = True ) | ||
>>> eff_graph = histo.eff_graph ( cut_low = False ) | ||
""" | ||
|
||
c1 = [ histo [ i ] for i in histo ] | ||
c2 = c1.copy() | ||
c2.reverse () | ||
|
||
s1 = [ VE () ] + [ s for s in itertools.accumulate ( c1 ) ] | ||
s2 = [ VE () ] + [ s for s in itertools.accumulate ( c2 ) ] | ||
|
||
## for "natural" histograms make better treatment of first/last uncertainties | ||
vz = VE ( 0 , 1 ) if histo.natural() else VE ( 0 , 0 ) | ||
|
||
s1 = [ vz ] + [ s for s in itertools.accumulate ( c1 ) ] | ||
s2 = [ vz ] + [ s for s in itertools.accumulate ( c2 ) ] | ||
|
||
s2.reverse () | ||
|
||
import ostap.histos.graphs | ||
|
@@ -4341,15 +4354,15 @@ def _h1_effic4_ ( histo , increasing = True ) : | |
the_eff = lambda a,b : a.frac ( b ) | ||
|
||
## special treatment of the first point | ||
e0 = the_eff ( s1 [ 0 ] , s2 [ 0 ] ) if increasing else the_eff ( s2 [ 0 ] , s1 [ 0 ] ) | ||
e0 = the_eff ( s2 [ 0 ] , s1 [ 0 ] ) if cut_low else the_eff ( s1 [ 0 ] , s2 [ 0 ] ) | ||
xmin , _ = histo.xminmax() | ||
|
||
graph.SetPoint ( 0 , xmin , e0.value () ) | ||
graph.SetPointError ( 0 , 0 , e0.error () ) | ||
|
||
for i, x , _ in histo.items() : | ||
xx = x.value() + x.error() | ||
ei = the_eff ( s1 [ i ] , s2 [ i ] ) if increasing else the_eff ( s2 [ i ] , s1 [ i ] ) | ||
ei = the_eff ( s2 [ i ] , s1 [ i ] ) if cut_low else the_eff ( s1 [ i ] , s2 [ i ] ) | ||
graph.SetPoint ( i , xx , ei.value () ) | ||
graph.SetPointError ( i , 0 , ei.error () ) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,7 @@ | |
__author__ = "Vanya BELYAEV [email protected]" | ||
__date__ = "2024-08-02" | ||
__all__ = ( | ||
'makeGraph' , # make graph from primitive data | ||
'roc_curve' , # Make ROC curve form signal & backgrund distributions | ||
) | ||
# ============================================================================= | ||
from ostap.core.ostap_types import string_types | ||
|
@@ -31,6 +31,11 @@ | |
if '__main__' == __name__ : logger = getLogger( 'ostap.histos.roc' ) | ||
else : logger = getLogger( __name__ ) | ||
# ============================================================================= | ||
## symbols to indicate the efficiency | ||
_effs = ( 'e' , 'eff' , 'effs' , 'effic' , 'efficiency' ) | ||
## symbols to indicate the rejection | ||
_rejs = ( 'r' , 'rej' , 'reject' , 'rejection' ) | ||
# ============================================================================= | ||
## Build the ROC-curve from signal and background disctributuions | ||
# @param signal (histogram) of signal distribution | ||
# @param backgrund (histogram) of background distribution | ||
|
@@ -40,30 +45,30 @@ | |
# hbkg = ... ## background distribution | ||
# roc = roc_curve ( signal = hsig , | ||
# backgrund = hbkg , | ||
# increasing = True , ## "keep valeus less than cut value" | ||
# cut_low = True , ## "keep valeus less than cut value" | ||
# show_sinal = 'efficiency' , | ||
# show_backgrund = 'rejection' ) | ||
# ## get AUC | ||
# | ||
# import ostap.math,integral as I | ||
# auc = I.integral ( roc , xmin = 0 , xmax = 1.0 ) | ||
# @endcode | ||
def roc_curve ( signal , | ||
background , | ||
increasing , | ||
cut_low , | ||
show_signal = 'efficiency' , | ||
show_background = 'rejection' ) : | ||
|
||
"""Build the ROC-curve from signal and background disctributuions | ||
- signal : (histogram) of signal distribution | ||
- backgrund : (histogram) of background distribution | ||
>>> hsignal = ... ## signal distribution | ||
>>> hbkg = ... ## background distribution | ||
>>> roc = roc_curve ( signal = hsig , | ||
... backgrund = hbkg , | ||
... increasing = True , ## "keep vaues that are less than the cut value" | ||
... show_sinal = 'efficiency' , | ||
... show_backgrund = 'rejection' ) | ||
>>> roc = roc_curve ( signal = hsig , | ||
... backgrund = hbkg , | ||
... cut_low = True , ## "keep vaues that are less than the cut value" | ||
... show_sinal = 'efficiency' , | ||
... show_background = 'rejection' ) | ||
>>> import ostap.math,integral as I | ||
>>> auc = I.integral ( roc , xmin = 0 , xmax = 1.0 ) | ||
|
@@ -73,48 +78,41 @@ def roc_curve ( signal , | |
"Invalid `signal' type: %s" % type ( signal ) | ||
assert isinstance ( background , ROOT.TH1 ) and 1 == background.dim() , \ | ||
"Invalid `background' type: %s" % type ( background ) | ||
|
||
def _fun_ ( obj ) : | ||
if callable ( obj ) : return obj | ||
assert isinstance ( obj , string_types ) , 'Invalid type: %s' % type ( obj ) | ||
obj = str ( obj ).strip ().lower () | ||
if obj in _effs : return lambda e : e | ||
elif obj in _rejs : return lambda e : 1.0-e | ||
raise TypeError ( 'Invalid object: %s' % obj ) | ||
|
||
sig_fun = show_signal | ||
if callable ( sig_fun ) : pass | ||
else : | ||
assert isinstance ( sig_fun , string_types ) , "Invalid type of `show_signal' %s" % type ( sig_fun ) | ||
sig_fun = str(sig_fun).strip().lower() | ||
if sig_fun in ( 'e' , 'eff' , 'effic' , 'efficiency' ) : sig_fun = lambda s : s | ||
elif sig_fun in ( 'r' , 'rej' , 'rejec' , 'reject' , 'rejection' ) : sig_fun = lambda s : 1-s | ||
else : | ||
raise TypeError ("Unknown `show_signal' :%s" % show_signal ) | ||
|
||
bkg_fun = show_background | ||
if callable ( bkg_fun ) : pass | ||
else : | ||
assert isinstance ( bkg_fun , string_types ) , "Invalid type of `show_background' %s" % type ( bkg_fun ) | ||
bkg_fun = str(bkg_fun).strip().lower() | ||
if bkg_fun in ( 'e' , 'eff' , 'effic' , 'efficiency' ) : bkg_fun = lambda s : s | ||
elif bkg_fun in ( 'r' , 'rej' , 'rejec' , 'reject' , 'rejection' ) : bkg_fun = lambda s : 1-s | ||
else : | ||
raise TypeError ("Unknown `show_background' :%s" % show_backgrund ) | ||
## transformations : | ||
|
||
sig_fun = _fun_ ( show_signal ) | ||
bkg_fun = _fun_ ( show_background ) | ||
|
||
|
||
hs = signal | ||
hb = background | ||
|
||
a## signal efficiency | ||
hse = hs.eff ( increasing = increasing ) | ||
## signal efficiency histogram | ||
hse = hs.eff ( cut_low = cut_low ) | ||
|
||
## background efficiency histogram | ||
hbe = hb.eff ( cut_low = cut_low ) | ||
|
||
## background efficiency | ||
hbe = hb.eff ( increasing = increasing ) | ||
|
||
## output graph: ROC curve | ||
np = len ( hse ) | ||
graph = ROOT.TGraphErrors ( np ) | ||
|
||
## loop over signal efficiency | ||
for i , xs , es in hse.items() : | ||
|
||
## backrgiund efficiency | ||
## background efficiency | ||
eb = hbe ( xs.value() ) | ||
|
||
## tarnsform if requested: | ||
## transform if requested: | ||
|
||
es = sig_fun ( es ) | ||
eb = bkg_fun ( eb ) | ||
|
Oops, something went wrong.