Skip to content

Commit

Permalink
1. some improvements for SimFuit.generate
Browse files Browse the repository at this point in the history
  1. require `nEvents` argument for `SimFit.generate` to be `dict`-like type
  1. fix `SimFit.generate`
  • Loading branch information
VanyaBelyaev committed Feb 9, 2024
1 parent 7f95f27 commit fd071b1
Show file tree
Hide file tree
Showing 10 changed files with 133 additions and 58 deletions.
3 changes: 3 additions & 0 deletions ReleaseNotes/release_notes.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,17 @@
1. improve `hadd` function from `ostap.utils.utils` module
1. add `mtime` fnuction into `ostap.utils.basic` module - last createion/modification date for the path (dir/file)
1. add (much) better cleanup of the ancient tmp directories. Usefulto remove lefovers from the parallel executions.
1. some improvements for `SimFuit.generate`

## Backward incompatible:

1. move `hadd` function from `ostap.trees.data_utils.Files` to `ostap.trees.data_utils.Data`
1. require `nEvents` argument for `SimFit.generate` to be `dict`-like type

## Bug fixes:

1. fix numerous typos in documentation strings
1. fix `SimFit.generate`

# v1.10.0.6

Expand Down
64 changes: 25 additions & 39 deletions ostap/fitting/pdfbasic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1905,15 +1905,16 @@ def generate ( self ,
binning = {}

if not varset :
varset = ROOT.RooArgSet ( self.xvar )
varset = ROOT.RooArgSet ( self.vars )
elif isinstance ( varset , ROOT.RooAbsReal ) :
varset = ROOT.RooArgSet ( varset )

if not self.xvar in varset :
vs = ROOT.RooArgSet()
vs . add ( self.xvar )
for v in varset : vs.add ( v )
varset = vs
for v in self.vars :
if not v in varset :
vs = ROOT.RooArgSet()
vs . add ( v )
for vv in varset : vs.add ( vv )
varset = vs

from ostap.fitting.variables import KeepBinning

Expand Down Expand Up @@ -3112,7 +3113,8 @@ def generate ( self ,
nEvents ,
varset = None ,
binning = {} ,
sample = True ,
sample = True ,
storage = None ,
args = () ) :
"""Generate toy-sample according to PDF
>>> model = ....
Expand All @@ -3136,18 +3138,13 @@ def generate ( self ,
elif isinstance ( varset , ROOT.RooAbsReal ) :
varset = ROOT.RooArgSet( varset )

if not self.xvar in varset :
vs = ROOT.RooArgSet()
vs . add ( self.xvar )
for v in varset : vs.add ( v )
varset = vs

if not self.yvar in varset :
vs = ROOT.RooArgSet()
vs . add ( self.yvar )
for v in varset : vs.add ( v )
varset = vs

for v in self.vars :
if not v in varset :
vs = ROOT.RooArgSet()
vs . add ( v )
for vv in varset : vs.add ( vv )
varset = vs

from ostap.fitting.variables import KeepBinning
with KeepBinning ( self.xvar ) , KeepBinning ( self.yvar ) :

Expand Down Expand Up @@ -4296,7 +4293,8 @@ def generate ( self ,
nEvents ,
varset = None ,
binning = {} ,
sample = True ,
sample = True ,
storage = None ,
args = () ) :
"""Generate toy-sample according to PDF
>>> model = ....
Expand All @@ -4320,23 +4318,12 @@ def generate ( self ,
elif isinstance ( varset , ROOT.RooAbsReal ) :
varset = ROOT.RooArgSet( varset )

if not self.xvar in varset :
vs = ROOT.RooArgSet()
vs . add ( self.xvar )
for v in varset : vs.add ( v )
varset = vs

if not self.yvar in varset :
vs = ROOT.RooArgSet()
vs . add ( self.yvar )
for v in varset : vs.add ( v )
varset = vs

if not self.zvar in varset :
vs = ROOT.RooArgSet()
vs . add ( self.zvar )
for v in varset : vs.add ( v )
varset = vs
for v in self.vars :
if not v in varset :
vs = ROOT.RooArgSet()
vs . add ( v )
for vv in varset : vs.add ( vv )
varset = vs

from ostap.fitting.variables import KeepBinning
with KeepBinning ( self.xvar ) , KeepBinning ( self.yvar ), KeepBinning ( self.zvar ) :
Expand All @@ -4350,9 +4337,8 @@ def generate ( self ,
if xbins : self.xvar.bins = xbins
if ybins : self.yvar.bins = ybins
if zbins : self.zvar.bins = zbins

return self.pdf.generate ( varset , *args )

return self.pdf.generate ( varset , *args )

# ========================================================================
## check minmax of the PDF using the random shoots
Expand Down
4 changes: 3 additions & 1 deletion ostap/fitting/pyselectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1717,7 +1717,9 @@ def make_dataset_old ( tree ,
with TIMING ( 'Fill RooDataSet' , logger = logger ) :
with rooSilent ( ROOT.RooFit.ERROR , True ) :
with rootError ( ROOT.kWarning ) :
ds = ROOT.RooDataSet ( name , title , tree , varsete , str ( cuts ) )
if root_info <= ( 6, 31 ) : ds = ROOT.RooDataSet ( name , title , tree , varsete , str ( cuts ) )
else : ds = ROOT.RooDataSet ( name , title , varsete , ROOT.RooFit.Import ( tree ) , ROOT.RooFit.Cut ( str ( cuts ) ) )

varsete = ds.get()

if not silent :
Expand Down
38 changes: 24 additions & 14 deletions ostap/fitting/simfit.py
Original file line number Diff line number Diff line change
Expand Up @@ -1158,10 +1158,10 @@ def minuit ( self , dataset ,
## generate toy-sample according to PDF
# @code
# model = ....
# data = model.generate ( 10000 ) ## generate dataset with 10000 events
# data = model.generate ( { 'A' : 100 , 'B' : 200 } ) ## generate dataset with 100+200 events
# varset = ....
# data = model.generate ( 100000 , varset , sample = False )
# data = model.generate ( 100000 , varset , sample = True )
# data = model.generate ( { 'A' : 100 , 'B' : 200 } , varset , sample = False )
# data = model.generate ( { 'A' : 100 , 'B' : 200 } , varset , sample = True )
# @endcode
def generate ( self ,
nEvents ,
Expand All @@ -1173,33 +1173,43 @@ def generate ( self ,
category_args = {} ) :
"""Generate toy-sample according to PDF
>>> model = ....
>>> data = model.generate ( 10000 ) ## generate dataset with 10000 events
>>> data = model.generate ( { 'A' : 100 , 'B' : 200 } ) ## generate dataset with 10000 events
>>> varset = ....
>>> data = model.generate ( 100000 , varset , sample = False )
>>> data = model.generate ( 100000 , varset , sample = True )
>>> data = model.generate ( { 'A' : 100 , 'B' : 200 } , varset , sample = False )
>>> data = model.generate ( { 'A' : 100 , 'B' : 200 } , varset , sample = True )
"""

from ostap.core.ostap_types import dictlike_types

labels = self.sample.labels()

assert len ( labels ) == len ( nEvents ), 'Invalid length of nEvents array'
assert isinstance ( nEvents , dictlike_types ) and \
len ( labels ) == len ( nEvents ) and \
all ( k in labels for k in nEvents ) , \
'Invalid type/length of nEvents!'

vars = ROOT.RooArgSet()
data = {}

weight = None
wvar = None

## generate all categories separately:
for l , n in zip ( labels , nEvents ) :
for label in nEvents :

nevts = nEvents [ label ]

cargs = []
for a in args : cargs.append ( a )
for a in category_args.get ( l , () ) : cargs.append ( a )
for a in args : cargs.append ( a )
for a in category_args.get ( label , () ) : cargs.append ( a )
cargs = tuple ( cargs )

pdf = self.categories [ l ]
ds = pdf.generate ( n ,
varset = varset ,
pdf = self.categories [ label ]
vv = ROOT.RooArgSet ( [ v for v in self.vars if v in varset ] )
ds = pdf.generate ( nevts ,
varset = vv ,
## varset = varset ,
binning = binning ,
sample = sample ,
storage = storage ,
Expand All @@ -1209,7 +1219,7 @@ def generate ( self ,
ds , weight = ds.unWeighted ()
if weight : wvar = getattr ( ds , weight )

data [ l ] = ds
data [ label ] = ds
for v in ds.varset() :
if not v in vars : vars.add ( v )
## vars |= ds.varset()
Expand Down
16 changes: 15 additions & 1 deletion ostap/fitting/tests/test_fitting_simfit1.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@
graphs = []
# =============================================================================
def test_simfit1 () :
## if 1 < 2 :

logger = getLogger( 'test_simfit1' )

Expand Down Expand Up @@ -219,6 +218,21 @@ def test_simfit1 () :
with use_canvas ( 'test_simfit1: sPlot/xyz for B (background)' , wait = 1 ) :
dataset.draw ( 'test_xyz' , '(sample==1)*B_M2_sw' )


# =========================================================================
## test creation of dataset
# =========================================================================
ds_gen = model_sim.generate ( nEvents = { 'A' : len ( dataset1 ) ,
'B' : len ( dataset2 ) } ,
varset = vars )

rg , f = model_sim.fitTo ( ds_gen , silent = True )
rg , f = model_sim.fitTo ( ds_gen , silent = True )

title = 'Results of simultaneous fit to generated dataset'
logger.info ( '%s\n%s' % ( title , rg.table ( title = title , prefix = '# ' ) ) )


# =============================================================================
## check that everything is serializable
# =============================================================================
Expand Down
15 changes: 15 additions & 0 deletions ostap/fitting/tests/test_fitting_simfit2.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,21 @@ def test_simfit2 ( ) :
logger.info ( 'Significane: %.1f and %.1f ' % ( wilks , wilks2 ) )


# =========================================================================
## test creation of dataset
# =========================================================================
ds_gen = model_sim.generate ( nEvents = { 'A' : len ( dataset1 ) ,
'B' : len ( dataset2 ) } ,
varset = vars )

rg , f = model_sim.fitTo ( ds_gen , silent = True )
rg , f = model_sim.fitTo ( ds_gen , silent = True )

title = 'Results of simultaneous fit to generated dataset'
logger.info ( '%s\n%s' % ( title , rg.table ( title = title , prefix = '# ' ) ) )



# =============================================================================
if '__main__' == __name__ :

Expand Down
15 changes: 15 additions & 0 deletions ostap/fitting/tests/test_fitting_simfit3.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,21 @@ def test_simfit3() :
grs.draw('apl')
graphs.append ( grs )


# =========================================================================
## test creation of dataset
# =========================================================================
ds_gen = model_sim.generate ( nEvents = { 'N' : len ( dataset1 ) ,
'S' : len ( dataset2 ) } ,
varset = vars )

rg , f = model_sim.fitTo ( ds_gen , silent = True )
rg , f = model_sim.fitTo ( ds_gen , silent = True )

title = 'Results of simultaneous fit to generated dataset'
logger.info ( '%s\n%s' % ( title , rg.table ( title = title , prefix = '# ' ) ) )



# =============================================================================
if '__main__' == __name__ :
Expand Down
15 changes: 14 additions & 1 deletion ostap/fitting/tests/test_fitting_simfit4.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,20 @@ def test_simfit4() :
graphs.append ( grs )
title = 'Simultaneous fit'
logger.info ( 'Combined fit results are:\n%s ' % rC.table ( title = title ,
prefix = '#' ) )
prefix = '#' ) )
# =========================================================================
## test creation of dataset
# =========================================================================
ds_gen = model_sim.generate ( nEvents = { 'N' : len ( dataset1 ) ,
'S' : len ( dataset2 ) } ,
varset = vars )

rg , f = model_sim.fitTo ( ds_gen , silent = True )
rg , f = model_sim.fitTo ( ds_gen , silent = True )

title = 'Results of simultaneous fit to generated dataset'
logger.info ( '%s\n%s' % ( title , rg.table ( title = title , prefix = '# ' ) ) )


# =============================================================================
if '__main__' == __name__ :
Expand Down
18 changes: 17 additions & 1 deletion ostap/fitting/tests/test_fitting_simfit5.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,23 @@ def test_simfit5() :
with wait ( 1 ) : fdm3 = model_sim.draw ( 'dm3' , cdataset2 , nbins = 100 )
with wait ( 1 ) : fd = model_sim.draw ( 'data' , cdataset2 , nbins = 100 )

## tru to serialize everything
# =========================================================================
## test creation of dataset
# =========================================================================
ds_gen = model_sim.generate ( nEvents = { 'data' : len ( dataset ) ,
'dm1' : len ( dset1 ) ,
'dm2' : len ( dset2 ) ,
'dm3' : len ( dset3 ) } ,
varset = vars )

rg , f = model_sim.fitTo ( ds_gen , silent = True )
rg , f = model_sim.fitTo ( ds_gen , silent = True )

title = 'Results of simultaneous fit to generated dataset'
logger.info ( '%s\n%s' % ( title , rg.table ( title = title , prefix = '# ' ) ) )


## try to serialize everything
logger.info('Saving all objects into DBASE')
with timing ('Save everything to DBASE' , logger ), DBASE.tmpdb() as db :

Expand Down
3 changes: 2 additions & 1 deletion ostap/fitting/tests/test_fitting_toys_simfit.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,8 @@ def test_toys_simfit_1 () :
pdf = model_sim ,
nToys = 100 ,
data = [ mass ] ,
gen_config = { 'nEvents' : ( NS1 + NB1 , NS2 + NB2 ) , 'sample' : True } ,
gen_config = { 'nEvents' : { 'A' : len ( dataset1 ) ,
'B' : len ( dataset2 ) } , 'sample' : True } ,
fit_config = { 'silent' : True } ,
init_pars = { 'mean_G1' : mean1 , 'BM1' : NB1 , 'BM2' : NB2 ,
'sigma_G1' : sigma1 , 'SM1' : NS1 , 'SM2' : NS2 ,
Expand Down

0 comments on commit fd071b1

Please sign in to comment.