Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

12 4 8 new vars v1 #47

Open
wants to merge 11 commits into
base: 12_4_8
Choose a base branch
from
94 changes: 92 additions & 2 deletions plugins/JetConstituentTableProducer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,23 @@ class JetConstituentTableProducer : public edm::stream::EDProducer<> {
//const std::string name_;
const std::string name_;
const std::string nameSV_;
const std::string nameMu_;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Style] Indentation level seems to vary for the newly added lines, compared to the surrounding statements

const std::string idx_name_;
const std::string idx_nameSV_;
const std::string idx_nameMu_;
const bool readBtag_;
const double jet_radius_;

edm::EDGetTokenT<edm::View<T>> jet_token_;
edm::EDGetTokenT<VertexCollection> vtx_token_;
edm::EDGetTokenT<reco::CandidateView> cand_token_;
edm::EDGetTokenT<SVCollection> sv_token_;
edm::EDGetTokenT<edm::View<reco::Muon> > muon_token_;

edm::Handle<VertexCollection> vtxs_;
edm::Handle<reco::CandidateView> cands_;
edm::Handle<SVCollection> svs_;
edm::Handle<edm::View<reco::Muon> > muons_;
edm::ESHandle<TransientTrackBuilder> track_builder_;
edm::ESGetToken<TransientTrackBuilder, TransientTrackRecord> track_builder_token_;

Expand All @@ -75,18 +79,22 @@ template< typename T>
JetConstituentTableProducer<T>::JetConstituentTableProducer(const edm::ParameterSet &iConfig)
: name_(iConfig.getParameter<std::string>("name")),
nameSV_(iConfig.getParameter<std::string>("nameSV")),
nameMu_(iConfig.getParameter<std::string>("nameMu")),
idx_name_(iConfig.getParameter<std::string>("idx_name")),
idx_nameSV_(iConfig.getParameter<std::string>("idx_nameSV")),
idx_nameMu_(iConfig.getParameter<std::string>("idx_nameMu")),
readBtag_(iConfig.getParameter<bool>("readBtag")),
jet_radius_(iConfig.getParameter<double>("jet_radius")),
jet_token_(consumes<edm::View<T>>(iConfig.getParameter<edm::InputTag>("jets"))),
vtx_token_(consumes<VertexCollection>(iConfig.getParameter<edm::InputTag>("vertices"))),
cand_token_(consumes<reco::CandidateView>(iConfig.getParameter<edm::InputTag>("candidates"))),
sv_token_(consumes<SVCollection>(iConfig.getParameter<edm::InputTag>("secondary_vertices"))),
muon_token_(consumes<edm::View<reco::Muon> >(iConfig.getParameter<edm::InputTag>("muons"))),
track_builder_token_(
esConsumes<TransientTrackBuilder, TransientTrackRecord>(edm::ESInputTag("", "TransientTrackBuilder"))){
produces<nanoaod::FlatTable>(name_);
produces<nanoaod::FlatTable>(nameSV_);
produces<nanoaod::FlatTable>(nameMu_);
produces<std::vector<reco::CandidatePtr>>();
}

Expand All @@ -98,24 +106,40 @@ void JetConstituentTableProducer<T>::produce(edm::Event &iEvent, const edm::Even
// elements in all these collections must have the same order!
auto outCands = std::make_unique<std::vector<reco::CandidatePtr>>();
auto outSVs = std::make_unique<std::vector<const reco::VertexCompositePtrCandidate *>> ();
std::vector<int> jetIdx_pf, jetIdx_sv, pfcandIdx, svIdx;
auto outMuons = std::make_unique<std::vector<reco::Muon>>();
std::vector<int> jetIdx_pf, jetIdx_sv, jetIdx_mu, pfcandIdx, svIdx, muIdx;
// PF Cands
std::vector<float> btagEtaRel, btagPtRatio, btagPParRatio, btagSip3dVal, btagSip3dSig, btagJetDistVal, cand_pt;
// Secondary vertices
std::vector<float> sv_mass, sv_pt, sv_ntracks, sv_chi2, sv_normchi2, sv_dxy, sv_dxysig, sv_d3d, sv_d3dsig, sv_costhetasvpv;
std::vector<float> sv_ptrel, sv_phirel, sv_deltaR, sv_enratio;
// Muons
std::vector<float> muon_pt, muon_eta, muon_phi, muon_ptrel;
std::vector<double> muon_isGlobal, muon_chi2Tk, muon_chi2;
std::vector<int> muon_nMuHit, muon_nMatched, muon_nTkHit, muon_nPixHit, muon_nOutHit;

auto jets = iEvent.getHandle(jet_token_);
iEvent.getByToken(vtx_token_, vtxs_);
iEvent.getByToken(cand_token_, cands_);
iEvent.getByToken(sv_token_, svs_);
iEvent.getByToken(muon_token_, muons_);

if(readBtag_){
track_builder_ = iSetup.getHandle(track_builder_token_);
}

/*outMuons->clear();
jetIdx_mu.clear();
muIdx.clear();
muon_pt.clear();
muon_eta.clear();
muon_phi.clear();
muon_ptrel.clear();*/
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Style] Places with commented out code which could be removed for clarity



for (unsigned i_jet = 0; i_jet < jets->size(); ++i_jet) {
const auto &jet = jets->at(i_jet);
//std::cout<<"jet "<<i_jet<<std::endl;
math::XYZVector jet_dir = jet.momentum().Unit();
GlobalVector jet_ref_track_dir(jet.px(), jet.py(), jet.pz());
VertexDistance3D vdist;
Expand Down Expand Up @@ -214,6 +238,47 @@ void JetConstituentTableProducer<T>::produce(edm::Event &iEvent, const edm::Even
}
}
} // end jet loop


//Muons
uint idx_mu=0;
for (const auto &mu : *muons_) {
if(reco::deltaR2(mu, jet) < jet_radius_ * jet_radius_){
outMuons->push_back(mu);
jetIdx_mu.push_back(i_jet);
muIdx.push_back(idx_mu);
muon_pt.push_back(mu.pt());
//std::cout<<"muon pt: "<<mu.pt()<<std::endl;
muon_eta.push_back(mu.eta());
muon_phi.push_back(mu.phi());
muon_ptrel.push_back(mu.pt()/jet.pt());

if(mu.isGlobalMuon()){
muon_nMuHit.push_back(mu.outerTrack()->hitPattern().numberOfValidMuonHits());
muon_nMatched.push_back(mu.numberOfMatchedStations());
muon_nTkHit.push_back(mu.innerTrack()->hitPattern().numberOfValidHits());
muon_nPixHit.push_back(mu.innerTrack()->hitPattern().numberOfValidPixelHits());
muon_nOutHit.push_back(mu.innerTrack()->hitPattern().numberOfLostHits(reco::HitPattern::MISSING_OUTER_HITS));
muon_chi2.push_back(mu.globalTrack()->normalizedChi2());
muon_chi2Tk.push_back(mu.innerTrack()->normalizedChi2());
}
else{
muon_nMuHit.push_back(-1);
muon_nMatched.push_back(-1);
muon_nTkHit.push_back(-1);
muon_nPixHit.push_back(-1);
muon_nOutHit.push_back(-1);
muon_chi2.push_back(-1);
muon_chi2Tk.push_back(-1);

}
muon_isGlobal.push_back(mu.isGlobalMuon());



}
idx_mu++;
}
}

auto candTable = std::make_unique<nanoaod::FlatTable>(outCands->size(), name_, false);
Expand Down Expand Up @@ -256,22 +321,47 @@ void JetConstituentTableProducer<T>::produce(edm::Event &iEvent, const edm::Even
iEvent.put(std::move(svTable), nameSV_);

iEvent.put(std::move(outCands));


// Muon table
auto muonTable = std::make_unique<nanoaod::FlatTable>(outMuons->size(), nameMu_, false);
// We fill from here only stuff that cannot be created with the SimpleFlatTnameableProducer
muonTable->addColumn<int>("jetIdx", jetIdx_mu, "Index of the parent jet");
muonTable->addColumn<int>(idx_nameMu_, muIdx, "Index in the Muon list");
if (readBtag_) {
muonTable->addColumn<float>("pt", muon_pt, "pt", 20);
muonTable->addColumn<float>("eta", muon_eta, "eta", 20);
muonTable->addColumn<float>("phi", muon_phi, "phi", 20);
muonTable->addColumn<double>("isGlobal", muon_isGlobal, "isGlobal", 20);
muonTable->addColumn<float>("ptrel", muon_ptrel, "pt relative to parent jet", 20);
muonTable->addColumn<int>("nMuHit", muon_nMuHit, "number of muon hits", 20);
muonTable->addColumn<int>("nMatched", muon_nMatched, "number of matched stations", 20);
muonTable->addColumn<int>("nTkHit", muon_nTkHit, "number of tracker hits", 20);
muonTable->addColumn<int>("nPixHit", muon_nPixHit, "number of pixel hits", 20);
muonTable->addColumn<int>("nOutHit", muon_nOutHit, "number of missing outer hits", 20);
muonTable->addColumn<double>("chi2", muon_chi2, "chi2", 20);
muonTable->addColumn<double>("chi2Tk", muon_chi2Tk, "chi2 inner track", 20);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Implementation itself, technical details & storage]
The observables of type float or double have precision of 20, while the other variables in JetConstituentTableProducer all use 10. Was there a specific reason to effectively double the precision? Maybe we should stick to one level of precision for jet-constituent-related features.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[For my own education, but maybe to avoid we might be missing something here]
How were the stored Muon variables chosen, related to e.g. https://github.com/cms-btv-pog/RecoBTag-PerformanceMeasurements/blob/12_2_X/interface/JetInfoBranches.h#L344-L369?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For what concerns the precision, I tried with 10 but I obtained a segmentation violation error.
About the variables stored, I followed this list according to Soureek instructions https://github.com/cms-btv-pog/RecoBTag-PerformanceMeasurements/blob/10_6_X_UL2016_PreliminaryJECs/python/varGroups_cfi.py#L1738-L1890

}
iEvent.put(std::move(muonTable), nameMu_);
}

template< typename T>
void JetConstituentTableProducer<T>::fillDescriptions(edm::ConfigurationDescriptions &descriptions) {
edm::ParameterSetDescription desc;
desc.add<std::string>("name", "JetPFCands");
desc.add<std::string>("nameSV", "JetSV");
desc.add<std::string>("nameMu", "JetMuons");
desc.add<std::string>("idx_name", "candIdx");
desc.add<std::string>("idx_nameSV", "svIdx");
desc.add<std::string>("idx_nameMu", "muIdx");
desc.add<double>("jet_radius", true);
desc.add<bool>("readBtag", true);
desc.add<edm::InputTag>("jets", edm::InputTag("slimmedJetsAK8"));
desc.add<edm::InputTag>("vertices", edm::InputTag("offlineSlimmedPrimaryVertices"));
desc.add<edm::InputTag>("candidates", edm::InputTag("packedPFCandidates"));
desc.add<edm::InputTag>("secondary_vertices", edm::InputTag("slimmedSecondaryVertices"));
descriptions.addWithDefaultLabel(desc);
desc.add<edm::InputTag>("muons", edm::InputTag("slimmedMuons"));
descriptions.addWithDefaultLabel(desc);
}

typedef JetConstituentTableProducer<pat::Jet> PatJetConstituentTableProducer;
Expand Down
87 changes: 87 additions & 0 deletions python/addBTV.py
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,91 @@ def add_BTV(process, runOnMC=False, onlyAK4=False, onlyAK8=False, keepInputs=['D
doc="DeepCSV light btag discriminator",
precision=10),
)

FatJetVars = cms.PSet(
uncorrpt=Var("?availableJECSets().size()>0?correctedJet('Uncorrected').pt():pt()",
float,
doc="Uncorrected pT",
precision=10),
residual=Var("?availableJECSets().size()>0 ? pt()/correctedJet('L3Absolute').pt() : 1. ",
float,
doc="residual",
precision=10),
jes=Var("?availableJECSets().size()>0 ? pt()/correctedJet('Uncorrected').pt() : 1.",
float,
doc="jes (jet substructure)",
precision=10),
CombIVF=Var("bDiscriminator('pfCombinedInclusiveSecondaryVertexV2BJetTags')",
float,
doc="combinedIVF",
precision=10),
DeepCSVBDisc=Var("bDiscriminator('pfDeepCSVJetTags:probb')+bDiscriminator('pfDeepCSVJetTags:probbb')",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Implementation itself, technical details & storage]
For the b discriminator (the sum of the individual b, bb related outputs) something similar to the CvsL/CvsB discriminator approach could be done to avoid getting irregular -2 bins: https://github.com/cms-sw/cmssw/blob/master/PhysicsTools/NanoAOD/python/jetsAK4_Puppi_cff.py#L83 -> also add the ternary operator to test whether the sum is >=0 and assign either that sum or -1 finally.

float,
doc="DeepCSVBDisc",
precision=10),
btagDeepB_c=Var("bDiscriminator('pfDeepCSVJetTags:probc')",
float,
doc="DeepCSV c tag discriminator",
precision=10),
DeepCSVCvsLDisc=Var("? (bDiscriminator('pfDeepCSVJetTags:probc'))!=-1 ? bDiscriminator('pfDeepCSVJetTags:probc')/(bDiscriminator('pfDeepCSVJetTags:probc')+bDiscriminator('pfDeepCSVJetTags:probudsg')) : -1",
float,
doc="DeepCSV C vs L discriminator",
precision=10),
DeepCSVCvsBDisc=Var("? (bDiscriminator('pfDeepCSVJetTags:probc'))!=-1 ? bDiscriminator('pfDeepCSVJetTags:probc')/(bDiscriminator('pfDeepCSVJetTags:probc')+bDiscriminator('pfDeepCSVJetTags:probb')+bDiscriminator('pfDeepCSVJetTags:probbb')) : -1",
float,
doc="DeepCSV C vs B discriminator",
precision=10),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Implementation itself, technical details & storage]
We might want to synchronize this check for invalid values with the statement in NanoAOD, example here: https://github.com/cms-sw/cmssw/blob/master/PhysicsTools/NanoAOD/python/jetsAK4_Puppi_cff.py#L86-L87 —> switch from !=-1 to >=0. Although that should not make that big of a difference, probably it has some effects related to floating point precision.

#massSoftDrop=Var("? hasUserFloat('SoftDrop:Mass') ? userFloat('SoftDrop:Mass') : userFloat('ak8PFJetsPuppiSoftDropMass')",
# float,
# doc="mass SoftDrop",
# precision=10),
DoubleSV=Var("bDiscriminator('doubleSVBJetTags')",
float,
doc="DoubleSV discriminator",
precision=10),
)

SubJetVars = cms.PSet(
uncorrpt=Var("?availableJECSets().size()>0?correctedJet('Uncorrected').pt():pt()",
float,
doc="Uncorrected pT",
precision=10),
residual=Var("?availableJECSets().size()>0 ? pt()/correctedJet('L3Absolute').pt() : 1. ",
float,
doc="residual",
precision=10),
jes=Var("?availableJECSets().size()>0 ? pt()/correctedJet('Uncorrected').pt() : 1.",
float,
doc="jes (jet substructure)",
precision=10),
CombIVF=Var("bDiscriminator('pfCombinedInclusiveSecondaryVertexV2BJetTags')",
float,
doc="combinedIVF",
precision=10),
DeepCSVBDisc=Var("bDiscriminator('pfDeepCSVJetTags:probb')+bDiscriminator('pfDeepCSVJetTags:probbb')",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Implementation itself, technical details & storage]
Same comment as above

float,
doc="DeepCSVBDisc",
precision=10),
btagDeepB_c=Var("bDiscriminator('pfDeepCSVJetTags:probc')",
float,
doc="DeepCSV c tag discriminator",
precision=10),
DeepCSVCvsLDisc=Var("? (bDiscriminator('pfDeepCSVJetTags:probc'))!=-1 ? bDiscriminator('pfDeepCSVJetTags:probc')/(bDiscriminator('pfDeepCSVJetTags:probc')+bDiscriminator('pfDeepCSVJetTags:probudsg')) : -1",
float,
doc="DeepCSV C vs L discriminator",
precision=10),
DeepCSVCvsBDisc=Var("? (bDiscriminator('pfDeepCSVJetTags:probc'))!=-1 ? bDiscriminator('pfDeepCSVJetTags:probc')/(bDiscriminator('pfDeepCSVJetTags:probc')+bDiscriminator('pfDeepCSVJetTags:probb')+bDiscriminator('pfDeepCSVJetTags:probbb')) : -1",
float,
doc="DeepCSV C vs B discriminator",
precision=10),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Implementation itself, technical details & storage]
We might want to synchronize this check for invalid values with the statement in NanoAOD, example here: https://github.com/cms-sw/cmssw/blob/master/PhysicsTools/NanoAOD/python/jetsAK4_Puppi_cff.py#L86-L87 —> switch from !=-1 to >=0. Although that should not make that big of a difference, probably it has some effects related to floating point precision.

# massSoftDrop=Var("? hasUserFloat('SoftDrop:Mass') ? userFloat('SoftDrop:Mass') : userFloat('ak8PFJetsPuppiSoftDropMass')",
# float,
# doc="mass SoftDrop",
# precision=10),

)



# decouple these from CommonVars, not relevant for data
HadronCountingVars = cms.PSet(
Expand Down Expand Up @@ -348,6 +433,7 @@ def add_BTV(process, runOnMC=False, onlyAK4=False, onlyAK8=False, keepInputs=['D
extension=cms.bool(True), # this is the extension table for FatJets
variables=cms.PSet(
CommonVars,
FatJetVars,
#HadronCountingVars if runOnMC else cms.PSet(), # only necessary before 106x
get_DDX_vars() if ('DDX' in keepInputs) else cms.PSet(),
))
Expand All @@ -363,6 +449,7 @@ def add_BTV(process, runOnMC=False, onlyAK4=False, onlyAK8=False, keepInputs=['D
extension=cms.bool(True), # this is the extension table for FatJets
variables=cms.PSet(
CommonVars,
SubJetVars,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Style] Indentation level seems to vary for the newly added lines, compared to the surrounding statements

#HadronCountingVars if runOnMC else cms.PSet(), # only necessary before 106x
))

Expand Down
8 changes: 8 additions & 0 deletions python/addPFCands_cff.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ def addPFCands(process, runOnMC=False, allPF = False, onlyAK4=False, onlyAK8=Fal
idx_name = cms.string("pFCandsIdx"),
nameSV = cms.string("FatJetSVs"),
idx_nameSV = cms.string("sVIdx"),
nameMu = cms.string("FatJetMuons"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Style] Indentation level seems to vary for the newly added lines, compared to the surrounding statements
Especially here, this looked like an empty line and caused some confusion first

idx_nameMu = cms.string("MuIdx"),
)
process.customAK4ConstituentsTable = cms.EDProducer("PatJetConstituentTableProducer",
candidates = candInput,
Expand All @@ -71,6 +73,8 @@ def addPFCands(process, runOnMC=False, allPF = False, onlyAK4=False, onlyAK8=Fal
idx_name = cms.string("pFCandsIdx"),
nameSV = cms.string("JetSVs"),
idx_nameSV = cms.string("sVIdx"),
#nameMu = cms.string("JetMuons"),
#idx_nameMu = cms.string("MuIdx"),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[Style] Places with commented out code which could be removed for clarity
The default name JetMuons and Idx when adding the Muon table to AK4 jets could also be removed (currently commented out).

)
if not allPF:
process.customizedPFCandsTask.add(process.finalJetsConstituents)
Expand Down Expand Up @@ -122,6 +126,8 @@ def addPFCands(process, runOnMC=False, allPF = False, onlyAK4=False, onlyAK8=Fal
nameSV = cms.string("GenFatJetSVs"),
idx_name = cms.string("pFCandsIdx"),
idx_nameSV = cms.string("sVIdx"),
nameMu = cms.string("GenFatJetMuons"),
idx_nameMu = cms.string("MuIdx"),
readBtag = cms.bool(False))
process.genAK4ConstituentsTable = cms.EDProducer("GenJetConstituentTableProducer",
candidates = genCandInput,
Expand All @@ -130,6 +136,8 @@ def addPFCands(process, runOnMC=False, allPF = False, onlyAK4=False, onlyAK8=Fal
nameSV = cms.string("GenJetSVs"),
idx_name = cms.string("pFCandsIdx"),
idx_nameSV = cms.string("sVIdx"),
nameMu = cms.string("GenJetMuons"),
idx_nameMu = cms.string("MuIdx"),
readBtag = cms.bool(False))
process.customizedPFCandsTask.add(process.genJetsAK4Constituents) #Note: For gen need to add jets to the process to keep pt cuts.
process.customizedPFCandsTask.add(process.genJetsAK8Constituents)
Expand Down
4 changes: 2 additions & 2 deletions test/nano_mc_Run3_122X_NANO.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@
process.load('Configuration.StandardSequences.FrontierConditions_GlobalTag_cff')

process.maxEvents = cms.untracked.PSet(
input = cms.untracked.int32(-1),
input = cms.untracked.int32(1000),
output = cms.optional.untracked.allowed(cms.int32,cms.PSet)
)

# Input source
process.source = cms.Source("PoolSource",
fileNames = cms.untracked.vstring('/store/mc/Run3Winter22MiniAOD/TTTo2L2Nu_CP5_13p6TeV_powheg-pythia8/MINIAODSIM/122X_mcRun3_2021_realistic_v9-v2/2550000/0d44f6e9-6961-4d60-b2c1-0e21c1249100.root'),
fileNames = cms.untracked.vstring('/store/mc/Run3Winter22MiniAOD/ZprimeToTT_M3000_W30_TuneCP2_13p6TeV-madgraph-pythiaMLM-pythia8/MINIAODSIM/122X_mcRun3_2021_realistic_v9-v2/60000/0aca854d-fcdb-4523-834d-201c36e4a6ca.root'),
secondaryFileNames = cms.untracked.vstring()
)

Expand Down