diff --git a/Configuration/ProcessModifiers/python/trackingIters01_cff.py b/Configuration/ProcessModifiers/python/trackingIters01_cff.py new file mode 100644 index 0000000000000..9f7506d27b51c --- /dev/null +++ b/Configuration/ProcessModifiers/python/trackingIters01_cff.py @@ -0,0 +1,4 @@ +import FWCore.ParameterSet.Config as cms + +# This modifier sets the iterative tracking to use a minimal set of iterations, first two +trackingIters01 = cms.Modifier() diff --git a/Configuration/ProcessModifiers/python/trackingLST_cff.py b/Configuration/ProcessModifiers/python/trackingLST_cff.py new file mode 100644 index 0000000000000..ae1dd83e20b0b --- /dev/null +++ b/Configuration/ProcessModifiers/python/trackingLST_cff.py @@ -0,0 +1,5 @@ +import FWCore.ParameterSet.Config as cms + +# This modifier sets the LST (Phase-2 line segment tracking) used for track building +trackingLST = cms.Modifier() + diff --git a/Configuration/PyReleaseValidation/README.md b/Configuration/PyReleaseValidation/README.md index a3c4177c0fcb8..a114e81d7146c 100644 --- a/Configuration/PyReleaseValidation/README.md +++ b/Configuration/PyReleaseValidation/README.md @@ -65,6 +65,8 @@ The offsets currently in use are: * 0.7: trackingMkFit modifier * 0.701: DisplacedRegionalStep tracking iteration for Run-3 * 0.702: trackingMkFit modifier for Phase-2 (initialStep only) +* 0.703: LST tracking (Phase-2 only), initialStep+HighPtTripletStep only, on CPU +* 0.704: LST tracking (Phase-2 only), initialStep+HighPtTripletStep only, on GPU * 0.75: HLT phase-2 timing menu * 0.751: HLT phase-2 timing menu Alpaka variant * 0.752: HLT phase-2 timing menu ticl_v5 variant diff --git a/Configuration/PyReleaseValidation/python/relval_Run4.py b/Configuration/PyReleaseValidation/python/relval_Run4.py index 3b866ca55c000..95f75dcee7a6a 100644 --- a/Configuration/PyReleaseValidation/python/relval_Run4.py +++ b/Configuration/PyReleaseValidation/python/relval_Run4.py @@ -36,6 +36,9 @@ numWFIB.extend([31234.0]) #Run4D114 numWFIB.extend([32034.0]) #Run4D115 +# Temporary placement for LST workflow to workaround PR conflicts - to be formatted and placed in an upcoming PR +numWFIB.extend([24834.703]) #Run4D98 LST tracking (initialStep+HighPtTripletStep only) + #Additional sample for short matrix and IB #Default Phase-2 Det NoPU numWFIB.extend([prefixDet+34.911]) #DD4hep XML diff --git a/Configuration/PyReleaseValidation/python/relval_gpu.py b/Configuration/PyReleaseValidation/python/relval_gpu.py index f71ade3f646f1..e70d877c1daaa 100644 --- a/Configuration/PyReleaseValidation/python/relval_gpu.py +++ b/Configuration/PyReleaseValidation/python/relval_gpu.py @@ -72,6 +72,8 @@ # Run4, Alpaka-based noPU 29634.402, 29634.403, 29634.404, 29634.406, 29661.402, + # Run4, Alpaka-based noPU GPU LST tracking D98 + 24834.704, # Run4, Alpaka-based PU 29834.402, 29834.403, 29834.404 diff --git a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py index fbfd6b28801aa..31543ebd3b380 100644 --- a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py +++ b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py @@ -490,6 +490,56 @@ def condition_(self, fragment, stepList, key, hasHarvest): '--procModifiers': 'trackingMkFitCommon,trackingMkFitInitialStep' } +# LST on CPU, initialStep+highPtTripletStep-only tracking-only +class UpgradeWorkflow_lstOnCPUIters01TrackingOnly(UpgradeWorkflowTracking): + def setup__(self, step, stepName, stepDict, k, properties): + if 'Reco' in step: stepDict[stepName][k] = merge([self.step3, stepDict[step][k]]) + elif 'HARVEST' in step: stepDict[stepName][k] = merge([{'-s': 'HARVESTING:@trackingOnlyValidation+@trackingOnlyDQM'}, stepDict[step][k]]) + elif 'ALCA' in step: stepDict[stepName][k] = None + def condition_(self, fragment, stepList, key, hasHarvest): + return ('Run4' in key) +upgradeWFs['lstOnCPUIters01TrackingOnly'] = UpgradeWorkflow_lstOnCPUIters01TrackingOnly( + steps = [ + 'RecoGlobal', + 'HARVESTGlobal', + # Add ALCA steps explicitly, so that they can be properly removed + 'ALCA', + 'ALCAPhase2' + ], + PU = [], + suffix = '_lstOnCPUIters01TrackingOnly', + offset = 0.703, +) +upgradeWFs['lstOnCPUIters01TrackingOnly'].step3 = upgradeWFs['trackingOnly'].step3 | { + '--procModifiers': 'trackingIters01,trackingLST', + '--accelerators' : 'cpu' +} + +# LST on GPU, initialStep+highPtTripletStep-only tracking-only +class UpgradeWorkflow_lstOnGPUIters01TrackingOnly(UpgradeWorkflowTracking): + def setup__(self, step, stepName, stepDict, k, properties): + if 'Reco' in step: stepDict[stepName][k] = merge([self.step3, stepDict[step][k]]) + elif 'HARVEST' in step: stepDict[stepName][k] = merge([{'-s': 'HARVESTING:@trackingOnlyValidation+@trackingOnlyDQM'}, stepDict[step][k]]) + elif 'ALCA' in step: stepDict[stepName][k] = None + def condition_(self, fragment, stepList, key, hasHarvest): + return ('Run4' in key) +upgradeWFs['lstOnGPUIters01TrackingOnly'] = UpgradeWorkflow_lstOnGPUIters01TrackingOnly( + steps = [ + 'RecoGlobal', + 'HARVESTGlobal', + # Add ALCA steps explicitly, so that they can be properly removed + 'ALCA', + 'ALCAPhase2' + ], + PU = [], + suffix = '_lstOnGPUIters01TrackingOnly', + offset = 0.704, +) +upgradeWFs['lstOnGPUIters01TrackingOnly'].step3 = upgradeWFs['trackingOnly'].step3 | { + '--procModifiers': 'trackingIters01,trackingLST', + '--accelerators' : 'gpu-*' +} + #DeepCore seeding for JetCore iteration workflow class UpgradeWorkflow_seedingDeepCore(UpgradeWorkflow): def setup_(self, step, stepName, stepDict, k, properties): diff --git a/RecoTracker/ConversionSeedGenerators/python/ConversionStep_cff.py b/RecoTracker/ConversionSeedGenerators/python/ConversionStep_cff.py index 256432c1180c8..6d44990855324 100644 --- a/RecoTracker/ConversionSeedGenerators/python/ConversionStep_cff.py +++ b/RecoTracker/ConversionSeedGenerators/python/ConversionStep_cff.py @@ -33,6 +33,16 @@ oldClusterRemovalInfo = 'detachedQuadStepClusters', overrideTrkQuals = 'detachedQuadStepSelector:detachedQuadStepTrk' )) +from Configuration.ProcessModifiers.trackingIters01_cff import trackingIters01 +trackingIters01.toModify(convClusters, + trajectories = "highPtTripletStepTracks", + oldClusterRemovalInfo = "highPtTripletStepClusters", + overrideTrkQuals = "highPtTripletStepSelector:highPtTripletStep" +) +from Configuration.ProcessModifiers.trackingLST_cff import trackingLST +(trackingIters01 & trackingPhase2PU140 & trackingLST).toModify(convClusters, + overrideTrkQuals = "" +) _convLayerPairsStripOnlyLayers = ['TIB1+TID1_pos', 'TIB1+TID1_neg', diff --git a/RecoTracker/FinalTrackSelectors/python/MergeTrackCollections_cff.py b/RecoTracker/FinalTrackSelectors/python/MergeTrackCollections_cff.py index 907e3126a5cd7..d5256c19a1756 100644 --- a/RecoTracker/FinalTrackSelectors/python/MergeTrackCollections_cff.py +++ b/RecoTracker/FinalTrackSelectors/python/MergeTrackCollections_cff.py @@ -17,6 +17,8 @@ ttrhBuilderName = "WithAngleAndTemplate", chi2EstimatorName = "duplicateTrackCandidatesChi2Est" ) +from Configuration.ProcessModifiers.trackingIters01_cff import trackingIters01 +trackingIters01.toModify(duplicateTrackCandidates, source = "earlyGeneralTracks") import RecoTracker.TrackProducer.TrackProducer_cfi mergedDuplicateTracks = RecoTracker.TrackProducer.TrackProducer_cfi.TrackProducer.clone( @@ -44,6 +46,10 @@ candidateSource = "duplicateTrackCandidates:candidates", candidateComponents = "duplicateTrackCandidates:candidateMap" ) +trackingIters01.toModify(generalTracks, + originalSource = "earlyGeneralTracks", + originalMVAVals = "earlyGeneralTracks:MVAValues" +) generalTracksTask = cms.Task( duplicateTrackCandidates, diff --git a/RecoTracker/FinalTrackSelectors/python/earlyGeneralTracks_cfi.py b/RecoTracker/FinalTrackSelectors/python/earlyGeneralTracks_cfi.py index 525640861f3ea..d03744c8bdfe5 100644 --- a/RecoTracker/FinalTrackSelectors/python/earlyGeneralTracks_cfi.py +++ b/RecoTracker/FinalTrackSelectors/python/earlyGeneralTracks_cfi.py @@ -109,6 +109,16 @@ def _extend_displacedGeneral(x): makeReKeyedSeeds = cms.untracked.bool(False) ) ) +from Configuration.ProcessModifiers.trackingIters01_cff import trackingIters01 +trackingIters01.toModify(earlyGeneralTracks, + TrackProducers = ['initialStepTracks', 'highPtTripletStepTracks'], + hasSelector = [1,1], + indivShareFrac = [1,0.16], + selectedTrackQuals = ['initialStepSelector:initialStep', + 'highPtTripletStepSelector:highPtTripletStep' + ], + setsToMerge = {0: dict(tLists = [0,1])} +) from Configuration.ProcessModifiers.vectorHits_cff import vectorHits def _extend_pixelLess(x): x.TrackProducers += ['pixelLessStepTracks'] @@ -118,3 +128,13 @@ def _extend_pixelLess(x): x.setsToMerge[0].tLists += [6] (trackingPhase2PU140 & vectorHits).toModify(earlyGeneralTracks, _extend_pixelLess) +from Configuration.ProcessModifiers.trackingLST_cff import trackingLST +(trackingPhase2PU140 & trackingLST).toModify(earlyGeneralTracks, + TrackProducers = ['highPtTripletStepLSTpTracks', 'highPtTripletStepLSTT5Tracks'], + hasSelector = [1,0], + indivShareFrac = [0.1,0.1], + selectedTrackQuals = ['highPtTripletStepSelector:highPtTripletStep', + 'highPtTripletStepSelectorLSTT5:highPtTripletStepLSTT5' + ], + setsToMerge = {0: dict(tLists = [0,1])} +) diff --git a/RecoTracker/IterativeTracking/python/HighPtTripletStep_cff.py b/RecoTracker/IterativeTracking/python/HighPtTripletStep_cff.py index 8f35832cc75d5..7c1dab22afd58 100644 --- a/RecoTracker/IterativeTracking/python/HighPtTripletStep_cff.py +++ b/RecoTracker/IterativeTracking/python/HighPtTripletStep_cff.py @@ -259,6 +259,10 @@ phase2clustersToSkip = 'highPtTripletStepClusters' ) +from Configuration.ProcessModifiers.trackingLST_cff import trackingLST +from RecoTracker.LST.lstOutputConverter_cfi import lstOutputConverter as _lstOutputConverter +(trackingPhase2PU140 & trackingLST).toReplaceWith(highPtTripletStepTrackCandidates, _lstOutputConverter.clone()) + #For FastSim phase1 tracking import FastSimulation.Tracking.TrackCandidateProducer_cfi _fastSim_highPtTripletStepTrackCandidates = FastSimulation.Tracking.TrackCandidateProducer_cfi.trackCandidateProducer.clone( @@ -280,6 +284,25 @@ from Configuration.Eras.Modifier_phase2_timing_layer_cff import phase2_timing_layer phase2_timing_layer.toModify(highPtTripletStepTracks, TrajectoryInEvent = True) +highPtTripletStepLSTpTracks = highPtTripletStepTracks.clone( + src = 'highPtTripletStepTrackCandidates:pTCsLST' +) +highPtTripletStepLSTT5Tracks = highPtTripletStepTracks.clone( + src = 'highPtTripletStepTrackCandidates:t5TCsLST' +) +_highPtTripletStepTracks_LST = RecoTracker.FinalTrackSelectors.trackListMerger_cfi.trackListMerger.clone( + TrackProducers = ['highPtTripletStepLSTpTracks', + 'highPtTripletStepLSTT5Tracks'], + hasSelector = [1,0], + indivShareFrac = [0.1,0.1], + selectedTrackQuals = ['highPtTripletStepSelector:highPtTripletStep', + 'highPtTripletStepSelectorLSTT5:highPtTripletStepLSTT5'], + copyExtras = True, + copyMVA = False, + setsToMerge = [cms.PSet( tLists=cms.vint32(0,1), pQual=cms.bool(True) )] +) +(trackingPhase2PU140 & trackingLST).toReplaceWith(highPtTripletStepTracks, _highPtTripletStepTracks_LST) + # Final selection from RecoTracker.FinalTrackSelectors.TrackMVAClassifierPrompt_cfi import * highPtTripletStep = TrackMVAClassifierPrompt.clone( @@ -357,6 +380,28 @@ from Configuration.ProcessModifiers.vectorHits_cff import vectorHits vectorHits.toModify(highPtTripletStepSelector.trackSelectors[2], minNumberLayers = 3, minNumber3DLayers = 3, d0_par1 = ( 0.5, 4.0 ), dz_par1 = ( 0.6, 4.0 )) +(trackingPhase2PU140 & trackingLST).toModify(highPtTripletStepSelector, src = 'highPtTripletStepLSTpTracks') +# Passthrough selector to satisfy the TrackListMerger requirement for selector values +highPtTripletStepSelectorLSTT5 = RecoTracker.FinalTrackSelectors.multiTrackSelector_cfi.multiTrackSelector.clone( + src = 'highPtTripletStepLSTT5Tracks', + trackSelectors = [ + RecoTracker.FinalTrackSelectors.multiTrackSelector_cfi.looseMTS.clone( + name = 'highPtTripletStepLSTT5Loose', + minHitsToBypassChecks = 0 + ), #end of pset + RecoTracker.FinalTrackSelectors.multiTrackSelector_cfi.tightMTS.clone( + name = 'highPtTripletStepLSTT5Tight', + preFilterName = 'highPtTripletStepLSTT5Loose', + minHitsToBypassChecks = 0 + ), + RecoTracker.FinalTrackSelectors.multiTrackSelector_cfi.highpurityMTS.clone( + name = 'highPtTripletStepLSTT5', + preFilterName = 'highPtTripletStepLSTT5Tight', + minHitsToBypassChecks = 0 + ), + ] #end of vpset +) #end of clone + # Final sequence HighPtTripletStepTask = cms.Task(highPtTripletStepClusters, highPtTripletStepSeedLayers, @@ -378,6 +423,17 @@ _HighPtTripletStep_Phase2PU140 = cms.Sequence(_HighPtTripletStepTask_Phase2PU140) trackingPhase2PU140.toReplaceWith(HighPtTripletStepTask, _HighPtTripletStepTask_Phase2PU140) +_HighPtTripletStepTask_LST = HighPtTripletStepTask.copy() +from RecoLocalTracker.Phase2TrackerRecHits.Phase2TrackerRecHits_cfi import siPhase2RecHits +from RecoTracker.LST.lstSeedTracks_cff import lstInitialStepSeedTracks,lstHighPtTripletStepSeedTracks +from RecoTracker.LST.lstPixelSeedInputProducer_cfi import lstPixelSeedInputProducer +from RecoTracker.LST.lstPhase2OTHitsInputProducer_cfi import lstPhase2OTHitsInputProducer +from RecoTracker.LST.lstProducerTask_cff import * + +_HighPtTripletStepTask_LST.add(siPhase2RecHits, lstInitialStepSeedTracks, lstHighPtTripletStepSeedTracks, lstPixelSeedInputProducer, lstPhase2OTHitsInputProducer, + lstProducerTask, highPtTripletStepLSTpTracks, highPtTripletStepLSTT5Tracks, highPtTripletStepSelectorLSTT5) +(trackingPhase2PU140 & trackingLST).toReplaceWith(HighPtTripletStepTask, _HighPtTripletStepTask_LST) + # fast tracking mask producer from FastSimulation.Tracking.FastTrackerRecHitMaskProducer_cfi import maskProducerFromClusterRemover highPtTripletStepMasks = maskProducerFromClusterRemover(highPtTripletStepClusters) diff --git a/RecoTracker/IterativeTracking/python/LowPtQuadStep_cff.py b/RecoTracker/IterativeTracking/python/LowPtQuadStep_cff.py index 84d87c18c883a..40a1161b15f13 100644 --- a/RecoTracker/IterativeTracking/python/LowPtQuadStep_cff.py +++ b/RecoTracker/IterativeTracking/python/LowPtQuadStep_cff.py @@ -14,6 +14,9 @@ for _eraName, _postfix, _era in _cfg.nonDefaultEras(): _era.toReplaceWith(lowPtQuadStepClusters, _cfg.clusterRemoverForIter('LowPtQuadStep', _eraName, _postfix)) +from Configuration.ProcessModifiers.trackingLST_cff import trackingLST +# with LST, this is the first iteration with proper cluster masking +trackingLST.toModify(lowPtQuadStepClusters, oldClusterRemovalInfo = "") # SEEDING LAYERS import RecoTracker.TkSeedingLayers.PixelLayerQuadruplets_cfi diff --git a/RecoTracker/IterativeTracking/python/iterativeTkConfig.py b/RecoTracker/IterativeTracking/python/iterativeTkConfig.py index d409cae8d3340..8d9fd5fb45824 100644 --- a/RecoTracker/IterativeTracking/python/iterativeTkConfig.py +++ b/RecoTracker/IterativeTracking/python/iterativeTkConfig.py @@ -53,16 +53,20 @@ _iterations_trackingPhase1.append('JetCoreRegionalStep') -_iterations_trackingPhase2PU140 = [ +_iterations_trackingPhase2PU140_VS = cms.PSet(names = cms.vstring( "InitialStep", "HighPtTripletStep", "LowPtQuadStep", "LowPtTripletStep", "DetachedQuadStep", "PixelPairStep", -] +)) from Configuration.ProcessModifiers.vectorHits_cff import vectorHits -vectorHits.toModify(_iterations_trackingPhase2PU140, func=lambda x: x.append('PixelLessStep')) +vectorHits.toModify(_iterations_trackingPhase2PU140_VS.names, func=lambda x: x.append('PixelLessStep')) +from Configuration.ProcessModifiers.trackingIters01_cff import trackingIters01 +trackingIters01.toModify(_iterations_trackingPhase2PU140_VS, names = ["InitialStep", "HighPtTripletStep"]) +# apply all procModifiers before this +_iterations_trackingPhase2PU140 = _iterations_trackingPhase2PU140_VS.names.value() from Configuration.ProcessModifiers.jetCoreInPhase2_cff import jetCoreInPhase2 jetCoreInPhase2.toModify(_iterations_trackingPhase2PU140, func=lambda x: x.append('JetCoreRegionalStep')) @@ -76,10 +80,13 @@ "MuonSeededStepOutIn", ] #Phase2 -_iterations_muonSeeded_trackingPhase2PU140 = [ +_iterations_muonSeeded_trackingPhase2PU140_VS = cms.PSet(names = cms.vstring( "MuonSeededStepInOut", "MuonSeededStepOutIn", -] +)) +trackingIters01.toModify(_iterations_muonSeeded_trackingPhase2PU140_VS, names = []) +_iterations_muonSeeded_trackingPhase2PU140 = _iterations_muonSeeded_trackingPhase2PU140_VS.names.value() + _multipleSeedProducers = { "MixedTripletStep": ["A", "B"], "TobTecStep": ["Pair", "Tripl"], diff --git a/RecoTracker/LST/BuildFile.xml b/RecoTracker/LST/BuildFile.xml new file mode 100644 index 0000000000000..07a6ae1d26eaf --- /dev/null +++ b/RecoTracker/LST/BuildFile.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/RecoTracker/LST/interface/LSTOutput.h b/RecoTracker/LST/interface/LSTOutput.h new file mode 100644 index 0000000000000..f50000b83cf21 --- /dev/null +++ b/RecoTracker/LST/interface/LSTOutput.h @@ -0,0 +1,40 @@ +#ifndef RecoTracker_LST_interface_LSTOutput_h +#define RecoTracker_LST_interface_LSTOutput_h + +#include +#include + +#include "RecoTracker/LSTCore/interface/Common.h" + +class LSTOutput { +public: + LSTOutput() = default; + LSTOutput(std::vector> const hitIdx, + std::vector const len, + std::vector const seedIdx, + std::vector const trackCandidateType) + : hitIdx_(std::move(hitIdx)), + len_(std::move(len)), + seedIdx_(std::move(seedIdx)), + trackCandidateType_(std::move(trackCandidateType)) {} + + using LSTTCType = lst::LSTObjType; + + // Hit indices of each of the LST track candidates. + std::vector> const& hitIdx() const { return hitIdx_; } + // Number of hits of each of the LST track candidates. + std::vector const& len() const { return len_; } + // Index of the pixel track associated to each of the LST track candidates. + // If not associated to a pixel track, which is the case for T5s, it defaults to -1. + std::vector const& seedIdx() const { return seedIdx_; } + // LSTTCType from RecoTracker/LSTCore/interface/Common.h + std::vector const& trackCandidateType() const { return trackCandidateType_; } + +private: + std::vector> hitIdx_; + std::vector len_; + std::vector seedIdx_; + std::vector trackCandidateType_; +}; + +#endif diff --git a/RecoTracker/LST/interface/LSTPhase2OTHitsInput.h b/RecoTracker/LST/interface/LSTPhase2OTHitsInput.h new file mode 100644 index 0000000000000..00fd77846c4c3 --- /dev/null +++ b/RecoTracker/LST/interface/LSTPhase2OTHitsInput.h @@ -0,0 +1,33 @@ +#ifndef RecoTracker_LST_interface_LSTPhase2OTHitsInput_h +#define RecoTracker_LST_interface_LSTPhase2OTHitsInput_h + +#include +#include + +#include "DataFormats/TrackerRecHit2D/interface/Phase2TrackerRecHit1D.h" + +class LSTPhase2OTHitsInput { +public: + LSTPhase2OTHitsInput() = default; + LSTPhase2OTHitsInput(std::vector const detId, + std::vector const x, + std::vector const y, + std::vector const z, + std::vector const hits) + : detId_(std::move(detId)), x_(std::move(x)), y_(std::move(y)), z_(std::move(z)), hits_(std::move(hits)) {} + + std::vector const& detId() const { return detId_; } + std::vector const& x() const { return x_; } + std::vector const& y() const { return y_; } + std::vector const& z() const { return z_; } + std::vector const& hits() const { return hits_; } + +private: + std::vector detId_; + std::vector x_; + std::vector y_; + std::vector z_; + std::vector hits_; +}; + +#endif diff --git a/RecoTracker/LST/interface/LSTPixelSeedInput.h b/RecoTracker/LST/interface/LSTPixelSeedInput.h new file mode 100644 index 0000000000000..18d3768b2e0fc --- /dev/null +++ b/RecoTracker/LST/interface/LSTPixelSeedInput.h @@ -0,0 +1,75 @@ +#ifndef RecoTracker_LST_interface_LSTPixelSeedInput_h +#define RecoTracker_LST_interface_LSTPixelSeedInput_h + +#include +#include + +class LSTPixelSeedInput { +public: + LSTPixelSeedInput() = default; + LSTPixelSeedInput(std::vector const px, + std::vector const py, + std::vector const pz, + std::vector const dxy, + std::vector const dz, + std::vector const ptErr, + std::vector const etaErr, + std::vector const stateTrajGlbX, + std::vector const stateTrajGlbY, + std::vector const stateTrajGlbZ, + std::vector const stateTrajGlbPx, + std::vector const stateTrajGlbPy, + std::vector const stateTrajGlbPz, + std::vector const q, + std::vector> const hitIdx) + : px_(std::move(px)), + py_(std::move(py)), + pz_(std::move(pz)), + dxy_(std::move(dxy)), + dz_(std::move(dz)), + ptErr_(std::move(ptErr)), + etaErr_(std::move(etaErr)), + stateTrajGlbX_(std::move(stateTrajGlbX)), + stateTrajGlbY_(std::move(stateTrajGlbY)), + stateTrajGlbZ_(std::move(stateTrajGlbZ)), + stateTrajGlbPx_(std::move(stateTrajGlbPx)), + stateTrajGlbPy_(std::move(stateTrajGlbPy)), + stateTrajGlbPz_(std::move(stateTrajGlbPz)), + q_(std::move(q)), + hitIdx_(std::move(hitIdx)) {} + + std::vector const& px() const { return px_; } + std::vector const& py() const { return py_; } + std::vector const& pz() const { return pz_; } + std::vector const& dxy() const { return dxy_; } + std::vector const& dz() const { return dz_; } + std::vector const& ptErr() const { return ptErr_; } + std::vector const& etaErr() const { return etaErr_; } + std::vector const& stateTrajGlbX() const { return stateTrajGlbX_; } + std::vector const& stateTrajGlbY() const { return stateTrajGlbY_; } + std::vector const& stateTrajGlbZ() const { return stateTrajGlbZ_; } + std::vector const& stateTrajGlbPx() const { return stateTrajGlbPx_; } + std::vector const& stateTrajGlbPy() const { return stateTrajGlbPy_; } + std::vector const& stateTrajGlbPz() const { return stateTrajGlbPz_; } + std::vector const& q() const { return q_; } + std::vector> const& hitIdx() const { return hitIdx_; } + +private: + std::vector px_; + std::vector py_; + std::vector pz_; + std::vector dxy_; + std::vector dz_; + std::vector ptErr_; + std::vector etaErr_; + std::vector stateTrajGlbX_; + std::vector stateTrajGlbY_; + std::vector stateTrajGlbZ_; + std::vector stateTrajGlbPx_; + std::vector stateTrajGlbPy_; + std::vector stateTrajGlbPz_; + std::vector q_; + std::vector> hitIdx_; +}; + +#endif diff --git a/RecoTracker/LST/plugins/BuildFile.xml b/RecoTracker/LST/plugins/BuildFile.xml new file mode 100644 index 0000000000000..49e9ee77f5a3b --- /dev/null +++ b/RecoTracker/LST/plugins/BuildFile.xml @@ -0,0 +1,41 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/RecoTracker/LST/plugins/LSTOutputConverter.cc b/RecoTracker/LST/plugins/LSTOutputConverter.cc new file mode 100644 index 0000000000000..0bbdd68051b87 --- /dev/null +++ b/RecoTracker/LST/plugins/LSTOutputConverter.cc @@ -0,0 +1,273 @@ +#include "DataFormats/TrackerRecHit2D/interface/Phase2TrackerRecHit1D.h" +#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" +#include "DataFormats/TrackCandidate/interface/TrackCandidateCollection.h" +#include "DataFormats/TrackReco/interface/SeedStopInfo.h" +#include "DataFormats/TrajectorySeed/interface/TrajectorySeedCollection.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Utilities/interface/Exception.h" +#include "Geometry/CommonDetUnit/interface/GeomDet.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "MagneticField/Engine/interface/MagneticField.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "RecoTracker/LST/interface/LSTPhase2OTHitsInput.h" +#include "RecoTracker/LST/interface/LSTOutput.h" +#include "RecoTracker/TkSeedingLayers/interface/SeedingHitSet.h" + +#include "RecoTracker/TkSeedGenerator/interface/SeedCreator.h" +#include "RecoTracker/TkSeedGenerator/interface/SeedCreatorFactory.h" + +#include "RecoTracker/TkTrackingRegions/interface/GlobalTrackingRegion.h" +#include "TrackingTools/GeomPropagators/interface/Propagator.h" +#include "TrackingTools/Records/interface/TrackingComponentsRecord.h" +#include "TrackingTools/TrajectoryState/interface/TrajectoryStateTransform.h" + +class LSTOutputConverter : public edm::stream::EDProducer<> { +public: + explicit LSTOutputConverter(edm::ParameterSet const& iConfig); + ~LSTOutputConverter() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; + + const edm::EDGetTokenT lstOutputToken_; + const edm::EDGetTokenT lstPhase2OTHitsInputToken_; + const edm::EDGetTokenT lstPixelSeedToken_; + const bool includeT5s_; + const bool includeNonpLSTSs_; + const edm::ESGetToken mfToken_; + const edm::ESGetToken propagatorAlongToken_; + const edm::ESGetToken propagatorOppositeToken_; + const edm::ESGetToken tGeomToken_; + std::unique_ptr seedCreator_; + const edm::EDPutTokenT trajectorySeedPutToken_; + const edm::EDPutTokenT trajectorySeedpLSPutToken_; + const edm::EDPutTokenT trackCandidatePutToken_; + const edm::EDPutTokenT trackCandidatepTCPutToken_; + const edm::EDPutTokenT trackCandidateT5TCPutToken_; + const edm::EDPutTokenT trackCandidateNopLSTCPutToken_; + const edm::EDPutTokenT trackCandidatepTTCPutToken_; + const edm::EDPutTokenT trackCandidatepLSTCPutToken_; + const edm::EDPutTokenT> seedStopInfoPutToken_; +}; + +LSTOutputConverter::LSTOutputConverter(edm::ParameterSet const& iConfig) + : lstOutputToken_(consumes(iConfig.getParameter("lstOutput"))), + lstPhase2OTHitsInputToken_{consumes(iConfig.getParameter("phase2OTHits"))}, + lstPixelSeedToken_{consumes(iConfig.getParameter("lstPixelSeeds"))}, + includeT5s_(iConfig.getParameter("includeT5s")), + includeNonpLSTSs_(iConfig.getParameter("includeNonpLSTSs")), + mfToken_(esConsumes()), + propagatorAlongToken_{esConsumes(iConfig.getParameter("propagatorAlong"))}, + propagatorOppositeToken_{esConsumes(iConfig.getParameter("propagatorOpposite"))}, + tGeomToken_(esConsumes()), + seedCreator_(SeedCreatorFactory::get()->create("SeedFromConsecutiveHitsCreator", + iConfig.getParameter("SeedCreatorPSet"), + consumesCollector())), + // FIXME: need to make creation configurable: + // - A toggle to not produce TSs at all could be useful to save memory; + // it won't affect speed though + // - The minimal set for TCs is t5TCsLST, pTTCsLST and pLSTCsLST. + // That would complicate the handling of collections though, + // so it is deferred to when we have a clearer picture of what's needed. + trajectorySeedPutToken_(produces("")), + trajectorySeedpLSPutToken_(produces("pLSTSsLST")), + trackCandidatePutToken_(produces("")), + trackCandidatepTCPutToken_(produces("pTCsLST")), + trackCandidateT5TCPutToken_(produces("t5TCsLST")), + trackCandidateNopLSTCPutToken_(produces("nopLSTCsLST")), + trackCandidatepTTCPutToken_(produces("pTTCsLST")), + trackCandidatepLSTCPutToken_(produces("pLSTCsLST")), + seedStopInfoPutToken_(produces()) {} + +void LSTOutputConverter::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("lstOutput", edm::InputTag("lstProducer")); + desc.add("phase2OTHits", edm::InputTag("lstPhase2OTHitsInputProducer")); + desc.add("lstPixelSeeds", edm::InputTag("lstPixelSeedInputProducer")); + desc.add("includeT5s", true); + desc.add("includeNonpLSTSs", false); + desc.add("propagatorAlong", edm::ESInputTag{"", "PropagatorWithMaterial"}); + desc.add("propagatorOpposite", edm::ESInputTag{"", "PropagatorWithMaterialOpposite"}); + + edm::ParameterSetDescription psd0; + psd0.add("ComponentName", std::string("SeedFromConsecutiveHitsCreator")); + psd0.add("propagator", std::string("PropagatorWithMaterial")); + psd0.add("SeedMomentumForBOFF", 5.0); + psd0.add("OriginTransverseErrorMultiplier", 1.0); + psd0.add("MinOneOverPtError", 1.0); + psd0.add("magneticField", std::string("")); + psd0.add("TTRHBuilder", std::string("WithTrackAngle")); + psd0.add("forceKinematicWithRegionDirection", false); + desc.add("SeedCreatorPSet", psd0); + + descriptions.addWithDefaultLabel(desc); +} + +void LSTOutputConverter::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { + // Setup + auto const& lstOutput = iEvent.get(lstOutputToken_); + auto const& phase2OTRecHits = iEvent.get(lstPhase2OTHitsInputToken_); + auto const& pixelSeeds = iEvent.get(lstPixelSeedToken_); + auto const& mf = iSetup.getData(mfToken_); + auto const& propAlo = iSetup.getData(propagatorAlongToken_); + auto const& propOppo = iSetup.getData(propagatorOppositeToken_); + auto const& tracker = iSetup.getData(tGeomToken_); + + // Vector definitions + std::vector> const& lstTC_hitIdx = lstOutput.hitIdx(); + std::vector const& lstTC_len = lstOutput.len(); + std::vector const& lstTC_seedIdx = lstOutput.seedIdx(); + std::vector const& lstTC_trackCandidateType = lstOutput.trackCandidateType(); + + TrajectorySeedCollection outputTS, outputpLSTS; + outputTS.reserve(lstTC_len.size()); + outputpLSTS.reserve(lstTC_len.size()); + TrackCandidateCollection outputTC, outputpTC, outputT5TC, outputNopLSTC, outputpTTC, outputpLSTC; + outputTC.reserve(lstTC_len.size()); + outputpTC.reserve(lstTC_len.size()); + outputT5TC.reserve(lstTC_len.size()); + outputNopLSTC.reserve(lstTC_len.size()); + outputpTTC.reserve(lstTC_len.size()); + outputpLSTC.reserve(lstTC_len.size()); + + auto const& OTHits = phase2OTRecHits.hits(); + + LogDebug("LSTOutputConverter") << "lstTC size " << lstTC_len.size(); + for (unsigned int i = 0; i < lstTC_len.size(); i++) { + LogDebug("LSTOutputConverter") << " cand " << i << " " << lstTC_len[i] << " " << lstTC_seedIdx[i]; + TrajectorySeed seed; + if (lstTC_trackCandidateType[i] != LSTOutput::LSTTCType::T5) + seed = pixelSeeds[lstTC_seedIdx[i]]; + + edm::OwnVector recHits; + if (lstTC_trackCandidateType[i] != LSTOutput::LSTTCType::T5) { + for (auto const& hit : seed.recHits()) + recHits.push_back(hit.clone()); + } + + unsigned int const nPixelHits = lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5 ? 0 : recHits.size(); + for (unsigned int j = nPixelHits; j < lstTC_hitIdx[i].size(); j++) + recHits.push_back(OTHits[lstTC_hitIdx[i][j]]->clone()); + + recHits.sort([](const auto& a, const auto& b) { + const auto asub = a.det()->subDetector(); + const auto bsub = b.det()->subDetector(); + if (GeomDetEnumerators::isInnerTracker(asub) && GeomDetEnumerators::isOuterTracker(bsub)) { + return true; + } else if (GeomDetEnumerators::isOuterTracker(asub) && GeomDetEnumerators::isInnerTracker(bsub)) { + return false; + } else if (asub != bsub) { + return asub < bsub; + } else { + const auto& apos = a.surface(); + const auto& bpos = b.surface(); + if (GeomDetEnumerators::isBarrel(asub)) { + return apos->rSpan().first < bpos->rSpan().first; + } else { + return std::abs(apos->zSpan().first) < std::abs(bpos->zSpan().first); + } + } + }); + + TrajectorySeedCollection seeds; + if (lstTC_trackCandidateType[i] != LSTOutput::LSTTCType::pLS) { + // Construct a full-length TrajectorySeed always for T5s, + // only when required by a flag for other pT objects. + if (includeNonpLSTSs_ || lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5) { + using Hit = SeedingHitSet::ConstRecHitPointer; + std::vector hitsForSeed; + hitsForSeed.reserve(lstTC_len[i]); + int nHits = 0; + for (auto const& hit : recHits) { + if (lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5) { + auto hType = tracker.getDetectorType(hit.geographicalId()); + if (hType != TrackerGeometry::ModuleType::Ph2PSP && nHits < 2) + continue; // the first two should be P + } + hitsForSeed.emplace_back(dynamic_cast(&hit)); + nHits++; + } + + seedCreator_->init(GlobalTrackingRegion(), iSetup, nullptr); + seedCreator_->makeSeed(seeds, hitsForSeed); + if (seeds.empty()) { + edm::LogInfo("LSTOutputConverter") + << "failed to convert a LST object to a seed" << i << " " << lstTC_len[i] << " " << lstTC_seedIdx[i]; + if (lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5) + continue; + } + if (lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5) + seed = seeds[0]; + + auto trajectorySeed = (seeds.empty() ? seed : seeds[0]); + outputTS.emplace_back(trajectorySeed); + auto const& ss = trajectorySeed.startingState(); + LogDebug("LSTOutputConverter") << "Created a seed with " << seed.nHits() << " " << ss.detId() << " " << ss.pt() + << " " << ss.parameters().vector() << " " << ss.error(0); + } + } else { + outputTS.emplace_back(seed); + outputpLSTS.emplace_back(seed); + } + + TrajectoryStateOnSurface tsos = + trajectoryStateTransform::transientState(seed.startingState(), (seed.recHits().end() - 1)->surface(), &mf); + tsos.rescaleError(100.); + auto tsosPair = propOppo.propagateWithPath(tsos, *recHits[0].surface()); + if (!tsosPair.first.isValid()) { + LogDebug("LSTOutputConverter") << "Propagating to startingState opposite to momentum failed, trying along next"; + tsosPair = propAlo.propagateWithPath(tsos, *recHits[0].surface()); + } + if (tsosPair.first.isValid()) { + PTrajectoryStateOnDet st = + trajectoryStateTransform::persistentState(tsosPair.first, recHits[0].det()->geographicalId().rawId()); + + if (lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5) { + if (!includeT5s_) { + continue; + } else { + auto tc = TrackCandidate(recHits, seed, st); + outputTC.emplace_back(tc); + outputT5TC.emplace_back(tc); + outputNopLSTC.emplace_back(tc); + } + } else { + auto tc = TrackCandidate(recHits, seed, st); + outputTC.emplace_back(tc); + outputpTC.emplace_back(tc); + if (lstTC_trackCandidateType[i] != LSTOutput::LSTTCType::pLS) { + outputNopLSTC.emplace_back(tc); + outputpTTC.emplace_back(tc); + } else { + outputpLSTC.emplace_back(tc); + } + } + } else { + edm::LogInfo("LSTOutputConverter") << "Failed to make a candidate initial state. Seed state is " << tsos + << " TC cand " << i << " " << lstTC_len[i] << " " << lstTC_seedIdx[i] + << " first hit " << recHits.front().globalPosition() << " last hit " + << recHits.back().globalPosition(); + } + } + + LogDebug("LSTOutputConverter") << "done with conversion: Track candidate output size = " << outputpTC.size() + << " (p* objects) + " << outputT5TC.size() << " (T5 objects)"; + iEvent.emplace(trajectorySeedPutToken_, std::move(outputTS)); + iEvent.emplace(trajectorySeedpLSPutToken_, std::move(outputpLSTS)); + iEvent.emplace(trackCandidatePutToken_, std::move(outputTC)); + iEvent.emplace(trackCandidatepTCPutToken_, std::move(outputpTC)); + iEvent.emplace(trackCandidateT5TCPutToken_, std::move(outputT5TC)); + iEvent.emplace(trackCandidateNopLSTCPutToken_, std::move(outputNopLSTC)); + iEvent.emplace(trackCandidatepTTCPutToken_, std::move(outputpTTC)); + iEvent.emplace(trackCandidatepLSTCPutToken_, std::move(outputpLSTC)); + iEvent.emplace(seedStopInfoPutToken_, 0U); //dummy stop info +} + +DEFINE_FWK_MODULE(LSTOutputConverter); diff --git a/RecoTracker/LST/plugins/LSTPhase2OTHitsInputProducer.cc b/RecoTracker/LST/plugins/LSTPhase2OTHitsInputProducer.cc new file mode 100644 index 0000000000000..a0fcc72f598b6 --- /dev/null +++ b/RecoTracker/LST/plugins/LSTPhase2OTHitsInputProducer.cc @@ -0,0 +1,67 @@ +#include "FWCore/Framework/interface/global/EDProducer.h" + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include "RecoTracker/LST/interface/LSTPhase2OTHitsInput.h" + +class LSTPhase2OTHitsInputProducer : public edm::global::EDProducer<> { +public: + explicit LSTPhase2OTHitsInputProducer(edm::ParameterSet const& iConfig); + ~LSTPhase2OTHitsInputProducer() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + const edm::EDGetTokenT phase2OTRecHitToken_; + const edm::EDPutTokenT lstPhase2OTHitsInputPutToken_; +}; + +LSTPhase2OTHitsInputProducer::LSTPhase2OTHitsInputProducer(edm::ParameterSet const& iConfig) + : phase2OTRecHitToken_(consumes(iConfig.getParameter("phase2OTRecHits"))), + lstPhase2OTHitsInputPutToken_(produces()) {} + +void LSTPhase2OTHitsInputProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("phase2OTRecHits", edm::InputTag("siPhase2RecHits")); + + descriptions.addWithDefaultLabel(desc); +} + +void LSTPhase2OTHitsInputProducer::produce(edm::StreamID iID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { + // Setup + auto const& phase2OTHits = iEvent.get(phase2OTRecHitToken_); + + // Vector definitions + std::vector ph2_detId; + ph2_detId.reserve(phase2OTHits.dataSize()); + std::vector ph2_x; + ph2_x.reserve(phase2OTHits.dataSize()); + std::vector ph2_y; + ph2_y.reserve(phase2OTHits.dataSize()); + std::vector ph2_z; + ph2_z.reserve(phase2OTHits.dataSize()); + std::vector ph2_hits; + ph2_hits.reserve(phase2OTHits.dataSize()); + + for (auto const& it : phase2OTHits) { + const DetId hitId = it.detId(); + for (auto const& hit : it) { + ph2_detId.push_back(hitId.rawId()); + ph2_x.push_back(hit.globalPosition().x()); + ph2_y.push_back(hit.globalPosition().y()); + ph2_z.push_back(hit.globalPosition().z()); + ph2_hits.push_back(&hit); + } + } + + LSTPhase2OTHitsInput phase2OTHitsInput( + std::move(ph2_detId), std::move(ph2_x), std::move(ph2_y), std::move(ph2_z), std::move(ph2_hits)); + iEvent.emplace(lstPhase2OTHitsInputPutToken_, std::move(phase2OTHitsInput)); +} + +DEFINE_FWK_MODULE(LSTPhase2OTHitsInputProducer); diff --git a/RecoTracker/LST/plugins/LSTPixelSeedInputProducer.cc b/RecoTracker/LST/plugins/LSTPixelSeedInputProducer.cc new file mode 100644 index 0000000000000..819baf78c6aa4 --- /dev/null +++ b/RecoTracker/LST/plugins/LSTPixelSeedInputProducer.cc @@ -0,0 +1,171 @@ +#include "FWCore/Framework/interface/global/EDProducer.h" + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include "FWCore/Utilities/interface/transform.h" + +#include "MagneticField/Engine/interface/MagneticField.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" + +#include "DataFormats/TrackerRecHit2D/interface/SiStripMatchedRecHit2DCollection.h" +#include "DataFormats/TrajectorySeed/interface/TrajectorySeedCollection.h" + +#include "Validation/RecoTrack/interface/trackFromSeedFitFailed.h" + +#include "TrackingTools/Records/interface/TransientRecHitRecord.h" +#include "TrackingTools/TrajectoryState/interface/TrajectoryStateTransform.h" +#include "TrackingTools/TransientTrackingRecHit/interface/TransientTrackingRecHitBuilder.h" + +#include "RecoTracker/LST/interface/LSTPixelSeedInput.h" + +class LSTPixelSeedInputProducer : public edm::global::EDProducer<> { +public: + explicit LSTPixelSeedInputProducer(edm::ParameterSet const& iConfig); + ~LSTPixelSeedInputProducer() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + const edm::ESGetToken mfToken_; + const edm::EDGetTokenT beamSpotToken_; + std::vector>> seedTokens_; + const edm::EDPutTokenT lstPixelSeedInputPutToken_; + const edm::EDPutTokenT lstPixelSeedsPutToken_; +}; + +LSTPixelSeedInputProducer::LSTPixelSeedInputProducer(edm::ParameterSet const& iConfig) + : mfToken_(esConsumes()), + beamSpotToken_(consumes(iConfig.getParameter("beamSpot"))), + lstPixelSeedInputPutToken_(produces()), + lstPixelSeedsPutToken_(produces()) { + seedTokens_ = edm::vector_transform(iConfig.getParameter>("seedTracks"), + [&](const edm::InputTag& tag) { return consumes>(tag); }); +} + +void LSTPixelSeedInputProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); + + desc.add>("seedTracks", + std::vector{edm::InputTag("lstInitialStepSeedTracks"), + edm::InputTag("lstHighPtTripletStepSeedTracks")}); + + descriptions.addWithDefaultLabel(desc); +} + +void LSTPixelSeedInputProducer::produce(edm::StreamID iID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { + // Setup + auto const& mf = iSetup.getData(mfToken_); + auto const& bs = iEvent.get(beamSpotToken_); + + // Vector definitions + std::vector see_px; + std::vector see_py; + std::vector see_pz; + std::vector see_dxy; + std::vector see_dz; + std::vector see_ptErr; + std::vector see_etaErr; + std::vector see_stateTrajGlbX; + std::vector see_stateTrajGlbY; + std::vector see_stateTrajGlbZ; + std::vector see_stateTrajGlbPx; + std::vector see_stateTrajGlbPy; + std::vector see_stateTrajGlbPz; + std::vector see_q; + std::vector> see_hitIdx; + TrajectorySeedCollection see_seeds; + + for (size_t iColl = 0; iColl < seedTokens_.size(); ++iColl) { + // Get seed tokens + auto const& seedToken = seedTokens_[iColl]; + auto const& seedTracks = iEvent.get(seedToken); + + if (seedTracks.empty()) + continue; + + // Get seed track refs + edm::RefToBaseVector seedTrackRefs; + for (edm::View::size_type i = 0; i < seedTracks.size(); ++i) { + seedTrackRefs.push_back(seedTracks.refAt(i)); + } + + edm::ProductID id = seedTracks[0].seedRef().id(); + + for (size_t iSeed = 0; iSeed < seedTrackRefs.size(); ++iSeed) { + auto const& seedTrackRef = seedTrackRefs[iSeed]; + auto const& seedTrack = *seedTrackRef; + auto const& seedRef = seedTrack.seedRef(); + auto const& seed = *seedRef; + + if (seedRef.id() != id) + throw cms::Exception("LogicError") + << "All tracks in 'TracksFromSeeds' collection should point to seeds in the same collection. Now the " + "element 0 had ProductID " + << id << " while the element " << seedTrackRef.key() << " had " << seedTrackRef.id() << "."; + + const bool seedFitOk = !trackFromSeedFitFailed(seedTrack); + + const TrackingRecHit* lastRecHit = &*(seed.recHits().end() - 1); + TrajectoryStateOnSurface tsos = + trajectoryStateTransform::transientState(seed.startingState(), lastRecHit->surface(), &mf); + auto const& stateGlobal = tsos.globalParameters(); + + std::vector hitIdx; + for (auto const& hit : seed.recHits()) { + int subid = hit.geographicalId().subdetId(); + if (subid == (int)PixelSubdetector::PixelBarrel || subid == (int)PixelSubdetector::PixelEndcap) { + const BaseTrackerRecHit* bhit = dynamic_cast(&hit); + const auto& clusterRef = bhit->firstClusterRef(); + const auto clusterKey = clusterRef.cluster_pixel().key(); + hitIdx.push_back(clusterKey); + } else { + throw cms::Exception("LSTPixelSeedInputProducer") << "Not pixel hits found!"; + } + } + + // Fill output + see_px.push_back(seedFitOk ? seedTrack.px() : 0); + see_py.push_back(seedFitOk ? seedTrack.py() : 0); + see_pz.push_back(seedFitOk ? seedTrack.pz() : 0); + see_dxy.push_back(seedFitOk ? seedTrack.dxy(bs.position()) : 0); + see_dz.push_back(seedFitOk ? seedTrack.dz(bs.position()) : 0); + see_ptErr.push_back(seedFitOk ? seedTrack.ptError() : 0); + see_etaErr.push_back(seedFitOk ? seedTrack.etaError() : 0); + see_stateTrajGlbX.push_back(stateGlobal.position().x()); + see_stateTrajGlbY.push_back(stateGlobal.position().y()); + see_stateTrajGlbZ.push_back(stateGlobal.position().z()); + see_stateTrajGlbPx.push_back(stateGlobal.momentum().x()); + see_stateTrajGlbPy.push_back(stateGlobal.momentum().y()); + see_stateTrajGlbPz.push_back(stateGlobal.momentum().z()); + see_q.push_back(seedTrack.charge()); + see_hitIdx.push_back(hitIdx); + see_seeds.push_back(seed); + } + } + + LSTPixelSeedInput pixelSeedInput(std::move(see_px), + std::move(see_py), + std::move(see_pz), + std::move(see_dxy), + std::move(see_dz), + std::move(see_ptErr), + std::move(see_etaErr), + std::move(see_stateTrajGlbX), + std::move(see_stateTrajGlbY), + std::move(see_stateTrajGlbZ), + std::move(see_stateTrajGlbPx), + std::move(see_stateTrajGlbPy), + std::move(see_stateTrajGlbPz), + std::move(see_q), + std::move(see_hitIdx)); + iEvent.emplace(lstPixelSeedInputPutToken_, std::move(pixelSeedInput)); + iEvent.emplace(lstPixelSeedsPutToken_, std::move(see_seeds)); +} + +DEFINE_FWK_MODULE(LSTPixelSeedInputProducer); diff --git a/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc b/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc new file mode 100644 index 0000000000000..d0e103b1e315b --- /dev/null +++ b/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc @@ -0,0 +1,31 @@ +// LST includes +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" + +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +#include "RecoTracker/Record/interface/TrackerRecoGeometryRecord.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class LSTModulesDevESProducer : public ESProducer { + public: + LSTModulesDevESProducer(edm::ParameterSet const& iConfig) : ESProducer(iConfig) { setWhatProduced(this); } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + descriptions.addWithDefaultLabel(desc); + } + + std::unique_ptr> produce(TrackerRecoGeometryRecord const& iRecord) { + return lst::loadAndFillESHost(); + } + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(LSTModulesDevESProducer); diff --git a/RecoTracker/LST/plugins/alpaka/LSTProducer.cc b/RecoTracker/LST/plugins/alpaka/LSTProducer.cc new file mode 100644 index 0000000000000..7eb6c57ade05c --- /dev/null +++ b/RecoTracker/LST/plugins/alpaka/LSTProducer.cc @@ -0,0 +1,99 @@ +#include + +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" + +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDGetToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/SynchronizingEDProducer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +#include "RecoTracker/LST/interface/LSTOutput.h" +#include "RecoTracker/LST/interface/LSTPhase2OTHitsInput.h" +#include "RecoTracker/LST/interface/LSTPixelSeedInput.h" + +#include "RecoTracker/Record/interface/TrackerRecoGeometryRecord.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class LSTProducer : public stream::SynchronizingEDProducer<> { + public: + LSTProducer(edm::ParameterSet const& config) + : lstPixelSeedInputToken_{consumes(config.getParameter("pixelSeedInput"))}, + lstPhase2OTHitsInputToken_{consumes(config.getParameter("phase2OTHitsInput"))}, + lstESToken_{esConsumes()}, + verbose_(config.getParameter("verbose")), + nopLSDupClean_(config.getParameter("nopLSDupClean")), + tcpLSTriplets_(config.getParameter("tcpLSTriplets")), + lstOutputToken_{produces()} {} + + void acquire(device::Event const& event, device::EventSetup const& setup) override { + // Inputs + auto const& pixelSeeds = event.get(lstPixelSeedInputToken_); + auto const& phase2OTHits = event.get(lstPhase2OTHitsInputToken_); + + auto const& lstESDeviceData = setup.getData(lstESToken_); + + lst_.run(event.queue(), + verbose_, + &lstESDeviceData, + pixelSeeds.px(), + pixelSeeds.py(), + pixelSeeds.pz(), + pixelSeeds.dxy(), + pixelSeeds.dz(), + pixelSeeds.ptErr(), + pixelSeeds.etaErr(), + pixelSeeds.stateTrajGlbX(), + pixelSeeds.stateTrajGlbY(), + pixelSeeds.stateTrajGlbZ(), + pixelSeeds.stateTrajGlbPx(), + pixelSeeds.stateTrajGlbPy(), + pixelSeeds.stateTrajGlbPz(), + pixelSeeds.q(), + pixelSeeds.hitIdx(), + phase2OTHits.detId(), + phase2OTHits.x(), + phase2OTHits.y(), + phase2OTHits.z(), + nopLSDupClean_, + tcpLSTriplets_); + } + + void produce(device::Event& event, device::EventSetup const&) override { + // Output + LSTOutput lstOutput(lst_.hits(), lst_.len(), lst_.seedIdx(), lst_.trackCandidateType()); + event.emplace(lstOutputToken_, std::move(lstOutput)); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("pixelSeedInput", edm::InputTag{"lstPixelSeedInputProducer"}); + desc.add("phase2OTHitsInput", edm::InputTag{"lstPhase2OTHitsInputProducer"}); + desc.add("verbose", false); + desc.add("nopLSDupClean", false); + desc.add("tcpLSTriplets", false); + descriptions.addWithDefaultLabel(desc); + } + + private: + edm::EDGetTokenT lstPixelSeedInputToken_; + edm::EDGetTokenT lstPhase2OTHitsInputToken_; + device::ESGetToken, TrackerRecoGeometryRecord> lstESToken_; + const bool verbose_, nopLSDupClean_, tcpLSTriplets_; + edm::EDPutTokenT lstOutputToken_; + + lst::LST lst_; + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(LSTProducer); diff --git a/RecoTracker/LST/python/lstProducerTask_cff.py b/RecoTracker/LST/python/lstProducerTask_cff.py new file mode 100644 index 0000000000000..588b354788635 --- /dev/null +++ b/RecoTracker/LST/python/lstProducerTask_cff.py @@ -0,0 +1,7 @@ +import FWCore.ParameterSet.Config as cms + +from RecoTracker.LST.lstProducer_cfi import lstProducer + +from RecoTracker.LST.lstModulesDevESProducer_cfi import lstModulesDevESProducer + +lstProducerTask = cms.Task(lstModulesDevESProducer, lstProducer) diff --git a/RecoTracker/LST/python/lstSeedTracks_cff.py b/RecoTracker/LST/python/lstSeedTracks_cff.py new file mode 100644 index 0000000000000..7046c616b0054 --- /dev/null +++ b/RecoTracker/LST/python/lstSeedTracks_cff.py @@ -0,0 +1,15 @@ +import FWCore.ParameterSet.Config as cms + +lstInitialStepSeedTracks = cms.EDProducer( + "TrackFromSeedProducer", + src = cms.InputTag("initialStepSeeds"), + beamSpot = cms.InputTag("offlineBeamSpot"), + TTRHBuilder = cms.string("WithoutRefit") +) + +lstHighPtTripletStepSeedTracks = cms.EDProducer( + "TrackFromSeedProducer", + src = cms.InputTag("highPtTripletStepSeeds"), + beamSpot = cms.InputTag("offlineBeamSpot"), + TTRHBuilder = cms.string("WithoutRefit") +) diff --git a/RecoTracker/LST/python/lst_cff.py b/RecoTracker/LST/python/lst_cff.py new file mode 100644 index 0000000000000..af3a80ae77e18 --- /dev/null +++ b/RecoTracker/LST/python/lst_cff.py @@ -0,0 +1,6 @@ +import FWCore.ParameterSet.Config as cms + +from RecoTracker.LST.lstSeedTracks_cff import * +from RecoTracker.LST.lstPixelSeedInputProducer_cfi import * +from RecoTracker.LST.lstPhase2OTHitsInputProducer_cfi import * +from RecoTracker.LST.lstOutputConverter_cfi import * diff --git a/RecoTracker/LST/src/ES_ModulesDev.cc b/RecoTracker/LST/src/ES_ModulesDev.cc new file mode 100644 index 0000000000000..06a357860a7d5 --- /dev/null +++ b/RecoTracker/LST/src/ES_ModulesDev.cc @@ -0,0 +1,5 @@ +#include "RecoTracker/LSTCore/interface/LSTESData.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(lst::LSTESData); diff --git a/RecoTracker/LST/src/alpaka/ES_ModulesDev.cc b/RecoTracker/LST/src/alpaka/ES_ModulesDev.cc new file mode 100644 index 0000000000000..54ded5e7a7c98 --- /dev/null +++ b/RecoTracker/LST/src/alpaka/ES_ModulesDev.cc @@ -0,0 +1,4 @@ +#include "RecoTracker/LSTCore/interface/LSTESData.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +TYPELOOKUP_ALPAKA_TEMPLATED_DATA_REG(lst::LSTESData); diff --git a/RecoTracker/LST/src/classes.h b/RecoTracker/LST/src/classes.h new file mode 100644 index 0000000000000..6a6817d9b538e --- /dev/null +++ b/RecoTracker/LST/src/classes.h @@ -0,0 +1,9 @@ +#ifndef RecoTracker_LST_classes_h +#define RecoTracker_LST_classes_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "RecoTracker/LST/interface/LSTPixelSeedInput.h" +#include "RecoTracker/LST/interface/LSTPhase2OTHitsInput.h" +#include "RecoTracker/LST/interface/LSTOutput.h" + +#endif diff --git a/RecoTracker/LST/src/classes_def.xml b/RecoTracker/LST/src/classes_def.xml new file mode 100644 index 0000000000000..d386e7b92a215 --- /dev/null +++ b/RecoTracker/LST/src/classes_def.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/RecoTracker/LSTCore/BuildFile.xml b/RecoTracker/LSTCore/BuildFile.xml new file mode 100644 index 0000000000000..a58a1898046ae --- /dev/null +++ b/RecoTracker/LSTCore/BuildFile.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/RecoTracker/LSTCore/interface/Common.h b/RecoTracker/LSTCore/interface/Common.h new file mode 100644 index 0000000000000..f65ca7a50d867 --- /dev/null +++ b/RecoTracker/LSTCore/interface/Common.h @@ -0,0 +1,107 @@ +#ifndef RecoTracker_LSTCore_interface_Common_h +#define RecoTracker_LSTCore_interface_Common_h + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/Common/interface/StdArray.h" + +#if defined(FP16_Base) +#if defined ALPAKA_ACC_GPU_CUDA_ENABLED +#include +#elif defined ALPAKA_ACC_GPU_HIP_ENABLED +#include +#endif +#endif + +namespace lst { + + // Named constants for pixelTypes + enum PixelType : int8_t { kInvalid = -1, kHighPt = 0, kLowPtPosCurv = 1, kLowPtNegCurv = 2 }; + + // Named types for LST objects + enum LSTObjType { T5 = 4, pT3 = 5, pT5 = 7, pLS = 8 }; + +// If a compile time flag does not define PT_CUT, default to 0.8 (GeV) +#ifndef PT_CUT + constexpr float PT_CUT = 0.8f; +#endif + + constexpr unsigned int max_blocks = 80; + constexpr unsigned int max_connected_modules = 40; + + constexpr unsigned int n_max_pixel_segments_per_module = 50000; + + constexpr unsigned int n_max_pixel_md_per_modules = 2 * n_max_pixel_segments_per_module; + + constexpr unsigned int n_max_pixel_triplets = 5000; + constexpr unsigned int n_max_pixel_quintuplets = 15000; + + constexpr unsigned int n_max_pixel_track_candidates = 30000; + constexpr unsigned int n_max_nonpixel_track_candidates = 1000; + + constexpr unsigned int size_superbins = 45000; + +// Half precision wrapper functions. +#if defined(FP16_Base) +#define __F2H __float2half +#define __H2F __half2float + typedef __half FPX; +#else +#define __F2H +#define __H2F + typedef float FPX; +#endif + +// Needed for files that are compiled by g++ to not throw an error. +// uint4 is defined only for CUDA, so we will have to revisit this soon when running on other backends. +#if !defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !defined(ALPAKA_ACC_GPU_HIP_ENABLED) + struct uint4 { + unsigned int x; + unsigned int y; + unsigned int z; + unsigned int w; + }; +#endif + + // Defining the constant host device variables right up here + // Currently pixel tracks treated as LSs with 2 double layers (IT layers 1+2 and 3+4) and 4 hits. To be potentially handled better in the future. + struct Params_Modules { + using ArrayU16xMaxConnected = edm::StdArray; + }; + struct Params_pLS { + static constexpr int kLayers = 2, kHits = 4; + }; + struct Params_LS { + static constexpr int kLayers = 2, kHits = 4; + using ArrayUxLayers = edm::StdArray; + }; + struct Params_T3 { + static constexpr int kLayers = 3, kHits = 6; + using ArrayU8xLayers = edm::StdArray; + using ArrayU16xLayers = edm::StdArray; + using ArrayUxHits = edm::StdArray; + }; + struct Params_pT3 { + static constexpr int kLayers = 5, kHits = 10; + using ArrayU8xLayers = edm::StdArray; + using ArrayU16xLayers = edm::StdArray; + using ArrayUxHits = edm::StdArray; + }; + struct Params_T5 { + static constexpr int kLayers = 5, kHits = 10; + using ArrayU8xLayers = edm::StdArray; + using ArrayU16xLayers = edm::StdArray; + using ArrayUxHits = edm::StdArray; + }; + struct Params_pT5 { + static constexpr int kLayers = 7, kHits = 14; + using ArrayU8xLayers = edm::StdArray; + using ArrayU16xLayers = edm::StdArray; + using ArrayUxHits = edm::StdArray; + }; + + using ArrayIx2 = edm::StdArray; + using ArrayUx2 = edm::StdArray; + +} //namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/EndcapGeometry.h b/RecoTracker/LSTCore/interface/EndcapGeometry.h new file mode 100644 index 0000000000000..b8c44c14fb143 --- /dev/null +++ b/RecoTracker/LSTCore/interface/EndcapGeometry.h @@ -0,0 +1,29 @@ +#ifndef RecoTracker_LSTCore_interface_EndcapGeometry_h +#define RecoTracker_LSTCore_interface_EndcapGeometry_h + +#include +#include +#include + +namespace lst { + class EndcapGeometry { + private: + std::map dxdy_slope_; // dx/dy slope + std::map centroid_phis_; // centroid phi + + public: + std::vector geoMapDetId_buf; + std::vector geoMapPhi_buf; + + unsigned int nEndCapMap; + + EndcapGeometry() = default; + EndcapGeometry(std::string const& filename); + + void load(std::string const&); + void fillGeoMapArraysExplicit(); + float getdxdy_slope(unsigned int detid) const; + }; +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/EndcapGeometryDevHostCollection.h b/RecoTracker/LSTCore/interface/EndcapGeometryDevHostCollection.h new file mode 100644 index 0000000000000..e761ac5942bf8 --- /dev/null +++ b/RecoTracker/LSTCore/interface/EndcapGeometryDevHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_EndcapGeometryDevHostCollection_h +#define RecoTracker_LSTCore_interface_EndcapGeometryDevHostCollection_h + +#include "RecoTracker/LSTCore/interface/EndcapGeometryDevSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using EndcapGeometryDevHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/EndcapGeometryDevSoA.h b/RecoTracker/LSTCore/interface/EndcapGeometryDevSoA.h new file mode 100644 index 0000000000000..587abfdaec66a --- /dev/null +++ b/RecoTracker/LSTCore/interface/EndcapGeometryDevSoA.h @@ -0,0 +1,18 @@ +#ifndef RecoTracker_LSTCore_interface_EndcapGeometryDevSoA_h +#define RecoTracker_LSTCore_interface_EndcapGeometryDevSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +namespace lst { + + GENERATE_SOA_LAYOUT(EndcapGeometryDevSoALayout, SOA_COLUMN(unsigned int, geoMapDetId), SOA_COLUMN(float, geoMapPhi)) + + using EndcapGeometryDevSoA = EndcapGeometryDevSoALayout<>; + + using EndcapGeometryDev = EndcapGeometryDevSoA::View; + using EndcapGeometryDevConst = EndcapGeometryDevSoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/HitsHostCollection.h b/RecoTracker/LSTCore/interface/HitsHostCollection.h new file mode 100644 index 0000000000000..f26c98c36e069 --- /dev/null +++ b/RecoTracker/LSTCore/interface/HitsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_HitsHostCollection_h +#define RecoTracker_LSTCore_interface_HitsHostCollection_h + +#include "RecoTracker/LSTCore/interface/HitsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using HitsHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/HitsSoA.h b/RecoTracker/LSTCore/interface/HitsSoA.h new file mode 100644 index 0000000000000..b1f5de9eff46e --- /dev/null +++ b/RecoTracker/LSTCore/interface/HitsSoA.h @@ -0,0 +1,43 @@ +#ifndef RecoTracker_LSTCore_interface_HitsSoA_h +#define RecoTracker_LSTCore_interface_HitsSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + + GENERATE_SOA_LAYOUT(HitsSoALayout, + SOA_COLUMN(float, xs), + SOA_COLUMN(float, ys), + SOA_COLUMN(float, zs), + SOA_COLUMN(uint16_t, moduleIndices), + SOA_COLUMN(unsigned int, idxs), + SOA_COLUMN(unsigned int, detid), + SOA_COLUMN(float, rts), + SOA_COLUMN(float, phis), + SOA_COLUMN(float, etas), + SOA_COLUMN(float, highEdgeXs), + SOA_COLUMN(float, highEdgeYs), + SOA_COLUMN(float, lowEdgeXs), + SOA_COLUMN(float, lowEdgeYs)) + + GENERATE_SOA_LAYOUT(HitsRangesSoALayout, + SOA_COLUMN(ArrayIx2, hitRanges), + SOA_COLUMN(int, hitRangesLower), + SOA_COLUMN(int, hitRangesUpper), + SOA_COLUMN(int8_t, hitRangesnLower), + SOA_COLUMN(int8_t, hitRangesnUpper)) + + using HitsSoA = HitsSoALayout<>; + using HitsRangesSoA = HitsRangesSoALayout<>; + + using Hits = HitsSoA::View; + using HitsConst = HitsSoA::ConstView; + using HitsRanges = HitsRangesSoA::View; + using HitsRangesConst = HitsRangesSoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/LSTESData.h b/RecoTracker/LSTCore/interface/LSTESData.h new file mode 100644 index 0000000000000..45887d3cb1fea --- /dev/null +++ b/RecoTracker/LSTCore/interface/LSTESData.h @@ -0,0 +1,80 @@ +#ifndef RecoTracker_LSTCore_interface_LSTESData_h +#define RecoTracker_LSTCore_interface_LSTESData_h + +#include "RecoTracker/LSTCore/interface/Common.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometryDevHostCollection.h" +#include "RecoTracker/LSTCore/interface/ModulesHostCollection.h" +#include "RecoTracker/LSTCore/interface/PixelMap.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h" + +#include + +namespace lst { + + template + struct LSTESData { + uint16_t nModules; + uint16_t nLowerModules; + unsigned int nPixels; + unsigned int nEndCapMap; + // Using shared_ptr so that for the serial backend all streams can use the same data + std::shared_ptr> modules; + std::shared_ptr> endcapGeometry; + // Host-side object that is shared between the LSTESData objects for different devices + std::shared_ptr pixelMapping; + + LSTESData(uint16_t const& nModulesIn, + uint16_t const& nLowerModulesIn, + unsigned int const& nPixelsIn, + unsigned int const& nEndCapMapIn, + std::shared_ptr> modulesIn, + std::shared_ptr> endcapGeometryIn, + std::shared_ptr const& pixelMappingIn) + : nModules(nModulesIn), + nLowerModules(nLowerModulesIn), + nPixels(nPixelsIn), + nEndCapMap(nEndCapMapIn), + modules(std::move(modulesIn)), + endcapGeometry(std::move(endcapGeometryIn)), + pixelMapping(pixelMappingIn) {} + }; + + std::unique_ptr> loadAndFillESHost(); + +} // namespace lst + +namespace cms::alpakatools { + + template <> + struct CopyToDevice> { + template + static lst::LSTESData> copyAsync(TQueue& queue, + lst::LSTESData const& srcData) { + using TDev = alpaka::Dev; + std::shared_ptr> deviceModules; + std::shared_ptr> deviceEndcapGeometry; + + if constexpr (std::is_same_v) { + deviceModules = srcData.modules; + deviceEndcapGeometry = srcData.endcapGeometry; + } else { + deviceModules = std::make_shared>( + CopyToDevice>::copyAsync( + queue, *srcData.modules)); + deviceEndcapGeometry = std::make_shared>( + CopyToDevice>::copyAsync(queue, *srcData.endcapGeometry)); + } + + return lst::LSTESData>(srcData.nModules, + srcData.nLowerModules, + srcData.nPixels, + srcData.nEndCapMap, + std::move(deviceModules), + std::move(deviceEndcapGeometry), + srcData.pixelMapping); + } + }; +} // namespace cms::alpakatools + +#endif diff --git a/RecoTracker/LSTCore/interface/MiniDoubletsHostCollection.h b/RecoTracker/LSTCore/interface/MiniDoubletsHostCollection.h new file mode 100644 index 0000000000000..33169a07b9e51 --- /dev/null +++ b/RecoTracker/LSTCore/interface/MiniDoubletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_MiniDoubletsHostCollection_h +#define RecoTracker_LSTCore_interface_MiniDoubletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using MiniDoubletsHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/MiniDoubletsSoA.h b/RecoTracker/LSTCore/interface/MiniDoubletsSoA.h new file mode 100644 index 0000000000000..84375502c34b0 --- /dev/null +++ b/RecoTracker/LSTCore/interface/MiniDoubletsSoA.h @@ -0,0 +1,58 @@ +#ifndef RecoTracker_LSTCore_interface_MiniDoubletsSoA_h +#define RecoTracker_LSTCore_interface_MiniDoubletsSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +namespace lst { + + GENERATE_SOA_LAYOUT(MiniDoubletsSoALayout, + SOA_COLUMN(unsigned int, anchorHitIndices), + SOA_COLUMN(unsigned int, outerHitIndices), + SOA_COLUMN(uint16_t, moduleIndices), + SOA_COLUMN(float, dphichanges), + SOA_COLUMN(float, dzs), + SOA_COLUMN(float, dphis), + SOA_COLUMN(float, shiftedXs), + SOA_COLUMN(float, shiftedYs), + SOA_COLUMN(float, shiftedZs), + SOA_COLUMN(float, noShiftedDphis), + SOA_COLUMN(float, noShiftedDphiChanges), + SOA_COLUMN(float, anchorX), + SOA_COLUMN(float, anchorY), + SOA_COLUMN(float, anchorZ), + SOA_COLUMN(float, anchorRt), + SOA_COLUMN(float, anchorPhi), + SOA_COLUMN(float, anchorEta), + SOA_COLUMN(float, anchorHighEdgeX), + SOA_COLUMN(float, anchorHighEdgeY), + SOA_COLUMN(float, anchorLowEdgeX), + SOA_COLUMN(float, anchorLowEdgeY), + SOA_COLUMN(float, anchorLowEdgePhi), + SOA_COLUMN(float, anchorHighEdgePhi), + SOA_COLUMN(float, outerX), + SOA_COLUMN(float, outerY), + SOA_COLUMN(float, outerZ), + SOA_COLUMN(float, outerRt), + SOA_COLUMN(float, outerPhi), + SOA_COLUMN(float, outerEta), + SOA_COLUMN(float, outerHighEdgeX), + SOA_COLUMN(float, outerHighEdgeY), + SOA_COLUMN(float, outerLowEdgeX), + SOA_COLUMN(float, outerLowEdgeY)) + + GENERATE_SOA_LAYOUT(MiniDoubletsOccupancySoALayout, + SOA_COLUMN(unsigned int, nMDs), + SOA_COLUMN(unsigned int, totOccupancyMDs)) + + using MiniDoubletsSoA = MiniDoubletsSoALayout<>; + using MiniDoubletsOccupancySoA = MiniDoubletsOccupancySoALayout<>; + + using MiniDoublets = MiniDoubletsSoA::View; + using MiniDoubletsConst = MiniDoubletsSoA::ConstView; + using MiniDoubletsOccupancy = MiniDoubletsOccupancySoA::View; + using MiniDoubletsOccupancyConst = MiniDoubletsOccupancySoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/ModuleConnectionMap.h b/RecoTracker/LSTCore/interface/ModuleConnectionMap.h new file mode 100644 index 0000000000000..63c3496523c0d --- /dev/null +++ b/RecoTracker/LSTCore/interface/ModuleConnectionMap.h @@ -0,0 +1,29 @@ +#ifndef RecoTracker_LSTCore_interface_ModuleConnectionMap_h +#define RecoTracker_LSTCore_interface_ModuleConnectionMap_h + +#include +#include +#include +#include + +namespace lst { + class ModuleConnectionMap { + private: + std::map> moduleConnections_; + + public: + ModuleConnectionMap(); + ModuleConnectionMap(std::string const& filename); + + void load(std::string const&); + void add(std::string const&); + void print(); + + const std::vector& getConnectedModuleDetIds(unsigned int detid) const; + int size() const; + }; + + using MapPLStoLayer = std::array, 3>; +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/ModulesHostCollection.h b/RecoTracker/LSTCore/interface/ModulesHostCollection.h new file mode 100644 index 0000000000000..4119fb6ffb1a2 --- /dev/null +++ b/RecoTracker/LSTCore/interface/ModulesHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_ModulesHostCollection_h +#define RecoTracker_LSTCore_interface_ModulesHostCollection_h + +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using ModulesHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/ModulesSoA.h b/RecoTracker/LSTCore/interface/ModulesSoA.h new file mode 100644 index 0000000000000..241dce953b293 --- /dev/null +++ b/RecoTracker/LSTCore/interface/ModulesSoA.h @@ -0,0 +1,57 @@ +#ifndef RecoTracker_LSTCore_interface_ModulesSoA_h +#define RecoTracker_LSTCore_interface_ModulesSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + + enum SubDet { InnerPixel = 0, Barrel = 5, Endcap = 4 }; + + enum Side { NegZ = 1, PosZ = 2, Center = 3 }; + + enum ModuleType { PS, TwoS, PixelModule }; + + enum ModuleLayerType { Pixel, Strip, InnerPixelLayer }; + + GENERATE_SOA_LAYOUT(ModulesSoALayout, + SOA_COLUMN(unsigned int, detIds), + SOA_COLUMN(Params_Modules::ArrayU16xMaxConnected, moduleMap), + SOA_COLUMN(unsigned int, mapdetId), + SOA_COLUMN(uint16_t, mapIdx), + SOA_COLUMN(uint16_t, nConnectedModules), + SOA_COLUMN(float, drdzs), + SOA_COLUMN(float, dxdys), + SOA_COLUMN(uint16_t, partnerModuleIndices), + SOA_COLUMN(short, layers), + SOA_COLUMN(short, rings), + SOA_COLUMN(short, modules), + SOA_COLUMN(short, rods), + SOA_COLUMN(short, subdets), + SOA_COLUMN(short, sides), + SOA_COLUMN(float, eta), + SOA_COLUMN(float, r), + SOA_COLUMN(bool, isInverted), + SOA_COLUMN(bool, isLower), + SOA_COLUMN(bool, isAnchor), + SOA_COLUMN(ModuleType, moduleType), + SOA_COLUMN(ModuleLayerType, moduleLayerType), + SOA_COLUMN(int, lstLayers), + SOA_SCALAR(uint16_t, nModules), + SOA_SCALAR(uint16_t, nLowerModules)) + + GENERATE_SOA_LAYOUT(ModulesPixelSoALayout, SOA_COLUMN(unsigned int, connectedPixels)) + + using ModulesSoA = ModulesSoALayout<>; + using ModulesPixelSoA = ModulesPixelSoALayout<>; + + using Modules = ModulesSoA::View; + using ModulesConst = ModulesSoA::ConstView; + using ModulesPixel = ModulesPixelSoA::View; + using ModulesPixelConst = ModulesPixelSoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/ObjectRangesHostCollection.h b/RecoTracker/LSTCore/interface/ObjectRangesHostCollection.h new file mode 100644 index 0000000000000..5a6d3e8ca13b4 --- /dev/null +++ b/RecoTracker/LSTCore/interface/ObjectRangesHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_ObjectRangesHostCollection_h +#define RecoTracker_LSTCore_interface_ObjectRangesHostCollection_h + +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using ObjectRangesHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/ObjectRangesSoA.h b/RecoTracker/LSTCore/interface/ObjectRangesSoA.h new file mode 100644 index 0000000000000..ccab6b23909f6 --- /dev/null +++ b/RecoTracker/LSTCore/interface/ObjectRangesSoA.h @@ -0,0 +1,38 @@ +#ifndef RecoTracker_LSTCore_interface_ObjectRangesSoA_h +#define RecoTracker_LSTCore_interface_ObjectRangesSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + + GENERATE_SOA_LAYOUT(ObjectRangesSoALayout, + SOA_COLUMN(ArrayIx2, mdRanges), + SOA_COLUMN(ArrayIx2, segmentRanges), + SOA_COLUMN(ArrayIx2, tripletRanges), + SOA_COLUMN(ArrayIx2, quintupletRanges), + SOA_COLUMN(int, miniDoubletModuleIndices), + SOA_COLUMN(int, miniDoubletModuleOccupancy), + SOA_COLUMN(int, segmentModuleIndices), + SOA_COLUMN(int, segmentModuleOccupancy), + SOA_COLUMN(int, tripletModuleIndices), + SOA_COLUMN(int, tripletModuleOccupancy), + SOA_COLUMN(int, quintupletModuleIndices), + SOA_COLUMN(int, quintupletModuleOccupancy), + SOA_COLUMN(uint16_t, indicesOfEligibleT5Modules), + SOA_SCALAR(unsigned int, nTotalMDs), + SOA_SCALAR(unsigned int, nTotalSegs), + SOA_SCALAR(unsigned int, nTotalTrips), + SOA_SCALAR(unsigned int, nTotalQuints), + SOA_SCALAR(uint16_t, nEligibleT5Modules)) + + using ObjectRangesSoA = ObjectRangesSoALayout<>; + + using ObjectRanges = ObjectRangesSoA::View; + using ObjectRangesConst = ObjectRangesSoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/PixelMap.h b/RecoTracker/LSTCore/interface/PixelMap.h new file mode 100644 index 0000000000000..763686142056c --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelMap.h @@ -0,0 +1,31 @@ +#ifndef RecoTracker_LSTCore_interface_PixelMap_h +#define RecoTracker_LSTCore_interface_PixelMap_h + +#include +#include + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + struct PixelMap { + uint16_t pixelModuleIndex; + + std::vector connectedPixelsIndex; + std::vector connectedPixelsSizes; + std::vector connectedPixelsIndexPos; + std::vector connectedPixelsSizesPos; + std::vector connectedPixelsIndexNeg; + std::vector connectedPixelsSizesNeg; + + PixelMap(unsigned int sizef = size_superbins) + : pixelModuleIndex(0), + connectedPixelsIndex(sizef), + connectedPixelsSizes(sizef), + connectedPixelsIndexPos(sizef), + connectedPixelsSizesPos(sizef), + connectedPixelsIndexNeg(sizef), + connectedPixelsSizesNeg(sizef) {} + }; +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/PixelQuintupletsHostCollection.h b/RecoTracker/LSTCore/interface/PixelQuintupletsHostCollection.h new file mode 100644 index 0000000000000..afb2560680621 --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelQuintupletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_PixelQuintupletsHostCollection_h +#define RecoTracker_LSTCore_interface_PixelQuintupletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using PixelQuintupletsHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h b/RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h new file mode 100644 index 0000000000000..e8ea89b9a2547 --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h @@ -0,0 +1,35 @@ +#ifndef RecoTracker_LSTCore_interface_PixelQuintupletsSoA_h +#define RecoTracker_LSTCore_interface_PixelQuintupletsSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + GENERATE_SOA_LAYOUT(PixelQuintupletsSoALayout, + SOA_COLUMN(unsigned int, pixelSegmentIndices), + SOA_COLUMN(unsigned int, quintupletIndices), + SOA_COLUMN(Params_pT5::ArrayU16xLayers, lowerModuleIndices), // lower module index (OT part) + SOA_COLUMN(Params_pT5::ArrayU8xLayers, logicalLayers), // layer ID + SOA_COLUMN(Params_pT5::ArrayUxHits, hitIndices), // hit indices + SOA_COLUMN(float, rPhiChiSquared), // chi2 from pLS to T5 + SOA_COLUMN(float, rPhiChiSquaredInwards), // chi2 from T5 to pLS + SOA_COLUMN(float, rzChiSquared), + SOA_COLUMN(FPX, pixelRadius), // pLS pt converted + SOA_COLUMN(FPX, quintupletRadius), // T5 circle + SOA_COLUMN(FPX, eta), + SOA_COLUMN(FPX, phi), + SOA_COLUMN(FPX, score), // used for ranking (in e.g. duplicate cleaning) + SOA_COLUMN(FPX, centerX), // T3-based circle center x + SOA_COLUMN(FPX, centerY), // T3-based circle center y + SOA_COLUMN(bool, isDup), + SOA_SCALAR(unsigned int, nPixelQuintuplets), + SOA_SCALAR(unsigned int, totOccupancyPixelQuintuplets)); + + using PixelQuintupletsSoA = PixelQuintupletsSoALayout<>; + using PixelQuintuplets = PixelQuintupletsSoA::View; + using PixelQuintupletsConst = PixelQuintupletsSoA::ConstView; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/PixelTripletsHostCollection.h b/RecoTracker/LSTCore/interface/PixelTripletsHostCollection.h new file mode 100644 index 0000000000000..67678e64bfc03 --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelTripletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_PixelTripletsHostCollection_h +#define RecoTracker_LSTCore_interface_PixelTripletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using PixelTripletsHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/PixelTripletsSoA.h b/RecoTracker/LSTCore/interface/PixelTripletsSoA.h new file mode 100644 index 0000000000000..a0f2c9c416539 --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelTripletsSoA.h @@ -0,0 +1,39 @@ +#ifndef RecoTracker_LSTCore_interface_PixelTripletsSoA_h +#define RecoTracker_LSTCore_interface_PixelTripletsSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + GENERATE_SOA_LAYOUT(PixelTripletsSoALayout, + SOA_COLUMN(unsigned int, pixelSegmentIndices), + SOA_COLUMN(unsigned int, tripletIndices), + SOA_COLUMN(Params_pT3::ArrayU16xLayers, lowerModuleIndices), // lower module index (OT part) + SOA_COLUMN(Params_pT3::ArrayU8xLayers, logicalLayers), // layer ID + SOA_COLUMN(Params_pT3::ArrayUxHits, hitIndices), // hit indices + SOA_COLUMN(float, rPhiChiSquared), // chi2 from pLS to T3 + SOA_COLUMN(float, rPhiChiSquaredInwards), // chi2 from T3 to pLS + SOA_COLUMN(float, rzChiSquared), + SOA_COLUMN(FPX, pixelRadius), // pLS pt converted + SOA_COLUMN(FPX, tripletRadius), // T3 circle + SOA_COLUMN(FPX, pt), + SOA_COLUMN(FPX, eta), + SOA_COLUMN(FPX, phi), + SOA_COLUMN(FPX, eta_pix), // eta from pLS + SOA_COLUMN(FPX, phi_pix), // phi from pLS + SOA_COLUMN(FPX, score), // used for ranking (in e.g. duplicate cleaning) + SOA_COLUMN(FPX, centerX), // T3-based circle center x + SOA_COLUMN(FPX, centerY), // T3-based circle center y + SOA_COLUMN(bool, isDup), + SOA_SCALAR(unsigned int, nPixelTriplets), + SOA_SCALAR(unsigned int, totOccupancyPixelTriplets)); + + using PixelTripletsSoA = PixelTripletsSoALayout<>; + using PixelTriplets = PixelTripletsSoA::View; + using PixelTripletsConst = PixelTripletsSoA::ConstView; + +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/QuintupletsHostCollection.h b/RecoTracker/LSTCore/interface/QuintupletsHostCollection.h new file mode 100644 index 0000000000000..734ce03057be7 --- /dev/null +++ b/RecoTracker/LSTCore/interface/QuintupletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_QuintupletsHostCollection_h +#define RecoTracker_LSTCore_interface_QuintupletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using QuintupletsHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/QuintupletsSoA.h b/RecoTracker/LSTCore/interface/QuintupletsSoA.h new file mode 100644 index 0000000000000..4ece80cd11ddd --- /dev/null +++ b/RecoTracker/LSTCore/interface/QuintupletsSoA.h @@ -0,0 +1,46 @@ +#ifndef RecoTracker_LSTCore_interface_QuintupletsSoA_h +#define RecoTracker_LSTCore_interface_QuintupletsSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + GENERATE_SOA_LAYOUT(QuintupletsSoALayout, + SOA_COLUMN(ArrayUx2, tripletIndices), // inner and outer triplet indices + SOA_COLUMN(Params_T5::ArrayU16xLayers, lowerModuleIndices), // lower module index in each layer + SOA_COLUMN(Params_T5::ArrayU8xLayers, logicalLayers), // layer ID + SOA_COLUMN(Params_T5::ArrayUxHits, hitIndices), // hit indices + SOA_COLUMN(FPX, innerRadius), // inner triplet circle radius + SOA_COLUMN(FPX, bridgeRadius), // "middle"/bridge triplet radius + SOA_COLUMN(FPX, outerRadius), // outer triplet radius + SOA_COLUMN(FPX, pt), + SOA_COLUMN(FPX, eta), + SOA_COLUMN(FPX, phi), + SOA_COLUMN(FPX, score_rphisum), // r-phi based score + SOA_COLUMN(char, isDup), // duplicate flag + SOA_COLUMN(bool, tightCutFlag), // tight pass to be a TC + SOA_COLUMN(bool, partOfPT5), + SOA_COLUMN(float, regressionRadius), + SOA_COLUMN(float, regressionG), + SOA_COLUMN(float, regressionF), + SOA_COLUMN(float, rzChiSquared), // r-z only chi2 + SOA_COLUMN(float, chiSquared), + SOA_COLUMN(float, nonAnchorChiSquared)); + + using QuintupletsSoA = QuintupletsSoALayout<>; + using Quintuplets = QuintupletsSoA::View; + using QuintupletsConst = QuintupletsSoA::ConstView; + + GENERATE_SOA_LAYOUT(QuintupletsOccupancySoALayout, + SOA_COLUMN(unsigned int, nQuintuplets), + SOA_COLUMN(unsigned int, totOccupancyQuintuplets)); + + using QuintupletsOccupancySoA = QuintupletsOccupancySoALayout<>; + using QuintupletsOccupancy = QuintupletsOccupancySoA::View; + using QuintupletsOccupancyConst = QuintupletsOccupancySoA::ConstView; + +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/SegmentsHostCollection.h b/RecoTracker/LSTCore/interface/SegmentsHostCollection.h new file mode 100644 index 0000000000000..2fa6ac912a732 --- /dev/null +++ b/RecoTracker/LSTCore/interface/SegmentsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_SegmentsHostCollection_h +#define RecoTracker_LSTCore_interface_SegmentsHostCollection_h + +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using SegmentsHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/SegmentsSoA.h b/RecoTracker/LSTCore/interface/SegmentsSoA.h new file mode 100644 index 0000000000000..24df2fb5a42a0 --- /dev/null +++ b/RecoTracker/LSTCore/interface/SegmentsSoA.h @@ -0,0 +1,63 @@ +#ifndef RecoTracker_LSTCore_interface_SegmentsSoA_h +#define RecoTracker_LSTCore_interface_SegmentsSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + + GENERATE_SOA_LAYOUT(SegmentsSoALayout, + SOA_COLUMN(FPX, dPhis), + SOA_COLUMN(FPX, dPhiMins), + SOA_COLUMN(FPX, dPhiMaxs), + SOA_COLUMN(FPX, dPhiChanges), + SOA_COLUMN(FPX, dPhiChangeMins), + SOA_COLUMN(FPX, dPhiChangeMaxs), + SOA_COLUMN(uint16_t, innerLowerModuleIndices), + SOA_COLUMN(uint16_t, outerLowerModuleIndices), + SOA_COLUMN(Params_LS::ArrayUxLayers, mdIndices), + SOA_COLUMN(unsigned int, innerMiniDoubletAnchorHitIndices), + SOA_COLUMN(unsigned int, outerMiniDoubletAnchorHitIndices)) + + GENERATE_SOA_LAYOUT(SegmentsOccupancySoALayout, + SOA_COLUMN(unsigned int, nSegments), //number of segments per inner lower module + SOA_COLUMN(unsigned int, totOccupancySegments)) + + GENERATE_SOA_LAYOUT(SegmentsPixelSoALayout, + SOA_COLUMN(unsigned int, seedIdx), + SOA_COLUMN(int, charge), + SOA_COLUMN(int, superbin), + SOA_COLUMN(uint4, pLSHitsIdxs), + SOA_COLUMN(PixelType, pixelType), + SOA_COLUMN(char, isQuad), + SOA_COLUMN(char, isDup), + SOA_COLUMN(bool, partOfPT5), + SOA_COLUMN(float, ptIn), + SOA_COLUMN(float, ptErr), + SOA_COLUMN(float, px), + SOA_COLUMN(float, py), + SOA_COLUMN(float, pz), + SOA_COLUMN(float, etaErr), + SOA_COLUMN(float, eta), + SOA_COLUMN(float, phi), + SOA_COLUMN(float, score), + SOA_COLUMN(float, circleCenterX), + SOA_COLUMN(float, circleCenterY), + SOA_COLUMN(float, circleRadius)) + + using SegmentsSoA = SegmentsSoALayout<>; + using SegmentsOccupancySoA = SegmentsOccupancySoALayout<>; + using SegmentsPixelSoA = SegmentsPixelSoALayout<>; + + using Segments = SegmentsSoA::View; + using SegmentsConst = SegmentsSoA::ConstView; + using SegmentsOccupancy = SegmentsOccupancySoA::View; + using SegmentsOccupancyConst = SegmentsOccupancySoA::ConstView; + using SegmentsPixel = SegmentsPixelSoA::View; + using SegmentsPixelConst = SegmentsPixelSoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/TiltedGeometry.h b/RecoTracker/LSTCore/interface/TiltedGeometry.h new file mode 100644 index 0000000000000..7a17106195522 --- /dev/null +++ b/RecoTracker/LSTCore/interface/TiltedGeometry.h @@ -0,0 +1,26 @@ +#ifndef RecoTracker_LSTCore_interface_TiltedGeometry_h +#define RecoTracker_LSTCore_interface_TiltedGeometry_h + +#include +#include +#include + +namespace lst { + class TiltedGeometry { + private: + std::map drdzs_; // dr/dz slope + std::map dxdys_; // dx/dy slope + + public: + TiltedGeometry() = default; + TiltedGeometry(std::string const& filename); + + void load(std::string const&); + + float getDrDz(unsigned int detid) const; + float getDxDy(unsigned int detid) const; + }; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h b/RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h new file mode 100644 index 0000000000000..3ffd2bedf945e --- /dev/null +++ b/RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_TrackCandidatesHostCollection_h +#define RecoTracker_LSTCore_interface_TrackCandidatesHostCollection_h + +#include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using TrackCandidatesHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h b/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h new file mode 100644 index 0000000000000..b1fdecf75526a --- /dev/null +++ b/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h @@ -0,0 +1,32 @@ +#ifndef RecoTracker_LSTCore_interface_TrackCandidatesSoA_h +#define RecoTracker_LSTCore_interface_TrackCandidatesSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + GENERATE_SOA_LAYOUT(TrackCandidatesSoALayout, + SOA_COLUMN(short, trackCandidateType), // 4-T5 5-pT3 7-pT5 8-pLS + SOA_COLUMN(unsigned int, directObjectIndices), // direct indices to each type containers + SOA_COLUMN(ArrayUx2, objectIndices), // tracklet and triplet indices + SOA_COLUMN(Params_pT5::ArrayU8xLayers, logicalLayers), // + SOA_COLUMN(Params_pT5::ArrayUxHits, hitIndices), // + SOA_COLUMN(int, pixelSeedIndex), // + SOA_COLUMN(Params_pT5::ArrayU16xLayers, lowerModuleIndices), // + SOA_COLUMN(FPX, centerX), // + SOA_COLUMN(FPX, centerY), // + SOA_COLUMN(FPX, radius), // + SOA_SCALAR(unsigned int, nTrackCandidates), // + SOA_SCALAR(unsigned int, nTrackCandidatespT3), // + SOA_SCALAR(unsigned int, nTrackCandidatespT5), // + SOA_SCALAR(unsigned int, nTrackCandidatespLS), // + SOA_SCALAR(unsigned int, nTrackCandidatesT5)) // + + using TrackCandidatesSoA = TrackCandidatesSoALayout<>; + using TrackCandidates = TrackCandidatesSoA::View; + using TrackCandidatesConst = TrackCandidatesSoA::ConstView; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/TripletsHostCollection.h b/RecoTracker/LSTCore/interface/TripletsHostCollection.h new file mode 100644 index 0000000000000..6eaebd97e5bf6 --- /dev/null +++ b/RecoTracker/LSTCore/interface/TripletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_TripletsHostCollection_h +#define RecoTracker_LSTCore_interface_TripletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using TripletsHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/TripletsSoA.h b/RecoTracker/LSTCore/interface/TripletsSoA.h new file mode 100644 index 0000000000000..69c2d97449df3 --- /dev/null +++ b/RecoTracker/LSTCore/interface/TripletsSoA.h @@ -0,0 +1,42 @@ +#ifndef RecoTracker_LSTCore_interface_TripletsSoA_h +#define RecoTracker_LSTCore_interface_TripletsSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + GENERATE_SOA_LAYOUT(TripletsSoALayout, + SOA_COLUMN(ArrayUx2, segmentIndices), // inner and outer segment indices + SOA_COLUMN(Params_T3::ArrayU16xLayers, lowerModuleIndices), // lower module index in each layer + SOA_COLUMN(Params_T3::ArrayU8xLayers, logicalLayers), // layer ID + SOA_COLUMN(Params_T3::ArrayUxHits, hitIndices), // hit indices + SOA_COLUMN(FPX, betaIn), // beta/chord angle of the inner segment + SOA_COLUMN(float, centerX), // lower/anchor-hit based circle center x + SOA_COLUMN(float, centerY), // lower/anchor-hit based circle center y + SOA_COLUMN(float, radius), // lower/anchor-hit based circle radius +#ifdef CUT_VALUE_DEBUG + SOA_COLUMN(float, zOut), + SOA_COLUMN(float, rtOut), + SOA_COLUMN(float, betaInCut), +#endif + SOA_COLUMN(bool, partOfPT5), // is it used in a pT5 + SOA_COLUMN(bool, partOfT5), // is it used in a T5 + SOA_COLUMN(bool, partOfPT3)); // is it used in a pT3 + + using TripletsSoA = TripletsSoALayout<>; + using Triplets = TripletsSoA::View; + using TripletsConst = TripletsSoA::ConstView; + + GENERATE_SOA_LAYOUT(TripletsOccupancySoALayout, + SOA_COLUMN(unsigned int, nTriplets), + SOA_COLUMN(unsigned int, totOccupancyTriplets)); + + using TripletsOccupancySoA = TripletsOccupancySoALayout<>; + using TripletsOccupancy = TripletsOccupancySoA::View; + using TripletsOccupancyConst = TripletsOccupancySoA::ConstView; + +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/Common.h b/RecoTracker/LSTCore/interface/alpaka/Common.h new file mode 100644 index 0000000000000..7a1feabfcf076 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/Common.h @@ -0,0 +1,82 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_Common_h +#define RecoTracker_LSTCore_interface_alpaka_Common_h + +#include + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + using namespace ::lst; + + Vec3D constexpr elementsPerThread(Vec3D::all(static_cast(1))); + + // Adjust grid and block sizes based on backend configuration + template > + ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv createWorkDiv(const Vec& blocksPerGrid, + const Vec& threadsPerBlock, + const Vec& elementsPerThreadArg) { + Vec adjustedBlocks = blocksPerGrid; + Vec adjustedThreads = threadsPerBlock; + + // special overrides for CPU/host cases + if constexpr (std::is_same_v) { + adjustedBlocks = Vec::all(static_cast(1)); + + if constexpr (alpaka::accMatchesTags) { + // Serial execution, set threads to 1 as well + adjustedThreads = Vec::all(static_cast(1)); // probably redundant + } + } + + return WorkDiv(adjustedBlocks, adjustedThreads, elementsPerThreadArg); + } + + // The constants below are usually used in functions like alpaka::math::min(), + // expecting a reference (T const&) in the arguments. Hence, + // ALPAKA_STATIC_ACC_MEM_GLOBAL needs to be used in addition to constexpr. + + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kPi = std::numbers::pi_v; + // 15 MeV constant from the approximate Bethe-Bloch formula + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMulsInGeV = 0.015; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniMulsPtScaleBarrel[6] = { + 0.0052, 0.0038, 0.0034, 0.0034, 0.0032, 0.0034}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniMulsPtScaleEndcap[5] = {0.006, 0.006, 0.006, 0.006, 0.006}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniRminMeanBarrel[6] = { + 25.007152356, 37.2186993757, 52.3104270826, 68.6658656666, 85.9770373007, 108.301772384}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniRminMeanEndcap[5] = { + 130.992832231, 154.813883559, 185.352604327, 221.635123002, 265.022076742}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float k2Rinv1GeVf = (2.99792458e-3 * 3.8) / 2; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kR1GeVf = 1. / (2.99792458e-3 * 3.8); + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kSinAlphaMax = 0.95; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float ptCut = PT_CUT; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kDeltaZLum = 15.0; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kPixelPSZpitch = 0.15; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kStripPSZpitch = 2.4; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kStrip2SZpitch = 5.0; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWidth2S = 0.009; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWidthPS = 0.01; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kPt_betaMax = 7.0; + // To be updated with std::numeric_limits::infinity() in the code and data files + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kVerticalModuleSlope = 123456789.0; + + namespace t5dnn { + + // Working points matching LST fake rate (43.9%) or signal acceptance (82.0%) + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kLSTWp1 = 0.3418833f; // 94.0% TPR, 43.9% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kLSTWp2 = 0.6177366f; // 82.0% TPR, 20.0% FPR + // Other working points + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp70 = 0.7776195f; // 70.0% TPR, 10.0% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp75 = 0.7181118f; // 75.0% TPR, 13.5% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp80 = 0.6492643f; // 80.0% TPR, 17.9% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp85 = 0.5655319f; // 85.0% TPR, 23.8% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp90 = 0.4592205f; // 90.0% TPR, 32.6% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp95 = 0.3073708f; // 95.0% TPR, 47.7% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp97p5 = 0.2001348f; // 97.5% TPR, 61.2% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp99 = 0.1120605f; // 99.0% TPR, 75.9% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp99p9 = 0.0218196f; // 99.9% TPR, 95.4% FPR + + } // namespace t5dnn + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/EndcapGeometryDevDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/EndcapGeometryDevDeviceCollection.h new file mode 100644 index 0000000000000..12f510cf6b367 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/EndcapGeometryDevDeviceCollection.h @@ -0,0 +1,13 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_EndcapGeometryDevDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_EndcapGeometryDevDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometryDevSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using EndcapGeometryDevDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h new file mode 100644 index 0000000000000..5bafd9df246bc --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h @@ -0,0 +1,13 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_HitsDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_HitsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/HitsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using HitsDeviceCollection = PortableCollection2; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/LST.h b/RecoTracker/LSTCore/interface/alpaka/LST.h new file mode 100644 index 0000000000000..40d912de3f291 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/LST.h @@ -0,0 +1,102 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_LST_h +#define RecoTracker_LSTCore_interface_alpaka_LST_h + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/LSTESData.h" + +#include +#include +#include + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + class LSTEvent; + + class LST { + public: + LST() = default; + + void run(Queue& queue, + bool verbose, + LSTESData const* deviceESData, + std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z, + bool no_pls_dupclean, + bool tc_pls_triplets); + std::vector> const& hits() const { return out_tc_hitIdxs_; } + std::vector const& len() const { return out_tc_len_; } + std::vector const& seedIdx() const { return out_tc_seedIdx_; } + std::vector const& trackCandidateType() const { return out_tc_trackCandidateType_; } + + private: + void prepareInput(std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z); + + void getOutput(LSTEvent& event); + + // Input and output vectors + std::vector in_trkX_; + std::vector in_trkY_; + std::vector in_trkZ_; + std::vector in_hitId_; + std::vector in_hitIdxs_; + std::vector in_hitIndices_vec0_; + std::vector in_hitIndices_vec1_; + std::vector in_hitIndices_vec2_; + std::vector in_hitIndices_vec3_; + std::vector in_deltaPhi_vec_; + std::vector in_ptIn_vec_; + std::vector in_ptErr_vec_; + std::vector in_px_vec_; + std::vector in_py_vec_; + std::vector in_pz_vec_; + std::vector in_eta_vec_; + std::vector in_etaErr_vec_; + std::vector in_phi_vec_; + std::vector in_charge_vec_; + std::vector in_seedIdx_vec_; + std::vector in_superbin_vec_; + std::vector in_pixelType_vec_; + std::vector in_isQuad_vec_; + std::vector> out_tc_hitIdxs_; + std::vector out_tc_len_; + std::vector out_tc_seedIdx_; + std::vector out_tc_trackCandidateType_; + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h new file mode 100644 index 0000000000000..7751f75ac5ec9 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h @@ -0,0 +1,13 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_MiniDoubletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_MiniDoubletsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using MiniDoubletsDeviceCollection = PortableCollection2; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h new file mode 100644 index 0000000000000..73152a47b6a42 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h @@ -0,0 +1,13 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_ModulesDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_ModulesDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using ModulesDeviceCollection = PortableCollection2; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/ObjectRangesDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/ObjectRangesDeviceCollection.h new file mode 100644 index 0000000000000..36c6584280fe0 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/ObjectRangesDeviceCollection.h @@ -0,0 +1,13 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_ObjectRangesDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_ObjectRangesDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using ObjectRangesDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/PixelQuintupletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/PixelQuintupletsDeviceCollection.h new file mode 100644 index 0000000000000..e5feed7677c38 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/PixelQuintupletsDeviceCollection.h @@ -0,0 +1,12 @@ +#ifndef RecoTracker_LSTCore_interface_PixelQuintupletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_PixelQuintupletsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using PixelQuintupletsDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/PixelTripletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/PixelTripletsDeviceCollection.h new file mode 100644 index 0000000000000..a5938ed82bd8b --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/PixelTripletsDeviceCollection.h @@ -0,0 +1,12 @@ +#ifndef RecoTracker_LSTCore_interface_PixelTripletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_PixelTripletsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using PixelTripletsDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h new file mode 100644 index 0000000000000..13fb5484ea0fd --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h @@ -0,0 +1,12 @@ +#ifndef RecoTracker_LSTCore_interface_QuintupletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_QuintupletsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using QuintupletsDeviceCollection = PortableCollection2; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h new file mode 100644 index 0000000000000..934e6314ae320 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h @@ -0,0 +1,13 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_SegmentsDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_SegmentsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using SegmentsDeviceCollection = PortableCollection3; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h new file mode 100644 index 0000000000000..387ca5a108453 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h @@ -0,0 +1,12 @@ +#ifndef RecoTracker_LSTCore_interface_TrackCandidatesDeviceCollection_h +#define RecoTracker_LSTCore_interface_TrackCandidatesDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using TrackCandidatesDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h new file mode 100644 index 0000000000000..6db827680cee3 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h @@ -0,0 +1,12 @@ +#ifndef RecoTracker_LSTCore_interface_TripletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_TripletsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using TripletsDeviceCollection = PortableCollection2; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/EndcapGeometry.cc b/RecoTracker/LSTCore/src/EndcapGeometry.cc new file mode 100644 index 0000000000000..17e72379bb2ec --- /dev/null +++ b/RecoTracker/LSTCore/src/EndcapGeometry.cc @@ -0,0 +1,59 @@ +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" + +#include +#include +#include +#include + +lst::EndcapGeometry::EndcapGeometry(std::string const& filename) { load(filename); } + +void lst::EndcapGeometry::load(std::string const& filename) { + dxdy_slope_.clear(); + centroid_phis_.clear(); + + std::ifstream ifile(filename, std::ios::binary); + if (!ifile.is_open()) { + throw std::runtime_error("Unable to open file: " + filename); + } + + while (!ifile.eof()) { + unsigned int detid; + float dxdy_slope, centroid_phi; + + // Read the detid, dxdy_slope, and centroid_phi from binary file + ifile.read(reinterpret_cast(&detid), sizeof(detid)); + ifile.read(reinterpret_cast(&dxdy_slope), sizeof(dxdy_slope)); + ifile.read(reinterpret_cast(¢roid_phi), sizeof(centroid_phi)); + + if (ifile) { + dxdy_slope_[detid] = dxdy_slope; + centroid_phis_[detid] = centroid_phi; + } else { + // End of file or read failed + if (!ifile.eof()) { + throw std::runtime_error("Failed to read Endcap Geometry binary data."); + } + } + } + + fillGeoMapArraysExplicit(); +} + +void lst::EndcapGeometry::fillGeoMapArraysExplicit() { + nEndCapMap = centroid_phis_.size(); + + geoMapDetId_buf.reserve(nEndCapMap); + geoMapPhi_buf.reserve(nEndCapMap); + + for (auto it = centroid_phis_.begin(); it != centroid_phis_.end(); ++it) { + unsigned int detId = it->first; + float Phi = it->second; + geoMapPhi_buf.push_back(Phi); + geoMapDetId_buf.push_back(detId); + } +} + +float lst::EndcapGeometry::getdxdy_slope(unsigned int detid) const { + auto res = dxdy_slope_.find(detid); + return res == dxdy_slope_.end() ? 0.f : res->second; +} diff --git a/RecoTracker/LSTCore/src/LSTESData.cc b/RecoTracker/LSTCore/src/LSTESData.cc new file mode 100644 index 0000000000000..66163d39beb2e --- /dev/null +++ b/RecoTracker/LSTCore/src/LSTESData.cc @@ -0,0 +1,120 @@ +#include "RecoTracker/LSTCore/interface/LSTESData.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" +#include "RecoTracker/LSTCore/interface/ModuleConnectionMap.h" +#include "RecoTracker/LSTCore/interface/TiltedGeometry.h" +#include "RecoTracker/LSTCore/interface/PixelMap.h" + +#include "ModuleMethods.h" + +#include + +namespace { + std::string geometryDataDir() { + const char* path_lst_base = std::getenv("LST_BASE"); + const char* path_tracklooperdir = std::getenv("TRACKLOOPERDIR"); + std::string path_str; + if (path_lst_base != nullptr) { + path_str = path_lst_base; + } else if (path_tracklooperdir != nullptr) { + path_str = path_tracklooperdir; + path_str += "/../"; + } else { + std::stringstream search_path(std::getenv("CMSSW_SEARCH_PATH")); + std::string path; + while (std::getline(search_path, path, ':')) { + if (std::filesystem::exists(path + "/RecoTracker/LSTCore/data")) { + path_str = path; + break; + } + } + path_str += "/RecoTracker/LSTCore"; + } + return path_str; + } + + std::string get_absolute_path_after_check_file_exists(std::string const& name) { + std::filesystem::path fullpath = std::filesystem::absolute(name); + if (not std::filesystem::exists(fullpath)) { + throw std::runtime_error("Could not find the file = " + fullpath.string()); + } + return fullpath.string(); + } + + void loadMapsHost(lst::MapPLStoLayer& pLStoLayer, + lst::EndcapGeometry& endcapGeometry, + lst::TiltedGeometry& tiltedGeometry, + lst::ModuleConnectionMap& moduleConnectionMap) { + // Module orientation information (DrDz or phi angles) + auto endcap_geom = + get_absolute_path_after_check_file_exists(geometryDataDir() + "/data/OT800_IT615_pt0.8/endcap_orientation.bin"); + auto tilted_geom = get_absolute_path_after_check_file_exists( + geometryDataDir() + "/data/OT800_IT615_pt0.8/tilted_barrel_orientation.bin"); + // Module connection map (for line segment building) + auto mappath = get_absolute_path_after_check_file_exists( + geometryDataDir() + "/data/OT800_IT615_pt0.8/module_connection_tracing_merged.bin"); + + endcapGeometry.load(endcap_geom); + tiltedGeometry.load(tilted_geom); + moduleConnectionMap.load(mappath); + + auto pLSMapDir = geometryDataDir() + "/data/OT800_IT615_pt0.8/pixelmap/pLS_map"; + const std::array connects{ + {"_layer1_subdet5", "_layer2_subdet5", "_layer1_subdet4", "_layer2_subdet4"}}; + std::string path; + + static_assert(connects.size() == std::tuple_size>{}); + for (unsigned int i = 0; i < connects.size(); i++) { + auto connectData = connects[i].data(); + + path = pLSMapDir + connectData + ".bin"; + pLStoLayer[0][i] = lst::ModuleConnectionMap(get_absolute_path_after_check_file_exists(path)); + + path = pLSMapDir + "_pos" + connectData + ".bin"; + pLStoLayer[1][i] = lst::ModuleConnectionMap(get_absolute_path_after_check_file_exists(path)); + + path = pLSMapDir + "_neg" + connectData + ".bin"; + pLStoLayer[2][i] = lst::ModuleConnectionMap(get_absolute_path_after_check_file_exists(path)); + } + } +} // namespace + +std::unique_ptr> lst::loadAndFillESHost() { + uint16_t nModules; + uint16_t nLowerModules; + unsigned int nPixels; + MapPLStoLayer pLStoLayer; + EndcapGeometry endcapGeometry; + TiltedGeometry tiltedGeometry; + PixelMap pixelMapping; + ModuleConnectionMap moduleConnectionMap; + ::loadMapsHost(pLStoLayer, endcapGeometry, tiltedGeometry, moduleConnectionMap); + + auto endcapGeometryDev = + std::make_shared(endcapGeometry.nEndCapMap, cms::alpakatools::host()); + std::memcpy(endcapGeometryDev->view().geoMapDetId(), + endcapGeometry.geoMapDetId_buf.data(), + endcapGeometry.nEndCapMap * sizeof(unsigned int)); + std::memcpy(endcapGeometryDev->view().geoMapPhi(), + endcapGeometry.geoMapPhi_buf.data(), + endcapGeometry.nEndCapMap * sizeof(float)); + + auto path = + get_absolute_path_after_check_file_exists(geometryDataDir() + "/data/OT800_IT615_pt0.8/sensor_centroids.bin"); + auto modulesBuffers = lst::loadModulesFromFile(pLStoLayer, + path.c_str(), + nModules, + nLowerModules, + nPixels, + pixelMapping, + endcapGeometry, + tiltedGeometry, + moduleConnectionMap); + auto pixelMappingPtr = std::make_shared(std::move(pixelMapping)); + return std::make_unique>(nModules, + nLowerModules, + nPixels, + endcapGeometry.nEndCapMap, + std::move(modulesBuffers), + std::move(endcapGeometryDev), + pixelMappingPtr); +} diff --git a/RecoTracker/LSTCore/src/ModuleConnectionMap.cc b/RecoTracker/LSTCore/src/ModuleConnectionMap.cc new file mode 100644 index 0000000000000..0da0f4cc4ac6f --- /dev/null +++ b/RecoTracker/LSTCore/src/ModuleConnectionMap.cc @@ -0,0 +1,108 @@ +#include "RecoTracker/LSTCore/interface/ModuleConnectionMap.h" + +#include +#include +#include +#include + +lst::ModuleConnectionMap::ModuleConnectionMap() {} + +lst::ModuleConnectionMap::ModuleConnectionMap(std::string const& filename) { load(filename); } + +void lst::ModuleConnectionMap::load(std::string const& filename) { + moduleConnections_.clear(); + + std::ifstream ifile(filename, std::ios::binary); + if (!ifile.is_open()) { + throw std::runtime_error("Unable to open file: " + filename); + } + + while (!ifile.eof()) { + unsigned int detid, number_of_connections; + + // Read the detid and the number of connections from the binary file + ifile.read(reinterpret_cast(&detid), sizeof(detid)); + ifile.read(reinterpret_cast(&number_of_connections), sizeof(number_of_connections)); + + if (ifile) { + std::vector connected_detids; + connected_detids.reserve(number_of_connections); + + // Read the connections for the given detid + for (unsigned int i = 0; i < number_of_connections; ++i) { + unsigned int connected_detid; + ifile.read(reinterpret_cast(&connected_detid), sizeof(connected_detid)); + if (ifile) { + connected_detids.push_back(connected_detid); + } else { + if (!ifile.eof()) { + throw std::runtime_error("Failed to read connection data."); + } + break; // Exit loop on read failure that's not EOF + } + } + + if (ifile) { + moduleConnections_[detid] = std::move(connected_detids); + } + } else { + if (!ifile.eof()) { + throw std::runtime_error("Failed to read module connection binary data."); + } + } + } +} + +void lst::ModuleConnectionMap::add(std::string const& filename) { + std::ifstream ifile; + ifile.open(filename.c_str()); + std::string line; + + while (std::getline(ifile, line)) { + unsigned int detid; + int number_of_connections; + std::vector connected_detids; + unsigned int connected_detid; + + std::stringstream ss(line); + + ss >> detid >> number_of_connections; + connected_detids.reserve(number_of_connections); + + for (int ii = 0; ii < number_of_connections; ++ii) { + ss >> connected_detid; + connected_detids.push_back(connected_detid); + } + + auto& thisModuleConnections = moduleConnections_.at(detid); + + // Concatenate + thisModuleConnections.insert(thisModuleConnections.end(), connected_detids.begin(), connected_detids.end()); + + // Sort + std::sort(thisModuleConnections.begin(), thisModuleConnections.end()); + + // Unique + thisModuleConnections.erase(std::unique(thisModuleConnections.begin(), thisModuleConnections.end()), + thisModuleConnections.end()); + } +} + +void lst::ModuleConnectionMap::print() { + std::cout << "Printing ModuleConnectionMap" << std::endl; + for (auto& pair : moduleConnections_) { + unsigned int detid = pair.first; + std::vector const& connected_detids = pair.second; + std::cout << " detid: " << detid << std::endl; + for (auto& connected_detid : connected_detids) { + std::cout << " connected_detid: " << connected_detid << std::endl; + } + } +} + +const std::vector& lst::ModuleConnectionMap::getConnectedModuleDetIds(unsigned int detid) const { + static const std::vector dummy; + auto const mList = moduleConnections_.find(detid); + return mList != moduleConnections_.end() ? mList->second : dummy; +} +int lst::ModuleConnectionMap::size() const { return moduleConnections_.size(); } diff --git a/RecoTracker/LSTCore/src/ModuleMethods.h b/RecoTracker/LSTCore/src/ModuleMethods.h new file mode 100644 index 0000000000000..d2cf81be21d02 --- /dev/null +++ b/RecoTracker/LSTCore/src/ModuleMethods.h @@ -0,0 +1,395 @@ +#ifndef RecoTracker_LSTCore_src_ModuleMethods_h +#define RecoTracker_LSTCore_src_ModuleMethods_h + +#include +#include + +#include "RecoTracker/LSTCore/interface/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/ModulesHostCollection.h" +#include "RecoTracker/LSTCore/interface/TiltedGeometry.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" +#include "RecoTracker/LSTCore/interface/ModuleConnectionMap.h" +#include "RecoTracker/LSTCore/interface/PixelMap.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +namespace lst { + struct ModuleMetaData { + std::map detIdToIndex; + std::map module_x; + std::map module_y; + std::map module_z; + std::map module_type; // 23 : Ph2PSP, 24 : Ph2PSS, 25 : Ph2SS + // https://github.com/cms-sw/cmssw/blob/5e809e8e0a625578aa265dc4b128a93830cb5429/Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h#L29 + }; + + bool parseIsLower(bool isInvertedx, unsigned int detId) { return (isInvertedx) ? !(detId & 1) : (detId & 1); } + + unsigned int parsePartnerModuleId(unsigned int detId, bool isLowerx, bool isInvertedx) { + return isLowerx ? (isInvertedx ? detId - 1 : detId + 1) : (isInvertedx ? detId + 1 : detId - 1); + } + + bool parseIsInverted(short subdet, short side, short module, short layer) { + if (subdet == Endcap) { + if (side == NegZ) { + return module % 2 == 1; + } else if (side == PosZ) { + return module % 2 == 0; + } else { + return false; + } + } else if (subdet == Barrel) { + if (side == Center) { + if (layer <= 3) { + return module % 2 == 1; + } else if (layer >= 4) { + return module % 2 == 0; + } else { + return false; + } + } else if (side == NegZ or side == PosZ) { + if (layer <= 2) { + return module % 2 == 1; + } else if (layer == 3) { + return module % 2 == 0; + } else { + return false; + } + } else { + return false; + } + } else { + return false; + } + } + + inline std::tuple, + unsigned int, + std::vector, + unsigned int, + std::vector> + getConnectedPixels(uint16_t nModules, unsigned int& nPixels, PixelMap& pixelMapping, MapPLStoLayer const& pLStoLayer) { + std::vector connectedModuleDetIds; + std::vector connectedModuleDetIds_pos; + std::vector connectedModuleDetIds_neg; + + unsigned int totalSizes = 0; + unsigned int totalSizes_pos = 0; + unsigned int totalSizes_neg = 0; + for (unsigned int isuperbin = 0; isuperbin < size_superbins; isuperbin++) { + int sizes = 0; + for (auto const& mCM_pLS : pLStoLayer[0]) { + std::vector connectedModuleDetIds_pLS = + mCM_pLS.getConnectedModuleDetIds(isuperbin + size_superbins); + connectedModuleDetIds.insert( + connectedModuleDetIds.end(), connectedModuleDetIds_pLS.begin(), connectedModuleDetIds_pLS.end()); + sizes += connectedModuleDetIds_pLS.size(); + } + pixelMapping.connectedPixelsIndex[isuperbin] = totalSizes; + pixelMapping.connectedPixelsSizes[isuperbin] = sizes; + totalSizes += sizes; + + int sizes_pos = 0; + for (auto const& mCM_pLS : pLStoLayer[1]) { + std::vector connectedModuleDetIds_pLS_pos = mCM_pLS.getConnectedModuleDetIds(isuperbin); + connectedModuleDetIds_pos.insert(connectedModuleDetIds_pos.end(), + connectedModuleDetIds_pLS_pos.begin(), + connectedModuleDetIds_pLS_pos.end()); + sizes_pos += connectedModuleDetIds_pLS_pos.size(); + } + pixelMapping.connectedPixelsIndexPos[isuperbin] = totalSizes_pos; + pixelMapping.connectedPixelsSizesPos[isuperbin] = sizes_pos; + totalSizes_pos += sizes_pos; + + int sizes_neg = 0; + for (auto const& mCM_pLS : pLStoLayer[2]) { + std::vector connectedModuleDetIds_pLS_neg = mCM_pLS.getConnectedModuleDetIds(isuperbin); + connectedModuleDetIds_neg.insert(connectedModuleDetIds_neg.end(), + connectedModuleDetIds_pLS_neg.begin(), + connectedModuleDetIds_pLS_neg.end()); + sizes_neg += connectedModuleDetIds_pLS_neg.size(); + } + pixelMapping.connectedPixelsIndexNeg[isuperbin] = totalSizes_neg; + pixelMapping.connectedPixelsSizesNeg[isuperbin] = sizes_neg; + totalSizes_neg += sizes_neg; + } + + nPixels = totalSizes + totalSizes_pos + totalSizes_neg; + + return {totalSizes, + connectedModuleDetIds, + totalSizes_pos, + connectedModuleDetIds_pos, + totalSizes_neg, + connectedModuleDetIds_neg}; + } + + inline void fillConnectedModuleArrayExplicit(Modules modules, + ModuleMetaData const& mmd, + ModuleConnectionMap const& moduleConnectionMap) { + Params_Modules::ArrayU16xMaxConnected* moduleMap = modules.moduleMap(); + uint16_t* nConnectedModules = modules.nConnectedModules(); + + for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); ++it) { + unsigned int detId = it->first; + uint16_t index = it->second; + auto& connectedModules = moduleConnectionMap.getConnectedModuleDetIds(detId); + nConnectedModules[index] = connectedModules.size(); + for (uint16_t i = 0; i < nConnectedModules[index]; i++) { + moduleMap[index][i] = mmd.detIdToIndex.at(connectedModules[i]); + } + } + } + + inline void fillMapArraysExplicit(Modules modules, ModuleMetaData const& mmd) { + uint16_t* mapIdx = modules.mapIdx(); + unsigned int* mapdetId = modules.mapdetId(); + + unsigned int counter = 0; + for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); ++it) { + unsigned int detId = it->first; + unsigned int index = it->second; + mapIdx[counter] = index; + mapdetId[counter] = detId; + counter++; + } + } + + inline void setDerivedQuantities(unsigned int detId, + unsigned short& layer, + unsigned short& ring, + unsigned short& rod, + unsigned short& module, + unsigned short& subdet, + unsigned short& side, + float m_x, + float m_y, + float m_z, + float& eta, + float& r) { + subdet = (detId & (7 << 25)) >> 25; + side = (subdet == Endcap) ? (detId & (3 << 23)) >> 23 : (detId & (3 << 18)) >> 18; + layer = (subdet == Endcap) ? (detId & (7 << 18)) >> 18 : (detId & (7 << 20)) >> 20; + ring = (subdet == Endcap) ? (detId & (15 << 12)) >> 12 : 0; + module = (detId & (127 << 2)) >> 2; + rod = (subdet == Endcap) ? 0 : (detId & (127 << 10)) >> 10; + + r = std::sqrt(m_x * m_x + m_y * m_y + m_z * m_z); + eta = ((m_z > 0) - (m_z < 0)) * std::acosh(r / std::sqrt(m_x * m_x + m_y * m_y)); + } + + inline void loadCentroidsFromFile(const char* filePath, ModuleMetaData& mmd, uint16_t& nModules) { + std::ifstream ifile(filePath, std::ios::binary); + if (!ifile.is_open()) { + throw std::runtime_error("Unable to open file: " + std::string(filePath)); + } + + uint16_t counter = 0; + while (!ifile.eof()) { + unsigned int temp_detId; + float module_x, module_y, module_z; + int module_type; + + ifile.read(reinterpret_cast(&temp_detId), sizeof(temp_detId)); + ifile.read(reinterpret_cast(&module_x), sizeof(module_x)); + ifile.read(reinterpret_cast(&module_y), sizeof(module_y)); + ifile.read(reinterpret_cast(&module_z), sizeof(module_z)); + ifile.read(reinterpret_cast(&module_type), sizeof(module_type)); + + if (ifile) { + mmd.detIdToIndex[temp_detId] = counter; + mmd.module_x[temp_detId] = module_x; + mmd.module_y[temp_detId] = module_y; + mmd.module_z[temp_detId] = module_z; + mmd.module_type[temp_detId] = module_type; + counter++; + } else { + if (!ifile.eof()) { + throw std::runtime_error("Failed to read data for detId: " + std::to_string(temp_detId)); + } + } + } + + mmd.detIdToIndex[1] = counter; //pixel module is the last module in the module list + counter++; + nModules = counter; + } + + inline std::shared_ptr loadModulesFromFile(MapPLStoLayer const& pLStoLayer, + const char* moduleMetaDataFilePath, + uint16_t& nModules, + uint16_t& nLowerModules, + unsigned int& nPixels, + PixelMap& pixelMapping, + const EndcapGeometry& endcapGeometry, + const TiltedGeometry& tiltedGeometry, + const ModuleConnectionMap& moduleConnectionMap) { + ModuleMetaData mmd; + + loadCentroidsFromFile(moduleMetaDataFilePath, mmd, nModules); + + // TODO: this whole section could use some refactoring + auto [totalSizes, + connectedModuleDetIds, + totalSizes_pos, + connectedModuleDetIds_pos, + totalSizes_neg, + connectedModuleDetIds_neg] = getConnectedPixels(nModules, nPixels, pixelMapping, pLStoLayer); + + std::array const modules_sizes{{static_cast(nModules), static_cast(nPixels)}}; + + auto modulesHC = std::make_shared(modules_sizes, cms::alpakatools::host()); + + auto modules_view = modulesHC->view(); + + // Getting the underlying data pointers + unsigned int* host_detIds = modules_view.detIds(); + short* host_layers = modules_view.layers(); + short* host_rings = modules_view.rings(); + short* host_rods = modules_view.rods(); + short* host_modules = modules_view.modules(); + short* host_subdets = modules_view.subdets(); + short* host_sides = modules_view.sides(); + float* host_eta = modules_view.eta(); + float* host_r = modules_view.r(); + bool* host_isInverted = modules_view.isInverted(); + bool* host_isLower = modules_view.isLower(); + bool* host_isAnchor = modules_view.isAnchor(); + ModuleType* host_moduleType = modules_view.moduleType(); + ModuleLayerType* host_moduleLayerType = modules_view.moduleLayerType(); + float* host_dxdys = modules_view.dxdys(); + float* host_drdzs = modules_view.drdzs(); + uint16_t* host_nModules = &modules_view.nModules(); + uint16_t* host_nLowerModules = &modules_view.nLowerModules(); + uint16_t* host_partnerModuleIndices = modules_view.partnerModuleIndices(); + int* host_lstLayers = modules_view.lstLayers(); + + //reassign detIdToIndex indices here + nLowerModules = (nModules - 1) / 2; + uint16_t lowerModuleCounter = 0; + uint16_t upperModuleCounter = nLowerModules + 1; + //0 to nLowerModules - 1 => only lower modules, nLowerModules - pixel module, nLowerModules + 1 to nModules => upper modules + for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); it++) { + unsigned int detId = it->first; + float m_x = mmd.module_x[detId]; + float m_y = mmd.module_y[detId]; + float m_z = mmd.module_z[detId]; + unsigned int m_t = mmd.module_type[detId]; + + float eta, r; + + uint16_t index; + unsigned short layer, ring, rod, module, subdet, side; + bool isInverted, isLower; + if (detId == 1) { + layer = 0; + ring = 0; + rod = 0; + module = 0; + subdet = 0; + side = 0; + isInverted = false; + isLower = false; + eta = 0; + r = 0; + } else { + setDerivedQuantities(detId, layer, ring, rod, module, subdet, side, m_x, m_y, m_z, eta, r); + isInverted = parseIsInverted(subdet, side, module, layer); + isLower = parseIsLower(isInverted, detId); + } + if (isLower) { + index = lowerModuleCounter; + lowerModuleCounter++; + } else if (detId != 1) { + index = upperModuleCounter; + upperModuleCounter++; + } else { + index = nLowerModules; //pixel + } + //reassigning indices! + mmd.detIdToIndex[detId] = index; + host_detIds[index] = detId; + host_layers[index] = layer; + host_rings[index] = ring; + host_rods[index] = rod; + host_modules[index] = module; + host_subdets[index] = subdet; + host_sides[index] = side; + host_eta[index] = eta; + host_r[index] = r; + host_isInverted[index] = isInverted; + host_isLower[index] = isLower; + + //assigning other variables! + if (detId == 1) { + host_moduleType[index] = PixelModule; + host_moduleLayerType[index] = lst::InnerPixelLayer; + host_dxdys[index] = 0; + host_drdzs[index] = 0; + host_isAnchor[index] = false; + } else { + host_moduleType[index] = (m_t == 25 ? lst::TwoS : lst::PS); + host_moduleLayerType[index] = (m_t == 23 ? lst::Pixel : lst::Strip); + + if (host_moduleType[index] == lst::PS and host_moduleLayerType[index] == lst::Pixel) { + host_isAnchor[index] = true; + } else if (host_moduleType[index] == lst::TwoS and host_isLower[index]) { + host_isAnchor[index] = true; + } else { + host_isAnchor[index] = false; + } + + host_dxdys[index] = (subdet == Endcap) ? endcapGeometry.getdxdy_slope(detId) : tiltedGeometry.getDxDy(detId); + host_drdzs[index] = (subdet == Barrel) ? tiltedGeometry.getDrDz(detId) : 0; + } + + host_lstLayers[index] = + layer + 6 * (subdet == lst::Endcap) + 5 * (subdet == lst::Endcap and host_moduleType[index] == lst::TwoS); + } + + //partner module stuff, and slopes and drdz move around + for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); it++) { + auto& detId = it->first; + auto& index = it->second; + if (detId != 1) { + host_partnerModuleIndices[index] = + mmd.detIdToIndex[parsePartnerModuleId(detId, host_isLower[index], host_isInverted[index])]; + //add drdz and slope importing stuff here! + if (host_drdzs[index] == 0) { + host_drdzs[index] = host_drdzs[host_partnerModuleIndices[index]]; + } + if (host_dxdys[index] == 0) { + host_dxdys[index] = host_dxdys[host_partnerModuleIndices[index]]; + } + } + } + + *host_nModules = nModules; + *host_nLowerModules = nLowerModules; + + // Fill pixel part + pixelMapping.pixelModuleIndex = mmd.detIdToIndex.at(1); + + auto modulesPixel_view = modulesHC->view(); + auto connectedPixels = + cms::alpakatools::make_host_view(modulesPixel_view.connectedPixels(), modulesPixel_view.metadata().size()); + for (unsigned int icondet = 0; icondet < totalSizes; icondet++) { + connectedPixels[icondet] = mmd.detIdToIndex.at(connectedModuleDetIds[icondet]); + } + for (unsigned int icondet = 0; icondet < totalSizes_pos; icondet++) { + connectedPixels[icondet + totalSizes] = mmd.detIdToIndex.at(connectedModuleDetIds_pos[icondet]); + } + for (unsigned int icondet = 0; icondet < totalSizes_neg; icondet++) { + connectedPixels[icondet + totalSizes + totalSizes_pos] = mmd.detIdToIndex.at(connectedModuleDetIds_neg[icondet]); + } + + fillConnectedModuleArrayExplicit(modules_view, mmd, moduleConnectionMap); + fillMapArraysExplicit(modules_view, mmd); + + return modulesHC; + } +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/src/TiltedGeometry.cc b/RecoTracker/LSTCore/src/TiltedGeometry.cc new file mode 100644 index 0000000000000..d65a9a4a5f7b9 --- /dev/null +++ b/RecoTracker/LSTCore/src/TiltedGeometry.cc @@ -0,0 +1,48 @@ +#include "RecoTracker/LSTCore/interface/TiltedGeometry.h" + +#include +#include +#include +#include + +lst::TiltedGeometry::TiltedGeometry(std::string const& filename) { load(filename); } + +void lst::TiltedGeometry::load(std::string const& filename) { + drdzs_.clear(); + dxdys_.clear(); + + std::ifstream ifile(filename, std::ios::binary); + if (!ifile.is_open()) { + throw std::runtime_error("Unable to open file: " + filename); + } + + while (!ifile.eof()) { + unsigned int detid; + float drdz, dxdy; + + // Read the detid, drdz, and dxdy from binary file + ifile.read(reinterpret_cast(&detid), sizeof(detid)); + ifile.read(reinterpret_cast(&drdz), sizeof(drdz)); + ifile.read(reinterpret_cast(&dxdy), sizeof(dxdy)); + + if (ifile) { + drdzs_[detid] = drdz; + dxdys_[detid] = dxdy; + } else { + // End of file or read failed + if (!ifile.eof()) { + throw std::runtime_error("Failed to read Tilted Geometry binary data."); + } + } + } +} + +float lst::TiltedGeometry::getDrDz(unsigned int detid) const { + auto res = drdzs_.find(detid); + return res == drdzs_.end() ? 0.f : res->second; +} + +float lst::TiltedGeometry::getDxDy(unsigned int detid) const { + auto res = dxdys_.find(detid); + return res == dxdys_.end() ? 0.f : res->second; +} diff --git a/RecoTracker/LSTCore/src/alpaka/Hit.h b/RecoTracker/LSTCore/src/alpaka/Hit.h new file mode 100644 index 0000000000000..166be95cb432f --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/Hit.h @@ -0,0 +1,164 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_Hit_h +#define RecoTracker_LSTCore_src_alpaka_Hit_h + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float eta(TAcc const& acc, float x, float y, float z) { + float r3 = alpaka::math::sqrt(acc, x * x + y * y + z * z); + float rt = alpaka::math::sqrt(acc, x * x + y * y); + float eta = ((z > 0) - (z < 0)) * alpaka::math::acosh(acc, r3 / rt); + return eta; + } + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float phi_mpi_pi(TAcc const& acc, float x) { + if (alpaka::math::abs(acc, x) <= kPi) + return x; + + constexpr float o2pi = 1.f / (2.f * kPi); + float n = alpaka::math::round(acc, x * o2pi); + return x - n * float(2.f * kPi); + } + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float phi(TAcc const& acc, float x, float y) { + return phi_mpi_pi(acc, kPi + alpaka::math::atan2(acc, -y, -x)); + } + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float deltaPhi(TAcc const& acc, float x1, float y1, float x2, float y2) { + float phi1 = phi(acc, x1, y1); + float phi2 = phi(acc, x2, y2); + return phi_mpi_pi(acc, (phi2 - phi1)); + } + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float deltaPhiChange(TAcc const& acc, float x1, float y1, float x2, float y2) { + return deltaPhi(acc, x1, y1, x2 - x1, y2 - y1); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float calculate_dPhi(float phi1, float phi2) { + // Calculate dPhi + float dPhi = phi1 - phi2; + + // Normalize dPhi to be between -pi and pi + if (dPhi > kPi) { + dPhi -= 2 * kPi; + } else if (dPhi < -kPi) { + dPhi += 2 * kPi; + } + + return dPhi; + } + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE int binary_search(const unsigned int* data, // Array that we are searching over + unsigned int search_val, // Value we want to find in data array + unsigned int ndata) // Number of elements in data array + { + unsigned int low = 0; + unsigned int high = ndata - 1; + + while (low <= high) { + unsigned int mid = (low + high) / 2; + unsigned int test_val = data[mid]; + if (test_val == search_val) + return mid; + else if (test_val > search_val) + high = mid - 1; + else + low = mid + 1; + } + // Couldn't find search value in array. + return -1; + } + + struct ModuleRangesKernel { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + HitsRanges hitsRanges, + int nLowerModules) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (int lowerIndex = globalThreadIdx[2]; lowerIndex < nLowerModules; lowerIndex += gridThreadExtent[2]) { + uint16_t upperIndex = modules.partnerModuleIndices()[lowerIndex]; + if (hitsRanges.hitRanges()[lowerIndex][0] != -1 && hitsRanges.hitRanges()[upperIndex][0] != -1) { + hitsRanges.hitRangesLower()[lowerIndex] = hitsRanges.hitRanges()[lowerIndex][0]; + hitsRanges.hitRangesUpper()[lowerIndex] = hitsRanges.hitRanges()[upperIndex][0]; + hitsRanges.hitRangesnLower()[lowerIndex] = + hitsRanges.hitRanges()[lowerIndex][1] - hitsRanges.hitRanges()[lowerIndex][0] + 1; + hitsRanges.hitRangesnUpper()[lowerIndex] = + hitsRanges.hitRanges()[upperIndex][1] - hitsRanges.hitRanges()[upperIndex][0] + 1; + } + } + } + }; + + struct HitLoopKernel { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint16_t Endcap, // Integer corresponding to endcap in module subdets + uint16_t TwoS, // Integer corresponding to TwoS in moduleType + unsigned int nModules, // Number of modules + unsigned int nEndCapMap, // Number of elements in endcap map + EndcapGeometryDevConst endcapGeometry, + ModulesConst modules, + Hits hits, + HitsRanges hitsRanges, + unsigned int nHits) const // Total number of hits in event + { + auto geoMapDetId = endcapGeometry.geoMapDetId(); // DetId's from endcap map + auto geoMapPhi = endcapGeometry.geoMapPhi(); // Phi values from endcap map + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + for (unsigned int ihit = globalThreadIdx[2]; ihit < nHits; ihit += gridThreadExtent[2]) { + float ihit_x = hits.xs()[ihit]; + float ihit_y = hits.ys()[ihit]; + float ihit_z = hits.zs()[ihit]; + int iDetId = hits.detid()[ihit]; + + hits.rts()[ihit] = alpaka::math::sqrt(acc, ihit_x * ihit_x + ihit_y * ihit_y); + hits.phis()[ihit] = phi(acc, ihit_x, ihit_y); + hits.etas()[ihit] = + ((ihit_z > 0) - (ihit_z < 0)) * + alpaka::math::acosh( + acc, alpaka::math::sqrt(acc, ihit_x * ihit_x + ihit_y * ihit_y + ihit_z * ihit_z) / hits.rts()[ihit]); + int found_index = binary_search(modules.mapdetId(), iDetId, nModules); + uint16_t lastModuleIndex = modules.mapIdx()[found_index]; + + hits.moduleIndices()[ihit] = lastModuleIndex; + + if (modules.subdets()[lastModuleIndex] == Endcap && modules.moduleType()[lastModuleIndex] == TwoS) { + found_index = binary_search(geoMapDetId, iDetId, nEndCapMap); + float phi = geoMapPhi[found_index]; + float cos_phi = alpaka::math::cos(acc, phi); + hits.highEdgeXs()[ihit] = ihit_x + 2.5f * cos_phi; + hits.lowEdgeXs()[ihit] = ihit_x - 2.5f * cos_phi; + float sin_phi = alpaka::math::sin(acc, phi); + hits.highEdgeYs()[ihit] = ihit_y + 2.5f * sin_phi; + hits.lowEdgeYs()[ihit] = ihit_y - 2.5f * sin_phi; + } + // Need to set initial value if index hasn't been seen before. + int old = alpaka::atomicCas(acc, + &(hitsRanges.hitRanges()[lastModuleIndex][0]), + -1, + static_cast(ihit), + alpaka::hierarchy::Threads{}); + // For subsequent visits, stores the min value. + if (old != -1) + alpaka::atomicMin( + acc, &hitsRanges.hitRanges()[lastModuleIndex][0], static_cast(ihit), alpaka::hierarchy::Threads{}); + + alpaka::atomicMax( + acc, &hitsRanges.hitRanges()[lastModuleIndex][1], static_cast(ihit), alpaka::hierarchy::Threads{}); + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/Kernels.h b/RecoTracker/LSTCore/src/alpaka/Kernels.h new file mode 100644 index 0000000000000..c642f2427fa84 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/Kernels.h @@ -0,0 +1,421 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_Kernels_h +#define RecoTracker_LSTCore_src_alpaka_Kernels_h + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmQuintupletFromMemory(Quintuplets quintuplets, + unsigned int quintupletIndex, + bool secondpass = false) { + quintuplets.isDup()[quintupletIndex] |= 1 + secondpass; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelTripletFromMemory(PixelTriplets pixelTriplets, + unsigned int pixelTripletIndex) { + pixelTriplets.isDup()[pixelTripletIndex] = true; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelQuintupletFromMemory(PixelQuintuplets pixelQuintuplets, + unsigned int pixelQuintupletIndex) { + pixelQuintuplets.isDup()[pixelQuintupletIndex] = true; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelSegmentFromMemory(SegmentsPixel segmentsPixel, + unsigned int pixelSegmentArrayIndex, + bool secondpass = false) { + segmentsPixel.isDup()[pixelSegmentArrayIndex] |= 1 + secondpass; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitsT5(unsigned int ix, unsigned int jx, QuintupletsConst quintuplets) { + unsigned int hits1[Params_T5::kHits]; + unsigned int hits2[Params_T5::kHits]; + + for (int i = 0; i < Params_T5::kHits; i++) { + hits1[i] = quintuplets.hitIndices()[ix][i]; + hits2[i] = quintuplets.hitIndices()[jx][i]; + } + + int nMatched = 0; + for (int i = 0; i < Params_T5::kHits; i++) { + bool matched = false; + for (int j = 0; j < Params_T5::kHits; j++) { + if (hits1[i] == hits2[j]) { + matched = true; + break; + } + } + if (matched) { + nMatched++; + } + } + return nMatched; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitspT5(unsigned int ix, + unsigned int jx, + PixelQuintupletsConst pixelQuintuplets) { + unsigned int hits1[Params_pT5::kHits]; + unsigned int hits2[Params_pT5::kHits]; + + for (int i = 0; i < Params_pT5::kHits; i++) { + hits1[i] = pixelQuintuplets.hitIndices()[ix][i]; + hits2[i] = pixelQuintuplets.hitIndices()[jx][i]; + } + + int nMatched = 0; + for (int i = 0; i < Params_pT5::kHits; i++) { + bool matched = false; + for (int j = 0; j < Params_pT5::kHits; j++) { + if (hits1[i] == hits2[j]) { + matched = true; + break; + } + } + if (matched) { + nMatched++; + } + } + return nMatched; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void checkHitspT3(unsigned int ix, + unsigned int jx, + PixelTripletsConst pixelTriplets, + int* matched) { + int phits1[Params_pLS::kHits]; + int phits2[Params_pLS::kHits]; + + for (int i = 0; i < Params_pLS::kHits; i++) { + phits1[i] = pixelTriplets.hitIndices()[ix][i]; + phits2[i] = pixelTriplets.hitIndices()[jx][i]; + } + + int npMatched = 0; + for (int i = 0; i < Params_pLS::kHits; i++) { + bool pmatched = false; + for (int j = 0; j < Params_pLS::kHits; j++) { + if (phits1[i] == phits2[j]) { + pmatched = true; + break; + } + } + if (pmatched) { + npMatched++; + } + } + + int hits1[Params_T3::kHits]; + int hits2[Params_T3::kHits]; + + for (int i = 0; i < Params_T3::kHits; i++) { + hits1[i] = pixelTriplets.hitIndices()[ix][i + 4]; // Omitting the pLS hits + hits2[i] = pixelTriplets.hitIndices()[jx][i + 4]; // Omitting the pLS hits + } + + int nMatched = 0; + for (int i = 0; i < Params_T3::kHits; i++) { + bool tmatched = false; + for (int j = 0; j < Params_T3::kHits; j++) { + if (hits1[i] == hits2[j]) { + tmatched = true; + break; + } + } + if (tmatched) { + nMatched++; + } + } + + matched[0] = npMatched; + matched[1] = nMatched; + } + + struct RemoveDupQuintupletsAfterBuild { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + Quintuplets quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + ObjectRangesConst ranges) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (unsigned int lowmod = globalThreadIdx[0]; lowmod < modules.nLowerModules(); lowmod += gridThreadExtent[0]) { + unsigned int nQuintuplets_lowmod = quintupletsOccupancy.nQuintuplets()[lowmod]; + int quintupletModuleIndices_lowmod = ranges.quintupletModuleIndices()[lowmod]; + + for (unsigned int ix1 = globalThreadIdx[1]; ix1 < nQuintuplets_lowmod; ix1 += gridThreadExtent[1]) { + unsigned int ix = quintupletModuleIndices_lowmod + ix1; + float eta1 = __H2F(quintuplets.eta()[ix]); + float phi1 = __H2F(quintuplets.phi()[ix]); + float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]); + + for (unsigned int jx1 = globalThreadIdx[2] + ix1 + 1; jx1 < nQuintuplets_lowmod; jx1 += gridThreadExtent[2]) { + unsigned int jx = quintupletModuleIndices_lowmod + jx1; + + float eta2 = __H2F(quintuplets.eta()[jx]); + float phi2 = __H2F(quintuplets.phi()[jx]); + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]); + + if (dEta > 0.1f) + continue; + + if (alpaka::math::abs(acc, dPhi) > 0.1f) + continue; + + int nMatched = checkHitsT5(ix, jx, quintuplets); + const int minNHitsForDup_T5 = 7; + if (nMatched >= minNHitsForDup_T5) { + if (score_rphisum1 >= score_rphisum2) { + rmQuintupletFromMemory(quintuplets, ix); + } else { + rmQuintupletFromMemory(quintuplets, jx); + } + } + } + } + } + } + }; + + struct RemoveDupQuintupletsBeforeTC { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Quintuplets quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + ObjectRangesConst ranges) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (unsigned int lowmodIdx1 = globalThreadIdx[1]; lowmodIdx1 < ranges.nEligibleT5Modules(); + lowmodIdx1 += gridThreadExtent[1]) { + uint16_t lowmod1 = ranges.indicesOfEligibleT5Modules()[lowmodIdx1]; + unsigned int nQuintuplets_lowmod1 = quintupletsOccupancy.nQuintuplets()[lowmod1]; + if (nQuintuplets_lowmod1 == 0) + continue; + + unsigned int quintupletModuleIndices_lowmod1 = ranges.quintupletModuleIndices()[lowmod1]; + + for (unsigned int lowmodIdx2 = globalThreadIdx[2] + lowmodIdx1; lowmodIdx2 < ranges.nEligibleT5Modules(); + lowmodIdx2 += gridThreadExtent[2]) { + uint16_t lowmod2 = ranges.indicesOfEligibleT5Modules()[lowmodIdx2]; + unsigned int nQuintuplets_lowmod2 = quintupletsOccupancy.nQuintuplets()[lowmod2]; + if (nQuintuplets_lowmod2 == 0) + continue; + + unsigned int quintupletModuleIndices_lowmod2 = ranges.quintupletModuleIndices()[lowmod2]; + + for (unsigned int ix1 = 0; ix1 < nQuintuplets_lowmod1; ix1 += 1) { + unsigned int ix = quintupletModuleIndices_lowmod1 + ix1; + if (quintuplets.partOfPT5()[ix] || (quintuplets.isDup()[ix] & 1)) + continue; + + for (unsigned int jx1 = 0; jx1 < nQuintuplets_lowmod2; jx1++) { + unsigned int jx = quintupletModuleIndices_lowmod2 + jx1; + if (ix == jx) + continue; + + if (quintuplets.partOfPT5()[jx] || (quintuplets.isDup()[jx] & 1)) + continue; + + float eta1 = __H2F(quintuplets.eta()[ix]); + float phi1 = __H2F(quintuplets.phi()[ix]); + float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]); + + float eta2 = __H2F(quintuplets.eta()[jx]); + float phi2 = __H2F(quintuplets.phi()[jx]); + float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]); + + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + + if (dEta > 0.1f) + continue; + + if (alpaka::math::abs(acc, dPhi) > 0.1f) + continue; + + float dR2 = dEta * dEta + dPhi * dPhi; + int nMatched = checkHitsT5(ix, jx, quintuplets); + const int minNHitsForDup_T5 = 5; + if (dR2 < 0.001f || nMatched >= minNHitsForDup_T5) { + if (score_rphisum1 > score_rphisum2) { + rmQuintupletFromMemory(quintuplets, ix, true); + } else if (score_rphisum1 < score_rphisum2) { + rmQuintupletFromMemory(quintuplets, jx, true); + } else { + rmQuintupletFromMemory(quintuplets, (ix < jx ? ix : jx), true); + } + } + } + } + } + } + } + }; + + struct RemoveDupPixelTripletsFromMap { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelTriplets pixelTriplets) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (unsigned int ix = globalThreadIdx[1]; ix < pixelTriplets.nPixelTriplets(); ix += gridThreadExtent[1]) { + for (unsigned int jx = globalThreadIdx[2]; jx < pixelTriplets.nPixelTriplets(); jx += gridThreadExtent[2]) { + if (ix == jx) + continue; + + int nMatched[2]; + checkHitspT3(ix, jx, pixelTriplets, nMatched); + const int minNHitsForDup_pT3 = 5; + if ((nMatched[0] + nMatched[1]) >= minNHitsForDup_pT3) { + // Check the layers + if (pixelTriplets.logicalLayers()[jx][2] < pixelTriplets.logicalLayers()[ix][2]) { + rmPixelTripletFromMemory(pixelTriplets, ix); + break; + } else if (pixelTriplets.logicalLayers()[ix][2] == pixelTriplets.logicalLayers()[jx][2] && + __H2F(pixelTriplets.score()[ix]) > __H2F(pixelTriplets.score()[jx])) { + rmPixelTripletFromMemory(pixelTriplets, ix); + break; + } else if (pixelTriplets.logicalLayers()[ix][2] == pixelTriplets.logicalLayers()[jx][2] && + (__H2F(pixelTriplets.score()[ix]) == __H2F(pixelTriplets.score()[jx])) && (ix < jx)) { + rmPixelTripletFromMemory(pixelTriplets, ix); + break; + } + } + } + } + } + }; + + struct RemoveDupPixelQuintupletsFromMap { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelQuintuplets pixelQuintuplets) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets(); + for (unsigned int ix = globalThreadIdx[1]; ix < nPixelQuintuplets; ix += gridThreadExtent[1]) { + float score1 = __H2F(pixelQuintuplets.score()[ix]); + for (unsigned int jx = globalThreadIdx[2]; jx < nPixelQuintuplets; jx += gridThreadExtent[2]) { + if (ix == jx) + continue; + + int nMatched = checkHitspT5(ix, jx, pixelQuintuplets); + float score2 = __H2F(pixelQuintuplets.score()[jx]); + const int minNHitsForDup_pT5 = 7; + if (nMatched >= minNHitsForDup_pT5) { + if (score1 > score2 or ((score1 == score2) and (ix > jx))) { + rmPixelQuintupletFromMemory(pixelQuintuplets, ix); + break; + } + } + } + } + } + }; + + struct CheckHitspLS { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + SegmentsOccupancyConst segmentsOccupancy, + SegmentsPixel segmentsPixel, + bool secondpass) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + int pixelModuleIndex = modules.nLowerModules(); + unsigned int nPixelSegments = segmentsOccupancy.nSegments()[pixelModuleIndex]; + + if (nPixelSegments > n_max_pixel_segments_per_module) + nPixelSegments = n_max_pixel_segments_per_module; + + for (unsigned int ix = globalThreadIdx[1]; ix < nPixelSegments; ix += gridThreadExtent[1]) { + if (secondpass && (!segmentsPixel.isQuad()[ix] || (segmentsPixel.isDup()[ix] & 1))) + continue; + + unsigned int phits1[Params_pLS::kHits]; + phits1[0] = segmentsPixel.pLSHitsIdxs()[ix].x; + phits1[1] = segmentsPixel.pLSHitsIdxs()[ix].y; + phits1[2] = segmentsPixel.pLSHitsIdxs()[ix].z; + phits1[3] = segmentsPixel.pLSHitsIdxs()[ix].w; + float eta_pix1 = segmentsPixel.eta()[ix]; + float phi_pix1 = segmentsPixel.phi()[ix]; + + for (unsigned int jx = ix + 1 + globalThreadIdx[2]; jx < nPixelSegments; jx += gridThreadExtent[2]) { + float eta_pix2 = segmentsPixel.eta()[jx]; + float phi_pix2 = segmentsPixel.phi()[jx]; + + if (alpaka::math::abs(acc, eta_pix2 - eta_pix1) > 0.1f) + continue; + + if (secondpass && (!segmentsPixel.isQuad()[jx] || (segmentsPixel.isDup()[jx] & 1))) + continue; + + int8_t quad_diff = segmentsPixel.isQuad()[ix] - segmentsPixel.isQuad()[jx]; + float score_diff = segmentsPixel.score()[ix] - segmentsPixel.score()[jx]; + // Always keep quads over trips. If they are the same, we want the object with better score + int idxToRemove; + if (quad_diff > 0) + idxToRemove = jx; + else if (quad_diff < 0) + idxToRemove = ix; + else if (score_diff < 0) + idxToRemove = jx; + else if (score_diff > 0) + idxToRemove = ix; + else + idxToRemove = ix; + + unsigned int phits2[Params_pLS::kHits]; + phits2[0] = segmentsPixel.pLSHitsIdxs()[jx].x; + phits2[1] = segmentsPixel.pLSHitsIdxs()[jx].y; + phits2[2] = segmentsPixel.pLSHitsIdxs()[jx].z; + phits2[3] = segmentsPixel.pLSHitsIdxs()[jx].w; + + int npMatched = 0; + for (int i = 0; i < Params_pLS::kHits; i++) { + bool pmatched = false; + for (int j = 0; j < Params_pLS::kHits; j++) { + if (phits1[i] == phits2[j]) { + pmatched = true; + break; + } + } + if (pmatched) { + npMatched++; + // Only one hit is enough + if (secondpass) + break; + } + } + const int minNHitsForDup_pLS = 3; + if (npMatched >= minNHitsForDup_pLS) { + rmPixelSegmentFromMemory(segmentsPixel, idxToRemove, secondpass); + } + if (secondpass) { + float dEta = alpaka::math::abs(acc, eta_pix1 - eta_pix2); + float dPhi = calculate_dPhi(phi_pix1, phi_pix2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if ((npMatched >= 1) || (dR2 < 1e-5f)) { + rmPixelSegmentFromMemory(segmentsPixel, idxToRemove, secondpass); + } + } + } + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/LST.cc b/RecoTracker/LSTCore/src/alpaka/LST.cc new file mode 100644 index 0000000000000..3c1638677eab2 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/LST.cc @@ -0,0 +1,414 @@ +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" + +#include "LSTEvent.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; + +#include "Math/Vector3D.h" +#include "Math/VectorUtil.h" +using XYZVector = ROOT::Math::XYZVector; + +namespace { + XYZVector calculateR3FromPCA(const XYZVector& p3, float dxy, float dz) { + const float pt = p3.rho(); + const float p = p3.r(); + const float vz = dz * pt * pt / p / p; + + const float vx = -dxy * p3.y() / pt - p3.x() / p * p3.z() / p * dz; + const float vy = dxy * p3.x() / pt - p3.y() / p * p3.z() / p * dz; + return {vx, vy, vz}; + } + + using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; + std::vector getHitIdxs(short trackCandidateType, + Params_pT5::ArrayUxHits const& tcHitIndices, + unsigned int const* hitIndices) { + std::vector hits; + + unsigned int maxNHits = 0; + if (trackCandidateType == LSTObjType::pT5) + maxNHits = Params_pT5::kHits; + else if (trackCandidateType == LSTObjType::pT3) + maxNHits = Params_pT3::kHits; + else if (trackCandidateType == LSTObjType::T5) + maxNHits = Params_T5::kHits; + else if (trackCandidateType == LSTObjType::pLS) + maxNHits = Params_pLS::kHits; + + for (unsigned int i = 0; i < maxNHits; i++) { + unsigned int hitIdxDev = tcHitIndices[i]; + unsigned int hitIdx = + (trackCandidateType == LSTObjType::pLS) + ? hitIdxDev + : hitIndices[hitIdxDev]; // Hit indices are stored differently in the standalone for pLS. + + // For p objects, the 3rd and 4th hit maybe the same, + // due to the way pLS hits are stored in the standalone. + // This is because pixel seeds can be either triplets or quadruplets. + if (trackCandidateType != LSTObjType::T5 && hits.size() == 3 && + hits.back() == hitIdx) // Remove duplicate 4th hits. + continue; + + hits.push_back(hitIdx); + } + + return hits; + } + +} // namespace + +void LST::prepareInput(std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z) { + in_trkX_.clear(); + in_trkY_.clear(); + in_trkZ_.clear(); + in_hitId_.clear(); + in_hitIdxs_.clear(); + in_hitIndices_vec0_.clear(); + in_hitIndices_vec1_.clear(); + in_hitIndices_vec2_.clear(); + in_hitIndices_vec3_.clear(); + in_deltaPhi_vec_.clear(); + in_ptIn_vec_.clear(); + in_ptErr_vec_.clear(); + in_px_vec_.clear(); + in_py_vec_.clear(); + in_pz_vec_.clear(); + in_eta_vec_.clear(); + in_etaErr_vec_.clear(); + in_phi_vec_.clear(); + in_charge_vec_.clear(); + in_seedIdx_vec_.clear(); + in_superbin_vec_.clear(); + in_pixelType_vec_.clear(); + in_isQuad_vec_.clear(); + + unsigned int count = 0; + auto n_see = see_stateTrajGlbPx.size(); + in_px_vec_.reserve(n_see); + in_py_vec_.reserve(n_see); + in_pz_vec_.reserve(n_see); + in_hitIndices_vec0_.reserve(n_see); + in_hitIndices_vec1_.reserve(n_see); + in_hitIndices_vec2_.reserve(n_see); + in_hitIndices_vec3_.reserve(n_see); + in_ptIn_vec_.reserve(n_see); + in_ptErr_vec_.reserve(n_see); + in_etaErr_vec_.reserve(n_see); + in_eta_vec_.reserve(n_see); + in_phi_vec_.reserve(n_see); + in_charge_vec_.reserve(n_see); + in_seedIdx_vec_.reserve(n_see); + in_deltaPhi_vec_.reserve(n_see); + in_trkX_ = ph2_x; + in_trkY_ = ph2_y; + in_trkZ_ = ph2_z; + in_hitId_ = ph2_detId; + in_hitIdxs_.resize(ph2_detId.size()); + + std::iota(in_hitIdxs_.begin(), in_hitIdxs_.end(), 0); + const int hit_size = in_trkX_.size(); + + for (size_t iSeed = 0; iSeed < n_see; iSeed++) { + XYZVector p3LH(see_stateTrajGlbPx[iSeed], see_stateTrajGlbPy[iSeed], see_stateTrajGlbPz[iSeed]); + float ptIn = p3LH.rho(); + float eta = p3LH.eta(); + float ptErr = see_ptErr[iSeed]; + + if ((ptIn > 0.8 - 2 * ptErr)) { + XYZVector r3LH(see_stateTrajGlbX[iSeed], see_stateTrajGlbY[iSeed], see_stateTrajGlbZ[iSeed]); + XYZVector p3PCA(see_px[iSeed], see_py[iSeed], see_pz[iSeed]); + XYZVector r3PCA(calculateR3FromPCA(p3PCA, see_dxy[iSeed], see_dz[iSeed])); + + // The charge could be used directly in the line below + float pixelSegmentDeltaPhiChange = ROOT::Math::VectorUtil::DeltaPhi(p3LH, r3LH); + float etaErr = see_etaErr[iSeed]; + float px = p3LH.x(); + float py = p3LH.y(); + float pz = p3LH.z(); + + int charge = see_q[iSeed]; + PixelType pixtype = PixelType::kInvalid; + + if (ptIn >= 2.0) + pixtype = PixelType::kHighPt; + else if (ptIn >= (0.8 - 2 * ptErr) and ptIn < 2.0) { + if (pixelSegmentDeltaPhiChange >= 0) + pixtype = PixelType::kLowPtPosCurv; + else + pixtype = PixelType::kLowPtNegCurv; + } else + continue; + + unsigned int hitIdx0 = hit_size + count; + count++; + unsigned int hitIdx1 = hit_size + count; + count++; + unsigned int hitIdx2 = hit_size + count; + count++; + unsigned int hitIdx3; + if (see_hitIdx[iSeed].size() <= 3) + hitIdx3 = hitIdx2; + else { + hitIdx3 = hit_size + count; + count++; + } + + in_trkX_.push_back(r3PCA.x()); + in_trkY_.push_back(r3PCA.y()); + in_trkZ_.push_back(r3PCA.z()); + in_trkX_.push_back(p3PCA.rho()); + float p3PCA_Eta = p3PCA.eta(); + in_trkY_.push_back(p3PCA_Eta); + float p3PCA_Phi = p3PCA.phi(); + in_trkZ_.push_back(p3PCA_Phi); + in_trkX_.push_back(r3LH.x()); + in_trkY_.push_back(r3LH.y()); + in_trkZ_.push_back(r3LH.z()); + in_hitId_.push_back(1); + in_hitId_.push_back(1); + in_hitId_.push_back(1); + if (see_hitIdx[iSeed].size() > 3) { + in_trkX_.push_back(r3LH.x()); + in_trkY_.push_back(see_dxy[iSeed]); + in_trkZ_.push_back(see_dz[iSeed]); + in_hitId_.push_back(1); + } + in_px_vec_.push_back(px); + in_py_vec_.push_back(py); + in_pz_vec_.push_back(pz); + + in_hitIndices_vec0_.push_back(hitIdx0); + in_hitIndices_vec1_.push_back(hitIdx1); + in_hitIndices_vec2_.push_back(hitIdx2); + in_hitIndices_vec3_.push_back(hitIdx3); + in_ptIn_vec_.push_back(ptIn); + in_ptErr_vec_.push_back(ptErr); + in_etaErr_vec_.push_back(etaErr); + in_eta_vec_.push_back(eta); + float phi = p3LH.phi(); + in_phi_vec_.push_back(phi); + in_charge_vec_.push_back(charge); + in_seedIdx_vec_.push_back(iSeed); + in_deltaPhi_vec_.push_back(pixelSegmentDeltaPhiChange); + + in_hitIdxs_.push_back(see_hitIdx[iSeed][0]); + in_hitIdxs_.push_back(see_hitIdx[iSeed][1]); + in_hitIdxs_.push_back(see_hitIdx[iSeed][2]); + char isQuad = false; + if (see_hitIdx[iSeed].size() > 3) { + isQuad = true; + in_hitIdxs_.push_back(see_hitIdx[iSeed][3]); + } + float neta = 25.; + float nphi = 72.; + float nz = 25.; + int etabin = (p3PCA_Eta + 2.6) / ((2 * 2.6) / neta); + int phibin = (p3PCA_Phi + kPi) / ((2. * kPi) / nphi); + int dzbin = (see_dz[iSeed] + 30) / (2 * 30 / nz); + int isuperbin = (nz * nphi) * etabin + (nz)*phibin + dzbin; + in_superbin_vec_.push_back(isuperbin); + in_pixelType_vec_.push_back(pixtype); + in_isQuad_vec_.push_back(isQuad); + } + } +} + +void LST::getOutput(LSTEvent& event) { + out_tc_hitIdxs_.clear(); + out_tc_len_.clear(); + out_tc_seedIdx_.clear(); + out_tc_trackCandidateType_.clear(); + + auto const hits = event.getHits(/*inCMSSW*/ true, /*sync*/ false); // sync on next line + auto const& trackCandidates = event.getTrackCandidates(/*inCMSSW*/ true, /*sync*/ true); + + unsigned int nTrackCandidates = trackCandidates.nTrackCandidates(); + + for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { + short trackCandidateType = trackCandidates.trackCandidateType()[idx]; + std::vector hit_idx = getHitIdxs(trackCandidateType, trackCandidates.hitIndices()[idx], hits.idxs()); + + out_tc_hitIdxs_.push_back(hit_idx); + out_tc_len_.push_back(hit_idx.size()); + out_tc_seedIdx_.push_back(trackCandidates.pixelSeedIndex()[idx]); + out_tc_trackCandidateType_.push_back(trackCandidateType); + } +} + +void LST::run(Queue& queue, + bool verbose, + LSTESData const* deviceESData, + std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z, + bool no_pls_dupclean, + bool tc_pls_triplets) { + auto event = LSTEvent(verbose, queue, deviceESData); + prepareInput(see_px, + see_py, + see_pz, + see_dxy, + see_dz, + see_ptErr, + see_etaErr, + see_stateTrajGlbX, + see_stateTrajGlbY, + see_stateTrajGlbZ, + see_stateTrajGlbPx, + see_stateTrajGlbPy, + see_stateTrajGlbPz, + see_q, + see_hitIdx, + ph2_detId, + ph2_x, + ph2_y, + ph2_z); + + event.addHitToEvent(in_trkX_, in_trkY_, in_trkZ_, in_hitId_, in_hitIdxs_); + event.addPixelSegmentToEvent(in_hitIndices_vec0_, + in_hitIndices_vec1_, + in_hitIndices_vec2_, + in_hitIndices_vec3_, + in_deltaPhi_vec_, + in_ptIn_vec_, + in_ptErr_vec_, + in_px_vec_, + in_py_vec_, + in_pz_vec_, + in_eta_vec_, + in_etaErr_vec_, + in_phi_vec_, + in_charge_vec_, + in_seedIdx_vec_, + in_superbin_vec_, + in_pixelType_vec_, + in_isQuad_vec_); + event.createMiniDoublets(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of Mini-doublets produced: %d\n", event.getNumberOfMiniDoublets()); + printf("# of Mini-doublets produced barrel layer 1: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(0)); + printf("# of Mini-doublets produced barrel layer 2: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(1)); + printf("# of Mini-doublets produced barrel layer 3: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(2)); + printf("# of Mini-doublets produced barrel layer 4: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(3)); + printf("# of Mini-doublets produced barrel layer 5: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(4)); + printf("# of Mini-doublets produced barrel layer 6: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(5)); + printf("# of Mini-doublets produced endcap layer 1: %d\n", event.getNumberOfMiniDoubletsByLayerEndcap(0)); + printf("# of Mini-doublets produced endcap layer 2: %d\n", event.getNumberOfMiniDoubletsByLayerEndcap(1)); + printf("# of Mini-doublets produced endcap layer 3: %d\n", event.getNumberOfMiniDoubletsByLayerEndcap(2)); + printf("# of Mini-doublets produced endcap layer 4: %d\n", event.getNumberOfMiniDoubletsByLayerEndcap(3)); + printf("# of Mini-doublets produced endcap layer 5: %d\n", event.getNumberOfMiniDoubletsByLayerEndcap(4)); + } + + event.createSegmentsWithModuleMap(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of Segments produced: %d\n", event.getNumberOfSegments()); + printf("# of Segments produced layer 1-2: %d\n", event.getNumberOfSegmentsByLayerBarrel(0)); + printf("# of Segments produced layer 2-3: %d\n", event.getNumberOfSegmentsByLayerBarrel(1)); + printf("# of Segments produced layer 3-4: %d\n", event.getNumberOfSegmentsByLayerBarrel(2)); + printf("# of Segments produced layer 4-5: %d\n", event.getNumberOfSegmentsByLayerBarrel(3)); + printf("# of Segments produced layer 5-6: %d\n", event.getNumberOfSegmentsByLayerBarrel(4)); + printf("# of Segments produced endcap layer 1: %d\n", event.getNumberOfSegmentsByLayerEndcap(0)); + printf("# of Segments produced endcap layer 2: %d\n", event.getNumberOfSegmentsByLayerEndcap(1)); + printf("# of Segments produced endcap layer 3: %d\n", event.getNumberOfSegmentsByLayerEndcap(2)); + printf("# of Segments produced endcap layer 4: %d\n", event.getNumberOfSegmentsByLayerEndcap(3)); + printf("# of Segments produced endcap layer 5: %d\n", event.getNumberOfSegmentsByLayerEndcap(4)); + } + + event.createTriplets(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of T3s produced: %d\n", event.getNumberOfTriplets()); + printf("# of T3s produced layer 1-2-3: %d\n", event.getNumberOfTripletsByLayerBarrel(0)); + printf("# of T3s produced layer 2-3-4: %d\n", event.getNumberOfTripletsByLayerBarrel(1)); + printf("# of T3s produced layer 3-4-5: %d\n", event.getNumberOfTripletsByLayerBarrel(2)); + printf("# of T3s produced layer 4-5-6: %d\n", event.getNumberOfTripletsByLayerBarrel(3)); + printf("# of T3s produced endcap layer 1-2-3: %d\n", event.getNumberOfTripletsByLayerEndcap(0)); + printf("# of T3s produced endcap layer 2-3-4: %d\n", event.getNumberOfTripletsByLayerEndcap(1)); + printf("# of T3s produced endcap layer 3-4-5: %d\n", event.getNumberOfTripletsByLayerEndcap(2)); + printf("# of T3s produced endcap layer 1: %d\n", event.getNumberOfTripletsByLayerEndcap(0)); + printf("# of T3s produced endcap layer 2: %d\n", event.getNumberOfTripletsByLayerEndcap(1)); + printf("# of T3s produced endcap layer 3: %d\n", event.getNumberOfTripletsByLayerEndcap(2)); + printf("# of T3s produced endcap layer 4: %d\n", event.getNumberOfTripletsByLayerEndcap(3)); + printf("# of T3s produced endcap layer 5: %d\n", event.getNumberOfTripletsByLayerEndcap(4)); + } + + event.createQuintuplets(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of Quintuplets produced: %d\n", event.getNumberOfQuintuplets()); + printf("# of Quintuplets produced layer 1-2-3-4-5-6: %d\n", event.getNumberOfQuintupletsByLayerBarrel(0)); + printf("# of Quintuplets produced layer 2: %d\n", event.getNumberOfQuintupletsByLayerBarrel(1)); + printf("# of Quintuplets produced layer 3: %d\n", event.getNumberOfQuintupletsByLayerBarrel(2)); + printf("# of Quintuplets produced layer 4: %d\n", event.getNumberOfQuintupletsByLayerBarrel(3)); + printf("# of Quintuplets produced layer 5: %d\n", event.getNumberOfQuintupletsByLayerBarrel(4)); + printf("# of Quintuplets produced layer 6: %d\n", event.getNumberOfQuintupletsByLayerBarrel(5)); + printf("# of Quintuplets produced endcap layer 1: %d\n", event.getNumberOfQuintupletsByLayerEndcap(0)); + printf("# of Quintuplets produced endcap layer 2: %d\n", event.getNumberOfQuintupletsByLayerEndcap(1)); + printf("# of Quintuplets produced endcap layer 3: %d\n", event.getNumberOfQuintupletsByLayerEndcap(2)); + printf("# of Quintuplets produced endcap layer 4: %d\n", event.getNumberOfQuintupletsByLayerEndcap(3)); + printf("# of Quintuplets produced endcap layer 5: %d\n", event.getNumberOfQuintupletsByLayerEndcap(4)); + } + + event.pixelLineSegmentCleaning(no_pls_dupclean); + + event.createPixelQuintuplets(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of Pixel Quintuplets produced: %d\n", event.getNumberOfPixelQuintuplets()); + } + + event.createPixelTriplets(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of Pixel T3s produced: %d\n", event.getNumberOfPixelTriplets()); + } + + event.createTrackCandidates(no_pls_dupclean, tc_pls_triplets); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of TrackCandidates produced: %d\n", event.getNumberOfTrackCandidates()); + printf(" # of Pixel TrackCandidates produced: %d\n", event.getNumberOfPixelTrackCandidates()); + printf(" # of pT5 TrackCandidates produced: %d\n", event.getNumberOfPT5TrackCandidates()); + printf(" # of pT3 TrackCandidates produced: %d\n", event.getNumberOfPT3TrackCandidates()); + printf(" # of pLS TrackCandidates produced: %d\n", event.getNumberOfPLSTrackCandidates()); + printf(" # of T5 TrackCandidates produced: %d\n", event.getNumberOfT5TrackCandidates()); + } + + getOutput(event); +} diff --git a/RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc b/RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc new file mode 100644 index 0000000000000..be6c2b88b73c8 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc @@ -0,0 +1,1680 @@ +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +#include "LSTEvent.h" + +#include "MiniDoublet.h" +#include "PixelQuintuplet.h" +#include "PixelTriplet.h" +#include "Quintuplet.h" +#include "Segment.h" +#include "TrackCandidate.h" +#include "Triplet.h" + +using Device = ALPAKA_ACCELERATOR_NAMESPACE::Device; +using Queue = ALPAKA_ACCELERATOR_NAMESPACE::Queue; +using Acc1D = ALPAKA_ACCELERATOR_NAMESPACE::Acc1D; +using Acc3D = ALPAKA_ACCELERATOR_NAMESPACE::Acc3D; + +using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; + +void LSTEvent::initSync() { + alpaka::wait(queue_); // other calls can be asynchronous + + //reset the arrays + for (int i = 0; i < 6; i++) { + n_minidoublets_by_layer_barrel_[i] = 0; + n_segments_by_layer_barrel_[i] = 0; + n_triplets_by_layer_barrel_[i] = 0; + n_quintuplets_by_layer_barrel_[i] = 0; + if (i < 5) { + n_minidoublets_by_layer_endcap_[i] = 0; + n_segments_by_layer_endcap_[i] = 0; + n_triplets_by_layer_endcap_[i] = 0; + n_quintuplets_by_layer_endcap_[i] = 0; + } + } +} + +void LSTEvent::resetEventSync() { + alpaka::wait(queue_); // synchronize to reset consistently + //reset the arrays + for (int i = 0; i < 6; i++) { + n_minidoublets_by_layer_barrel_[i] = 0; + n_segments_by_layer_barrel_[i] = 0; + n_triplets_by_layer_barrel_[i] = 0; + n_quintuplets_by_layer_barrel_[i] = 0; + if (i < 5) { + n_minidoublets_by_layer_endcap_[i] = 0; + n_segments_by_layer_endcap_[i] = 0; + n_triplets_by_layer_endcap_[i] = 0; + n_quintuplets_by_layer_endcap_[i] = 0; + } + } + hitsDC_.reset(); + miniDoubletsDC_.reset(); + rangesDC_.reset(); + segmentsDC_.reset(); + tripletsDC_.reset(); + quintupletsDC_.reset(); + trackCandidatesDC_.reset(); + pixelTripletsDC_.reset(); + pixelQuintupletsDC_.reset(); + + hitsHC_.reset(); + rangesHC_.reset(); + miniDoubletsHC_.reset(); + segmentsHC_.reset(); + tripletsHC_.reset(); + quintupletsHC_.reset(); + pixelTripletsHC_.reset(); + pixelQuintupletsHC_.reset(); + trackCandidatesHC_.reset(); + modulesHC_.reset(); +} + +void LSTEvent::addHitToEvent(std::vector const& x, + std::vector const& y, + std::vector const& z, + std::vector const& detId, + std::vector const& idxInNtuple) { + // Use the actual number of hits instead of a max. + unsigned int nHits = x.size(); + + // Initialize space on device/host for next event. + if (!hitsDC_) { + std::array const hits_sizes{{static_cast(nHits), static_cast(nModules_)}}; + hitsDC_.emplace(hits_sizes, queue_); + + auto hitsRanges = hitsDC_->view(); + auto hitRanges_view = + cms::alpakatools::make_device_view(queue_, hitsRanges.hitRanges(), hitsRanges.metadata().size()); + auto hitRangesLower_view = + cms::alpakatools::make_device_view(queue_, hitsRanges.hitRangesLower(), hitsRanges.metadata().size()); + auto hitRangesUpper_view = + cms::alpakatools::make_device_view(queue_, hitsRanges.hitRangesUpper(), hitsRanges.metadata().size()); + auto hitRangesnLower_view = + cms::alpakatools::make_device_view(queue_, hitsRanges.hitRangesnLower(), hitsRanges.metadata().size()); + auto hitRangesnUpper_view = + cms::alpakatools::make_device_view(queue_, hitsRanges.hitRangesnUpper(), hitsRanges.metadata().size()); + alpaka::memset(queue_, hitRanges_view, 0xff); + alpaka::memset(queue_, hitRangesLower_view, 0xff); + alpaka::memset(queue_, hitRangesUpper_view, 0xff); + alpaka::memset(queue_, hitRangesnLower_view, 0xff); + alpaka::memset(queue_, hitRangesnUpper_view, 0xff); + } + + if (!rangesDC_) { + rangesDC_.emplace(nLowerModules_ + 1, queue_); + auto buf = rangesDC_->buffer(); + alpaka::memset(queue_, buf, 0xff); + } + + // Copy the host arrays to the GPU. + auto hits = hitsDC_->view(); + auto xs_d = cms::alpakatools::make_device_view(queue_, hits.xs(), (Idx)hits.metadata().size()); + auto ys_d = cms::alpakatools::make_device_view(queue_, hits.ys(), (Idx)hits.metadata().size()); + auto zs_d = cms::alpakatools::make_device_view(queue_, hits.zs(), (Idx)hits.metadata().size()); + auto detId_d = cms::alpakatools::make_device_view(queue_, hits.detid(), (Idx)hits.metadata().size()); + auto idxs_d = cms::alpakatools::make_device_view(queue_, hits.idxs(), (Idx)hits.metadata().size()); + alpaka::memcpy(queue_, xs_d, x, (Idx)nHits); + alpaka::memcpy(queue_, ys_d, y, (Idx)nHits); + alpaka::memcpy(queue_, zs_d, z, (Idx)nHits); + alpaka::memcpy(queue_, detId_d, detId, (Idx)nHits); + alpaka::memcpy(queue_, idxs_d, idxInNtuple, (Idx)nHits); + alpaka::wait(queue_); // FIXME: remove synch after inputs refactored to be in pinned memory + + Vec3D const threadsPerBlock1{1, 1, 256}; + Vec3D const blocksPerGrid1{1, 1, max_blocks}; + WorkDiv3D const hit_loop_workdiv = createWorkDiv(blocksPerGrid1, threadsPerBlock1, elementsPerThread); + + alpaka::exec(queue_, + hit_loop_workdiv, + HitLoopKernel{}, + Endcap, + TwoS, + nModules_, + nEndCapMap_, + endcapGeometry_.const_view(), + modules_.const_view(), + hitsDC_->view(), + hitsDC_->view(), + nHits); + + Vec3D const threadsPerBlock2{1, 1, 256}; + Vec3D const blocksPerGrid2{1, 1, max_blocks}; + WorkDiv3D const module_ranges_workdiv = createWorkDiv(blocksPerGrid2, threadsPerBlock2, elementsPerThread); + + alpaka::exec(queue_, + module_ranges_workdiv, + ModuleRangesKernel{}, + modules_.const_view(), + hitsDC_->view(), + nLowerModules_); +} + +void LSTEvent::addPixelSegmentToEvent(std::vector const& hitIndices0, + std::vector const& hitIndices1, + std::vector const& hitIndices2, + std::vector const& hitIndices3, + std::vector const& dPhiChange, + std::vector const& ptIn, + std::vector const& ptErr, + std::vector const& px, + std::vector const& py, + std::vector const& pz, + std::vector const& eta, + std::vector const& etaErr, + std::vector const& phi, + std::vector const& charge, + std::vector const& seedIdx, + std::vector const& superbin, + std::vector const& pixelType, + std::vector const& isQuad) { + unsigned int size = ptIn.size(); + + if (size > n_max_pixel_segments_per_module) { + printf( + "*********************************************************\n" + "* Warning: Pixel line segments will be truncated. *\n" + "* You need to increase n_max_pixel_segments_per_module. *\n" + "*********************************************************\n"); + size = n_max_pixel_segments_per_module; + } + + unsigned int mdSize = 2 * size; + uint16_t pixelModuleIndex = pixelMapping_.pixelModuleIndex; + + if (!miniDoubletsDC_) { + // Create a view for the element nLowerModules_ inside rangesOccupancy->miniDoubletModuleOccupancy + auto rangesOccupancy = rangesDC_->view(); + auto dst_view_miniDoubletModuleOccupancy = + cms::alpakatools::make_device_view(queue_, rangesOccupancy.miniDoubletModuleOccupancy()[pixelModuleIndex]); + + // Create a host buffer for a value to be passed to the device + auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue_); + *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; + + alpaka::memcpy(queue_, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); + + WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + createMDArrayRangesGPU_workDiv, + CreateMDArrayRangesGPU{}, + modules_.const_view(), + rangesDC_->view()); + + auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue_); + auto nTotalMDs_buf_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nTotalMDs()); + alpaka::memcpy(queue_, nTotalMDs_buf_h, nTotalMDs_buf_d); + alpaka::wait(queue_); // wait to get the data before manipulation + + *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; + unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); + + std::array const mds_sizes{{static_cast(nTotalMDs), static_cast(nLowerModules_ + 1)}}; + miniDoubletsDC_.emplace(mds_sizes, queue_); + + auto mdsOccupancy = miniDoubletsDC_->view(); + auto nMDs_view = cms::alpakatools::make_device_view(queue_, mdsOccupancy.nMDs(), mdsOccupancy.metadata().size()); + auto totOccupancyMDs_view = + cms::alpakatools::make_device_view(queue_, mdsOccupancy.totOccupancyMDs(), mdsOccupancy.metadata().size()); + alpaka::memset(queue_, nMDs_view, 0u); + alpaka::memset(queue_, totOccupancyMDs_view, 0u); + } + if (!segmentsDC_) { + // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously. + // If we want to further study the memory footprint in detail, we can separate the two and allocate different memories to them + + WorkDiv1D const createSegmentArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + createSegmentArrayRanges_workDiv, + CreateSegmentArrayRanges{}, + modules_.const_view(), + rangesDC_->view(), + miniDoubletsDC_->const_view()); + + auto rangesOccupancy = rangesDC_->view(); + auto nTotalSegments_view_h = cms::alpakatools::make_host_view(nTotalSegments_); + auto nTotalSegments_view_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nTotalSegs()); + alpaka::memcpy(queue_, nTotalSegments_view_h, nTotalSegments_view_d); + alpaka::wait(queue_); // wait to get the value before manipulation + + nTotalSegments_ += n_max_pixel_segments_per_module; + + std::array const segments_sizes{{static_cast(nTotalSegments_), + static_cast(nLowerModules_ + 1), + static_cast(n_max_pixel_segments_per_module)}}; + segmentsDC_.emplace(segments_sizes, queue_); + + auto segmentsOccupancy = segmentsDC_->view(); + auto nSegments_view = + cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments(), segmentsOccupancy.metadata().size()); + auto totOccupancySegments_view = cms::alpakatools::make_device_view( + queue_, segmentsOccupancy.totOccupancySegments(), segmentsOccupancy.metadata().size()); + alpaka::memset(queue_, nSegments_view, 0u); + alpaka::memset(queue_, totOccupancySegments_view, 0u); + } + + auto hitIndices0_dev = cms::alpakatools::make_device_buffer(queue_, size); + auto hitIndices1_dev = cms::alpakatools::make_device_buffer(queue_, size); + auto hitIndices2_dev = cms::alpakatools::make_device_buffer(queue_, size); + auto hitIndices3_dev = cms::alpakatools::make_device_buffer(queue_, size); + auto dPhiChange_dev = cms::alpakatools::make_device_buffer(queue_, size); + + alpaka::memcpy(queue_, hitIndices0_dev, hitIndices0, size); + alpaka::memcpy(queue_, hitIndices1_dev, hitIndices1, size); + alpaka::memcpy(queue_, hitIndices2_dev, hitIndices2, size); + alpaka::memcpy(queue_, hitIndices3_dev, hitIndices3, size); + alpaka::memcpy(queue_, dPhiChange_dev, dPhiChange, size); + + SegmentsPixel segmentsPixel = segmentsDC_->view(); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.ptIn(), size), ptIn, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.ptErr(), size), ptErr, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.px(), size), px, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.py(), size), py, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.pz(), size), pz, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.etaErr(), size), etaErr, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.isQuad(), size), isQuad, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.eta(), size), eta, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.phi(), size), phi, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.charge(), size), charge, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.seedIdx(), size), seedIdx, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.superbin(), size), superbin, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.pixelType(), size), pixelType, size); + + // Create source views for size and mdSize + auto src_view_size = cms::alpakatools::make_host_view(size); + auto src_view_mdSize = cms::alpakatools::make_host_view(mdSize); + + auto segmentsOccupancy = segmentsDC_->view(); + auto dst_view_segments = cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments()[pixelModuleIndex]); + alpaka::memcpy(queue_, dst_view_segments, src_view_size); + + auto dst_view_totOccupancySegments = + cms::alpakatools::make_device_view(queue_, segmentsOccupancy.totOccupancySegments()[pixelModuleIndex]); + alpaka::memcpy(queue_, dst_view_totOccupancySegments, src_view_size); + + auto mdsOccupancy = miniDoubletsDC_->view(); + auto dst_view_nMDs = cms::alpakatools::make_device_view(queue_, mdsOccupancy.nMDs()[pixelModuleIndex]); + alpaka::memcpy(queue_, dst_view_nMDs, src_view_mdSize); + + auto dst_view_totOccupancyMDs = + cms::alpakatools::make_device_view(queue_, mdsOccupancy.totOccupancyMDs()[pixelModuleIndex]); + alpaka::memcpy(queue_, dst_view_totOccupancyMDs, src_view_mdSize); + + alpaka::wait(queue_); // FIXME: remove synch after inputs refactored to be in pinned memory + + Vec3D const threadsPerBlock{1, 1, 256}; + Vec3D const blocksPerGrid{1, 1, max_blocks}; + WorkDiv3D const addPixelSegmentToEvent_workdiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); + + alpaka::exec(queue_, + addPixelSegmentToEvent_workdiv, + AddPixelSegmentToEventKernel{}, + modules_.const_view(), + rangesDC_->const_view(), + hitsDC_->view(), + miniDoubletsDC_->view(), + segmentsDC_->view(), + segmentsDC_->view(), + hitIndices0_dev.data(), + hitIndices1_dev.data(), + hitIndices2_dev.data(), + hitIndices3_dev.data(), + dPhiChange_dev.data(), + pixelModuleIndex, + size); +} + +void LSTEvent::createMiniDoublets() { + // Create a view for the element nLowerModules_ inside rangesOccupancy->miniDoubletModuleOccupancy + auto rangesOccupancy = rangesDC_->view(); + auto dst_view_miniDoubletModuleOccupancy = + cms::alpakatools::make_device_view(queue_, rangesOccupancy.miniDoubletModuleOccupancy()[nLowerModules_]); + + // Create a host buffer for a value to be passed to the device + auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue_); + *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; + + alpaka::memcpy(queue_, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); + + WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + createMDArrayRangesGPU_workDiv, + CreateMDArrayRangesGPU{}, + modules_.const_view(), + rangesDC_->view()); + + auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue_); + auto nTotalMDs_buf_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nTotalMDs()); + alpaka::memcpy(queue_, nTotalMDs_buf_h, nTotalMDs_buf_d); + alpaka::wait(queue_); // wait to get the data before manipulation + + *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; + unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); + + if (!miniDoubletsDC_) { + std::array const mds_sizes{{static_cast(nTotalMDs), static_cast(nLowerModules_ + 1)}}; + miniDoubletsDC_.emplace(mds_sizes, queue_); + + auto mdsOccupancy = miniDoubletsDC_->view(); + auto nMDs_view = cms::alpakatools::make_device_view(queue_, mdsOccupancy.nMDs(), mdsOccupancy.metadata().size()); + auto totOccupancyMDs_view = + cms::alpakatools::make_device_view(queue_, mdsOccupancy.totOccupancyMDs(), mdsOccupancy.metadata().size()); + alpaka::memset(queue_, nMDs_view, 0u); + alpaka::memset(queue_, totOccupancyMDs_view, 0u); + } + + Vec3D const threadsPerBlockCreateMD{1, 16, 32}; + Vec3D const blocksPerGridCreateMD{1, nLowerModules_ / threadsPerBlockCreateMD[1], 1}; + WorkDiv3D const createMiniDoublets_workDiv = + createWorkDiv(blocksPerGridCreateMD, threadsPerBlockCreateMD, elementsPerThread); + + alpaka::exec(queue_, + createMiniDoublets_workDiv, + CreateMiniDoublets{}, + modules_.const_view(), + hitsDC_->const_view(), + hitsDC_->const_view(), + miniDoubletsDC_->view(), + miniDoubletsDC_->view(), + rangesDC_->const_view()); + + WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + addMiniDoubletRangesToEventExplicit_workDiv, + AddMiniDoubletRangesToEventExplicit{}, + modules_.const_view(), + miniDoubletsDC_->view(), + rangesDC_->view(), + hitsDC_->const_view()); + + if (addObjects_) { + addMiniDoubletsToEventExplicit(); + } +} + +void LSTEvent::createSegmentsWithModuleMap() { + if (!segmentsDC_) { + std::array const segments_sizes{{static_cast(nTotalSegments_), + static_cast(nLowerModules_ + 1), + static_cast(n_max_pixel_segments_per_module)}}; + segmentsDC_.emplace(segments_sizes, queue_); + + auto segmentsOccupancy = segmentsDC_->view(); + auto nSegments_view = + cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments(), segmentsOccupancy.metadata().size()); + auto totOccupancySegments_view = cms::alpakatools::make_device_view( + queue_, segmentsOccupancy.totOccupancySegments(), segmentsOccupancy.metadata().size()); + alpaka::memset(queue_, nSegments_view, 0u); + alpaka::memset(queue_, totOccupancySegments_view, 0u); + } + + Vec3D const threadsPerBlockCreateSeg{1, 1, 64}; + Vec3D const blocksPerGridCreateSeg{1, 1, nLowerModules_}; + WorkDiv3D const createSegments_workDiv = + createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, elementsPerThread); + + alpaka::exec(queue_, + createSegments_workDiv, + CreateSegments{}, + modules_.const_view(), + miniDoubletsDC_->const_view(), + miniDoubletsDC_->const_view(), + segmentsDC_->view(), + segmentsDC_->view(), + rangesDC_->const_view()); + + WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + addSegmentRangesToEventExplicit_workDiv, + AddSegmentRangesToEventExplicit{}, + modules_.const_view(), + segmentsDC_->view(), + rangesDC_->view()); + + if (addObjects_) { + addSegmentsToEventExplicit(); + } +} + +void LSTEvent::createTriplets() { + if (!tripletsDC_) { + WorkDiv1D const createTripletArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + createTripletArrayRanges_workDiv, + CreateTripletArrayRanges{}, + modules_.const_view(), + rangesDC_->view(), + segmentsDC_->const_view()); + + // TODO: Why are we pulling this back down only to put it back on the device in a new struct? + auto rangesOccupancy = rangesDC_->view(); + auto maxTriplets_buf_h = cms::alpakatools::make_host_buffer(queue_); + auto maxTriplets_buf_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nTotalTrips()); + alpaka::memcpy(queue_, maxTriplets_buf_h, maxTriplets_buf_d); + alpaka::wait(queue_); // wait to get the value before using it + + std::array const triplets_sizes{ + {static_cast(*maxTriplets_buf_h.data()), static_cast(nLowerModules_)}}; + tripletsDC_.emplace(triplets_sizes, queue_); + + auto tripletsOccupancy = tripletsDC_->view(); + auto nTriplets_view = + cms::alpakatools::make_device_view(queue_, tripletsOccupancy.nTriplets(), tripletsOccupancy.metadata().size()); + alpaka::memset(queue_, nTriplets_view, 0u); + auto totOccupancyTriplets_view = cms::alpakatools::make_device_view( + queue_, tripletsOccupancy.totOccupancyTriplets(), tripletsOccupancy.metadata().size()); + alpaka::memset(queue_, totOccupancyTriplets_view, 0u); + auto triplets = tripletsDC_->view(); + auto partOfPT5_view = cms::alpakatools::make_device_view(queue_, triplets.partOfPT5(), triplets.metadata().size()); + alpaka::memset(queue_, partOfPT5_view, 0u); + auto partOfT5_view = cms::alpakatools::make_device_view(queue_, triplets.partOfT5(), triplets.metadata().size()); + alpaka::memset(queue_, partOfT5_view, 0u); + auto partOfPT3_view = cms::alpakatools::make_device_view(queue_, triplets.partOfPT3(), triplets.metadata().size()); + alpaka::memset(queue_, partOfPT3_view, 0u); + } + + uint16_t nonZeroModules = 0; + unsigned int max_InnerSeg = 0; + + // Allocate and copy nSegments from device to host (only nLowerModules in OT, not the +1 with pLSs) + auto nSegments_buf_h = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto nSegments_buf_d = cms::alpakatools::make_device_view( + queue_, segmentsDC_->const_view().nSegments(), nLowerModules_); + alpaka::memcpy(queue_, nSegments_buf_h, nSegments_buf_d, nLowerModules_); + + // ... same for module_nConnectedModules + // FIXME: replace by ES host data + auto modules = modules_.const_view(); + auto module_nConnectedModules_buf_h = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_nConnectedModules_buf_d = + cms::alpakatools::make_device_view(queue_, modules.nConnectedModules(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_nConnectedModules_buf_h, module_nConnectedModules_buf_d, nLowerModules_); + + alpaka::wait(queue_); // wait for nSegments and module_nConnectedModules before using + + auto const* nSegments = nSegments_buf_h.data(); + auto const* module_nConnectedModules = module_nConnectedModules_buf_h.data(); + + // Allocate host index and fill it directly + auto index_buf_h = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto* index = index_buf_h.data(); + + for (uint16_t innerLowerModuleIndex = 0; innerLowerModuleIndex < nLowerModules_; innerLowerModuleIndex++) { + uint16_t nConnectedModules = module_nConnectedModules[innerLowerModuleIndex]; + unsigned int nInnerSegments = nSegments[innerLowerModuleIndex]; + if (nConnectedModules != 0 and nInnerSegments != 0) { + index[nonZeroModules] = innerLowerModuleIndex; + nonZeroModules++; + } + max_InnerSeg = std::max(max_InnerSeg, nInnerSegments); + } + + // Allocate and copy to device index + auto index_gpu_buf = cms::alpakatools::make_device_buffer(queue_, nLowerModules_); + alpaka::memcpy(queue_, index_gpu_buf, index_buf_h, nonZeroModules); + + Vec3D const threadsPerBlockCreateTrip{1, 16, 16}; + Vec3D const blocksPerGridCreateTrip{max_blocks, 1, 1}; + WorkDiv3D const createTriplets_workDiv = + createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread); + + alpaka::exec(queue_, + createTriplets_workDiv, + CreateTriplets{}, + modules_.const_view(), + miniDoubletsDC_->const_view(), + segmentsDC_->const_view(), + segmentsDC_->const_view(), + tripletsDC_->view(), + tripletsDC_->view(), + rangesDC_->const_view(), + index_gpu_buf.data(), + nonZeroModules); + + WorkDiv1D const addTripletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + addTripletRangesToEventExplicit_workDiv, + AddTripletRangesToEventExplicit{}, + modules_.const_view(), + tripletsDC_->const_view(), + rangesDC_->view()); + + if (addObjects_) { + addTripletsToEventExplicit(); + } +} + +void LSTEvent::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { + if (!trackCandidatesDC_) { + trackCandidatesDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); + auto buf = trackCandidatesDC_->buffer(); + alpaka::memset(queue_, buf, 0u); + } + + Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64}; + Vec3D const blocksPerGrid_crossCleanpT3{1, 4, 20}; + WorkDiv3D const crossCleanpT3_workDiv = + createWorkDiv(blocksPerGrid_crossCleanpT3, threadsPerBlock_crossCleanpT3, elementsPerThread); + + alpaka::exec(queue_, + crossCleanpT3_workDiv, + CrossCleanpT3{}, + modules_.const_view(), + rangesDC_->const_view(), + pixelTripletsDC_->view(), + segmentsDC_->const_view(), + pixelQuintupletsDC_->const_view()); + + WorkDiv1D const addpT3asTrackCandidates_workDiv = createWorkDiv({1}, {512}, {1}); + + alpaka::exec(queue_, + addpT3asTrackCandidates_workDiv, + AddpT3asTrackCandidates{}, + nLowerModules_, + pixelTripletsDC_->const_view(), + trackCandidatesDC_->view(), + segmentsDC_->const_view(), + rangesDC_->const_view()); + + // Pull nEligibleT5Modules from the device. + auto rangesOccupancy = rangesDC_->view(); + auto nEligibleModules_buf_h = cms::alpakatools::make_host_buffer(queue_); + auto nEligibleModules_buf_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nEligibleT5Modules()); + alpaka::memcpy(queue_, nEligibleModules_buf_h, nEligibleModules_buf_d); + alpaka::wait(queue_); // wait to get the value before using + auto const nEligibleModules = *nEligibleModules_buf_h.data(); + + Vec3D const threadsPerBlockRemoveDupQuints{1, 16, 32}; + Vec3D const blocksPerGridRemoveDupQuints{1, std::max(nEligibleModules / 16, 1), std::max(nEligibleModules / 32, 1)}; + WorkDiv3D const removeDupQuintupletsBeforeTC_workDiv = + createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, elementsPerThread); + + alpaka::exec(queue_, + removeDupQuintupletsBeforeTC_workDiv, + RemoveDupQuintupletsBeforeTC{}, + quintupletsDC_->view(), + quintupletsDC_->view(), + rangesDC_->const_view()); + + Vec3D const threadsPerBlock_crossCleanT5{32, 1, 32}; + Vec3D const blocksPerGrid_crossCleanT5{(13296 / 32) + 1, 1, max_blocks}; + WorkDiv3D const crossCleanT5_workDiv = + createWorkDiv(blocksPerGrid_crossCleanT5, threadsPerBlock_crossCleanT5, elementsPerThread); + + alpaka::exec(queue_, + crossCleanT5_workDiv, + CrossCleanT5{}, + modules_.const_view(), + quintupletsDC_->view(), + quintupletsDC_->const_view(), + pixelQuintupletsDC_->const_view(), + pixelTripletsDC_->const_view(), + rangesDC_->const_view()); + + Vec3D const threadsPerBlock_addT5asTrackCandidate{1, 8, 128}; + Vec3D const blocksPerGrid_addT5asTrackCandidate{1, 8, 10}; + WorkDiv3D const addT5asTrackCandidate_workDiv = + createWorkDiv(blocksPerGrid_addT5asTrackCandidate, threadsPerBlock_addT5asTrackCandidate, elementsPerThread); + + alpaka::exec(queue_, + addT5asTrackCandidate_workDiv, + AddT5asTrackCandidate{}, + nLowerModules_, + quintupletsDC_->const_view(), + quintupletsDC_->const_view(), + trackCandidatesDC_->view(), + rangesDC_->const_view()); + + if (!no_pls_dupclean) { + Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; + Vec3D const blocksPerGridCheckHitspLS{1, max_blocks * 4, max_blocks / 4}; + WorkDiv3D const checkHitspLS_workDiv = + createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); + + alpaka::exec(queue_, + checkHitspLS_workDiv, + CheckHitspLS{}, + modules_.const_view(), + segmentsDC_->const_view(), + segmentsDC_->view(), + true); + } + + Vec3D const threadsPerBlock_crossCleanpLS{1, 16, 32}; + Vec3D const blocksPerGrid_crossCleanpLS{1, 4, 20}; + WorkDiv3D const crossCleanpLS_workDiv = + createWorkDiv(blocksPerGrid_crossCleanpLS, threadsPerBlock_crossCleanpLS, elementsPerThread); + + alpaka::exec(queue_, + crossCleanpLS_workDiv, + CrossCleanpLS{}, + modules_.const_view(), + rangesDC_->const_view(), + pixelTripletsDC_->const_view(), + trackCandidatesDC_->view(), + segmentsDC_->const_view(), + segmentsDC_->const_view(), + segmentsDC_->view(), + miniDoubletsDC_->const_view(), + hitsDC_->const_view(), + quintupletsDC_->const_view()); + + Vec3D const threadsPerBlock_addpLSasTrackCandidate{1, 1, 384}; + Vec3D const blocksPerGrid_addpLSasTrackCandidate{1, 1, max_blocks}; + WorkDiv3D const addpLSasTrackCandidate_workDiv = + createWorkDiv(blocksPerGrid_addpLSasTrackCandidate, threadsPerBlock_addpLSasTrackCandidate, elementsPerThread); + + alpaka::exec(queue_, + addpLSasTrackCandidate_workDiv, + AddpLSasTrackCandidate{}, + nLowerModules_, + trackCandidatesDC_->view(), + segmentsDC_->const_view(), + segmentsDC_->const_view(), + tc_pls_triplets); + + // Check if either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates was reached + auto nTrackCanpT5Host_buf = cms::alpakatools::make_host_buffer(queue_); + auto nTrackCanpT3Host_buf = cms::alpakatools::make_host_buffer(queue_); + auto nTrackCanpLSHost_buf = cms::alpakatools::make_host_buffer(queue_); + auto nTrackCanT5Host_buf = cms::alpakatools::make_host_buffer(queue_); + alpaka::memcpy(queue_, + nTrackCanpT5Host_buf, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespT5())); + alpaka::memcpy(queue_, + nTrackCanpT3Host_buf, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespT3())); + alpaka::memcpy(queue_, + nTrackCanpLSHost_buf, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespLS())); + alpaka::memcpy(queue_, + nTrackCanT5Host_buf, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatesT5())); + alpaka::wait(queue_); // wait to get the values before using them + + auto nTrackCandidatespT5 = *nTrackCanpT5Host_buf.data(); + auto nTrackCandidatespT3 = *nTrackCanpT3Host_buf.data(); + auto nTrackCandidatespLS = *nTrackCanpLSHost_buf.data(); + auto nTrackCandidatesT5 = *nTrackCanT5Host_buf.data(); + if ((nTrackCandidatespT5 + nTrackCandidatespT3 + nTrackCandidatespLS == n_max_pixel_track_candidates) || + (nTrackCandidatesT5 == n_max_nonpixel_track_candidates)) { + printf( + "****************************************************************************************************\n" + "* Warning: Track candidates were possibly truncated. *\n" + "* You may need to increase either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates. *\n" + "* Run the code with the WARNINGS flag activated for more details. *\n" + "****************************************************************************************************\n"); + } +} + +void LSTEvent::createPixelTriplets() { + if (!pixelTripletsDC_) { + pixelTripletsDC_.emplace(n_max_pixel_triplets, queue_); + auto nPixelTriplets_view = cms::alpakatools::make_device_view(queue_, (*pixelTripletsDC_)->nPixelTriplets()); + alpaka::memset(queue_, nPixelTriplets_view, 0u); + auto totOccupancyPixelTriplets_view = + cms::alpakatools::make_device_view(queue_, (*pixelTripletsDC_)->totOccupancyPixelTriplets()); + alpaka::memset(queue_, totOccupancyPixelTriplets_view, 0u); + } + SegmentsOccupancy segmentsOccupancy = segmentsDC_->view(); + SegmentsPixelConst segmentsPixel = segmentsDC_->view(); + + auto superbins_buf = cms::alpakatools::make_host_buffer(queue_, n_max_pixel_segments_per_module); + auto pixelTypes_buf = cms::alpakatools::make_host_buffer(queue_, n_max_pixel_segments_per_module); + + alpaka::memcpy(queue_, + superbins_buf, + cms::alpakatools::make_device_view(queue_, segmentsPixel.superbin(), n_max_pixel_segments_per_module)); + alpaka::memcpy( + queue_, + pixelTypes_buf, + cms::alpakatools::make_device_view(queue_, segmentsPixel.pixelType(), n_max_pixel_segments_per_module)); + auto const* superbins = superbins_buf.data(); + auto const* pixelTypes = pixelTypes_buf.data(); + + unsigned int nInnerSegments; + auto nInnerSegments_src_view = cms::alpakatools::make_host_view(nInnerSegments); + + // Create a sub-view for the device buffer + auto dev_view_nSegments = cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments()[nLowerModules_]); + + alpaka::memcpy(queue_, nInnerSegments_src_view, dev_view_nSegments); + alpaka::wait(queue_); // wait to get nInnerSegments (also superbins and pixelTypes) before using + + auto connectedPixelSize_host_buf = cms::alpakatools::make_host_buffer(queue_, nInnerSegments); + auto connectedPixelIndex_host_buf = cms::alpakatools::make_host_buffer(queue_, nInnerSegments); + auto connectedPixelSize_dev_buf = cms::alpakatools::make_device_buffer(queue_, nInnerSegments); + auto connectedPixelIndex_dev_buf = cms::alpakatools::make_device_buffer(queue_, nInnerSegments); + + unsigned int* connectedPixelSize_host = connectedPixelSize_host_buf.data(); + unsigned int* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); + + int pixelIndexOffsetPos = + pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; + int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[size_superbins - 1] + + pixelMapping_.connectedPixelsSizesPos[size_superbins - 1] + pixelIndexOffsetPos; + + // TODO: check if a map/reduction to just eligible pLSs would speed up the kernel + // the current selection still leaves a significant fraction of unmatchable pLSs + for (unsigned int i = 0; i < nInnerSegments; i++) { // loop over # pLS + PixelType pixelType = pixelTypes[i]; // Get pixel type for this pLS + int superbin = superbins[i]; // Get superbin for this pixel + if ((superbin < 0) or (superbin >= (int)size_superbins) or + ((pixelType != PixelType::kHighPt) and (pixelType != PixelType::kLowPtPosCurv) and + (pixelType != PixelType::kLowPtNegCurv))) { + connectedPixelSize_host[i] = 0; + connectedPixelIndex_host[i] = 0; + continue; + } + + // Used pixel type to select correct size-index arrays + switch (pixelType) { + case PixelType::kInvalid: + break; + case PixelType::kHighPt: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizes[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndex[superbin]; + break; + case PixelType::kLowPtPosCurv: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesPos[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndexPos[superbin] + pixelIndexOffsetPos; + break; + case PixelType::kLowPtNegCurv: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesNeg[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndexNeg[superbin] + pixelIndexOffsetNeg; + break; + } + } + + alpaka::memcpy(queue_, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments); + alpaka::memcpy(queue_, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); + + Vec3D const threadsPerBlock{1, 4, 32}; + Vec3D const blocksPerGrid{16 /* above median of connected modules*/, 4096, 1}; + WorkDiv3D const createPixelTripletsFromMap_workDiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); + + alpaka::exec(queue_, + createPixelTripletsFromMap_workDiv, + CreatePixelTripletsFromMap{}, + modules_.const_view(), + modules_.const_view(), + rangesDC_->const_view(), + miniDoubletsDC_->const_view(), + segmentsDC_->const_view(), + segmentsDC_->const_view(), + tripletsDC_->view(), + tripletsDC_->const_view(), + pixelTripletsDC_->view(), + connectedPixelSize_dev_buf.data(), + connectedPixelIndex_dev_buf.data(), + nInnerSegments); + +#ifdef WARNINGS + auto nPixelTriplets_buf = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy( + queue_, nPixelTriplets_buf, cms::alpakatools::make_device_view(queue_, (*pixelTripletsDC_)->nPixelTriplets())); + alpaka::wait(queue_); // wait to get the value before using it + + std::cout << "number of pixel triplets = " << *nPixelTriplets_buf.data() << std::endl; +#endif + + //pT3s can be cleaned here because they're not used in making pT5s! + Vec3D const threadsPerBlockDupPixTrip{1, 16, 16}; + //seems like more blocks lead to conflicting writes + Vec3D const blocksPerGridDupPixTrip{1, 40, 1}; + WorkDiv3D const removeDupPixelTripletsFromMap_workDiv = + createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, elementsPerThread); + + alpaka::exec( + queue_, removeDupPixelTripletsFromMap_workDiv, RemoveDupPixelTripletsFromMap{}, pixelTripletsDC_->view()); +} + +void LSTEvent::createQuintuplets() { + WorkDiv1D const createEligibleModulesListForQuintuplets_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + createEligibleModulesListForQuintuplets_workDiv, + CreateEligibleModulesListForQuintuplets{}, + modules_.const_view(), + tripletsDC_->const_view(), + rangesDC_->view()); + + auto nEligibleT5Modules_buf = cms::alpakatools::make_host_buffer(queue_); + auto nTotalQuintuplets_buf = cms::alpakatools::make_host_buffer(queue_); + auto rangesOccupancy = rangesDC_->view(); + auto nEligibleT5Modules_view_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nEligibleT5Modules()); + auto nTotalQuintuplets_view_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nTotalQuints()); + alpaka::memcpy(queue_, nEligibleT5Modules_buf, nEligibleT5Modules_view_d); + alpaka::memcpy(queue_, nTotalQuintuplets_buf, nTotalQuintuplets_view_d); + alpaka::wait(queue_); // wait for the values before using them + + auto nEligibleT5Modules = *nEligibleT5Modules_buf.data(); + auto nTotalQuintuplets = *nTotalQuintuplets_buf.data(); + + if (!quintupletsDC_) { + std::array const quintuplets_sizes{{static_cast(nTotalQuintuplets), static_cast(nLowerModules_)}}; + quintupletsDC_.emplace(quintuplets_sizes, queue_); + auto quintupletsOccupancy = quintupletsDC_->view(); + auto nQuintuplets_view = cms::alpakatools::make_device_view( + queue_, quintupletsOccupancy.nQuintuplets(), quintupletsOccupancy.metadata().size()); + alpaka::memset(queue_, nQuintuplets_view, 0u); + auto totOccupancyQuintuplets_view = cms::alpakatools::make_device_view( + queue_, quintupletsOccupancy.totOccupancyQuintuplets(), quintupletsOccupancy.metadata().size()); + alpaka::memset(queue_, totOccupancyQuintuplets_view, 0u); + auto quintuplets = quintupletsDC_->view(); + auto isDup_view = cms::alpakatools::make_device_view(queue_, quintuplets.isDup(), quintuplets.metadata().size()); + alpaka::memset(queue_, isDup_view, 0u); + auto tightCutFlag_view = + cms::alpakatools::make_device_view(queue_, quintuplets.tightCutFlag(), quintuplets.metadata().size()); + alpaka::memset(queue_, tightCutFlag_view, 0u); + auto partOfPT5_view = + cms::alpakatools::make_device_view(queue_, quintuplets.partOfPT5(), quintuplets.metadata().size()); + alpaka::memset(queue_, partOfPT5_view, 0u); + } + + Vec3D const threadsPerBlockQuints{1, 8, 32}; + Vec3D const blocksPerGridQuints{std::max((int)nEligibleT5Modules, 1), 1, 1}; + WorkDiv3D const createQuintuplets_workDiv = + createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, elementsPerThread); + + alpaka::exec(queue_, + createQuintuplets_workDiv, + CreateQuintuplets{}, + modules_.const_view(), + miniDoubletsDC_->const_view(), + segmentsDC_->const_view(), + tripletsDC_->view(), + tripletsDC_->const_view(), + quintupletsDC_->view(), + quintupletsDC_->view(), + rangesDC_->const_view(), + nEligibleT5Modules); + + Vec3D const threadsPerBlockDupQuint{1, 16, 16}; + Vec3D const blocksPerGridDupQuint{max_blocks, 1, 1}; + WorkDiv3D const removeDupQuintupletsAfterBuild_workDiv = + createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, elementsPerThread); + + alpaka::exec(queue_, + removeDupQuintupletsAfterBuild_workDiv, + RemoveDupQuintupletsAfterBuild{}, + modules_.const_view(), + quintupletsDC_->view(), + quintupletsDC_->const_view(), + rangesDC_->const_view()); + + WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + addQuintupletRangesToEventExplicit_workDiv, + AddQuintupletRangesToEventExplicit{}, + modules_.const_view(), + quintupletsDC_->const_view(), + rangesDC_->view()); + + if (addObjects_) { + addQuintupletsToEventExplicit(); + } +} + +void LSTEvent::pixelLineSegmentCleaning(bool no_pls_dupclean) { + if (!no_pls_dupclean) { + Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; + Vec3D const blocksPerGridCheckHitspLS{1, max_blocks * 4, max_blocks / 4}; + WorkDiv3D const checkHitspLS_workDiv = + createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); + + alpaka::exec(queue_, + checkHitspLS_workDiv, + CheckHitspLS{}, + modules_.const_view(), + segmentsDC_->const_view(), + segmentsDC_->view(), + false); + } +} + +void LSTEvent::createPixelQuintuplets() { + if (!pixelQuintupletsDC_) { + pixelQuintupletsDC_.emplace(n_max_pixel_quintuplets, queue_); + auto nPixelQuintuplets_view = + cms::alpakatools::make_device_view(queue_, (*pixelQuintupletsDC_)->nPixelQuintuplets()); + alpaka::memset(queue_, nPixelQuintuplets_view, 0u); + auto totOccupancyPixelQuintuplets_view = + cms::alpakatools::make_device_view(queue_, (*pixelQuintupletsDC_)->totOccupancyPixelQuintuplets()); + alpaka::memset(queue_, totOccupancyPixelQuintuplets_view, 0u); + } + if (!trackCandidatesDC_) { + trackCandidatesDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); + auto buf = trackCandidatesDC_->buffer(); + alpaka::memset(queue_, buf, 0u); + } + SegmentsOccupancy segmentsOccupancy = segmentsDC_->view(); + SegmentsPixelConst segmentsPixel = segmentsDC_->view(); + + auto superbins_buf = cms::alpakatools::make_host_buffer(queue_, n_max_pixel_segments_per_module); + auto pixelTypes_buf = cms::alpakatools::make_host_buffer(queue_, n_max_pixel_segments_per_module); + + alpaka::memcpy(queue_, + superbins_buf, + cms::alpakatools::make_device_view(queue_, segmentsPixel.superbin(), n_max_pixel_segments_per_module)); + alpaka::memcpy( + queue_, + pixelTypes_buf, + cms::alpakatools::make_device_view(queue_, segmentsPixel.pixelType(), n_max_pixel_segments_per_module)); + auto const* superbins = superbins_buf.data(); + auto const* pixelTypes = pixelTypes_buf.data(); + + unsigned int nInnerSegments; + auto nInnerSegments_src_view = cms::alpakatools::make_host_view(nInnerSegments); + + // Create a sub-view for the device buffer + unsigned int totalModules = nLowerModules_ + 1; + auto dev_view_nSegments_buf = cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments(), totalModules); + auto dev_view_nSegments = cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments()[nLowerModules_]); + + alpaka::memcpy(queue_, nInnerSegments_src_view, dev_view_nSegments); + alpaka::wait(queue_); // wait to get nInnerSegments (also superbins and pixelTypes) before using + + auto connectedPixelSize_host_buf = cms::alpakatools::make_host_buffer(queue_, nInnerSegments); + auto connectedPixelIndex_host_buf = cms::alpakatools::make_host_buffer(queue_, nInnerSegments); + auto connectedPixelSize_dev_buf = cms::alpakatools::make_device_buffer(queue_, nInnerSegments); + auto connectedPixelIndex_dev_buf = cms::alpakatools::make_device_buffer(queue_, nInnerSegments); + + auto* connectedPixelSize_host = connectedPixelSize_host_buf.data(); + auto* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); + + int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[::size_superbins - 1] + + pixelMapping_.connectedPixelsSizes[::size_superbins - 1]; + int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[::size_superbins - 1] + + pixelMapping_.connectedPixelsSizesPos[::size_superbins - 1] + pixelIndexOffsetPos; + + // Loop over # pLS + for (unsigned int i = 0; i < nInnerSegments; i++) { + PixelType pixelType = pixelTypes[i]; // Get pixel type for this pLS + int superbin = superbins[i]; // Get superbin for this pixel + if ((superbin < 0) or (superbin >= (int)size_superbins) or + ((pixelType != PixelType::kHighPt) and (pixelType != PixelType::kLowPtPosCurv) and + (pixelType != PixelType::kLowPtNegCurv))) { + connectedPixelSize_host[i] = 0; + connectedPixelIndex_host[i] = 0; + continue; + } + + // Used pixel type to select correct size-index arrays + switch (pixelType) { + case PixelType::kInvalid: + break; + case PixelType::kHighPt: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizes[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndex[superbin]; + break; + case PixelType::kLowPtPosCurv: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesPos[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndexPos[superbin] + pixelIndexOffsetPos; + break; + case PixelType::kLowPtNegCurv: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesNeg[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndexNeg[superbin] + pixelIndexOffsetNeg; + break; + } + } + + alpaka::memcpy(queue_, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments); + alpaka::memcpy(queue_, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); + + Vec3D const threadsPerBlockCreatePixQuints{1, 16, 16}; + Vec3D const blocksPerGridCreatePixQuints{16, max_blocks, 1}; + WorkDiv3D const createPixelQuintupletsFromMap_workDiv = + createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, elementsPerThread); + + alpaka::exec(queue_, + createPixelQuintupletsFromMap_workDiv, + CreatePixelQuintupletsFromMap{}, + modules_.const_view(), + modules_.const_view(), + miniDoubletsDC_->const_view(), + segmentsDC_->const_view(), + segmentsDC_->view(), + tripletsDC_->view(), + quintupletsDC_->view(), + quintupletsDC_->const_view(), + pixelQuintupletsDC_->view(), + connectedPixelSize_dev_buf.data(), + connectedPixelIndex_dev_buf.data(), + nInnerSegments, + rangesDC_->const_view()); + + Vec3D const threadsPerBlockDupPix{1, 16, 16}; + Vec3D const blocksPerGridDupPix{1, max_blocks, 1}; + WorkDiv3D const removeDupPixelQuintupletsFromMap_workDiv = + createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, elementsPerThread); + + alpaka::exec(queue_, + removeDupPixelQuintupletsFromMap_workDiv, + RemoveDupPixelQuintupletsFromMap{}, + pixelQuintupletsDC_->view()); + + WorkDiv1D const addpT5asTrackCandidate_workDiv = createWorkDiv({1}, {256}, {1}); + + alpaka::exec(queue_, + addpT5asTrackCandidate_workDiv, + AddpT5asTrackCandidate{}, + nLowerModules_, + pixelQuintupletsDC_->const_view(), + trackCandidatesDC_->view(), + segmentsDC_->const_view(), + rangesDC_->const_view()); + +#ifdef WARNINGS + auto nPixelQuintuplets_buf = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy(queue_, + nPixelQuintuplets_buf, + cms::alpakatools::make_device_view(queue_, (*pixelQuintupletsDC_)->nPixelQuintuplets())); + alpaka::wait(queue_); // wait to get the value before using it + + std::cout << "number of pixel quintuplets = " << *nPixelQuintuplets_buf.data() << std::endl; +#endif +} + +void LSTEvent::addMiniDoubletsToEventExplicit() { + auto nMDsCPU_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto mdsOccupancy = miniDoubletsDC_->const_view(); + auto nMDs_view = + cms::alpakatools::make_device_view(queue_, mdsOccupancy.nMDs(), nLowerModules_); // exclude pixel part + alpaka::memcpy(queue_, nMDsCPU_buf, nMDs_view, nLowerModules_); + + auto modules = modules_.const_view(); + + // FIXME: replace by ES host data + auto module_subdets_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_subdets_view = + cms::alpakatools::make_device_view(queue_, modules.subdets(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_subdets_buf, module_subdets_view, nLowerModules_); + + auto module_layers_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_layers_view = + cms::alpakatools::make_device_view(queue_, modules.layers(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_layers_buf, module_layers_view, nLowerModules_); + + auto module_hitRanges_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto hits = hitsDC_->view(); + auto hitRanges_view = + cms::alpakatools::make_device_view(queue_, hits.hitRanges(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_hitRanges_buf, hitRanges_view, nLowerModules_); + + alpaka::wait(queue_); // wait for inputs before using them + + auto const* nMDsCPU = nMDsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + auto const* module_hitRanges = module_hitRanges_buf.data(); + + for (unsigned int i = 0; i < nLowerModules_; i++) { + if (!(nMDsCPU[i] == 0 or module_hitRanges[i][0] == -1)) { + if (module_subdets[i] == Barrel) { + n_minidoublets_by_layer_barrel_[module_layers[i] - 1] += nMDsCPU[i]; + } else { + n_minidoublets_by_layer_endcap_[module_layers[i] - 1] += nMDsCPU[i]; + } + } + } +} + +void LSTEvent::addSegmentsToEventExplicit() { + auto nSegmentsCPU_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto nSegments_buf = cms::alpakatools::make_device_view( + queue_, segmentsDC_->const_view().nSegments(), nLowerModules_); + alpaka::memcpy(queue_, nSegmentsCPU_buf, nSegments_buf, nLowerModules_); + + auto modules = modules_.const_view(); + + // FIXME: replace by ES host data + auto module_subdets_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_subdets_view = + cms::alpakatools::make_device_view(queue_, modules.subdets(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_subdets_buf, module_subdets_view, nLowerModules_); + + auto module_layers_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_layers_view = + cms::alpakatools::make_device_view(queue_, modules.layers(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_layers_buf, module_layers_view, nLowerModules_); + + alpaka::wait(queue_); // wait for inputs before using them + + auto const* nSegmentsCPU = nSegmentsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + + for (unsigned int i = 0; i < nLowerModules_; i++) { + if (!(nSegmentsCPU[i] == 0)) { + if (module_subdets[i] == Barrel) { + n_segments_by_layer_barrel_[module_layers[i] - 1] += nSegmentsCPU[i]; + } else { + n_segments_by_layer_endcap_[module_layers[i] - 1] += nSegmentsCPU[i]; + } + } + } +} + +void LSTEvent::addQuintupletsToEventExplicit() { + auto quintupletsOccupancy = quintupletsDC_->const_view(); + auto nQuintuplets_view = + cms::alpakatools::make_device_view(queue_, quintupletsOccupancy.nQuintuplets(), nLowerModules_); + auto nQuintupletsCPU_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + alpaka::memcpy(queue_, nQuintupletsCPU_buf, nQuintuplets_view); + + auto modules = modules_.const_view(); + + // FIXME: replace by ES host data + auto module_subdets_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_subdets_view = cms::alpakatools::make_device_view(queue_, modules.subdets(), modules.metadata().size()); + alpaka::memcpy(queue_, module_subdets_buf, module_subdets_view, nModules_); + + auto module_layers_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_layers_view = + cms::alpakatools::make_device_view(queue_, modules.layers(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_layers_buf, module_layers_view, nLowerModules_); + + auto module_quintupletModuleIndices_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto rangesOccupancy = rangesDC_->view(); + auto quintupletModuleIndices_view_d = + cms::alpakatools::make_device_view(queue_, rangesOccupancy.quintupletModuleIndices(), nLowerModules_); + alpaka::memcpy(queue_, module_quintupletModuleIndices_buf, quintupletModuleIndices_view_d); + + alpaka::wait(queue_); // wait for inputs before using them + + auto const* nQuintupletsCPU = nQuintupletsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + auto const* module_quintupletModuleIndices = module_quintupletModuleIndices_buf.data(); + + for (uint16_t i = 0; i < nLowerModules_; i++) { + if (!(nQuintupletsCPU[i] == 0 or module_quintupletModuleIndices[i] == -1)) { + if (module_subdets[i] == Barrel) { + n_quintuplets_by_layer_barrel_[module_layers[i] - 1] += nQuintupletsCPU[i]; + } else { + n_quintuplets_by_layer_endcap_[module_layers[i] - 1] += nQuintupletsCPU[i]; + } + } + } +} + +void LSTEvent::addTripletsToEventExplicit() { + auto tripletsOccupancy = tripletsDC_->const_view(); + auto nTriplets_view = cms::alpakatools::make_device_view(queue_, tripletsOccupancy.nTriplets(), nLowerModules_); + auto nTripletsCPU_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + alpaka::memcpy(queue_, nTripletsCPU_buf, nTriplets_view); + + auto modules = modules_.const_view(); + + // FIXME: replace by ES host data + auto module_subdets_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_subdets_view = + cms::alpakatools::make_device_view(queue_, modules.subdets(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_subdets_buf, module_subdets_view, nLowerModules_); + + auto module_layers_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_layers_view = + cms::alpakatools::make_device_view(queue_, modules.layers(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_layers_buf, module_layers_view, nLowerModules_); + + alpaka::wait(queue_); // wait for inputs before using them + + auto const* nTripletsCPU = nTripletsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + + for (uint16_t i = 0; i < nLowerModules_; i++) { + if (nTripletsCPU[i] != 0) { + if (module_subdets[i] == Barrel) { + n_triplets_by_layer_barrel_[module_layers[i] - 1] += nTripletsCPU[i]; + } else { + n_triplets_by_layer_endcap_[module_layers[i] - 1] += nTripletsCPU[i]; + } + } + } +} + +unsigned int LSTEvent::getNumberOfMiniDoublets() { + unsigned int miniDoublets = 0; + for (auto& it : n_minidoublets_by_layer_barrel_) { + miniDoublets += it; + } + for (auto& it : n_minidoublets_by_layer_endcap_) { + miniDoublets += it; + } + + return miniDoublets; +} + +unsigned int LSTEvent::getNumberOfMiniDoubletsByLayer(unsigned int layer) { + if (layer == 6) + return n_minidoublets_by_layer_barrel_[layer]; + else + return n_minidoublets_by_layer_barrel_[layer] + n_minidoublets_by_layer_endcap_[layer]; +} + +unsigned int LSTEvent::getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer) { + return n_minidoublets_by_layer_barrel_[layer]; +} + +unsigned int LSTEvent::getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer) { + return n_minidoublets_by_layer_endcap_[layer]; +} + +unsigned int LSTEvent::getNumberOfSegments() { + unsigned int segments = 0; + for (auto& it : n_segments_by_layer_barrel_) { + segments += it; + } + for (auto& it : n_segments_by_layer_endcap_) { + segments += it; + } + + return segments; +} + +unsigned int LSTEvent::getNumberOfSegmentsByLayer(unsigned int layer) { + if (layer == 6) + return n_segments_by_layer_barrel_[layer]; + else + return n_segments_by_layer_barrel_[layer] + n_segments_by_layer_endcap_[layer]; +} + +unsigned int LSTEvent::getNumberOfSegmentsByLayerBarrel(unsigned int layer) { + return n_segments_by_layer_barrel_[layer]; +} + +unsigned int LSTEvent::getNumberOfSegmentsByLayerEndcap(unsigned int layer) { + return n_segments_by_layer_endcap_[layer]; +} + +unsigned int LSTEvent::getNumberOfTriplets() { + unsigned int triplets = 0; + for (auto& it : n_triplets_by_layer_barrel_) { + triplets += it; + } + for (auto& it : n_triplets_by_layer_endcap_) { + triplets += it; + } + + return triplets; +} + +unsigned int LSTEvent::getNumberOfTripletsByLayer(unsigned int layer) { + if (layer == 6) + return n_triplets_by_layer_barrel_[layer]; + else + return n_triplets_by_layer_barrel_[layer] + n_triplets_by_layer_endcap_[layer]; +} + +unsigned int LSTEvent::getNumberOfTripletsByLayerBarrel(unsigned int layer) { + return n_triplets_by_layer_barrel_[layer]; +} + +unsigned int LSTEvent::getNumberOfTripletsByLayerEndcap(unsigned int layer) { + return n_triplets_by_layer_endcap_[layer]; +} + +int LSTEvent::getNumberOfPixelTriplets() { + auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy( + queue_, nPixelTriplets_buf_h, cms::alpakatools::make_device_view(queue_, (*pixelTripletsDC_)->nPixelTriplets())); + alpaka::wait(queue_); + + return *nPixelTriplets_buf_h.data(); +} + +int LSTEvent::getNumberOfPixelQuintuplets() { + auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy(queue_, + nPixelQuintuplets_buf_h, + cms::alpakatools::make_device_view(queue_, (*pixelQuintupletsDC_)->nPixelQuintuplets())); + alpaka::wait(queue_); + + return *nPixelQuintuplets_buf_h.data(); +} + +unsigned int LSTEvent::getNumberOfQuintuplets() { + unsigned int quintuplets = 0; + for (auto& it : n_quintuplets_by_layer_barrel_) { + quintuplets += it; + } + for (auto& it : n_quintuplets_by_layer_endcap_) { + quintuplets += it; + } + + return quintuplets; +} + +unsigned int LSTEvent::getNumberOfQuintupletsByLayer(unsigned int layer) { + if (layer == 6) + return n_quintuplets_by_layer_barrel_[layer]; + else + return n_quintuplets_by_layer_barrel_[layer] + n_quintuplets_by_layer_endcap_[layer]; +} + +unsigned int LSTEvent::getNumberOfQuintupletsByLayerBarrel(unsigned int layer) { + return n_quintuplets_by_layer_barrel_[layer]; +} + +unsigned int LSTEvent::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) { + return n_quintuplets_by_layer_endcap_[layer]; +} + +int LSTEvent::getNumberOfTrackCandidates() { + auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy(queue_, + nTrackCandidates_buf_h, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidates())); + alpaka::wait(queue_); + + return *nTrackCandidates_buf_h.data(); +} + +int LSTEvent::getNumberOfPT5TrackCandidates() { + auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy(queue_, + nTrackCandidatesPT5_buf_h, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespT5())); + alpaka::wait(queue_); + + return *nTrackCandidatesPT5_buf_h.data(); +} + +int LSTEvent::getNumberOfPT3TrackCandidates() { + auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy(queue_, + nTrackCandidatesPT3_buf_h, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespT3())); + alpaka::wait(queue_); + + return *nTrackCandidatesPT3_buf_h.data(); +} + +int LSTEvent::getNumberOfPLSTrackCandidates() { + auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy(queue_, + nTrackCandidatesPLS_buf_h, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespLS())); + alpaka::wait(queue_); + + return *nTrackCandidatesPLS_buf_h.data(); +} + +int LSTEvent::getNumberOfPixelTrackCandidates() { + auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue_); + auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy(queue_, + nTrackCandidates_buf_h, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidates())); + alpaka::memcpy(queue_, + nTrackCandidatesT5_buf_h, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatesT5())); + alpaka::wait(queue_); + + return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data()); +} + +int LSTEvent::getNumberOfT5TrackCandidates() { + auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy(queue_, + nTrackCandidatesT5_buf_h, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatesT5())); + alpaka::wait(queue_); + + return *nTrackCandidatesT5_buf_h.data(); +} + +template +typename TSoA::ConstView LSTEvent::getHits(bool inCMSSW, bool sync) { + if constexpr (std::is_same_v) { + return hitsDC_->const_view(); + } else { + if (!hitsHC_) { + if (inCMSSW) { + auto hits_d = hitsDC_->view(); + auto nHits = hits_d.metadata().size(); + std::array const hits_sizes{{static_cast(nHits), static_cast(nModules_)}}; + hitsHC_.emplace(hits_sizes, queue_); + auto hits_h = hitsHC_->view(); + auto idxs_h = cms::alpakatools::make_host_view(hits_h.idxs(), nHits); + auto idxs_d = cms::alpakatools::make_device_view(queue_, hits_d.idxs(), nHits); + alpaka::memcpy(queue_, idxs_h, idxs_d); + } else { + hitsHC_.emplace(cms::alpakatools::CopyToHost>::copyAsync( + queue_, *hitsDC_)); + } + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return hitsHC_->const_view(); + } +} +template HitsConst LSTEvent::getHits(bool, bool); +template HitsRangesConst LSTEvent::getHits(bool, bool); + +template +ObjectRangesConst LSTEvent::getRanges(bool sync) { + if constexpr (std::is_same_v) { + return rangesDC_->const_view(); + } else { + if (!rangesHC_) { + rangesHC_.emplace( + cms::alpakatools::CopyToHost>::copyAsync(queue_, *rangesDC_)); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return rangesHC_->const_view(); + } +} +template ObjectRangesConst LSTEvent::getRanges<>(bool); + +template +typename TSoA::ConstView LSTEvent::getMiniDoublets(bool sync) { + if constexpr (std::is_same_v) { + return miniDoubletsDC_->const_view(); + } else { + if (!miniDoubletsHC_) { + miniDoubletsHC_.emplace( + cms::alpakatools::CopyToHost< + PortableMultiCollection>::copyAsync(queue_, + *miniDoubletsDC_)); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return miniDoubletsHC_->const_view(); + } +} +template MiniDoubletsConst LSTEvent::getMiniDoublets(bool); +template MiniDoubletsOccupancyConst LSTEvent::getMiniDoublets(bool); + +template +typename TSoA::ConstView LSTEvent::getSegments(bool sync) { + if constexpr (std::is_same_v) { + return segmentsDC_->const_view(); + } else { + if (!segmentsHC_) { + segmentsHC_.emplace( + cms::alpakatools:: + CopyToHost>::copyAsync( + queue_, *segmentsDC_)); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return segmentsHC_->const_view(); + } +} +template SegmentsConst LSTEvent::getSegments(bool); +template SegmentsOccupancyConst LSTEvent::getSegments(bool); +template SegmentsPixelConst LSTEvent::getSegments(bool); + +template +typename TSoA::ConstView LSTEvent::getTriplets(bool sync) { + if constexpr (std::is_same_v) { + return tripletsDC_->const_view(); + } else { + if (!tripletsHC_) { + tripletsHC_.emplace( + cms::alpakatools::CopyToHost>::copyAsync( + queue_, *tripletsDC_)); + + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + } + return tripletsHC_->const_view(); +} +template TripletsConst LSTEvent::getTriplets(bool); +template TripletsOccupancyConst LSTEvent::getTriplets(bool); + +template +typename TSoA::ConstView LSTEvent::getQuintuplets(bool sync) { + if constexpr (std::is_same_v) { + return quintupletsDC_->const_view(); + } else { + if (!quintupletsHC_) { + quintupletsHC_.emplace( + cms::alpakatools::CopyToHost>::copyAsync( + queue_, *quintupletsDC_)); + + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + } + return quintupletsHC_->const_view(); +} +template QuintupletsConst LSTEvent::getQuintuplets(bool); +template QuintupletsOccupancyConst LSTEvent::getQuintuplets(bool); + +template +PixelTripletsConst LSTEvent::getPixelTriplets(bool sync) { + if constexpr (std::is_same_v) { + return pixelTripletsDC_->const_view(); + } else { + if (!pixelTripletsHC_) { + pixelTripletsHC_.emplace(cms::alpakatools::CopyToHost<::PortableCollection>::copyAsync( + queue_, *pixelTripletsDC_)); + + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + } + return pixelTripletsHC_->const_view(); +} +template PixelTripletsConst LSTEvent::getPixelTriplets<>(bool); + +template +PixelQuintupletsConst LSTEvent::getPixelQuintuplets(bool sync) { + if constexpr (std::is_same_v) { + return pixelQuintupletsDC_->const_view(); + } else { + if (!pixelQuintupletsHC_) { + pixelQuintupletsHC_.emplace( + cms::alpakatools::CopyToHost<::PortableCollection>::copyAsync( + queue_, *pixelQuintupletsDC_)); + + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + } + return pixelQuintupletsHC_->const_view(); +} +template PixelQuintupletsConst LSTEvent::getPixelQuintuplets<>(bool); + +const TrackCandidatesConst& LSTEvent::getTrackCandidates(bool inCMSSW, bool sync) { + if (!trackCandidatesHC_) { + // Get nTrackCanHost parameter to initialize host based instance + auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_); + alpaka::memcpy(queue_, + nTrackCanHost_buf_h, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidates())); + alpaka::wait(queue_); // wait here before we get nTrackCanHost and trackCandidatesInCPU becomes usable + + auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); + trackCandidatesHC_.emplace(nTrackCanHost, queue_); + + (*trackCandidatesHC_)->nTrackCandidates() = nTrackCanHost; + alpaka::memcpy(queue_, + cms::alpakatools::make_host_view((*trackCandidatesHC_)->hitIndices()->data(), + Params_pT5::kHits * nTrackCanHost), + cms::alpakatools::make_device_view( + queue_, (*trackCandidatesDC_)->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost)); + alpaka::memcpy(queue_, + cms::alpakatools::make_host_view((*trackCandidatesHC_)->pixelSeedIndex(), nTrackCanHost), + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->pixelSeedIndex(), nTrackCanHost)); + if (not inCMSSW) { + alpaka::memcpy(queue_, + cms::alpakatools::make_host_view((*trackCandidatesHC_)->logicalLayers()->data(), + Params_pT5::kLayers * nTrackCanHost), + cms::alpakatools::make_device_view( + queue_, (*trackCandidatesDC_)->logicalLayers()->data(), Params_pT5::kLayers * nTrackCanHost)); + alpaka::memcpy( + queue_, + cms::alpakatools::make_host_view((*trackCandidatesHC_)->directObjectIndices(), nTrackCanHost), + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->directObjectIndices(), nTrackCanHost)); + alpaka::memcpy( + queue_, + cms::alpakatools::make_host_view((*trackCandidatesHC_)->objectIndices()->data(), 2 * nTrackCanHost), + cms::alpakatools::make_device_view( + queue_, (*trackCandidatesDC_)->objectIndices()->data(), 2 * nTrackCanHost)); + } + alpaka::memcpy( + queue_, + cms::alpakatools::make_host_view((*trackCandidatesHC_)->trackCandidateType(), nTrackCanHost), + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->trackCandidateType(), nTrackCanHost)); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return trackCandidatesHC_.value().const_view(); +} + +template +typename TSoA::ConstView LSTEvent::getModules(bool sync) { + if constexpr (std::is_same_v) { + return modules_.const_view(); + } else { + if (!modulesHC_) { + modulesHC_.emplace( + cms::alpakatools::CopyToHost>::copyAsync( + queue_, modules_)); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return modulesHC_->const_view(); + } +} +template ModulesConst LSTEvent::getModules(bool); +template ModulesPixelConst LSTEvent::getModules(bool); diff --git a/RecoTracker/LSTCore/src/alpaka/LSTEvent.h b/RecoTracker/LSTCore/src/alpaka/LSTEvent.h new file mode 100644 index 0000000000000..59f249aa9405f --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/LSTEvent.h @@ -0,0 +1,195 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_LSTEvent_h +#define RecoTracker_LSTCore_src_alpaka_LSTEvent_h + +#include + +#include "RecoTracker/LSTCore/interface/HitsHostCollection.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/PixelQuintupletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/QuintupletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/SegmentsHostCollection.h" +#include "RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h" +#include "RecoTracker/LSTCore/interface/TripletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesHostCollection.h" +#include "RecoTracker/LSTCore/interface/ModulesHostCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" +#include "RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/PixelQuintupletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/PixelTripletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/ObjectRangesDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/EndcapGeometryDevDeviceCollection.h" + +#include "Hit.h" +#include "Kernels.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + class LSTEvent { + private: + Queue& queue_; + + std::array n_minidoublets_by_layer_barrel_{}; + std::array n_minidoublets_by_layer_endcap_{}; + std::array n_segments_by_layer_barrel_{}; + std::array n_segments_by_layer_endcap_{}; + std::array n_triplets_by_layer_barrel_{}; + std::array n_triplets_by_layer_endcap_{}; + std::array n_quintuplets_by_layer_barrel_{}; + std::array n_quintuplets_by_layer_endcap_{}; + unsigned int nTotalSegments_; + + //Device stuff + std::optional rangesDC_; + std::optional hitsDC_; + std::optional miniDoubletsDC_; + std::optional segmentsDC_; + std::optional tripletsDC_; + std::optional quintupletsDC_; + std::optional trackCandidatesDC_; + std::optional pixelTripletsDC_; + std::optional pixelQuintupletsDC_; + + //CPU interface stuff + std::optional rangesHC_; + std::optional hitsHC_; + std::optional miniDoubletsHC_; + std::optional segmentsHC_; + std::optional tripletsHC_; + std::optional trackCandidatesHC_; + std::optional modulesHC_; + std::optional quintupletsHC_; + std::optional pixelTripletsHC_; + std::optional pixelQuintupletsHC_; + + const uint16_t nModules_; + const uint16_t nLowerModules_; + const unsigned int nPixels_; + const unsigned int nEndCapMap_; + ModulesDeviceCollection const& modules_; + PixelMap const& pixelMapping_; + EndcapGeometryDevDeviceCollection const& endcapGeometry_; + bool addObjects_; + + public: + // Constructor used for CMSSW integration. Uses an external queue. + LSTEvent(bool verbose, Queue& q, const LSTESData* deviceESData) + : queue_(q), + nModules_(deviceESData->nModules), + nLowerModules_(deviceESData->nLowerModules), + nPixels_(deviceESData->nPixels), + nEndCapMap_(deviceESData->nEndCapMap), + modules_(*deviceESData->modules), + pixelMapping_(*deviceESData->pixelMapping), + endcapGeometry_(*deviceESData->endcapGeometry), + addObjects_(verbose) {} + void initSync(); // synchronizes, for standalone usage + void resetEventSync(); // synchronizes, for standalone usage + void wait() const { alpaka::wait(queue_); } + + // Calls the appropriate hit function, then increments the counter + void addHitToEvent(std::vector const& x, + std::vector const& y, + std::vector const& z, + std::vector const& detId, + std::vector const& idxInNtuple); + void addPixelSegmentToEvent(std::vector const& hitIndices0, + std::vector const& hitIndices1, + std::vector const& hitIndices2, + std::vector const& hitIndices3, + std::vector const& dPhiChange, + std::vector const& ptIn, + std::vector const& ptErr, + std::vector const& px, + std::vector const& py, + std::vector const& pz, + std::vector const& eta, + std::vector const& etaErr, + std::vector const& phi, + std::vector const& charge, + std::vector const& seedIdx, + std::vector const& superbin, + std::vector const& pixelType, + std::vector const& isQuad); + + void createMiniDoublets(); + void createSegmentsWithModuleMap(); + void createTriplets(); + void createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets); + void createPixelTriplets(); + void createQuintuplets(); + void pixelLineSegmentCleaning(bool no_pls_dupclean); + void createPixelQuintuplets(); + + // functions that map the objects to the appropriate modules + void addMiniDoubletsToEventExplicit(); + void addSegmentsToEventExplicit(); + void addQuintupletsToEventExplicit(); + void addTripletsToEventExplicit(); + void resetObjectsInModule(); + + unsigned int getNumberOfMiniDoublets(); + unsigned int getNumberOfMiniDoubletsByLayer(unsigned int layer); + unsigned int getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer); + + unsigned int getNumberOfSegments(); + unsigned int getNumberOfSegmentsByLayer(unsigned int layer); + unsigned int getNumberOfSegmentsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfSegmentsByLayerEndcap(unsigned int layer); + + unsigned int getNumberOfTriplets(); + unsigned int getNumberOfTripletsByLayer(unsigned int layer); + unsigned int getNumberOfTripletsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfTripletsByLayerEndcap(unsigned int layer); + + int getNumberOfPixelTriplets(); + int getNumberOfPixelQuintuplets(); + + unsigned int getNumberOfQuintuplets(); + unsigned int getNumberOfQuintupletsByLayer(unsigned int layer); + unsigned int getNumberOfQuintupletsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfQuintupletsByLayerEndcap(unsigned int layer); + + int getNumberOfTrackCandidates(); + int getNumberOfPT5TrackCandidates(); + int getNumberOfPT3TrackCandidates(); + int getNumberOfPLSTrackCandidates(); + int getNumberOfPixelTrackCandidates(); + int getNumberOfT5TrackCandidates(); + + // sync adds alpaka::wait at the end of filling a buffer during lazy fill + // (has no effect on repeated calls) + // set to false may allow faster operation with concurrent calls of get* + // HANDLE WITH CARE + template + typename TSoA::ConstView getHits(bool inCMSSW = false, bool sync = true); + template + ObjectRangesConst getRanges(bool sync = true); + template + typename TSoA::ConstView getMiniDoublets(bool sync = true); + template + typename TSoA::ConstView getSegments(bool sync = true); + template + typename TSoA::ConstView getTriplets(bool sync = true); + template + typename TSoA::ConstView getQuintuplets(bool sync = true); + template + PixelTripletsConst getPixelTriplets(bool sync = true); + template + PixelQuintupletsConst getPixelQuintuplets(bool sync = true); + const TrackCandidatesConst& getTrackCandidates(bool inCMSSW = false, bool sync = true); + template + typename TSoA::ConstView getModules(bool sync = true); + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h new file mode 100644 index 0000000000000..0a0abff8b6986 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -0,0 +1,914 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_MiniDoublet_h +#define RecoTracker_LSTCore_src_alpaka_MiniDoublet_h + +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" + +#include "Hit.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addMDToMemory(TAcc const& acc, + MiniDoublets mds, + HitsConst hits, + ModulesConst modules, + unsigned int lowerHitIdx, + unsigned int upperHitIdx, + uint16_t lowerModuleIdx, + float dz, + float dPhi, + float dPhiChange, + float shiftedX, + float shiftedY, + float shiftedZ, + float noShiftedDphi, + float noShiftedDPhiChange, + unsigned int idx) { + //the index into which this MD needs to be written will be computed in the kernel + //nMDs variable will be incremented in the kernel, no need to worry about that here + + mds.moduleIndices()[idx] = lowerModuleIdx; + unsigned int anchorHitIndex, outerHitIndex; + if (modules.moduleType()[lowerModuleIdx] == PS and modules.moduleLayerType()[lowerModuleIdx] == Strip) { + mds.anchorHitIndices()[idx] = upperHitIdx; + mds.outerHitIndices()[idx] = lowerHitIdx; + + anchorHitIndex = upperHitIdx; + outerHitIndex = lowerHitIdx; + } else { + mds.anchorHitIndices()[idx] = lowerHitIdx; + mds.outerHitIndices()[idx] = upperHitIdx; + + anchorHitIndex = lowerHitIdx; + outerHitIndex = upperHitIdx; + } + + mds.dphichanges()[idx] = dPhiChange; + + mds.dphis()[idx] = dPhi; + mds.dzs()[idx] = dz; + mds.shiftedXs()[idx] = shiftedX; + mds.shiftedYs()[idx] = shiftedY; + mds.shiftedZs()[idx] = shiftedZ; + + mds.noShiftedDphis()[idx] = noShiftedDphi; + mds.noShiftedDphiChanges()[idx] = noShiftedDPhiChange; + + mds.anchorX()[idx] = hits.xs()[anchorHitIndex]; + mds.anchorY()[idx] = hits.ys()[anchorHitIndex]; + mds.anchorZ()[idx] = hits.zs()[anchorHitIndex]; + mds.anchorRt()[idx] = hits.rts()[anchorHitIndex]; + mds.anchorPhi()[idx] = hits.phis()[anchorHitIndex]; + mds.anchorEta()[idx] = hits.etas()[anchorHitIndex]; + mds.anchorHighEdgeX()[idx] = hits.highEdgeXs()[anchorHitIndex]; + mds.anchorHighEdgeY()[idx] = hits.highEdgeYs()[anchorHitIndex]; + mds.anchorLowEdgeX()[idx] = hits.lowEdgeXs()[anchorHitIndex]; + mds.anchorLowEdgeY()[idx] = hits.lowEdgeYs()[anchorHitIndex]; + mds.anchorHighEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorHighEdgeY()[idx], mds.anchorHighEdgeX()[idx]); + mds.anchorLowEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorLowEdgeY()[idx], mds.anchorLowEdgeX()[idx]); + + mds.outerX()[idx] = hits.xs()[outerHitIndex]; + mds.outerY()[idx] = hits.ys()[outerHitIndex]; + mds.outerZ()[idx] = hits.zs()[outerHitIndex]; + mds.outerRt()[idx] = hits.rts()[outerHitIndex]; + mds.outerPhi()[idx] = hits.phis()[outerHitIndex]; + mds.outerEta()[idx] = hits.etas()[outerHitIndex]; + mds.outerHighEdgeX()[idx] = hits.highEdgeXs()[outerHitIndex]; + mds.outerHighEdgeY()[idx] = hits.highEdgeYs()[outerHitIndex]; + mds.outerLowEdgeX()[idx] = hits.lowEdgeXs()[outerHitIndex]; + mds.outerLowEdgeY()[idx] = hits.lowEdgeYs()[outerHitIndex]; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isTighterTiltedModules(ModulesConst modules, uint16_t moduleIndex) { + // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing + // This is the same as what was previously considered as"isNormalTiltedModules" + // See Figure 9.1 of https://cds.cern.ch/record/2272264/files/CMS-TDR-014.pdf + short subdet = modules.subdets()[moduleIndex]; + short layer = modules.layers()[moduleIndex]; + short side = modules.sides()[moduleIndex]; + short rod = modules.rods()[moduleIndex]; + + if (subdet == Barrel) { + if ((side != Center and layer == 3) or (side == NegZ and layer == 2 and rod > 5) or + (side == PosZ and layer == 2 and rod < 8) or (side == NegZ and layer == 1 and rod > 9) or + (side == PosZ and layer == 1 and rod < 4)) + return true; + else + return false; + } else + return false; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize(ModulesConst modules, uint16_t moduleIndex) { + float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; + float miniDeltaFlat[6] = {0.26f, 0.16f, 0.16f, 0.18f, 0.18f, 0.18f}; + float miniDeltaLooseTilted[3] = {0.4f, 0.4f, 0.4f}; + float miniDeltaEndcap[5][15]; + + for (size_t i = 0; i < 5; i++) { + for (size_t j = 0; j < 15; j++) { + if (i == 0 || i == 1) { + if (j < 10) { + miniDeltaEndcap[i][j] = 0.4f; + } else { + miniDeltaEndcap[i][j] = 0.18f; + } + } else if (i == 2 || i == 3) { + if (j < 8) { + miniDeltaEndcap[i][j] = 0.4f; + } else { + miniDeltaEndcap[i][j] = 0.18f; + } + } else { + if (j < 9) { + miniDeltaEndcap[i][j] = 0.4f; + } else { + miniDeltaEndcap[i][j] = 0.18f; + } + } + } + } + + unsigned int iL = modules.layers()[moduleIndex] - 1; + unsigned int iR = modules.rings()[moduleIndex] - 1; + short subdet = modules.subdets()[moduleIndex]; + short side = modules.sides()[moduleIndex]; + + float moduleSeparation = 0; + + if (subdet == Barrel and side == Center) { + moduleSeparation = miniDeltaFlat[iL]; + } else if (isTighterTiltedModules(modules, moduleIndex)) { + moduleSeparation = miniDeltaTilted[iL]; + } else if (subdet == Endcap) { + moduleSeparation = miniDeltaEndcap[iL][iR]; + } else //Loose tilted modules + { + moduleSeparation = miniDeltaLooseTilted[iL]; + } + + return moduleSeparation; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float dPhiThreshold( + TAcc const& acc, float rt, ModulesConst modules, uint16_t moduleIndex, float dPhi = 0, float dz = 0) { + // ================================================================= + // Various constants + // ================================================================= + //mean of the horizontal layer position in y; treat this as R below + + // ================================================================= + // Computing some components that make up the cut threshold + // ================================================================= + + unsigned int iL = modules.layers()[moduleIndex] - 1; + const float miniSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rt * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + const float rLayNominal = + ((modules.subdets()[moduleIndex] == Barrel) ? kMiniRminMeanBarrel[iL] : kMiniRminMeanEndcap[iL]); + const float miniPVoff = 0.1f / rLayNominal; + const float miniMuls = ((modules.subdets()[moduleIndex] == Barrel) ? kMiniMulsPtScaleBarrel[iL] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[iL] * 3.f / ptCut); + const bool isTilted = modules.subdets()[moduleIndex] == Barrel and modules.sides()[moduleIndex] != Center; + //the lower module is sent in irrespective of its layer type. We need to fetch the drdz properly + + float drdz; + if (isTilted) { + if (modules.moduleType()[moduleIndex] == PS and modules.moduleLayerType()[moduleIndex] == Strip) { + drdz = modules.drdzs()[moduleIndex]; + } else { + drdz = modules.drdzs()[modules.partnerModuleIndices()[moduleIndex]]; + } + } else { + drdz = 0; + } + const float miniTilt2 = ((isTilted) ? (0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdz * drdz) / + (1.f + drdz * drdz) / moduleGapSize(modules, moduleIndex) + : 0); + + // Compute luminous region requirement for endcap + const float miniLum = alpaka::math::abs(acc, dPhi * kDeltaZLum / dz); // Balaji's new error + + // ================================================================= + // Return the threshold value + // ================================================================= + // Following condition is met if the module is central and flatly lying + if (modules.subdets()[moduleIndex] == Barrel and modules.sides()[moduleIndex] == Center) { + return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff); + } + // Following condition is met if the module is central and tilted + else if (modules.subdets()[moduleIndex] == Barrel and + modules.sides()[moduleIndex] != Center) //all types of tilted modules + { + return miniSlope + + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff + miniTilt2 * miniSlope * miniSlope); + } + // If not barrel, it is Endcap + else { + return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff + miniLum * miniLum); + } + } + + template + ALPAKA_FN_INLINE ALPAKA_FN_ACC void shiftStripHits(TAcc const& acc, + ModulesConst modules, + uint16_t lowerModuleIndex, + uint16_t upperModuleIndex, + unsigned int lowerHitIndex, + unsigned int upperHitIndex, + float* shiftedCoords, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper) { + // This is the strip shift scheme that is explained in http://uaf-10.t2.ucsd.edu/~phchang/talks/PhilipChang20190607_SDL_Update.pdf (see backup slides) + // The main feature of this shifting is that the strip hits are shifted to be "aligned" in the line of sight from interaction point to the the pixel hit. + // (since pixel hit is well defined in 3-d) + // The strip hit is shifted along the strip detector to be placed in a guessed position where we think they would have actually crossed + // The size of the radial direction shift due to module separation gap is computed in "radial" size, while the shift is done along the actual strip orientation + // This means that there may be very very subtle edge effects coming from whether the strip hit is center of the module or the at the edge of the module + // But this should be relatively minor effect + + // dependent variables for this if statement + // lowerModule + // lowerHit + // upperHit + // endcapGeometry + // tiltedGeometry + + // Some variables relevant to the function + float xp; // pixel x (pixel hit x) + float yp; // pixel y (pixel hit y) + float zp; // pixel y (pixel hit y) + float rtp; // pixel y (pixel hit y) + float xa; // "anchor" x (the anchor position on the strip module plane from pixel hit) + float ya; // "anchor" y (the anchor position on the strip module plane from pixel hit) + float xo; // old x (before the strip hit is moved up or down) + float yo; // old y (before the strip hit is moved up or down) + float xn; // new x (after the strip hit is moved up or down) + float yn; // new y (after the strip hit is moved up or down) + float abszn; // new z in absolute value + float zn; // new z with the sign (+/-) accounted + float angleA; // in r-z plane the theta of the pixel hit in polar coordinate is the angleA + float angleB; // this is the angle of tilted module in r-z plane ("drdz"), for endcap this is 90 degrees + bool isEndcap; // If endcap, drdz = infinity + float moduleSeparation; + float drprime; // The radial shift size in x-y plane projection + float drprime_x; // x-component of drprime + float drprime_y; // y-component of drprime + const float& slope = + modules.dxdys()[lowerModuleIndex]; // The slope of the possible strip hits for a given module in x-y plane + float absArctanSlope; + float angleM; // the angle M is the angle of rotation of the module in x-y plane if the possible strip hits are along the x-axis, then angleM = 0, and if the possible strip hits are along y-axis angleM = 90 degrees + float absdzprime; // The distance between the two points after shifting + const float& drdz_ = modules.drdzs()[lowerModuleIndex]; + // Assign hit pointers based on their hit type + if (modules.moduleType()[lowerModuleIndex] == PS) { + // TODO: This is somewhat of an mystery.... somewhat confused why this is the case + if (modules.subdets()[lowerModuleIndex] == Barrel ? modules.moduleLayerType()[lowerModuleIndex] != Pixel + : modules.moduleLayerType()[lowerModuleIndex] == Pixel) { + xo = xUpper; + yo = yUpper; + xp = xLower; + yp = yLower; + zp = zLower; + rtp = rtLower; + } else { + xo = xLower; + yo = yLower; + xp = xUpper; + yp = yUpper; + zp = zUpper; + rtp = rtUpper; + } + } else { + xo = xUpper; + yo = yUpper; + xp = xLower; + yp = yLower; + zp = zLower; + rtp = rtLower; + } + + // If it is endcap some of the math gets simplified (and also computers don't like infinities) + isEndcap = modules.subdets()[lowerModuleIndex] == Endcap; + + // NOTE: TODO: Keep in mind that the sin(atan) function can be simplified to something like x / sqrt(1 + x^2) and similar for cos + // I am not sure how slow sin, atan, cos, functions are in c++. If x / sqrt(1 + x^2) are faster change this later to reduce arithmetic computation time + angleA = alpaka::math::abs(acc, alpaka::math::atan(acc, rtp / zp)); + angleB = + ((isEndcap) + ? kPi / 2.f + : alpaka::math::atan( + acc, + drdz_)); // The tilt module on the positive z-axis has negative drdz slope in r-z plane and vice versa + + moduleSeparation = moduleGapSize(modules, lowerModuleIndex); + + // Sign flips if the pixel is later layer + if (modules.moduleType()[lowerModuleIndex] == PS and modules.moduleLayerType()[lowerModuleIndex] != Pixel) { + moduleSeparation *= -1; + } + + drprime = (moduleSeparation / alpaka::math::sin(acc, angleA + angleB)) * alpaka::math::sin(acc, angleA); + + // Compute arctan of the slope and take care of the slope = infinity case + absArctanSlope = ((slope != kVerticalModuleSlope) ? fabs(alpaka::math::atan(acc, slope)) : kPi / 2.f); + + // Depending on which quadrant the pixel hit lies, we define the angleM by shifting them slightly differently + if (xp > 0 and yp > 0) { + angleM = absArctanSlope; + } else if (xp > 0 and yp < 0) { + angleM = kPi - absArctanSlope; + } else if (xp < 0 and yp < 0) { + angleM = kPi + absArctanSlope; + } else // if (xp < 0 and yp > 0) + { + angleM = 2.f * kPi - absArctanSlope; + } + + // Then since the angleM sign is taken care of properly + drprime_x = drprime * alpaka::math::sin(acc, angleM); + drprime_y = drprime * alpaka::math::cos(acc, angleM); + + // The new anchor position is + xa = xp + drprime_x; + ya = yp + drprime_y; + + // Compute the new strip hit position (if the slope value is in special condition take care of the exceptions) + if (slope == + kVerticalModuleSlope) // Designated for tilted module when the slope is infinity (module lying along y-axis) + { + xn = xa; // New x point is simply where the anchor is + yn = yo; // No shift in y + } else if (slope == 0) { + xn = xo; // New x point is simply where the anchor is + yn = ya; // No shift in y + } else { + xn = (slope * xa + (1.f / slope) * xo - ya + yo) / (slope + (1.f / slope)); // new xn + yn = (xn - xa) * slope + ya; // new yn + } + + // Computing new Z position + absdzprime = alpaka::math::abs( + acc, + moduleSeparation / alpaka::math::sin(acc, angleA + angleB) * + alpaka::math::cos( + acc, + angleA)); // module separation sign is for shifting in radial direction for z-axis direction take care of the sign later + + // Depending on which one as closer to the interactin point compute the new z wrt to the pixel properly + if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { + abszn = alpaka::math::abs(acc, zp) + absdzprime; + } else { + abszn = alpaka::math::abs(acc, zp) - absdzprime; + } + + zn = abszn * ((zp > 0) ? 1 : -1); // Apply the sign of the zn + + shiftedCoords[0] = xn; + shiftedCoords[1] = yn; + shiftedCoords[2] = zn; + } + + template + ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoBarrel(TAcc const& acc, + ModulesConst modules, + uint16_t lowerModuleIndex, + uint16_t upperModuleIndex, + unsigned int lowerHitIndex, + unsigned int upperHitIndex, + float& dz, + float& dPhi, + float& dPhiChange, + float& shiftedX, + float& shiftedY, + float& shiftedZ, + float& noShiftedDphi, + float& noShiftedDphiChange, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper) { + dz = zLower - zUpper; + const float dzCut = modules.moduleType()[lowerModuleIndex] == PS ? 2.f : 10.f; + const float sign = ((dz > 0) - (dz < 0)) * ((zLower > 0) - (zLower < 0)); + const float invertedcrossercut = (alpaka::math::abs(acc, dz) > 2) * sign; + + if ((alpaka::math::abs(acc, dz) >= dzCut) || (invertedcrossercut > 0)) + return false; + + float miniCut = 0; + + miniCut = modules.moduleLayerType()[lowerModuleIndex] == Pixel + ? dPhiThreshold(acc, rtLower, modules, lowerModuleIndex) + : dPhiThreshold(acc, rtUpper, modules, lowerModuleIndex); + + // Cut #2: dphi difference + // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3085 + float xn = 0.f, yn = 0.f; // , zn = 0; + float shiftedRt2; + if (modules.sides()[lowerModuleIndex] != Center) // If barrel and not center it is tilted + { + // Shift the hits and calculate new xn, yn position + float shiftedCoords[3]; + shiftStripHits(acc, + modules, + lowerModuleIndex, + upperModuleIndex, + lowerHitIndex, + upperHitIndex, + shiftedCoords, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + xn = shiftedCoords[0]; + yn = shiftedCoords[1]; + + // Lower or the upper hit needs to be modified depending on which one was actually shifted + if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { + shiftedX = xn; + shiftedY = yn; + shiftedZ = zUpper; + shiftedRt2 = xn * xn + yn * yn; + + dPhi = deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); //function from Hit.cc + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + } else { + shiftedX = xn; + shiftedY = yn; + shiftedZ = zLower; + shiftedRt2 = xn * xn + yn * yn; + dPhi = deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + } + } else { + shiftedX = 0; + shiftedY = 0; + shiftedZ = 0; + dPhi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + noShiftedDphi = dPhi; + } + + if (alpaka::math::abs(acc, dPhi) >= miniCut) + return false; + + // Cut #3: The dphi change going from lower Hit to upper Hit + // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3076 + if (modules.sides()[lowerModuleIndex] != Center) { + // When it is tilted, use the new shifted positions + // TODO: This is somewhat of an mystery.... somewhat confused why this is the case + if (modules.moduleLayerType()[lowerModuleIndex] != Pixel) { + // dPhi Change should be calculated so that the upper hit has higher rt. + // In principle, this kind of check rt_lower < rt_upper should not be necessary because the hit shifting should have taken care of this. + // (i.e. the strip hit is shifted to be aligned in the line of sight from interaction point to pixel hit of PS module guaranteeing rt ordering) + // But I still placed this check for safety. (TODO: After checking explicitly if not needed remove later?) + // setdeltaPhiChange(lowerHit.rt() < upperHitMod.rt() ? lowerHit.deltaPhiChange(upperHitMod) : upperHitMod.deltaPhiChange(lowerHit)); + + dPhiChange = (rtLower * rtLower < shiftedRt2) ? deltaPhiChange(acc, xLower, yLower, shiftedX, shiftedY) + : deltaPhiChange(acc, shiftedX, shiftedY, xLower, yLower); + noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) + : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); + } else { + // dPhi Change should be calculated so that the upper hit has higher rt. + // In principle, this kind of check rt_lower < rt_upper should not be necessary because the hit shifting should have taken care of this. + // (i.e. the strip hit is shifted to be aligned in the line of sight from interaction point to pixel hit of PS module guaranteeing rt ordering) + // But I still placed this check for safety. (TODO: After checking explicitly if not needed remove later?) + + dPhiChange = (shiftedRt2 < rtUpper * rtUpper) ? deltaPhiChange(acc, shiftedX, shiftedY, xUpper, yUpper) + : deltaPhiChange(acc, xUpper, yUpper, shiftedX, shiftedY); + noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) + : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); + } + } else { + // When it is flat lying module, whichever is the lowerSide will always have rt lower + dPhiChange = deltaPhiChange(acc, xLower, yLower, xUpper, yUpper); + noShiftedDphiChange = dPhiChange; + } + + return alpaka::math::abs(acc, dPhiChange) < miniCut; + } + + template + ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoEndcap(TAcc const& acc, + ModulesConst modules, + uint16_t lowerModuleIndex, + uint16_t upperModuleIndex, + unsigned int lowerHitIndex, + unsigned int upperHitIndex, + float& drt, + float& dPhi, + float& dPhiChange, + float& shiftedX, + float& shiftedY, + float& shiftedZ, + float& noShiftedDphi, + float& noShiftedDphichange, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper) { + // There are series of cuts that applies to mini-doublet in a "endcap" region + // Cut #1 : dz cut. The dz difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap) + // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3093 + // For PS module in case when it is tilted a different dz (after the strip hit shift) is calculated later. + + float dz = zLower - zUpper; // Not const since later it might change depending on the type of module + + const float dzCut = 1.f; + + if (alpaka::math::abs(acc, dz) >= dzCut) + return false; + // Cut #2 : drt cut. The dz difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap) + // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3100 + const float drtCut = modules.moduleType()[lowerModuleIndex] == PS ? 2.f : 10.f; + drt = rtLower - rtUpper; + if (alpaka::math::abs(acc, drt) >= drtCut) + return false; + // The new scheme shifts strip hits to be "aligned" along the line of sight from interaction point to the pixel hit (if it is PS modules) + float xn = 0, yn = 0, zn = 0; + + float shiftedCoords[3]; + shiftStripHits(acc, + modules, + lowerModuleIndex, + upperModuleIndex, + lowerHitIndex, + upperHitIndex, + shiftedCoords, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + + xn = shiftedCoords[0]; + yn = shiftedCoords[1]; + zn = shiftedCoords[2]; + + if (modules.moduleType()[lowerModuleIndex] == PS) { + // Appropriate lower or upper hit is modified after checking which one was actually shifted + if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { + shiftedX = xn; + shiftedY = yn; + shiftedZ = zUpper; + dPhi = deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + } else { + shiftedX = xn; + shiftedY = yn; + shiftedZ = zLower; + dPhi = deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + } + } else { + shiftedX = xn; + shiftedY = yn; + shiftedZ = zUpper; + dPhi = deltaPhi(acc, xLower, yLower, xn, yn); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + } + + // dz needs to change if it is a PS module where the strip hits are shifted in order to properly account for the case when a tilted module falls under "endcap logic" + // if it was an endcap it will have zero effect + if (modules.moduleType()[lowerModuleIndex] == PS) { + dz = modules.moduleLayerType()[lowerModuleIndex] == Pixel ? zLower - zn : zUpper - zn; + } + + float miniCut = 0; + miniCut = modules.moduleLayerType()[lowerModuleIndex] == Pixel + ? dPhiThreshold(acc, rtLower, modules, lowerModuleIndex, dPhi, dz) + : dPhiThreshold(acc, rtUpper, modules, lowerModuleIndex, dPhi, dz); + + if (alpaka::math::abs(acc, dPhi) >= miniCut) + return false; + + // Cut #4: Another cut on the dphi after some modification + // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3119-L3124 + + float dzFrac = alpaka::math::abs(acc, dz) / alpaka::math::abs(acc, zLower); + dPhiChange = dPhi / dzFrac * (1.f + dzFrac); + noShiftedDphichange = noShiftedDphi / dzFrac * (1.f + dzFrac); + + return alpaka::math::abs(acc, dPhiChange) < miniCut; + } + + template + ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgo(TAcc const& acc, + ModulesConst modules, + uint16_t lowerModuleIndex, + uint16_t upperModuleIndex, + unsigned int lowerHitIndex, + unsigned int upperHitIndex, + float& dz, + float& dPhi, + float& dPhiChange, + float& shiftedX, + float& shiftedY, + float& shiftedZ, + float& noShiftedDphi, + float& noShiftedDphiChange, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper) { + if (modules.subdets()[lowerModuleIndex] == Barrel) { + return runMiniDoubletDefaultAlgoBarrel(acc, + modules, + lowerModuleIndex, + upperModuleIndex, + lowerHitIndex, + upperHitIndex, + dz, + dPhi, + dPhiChange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDphiChange, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + } else { + return runMiniDoubletDefaultAlgoEndcap(acc, + modules, + lowerModuleIndex, + upperModuleIndex, + lowerHitIndex, + upperHitIndex, + dz, + dPhi, + dPhiChange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDphiChange, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + } + } + + struct CreateMiniDoublets { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + HitsConst hits, + HitsRangesConst hitsRanges, + MiniDoublets mds, + MiniDoubletsOccupancy mdsOccupancy, + ObjectRangesConst ranges) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t lowerModuleIndex = globalThreadIdx[1]; lowerModuleIndex < modules.nLowerModules(); + lowerModuleIndex += gridThreadExtent[1]) { + uint16_t upperModuleIndex = modules.partnerModuleIndices()[lowerModuleIndex]; + int nLowerHits = hitsRanges.hitRangesnLower()[lowerModuleIndex]; + int nUpperHits = hitsRanges.hitRangesnUpper()[lowerModuleIndex]; + if (hitsRanges.hitRangesLower()[lowerModuleIndex] == -1) + continue; + unsigned int upHitArrayIndex = hitsRanges.hitRangesUpper()[lowerModuleIndex]; + unsigned int loHitArrayIndex = hitsRanges.hitRangesLower()[lowerModuleIndex]; + int limit = nUpperHits * nLowerHits; + + for (int hitIndex = globalThreadIdx[2]; hitIndex < limit; hitIndex += gridThreadExtent[2]) { + int lowerHitIndex = hitIndex / nUpperHits; + int upperHitIndex = hitIndex % nUpperHits; + if (upperHitIndex >= nUpperHits) + continue; + if (lowerHitIndex >= nLowerHits) + continue; + unsigned int lowerHitArrayIndex = loHitArrayIndex + lowerHitIndex; + float xLower = hits.xs()[lowerHitArrayIndex]; + float yLower = hits.ys()[lowerHitArrayIndex]; + float zLower = hits.zs()[lowerHitArrayIndex]; + float rtLower = hits.rts()[lowerHitArrayIndex]; + unsigned int upperHitArrayIndex = upHitArrayIndex + upperHitIndex; + float xUpper = hits.xs()[upperHitArrayIndex]; + float yUpper = hits.ys()[upperHitArrayIndex]; + float zUpper = hits.zs()[upperHitArrayIndex]; + float rtUpper = hits.rts()[upperHitArrayIndex]; + + float dz, dphi, dphichange, shiftedX, shiftedY, shiftedZ, noShiftedDphi, noShiftedDphiChange; + bool success = runMiniDoubletDefaultAlgo(acc, + modules, + lowerModuleIndex, + upperModuleIndex, + lowerHitArrayIndex, + upperHitArrayIndex, + dz, + dphi, + dphichange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDphiChange, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + if (success) { + int totOccupancyMDs = alpaka::atomicAdd( + acc, &mdsOccupancy.totOccupancyMDs()[lowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + if (totOccupancyMDs >= (ranges.miniDoubletModuleOccupancy()[lowerModuleIndex])) { +#ifdef WARNINGS + printf("Mini-doublet excess alert! Module index = %d\n", lowerModuleIndex); +#endif + } else { + int mdModuleIndex = + alpaka::atomicAdd(acc, &mdsOccupancy.nMDs()[lowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + unsigned int mdIndex = ranges.miniDoubletModuleIndices()[lowerModuleIndex] + mdModuleIndex; + + addMDToMemory(acc, + mds, + hits, + modules, + lowerHitArrayIndex, + upperHitArrayIndex, + lowerModuleIndex, + dz, + dphi, + dphichange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDphiChange, + mdIndex); + } + } + } + } + } + }; + + struct CreateMDArrayRangesGPU { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, ModulesConst modules, ObjectRanges ranges) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + // Declare variables in shared memory and set to 0 + int& nTotalMDs = alpaka::declareSharedVar(acc); + if (cms::alpakatools::once_per_block(acc)) { + nTotalMDs = 0; + } + alpaka::syncBlockThreads(acc); + + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + short module_rings = modules.rings()[i]; + short module_layers = modules.layers()[i]; + short module_subdets = modules.subdets()[i]; + float module_eta = alpaka::math::abs(acc, modules.eta()[i]); + + int category_number; + if (module_layers <= 3 && module_subdets == 5) + category_number = 0; + else if (module_layers >= 4 && module_subdets == 5) + category_number = 1; + else if (module_layers <= 2 && module_subdets == 4 && module_rings >= 11) + category_number = 2; + else if (module_layers >= 3 && module_subdets == 4 && module_rings >= 8) + category_number = 2; + else if (module_layers <= 2 && module_subdets == 4 && module_rings <= 10) + category_number = 3; + else if (module_layers >= 3 && module_subdets == 4 && module_rings <= 7) + category_number = 3; + else + category_number = -1; + + int eta_number; + if (module_eta < 0.75f) + eta_number = 0; + else if (module_eta < 1.5f) + eta_number = 1; + else if (module_eta < 2.25f) + eta_number = 2; + else if (module_eta < 3.0f) + eta_number = 3; + else + eta_number = -1; + + int occupancy; + if (category_number == 0 && eta_number == 0) + occupancy = 49; + else if (category_number == 0 && eta_number == 1) + occupancy = 42; + else if (category_number == 0 && eta_number == 2) + occupancy = 37; + else if (category_number == 0 && eta_number == 3) + occupancy = 41; + else if (category_number == 1) + occupancy = 100; + else if (category_number == 2 && eta_number == 1) + occupancy = 16; + else if (category_number == 2 && eta_number == 2) + occupancy = 19; + else if (category_number == 3 && eta_number == 1) + occupancy = 14; + else if (category_number == 3 && eta_number == 2) + occupancy = 20; + else if (category_number == 3 && eta_number == 3) + occupancy = 25; + else { + occupancy = 0; +#ifdef WARNINGS + printf("Unhandled case in createMDArrayRangesGPU! Module index = %i\n", i); +#endif + } + + unsigned int nTotMDs = alpaka::atomicAdd(acc, &nTotalMDs, occupancy, alpaka::hierarchy::Threads{}); + + ranges.miniDoubletModuleIndices()[i] = nTotMDs; + ranges.miniDoubletModuleOccupancy()[i] = occupancy; + } + + // Wait for all threads to finish before reporting final values + alpaka::syncBlockThreads(acc); + if (cms::alpakatools::once_per_block(acc)) { + ranges.miniDoubletModuleIndices()[modules.nLowerModules()] = nTotalMDs; + ranges.nTotalMDs() = nTotalMDs; + } + } + }; + + struct AddMiniDoubletRangesToEventExplicit { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + MiniDoubletsOccupancy mdsOccupancy, + ObjectRanges ranges, + HitsRangesConst hitsRanges) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (mdsOccupancy.nMDs()[i] == 0 or hitsRanges.hitRanges()[i][0] == -1) { + ranges.mdRanges()[i][0] = -1; + ranges.mdRanges()[i][1] = -1; + } else { + ranges.mdRanges()[i][0] = ranges.miniDoubletModuleIndices()[i]; + ranges.mdRanges()[i][1] = ranges.miniDoubletModuleIndices()[i] + mdsOccupancy.nMDs()[i] - 1; + } + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h new file mode 100644 index 0000000000000..42605c80e9434 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h @@ -0,0 +1,165 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_NeuralNetwork_h +#define RecoTracker_LSTCore_src_alpaka_NeuralNetwork_h + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/HitsSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" + +#include "NeuralNetworkWeights.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + namespace t5dnn { + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float runInference(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + TripletsConst triplets, + const float* xVec, + const float* yVec, + const unsigned int* mdIndices, + const uint16_t* lowerModuleIndices, + unsigned int innerTripletIndex, + unsigned int outerTripletIndex, + float innerRadius, + float outerRadius, + float bridgeRadius) { + // Unpack x-coordinates of hits + float x1 = xVec[0]; + float x2 = xVec[1]; + float x3 = xVec[2]; + float x4 = xVec[3]; + float x5 = xVec[4]; + // Unpack y-coordinates of hits + float y1 = yVec[0]; + float y2 = yVec[1]; + float y3 = yVec[2]; + float y4 = yVec[3]; + float y5 = yVec[4]; + // Unpack module indices + unsigned int mdIndex1 = mdIndices[0]; + unsigned int mdIndex2 = mdIndices[1]; + unsigned int mdIndex3 = mdIndices[2]; + unsigned int mdIndex4 = mdIndices[3]; + unsigned int mdIndex5 = mdIndices[4]; + // Unpack module indices + uint16_t lowerModuleIndex1 = lowerModuleIndices[0]; + uint16_t lowerModuleIndex2 = lowerModuleIndices[1]; + uint16_t lowerModuleIndex3 = lowerModuleIndices[2]; + uint16_t lowerModuleIndex4 = lowerModuleIndices[3]; + uint16_t lowerModuleIndex5 = lowerModuleIndices[4]; + // Compute some convenience variables + short layer2_adjustment = 0; + if (modules.layers()[lowerModuleIndex1] == 1) { + layer2_adjustment = 1; // get upper segment to be in second layer + } + unsigned int md_idx_for_t5_eta_phi = + segments.mdIndices()[triplets.segmentIndices()[innerTripletIndex][0]][layer2_adjustment]; + bool is_endcap1 = (modules.subdets()[lowerModuleIndex1] == 4); // true if anchor hit 1 is in the endcap + bool is_endcap2 = (modules.subdets()[lowerModuleIndex2] == 4); // true if anchor hit 2 is in the endcap + bool is_endcap3 = (modules.subdets()[lowerModuleIndex3] == 4); // true if anchor hit 3 is in the endcap + bool is_endcap4 = (modules.subdets()[lowerModuleIndex4] == 4); // true if anchor hit 4 is in the endcap + bool is_endcap5 = (modules.subdets()[lowerModuleIndex5] == 4); // true if anchor hit 5 is in the endcap + + // Build DNN input vector (corresponding output N-tuple branch noted in parenthetical in comment) + float x[38] = { + alpaka::math::log10(acc, 2 * k2Rinv1GeVf * innerRadius), // inner T3 pT (t3_pt) + mds.anchorEta()[mdIndex1], // inner T3 anchor hit 1 eta (t3_0_eta) + mds.anchorPhi()[mdIndex1], // inner T3 anchor hit 1 phi (t3_0_phi) + mds.anchorZ()[mdIndex1], // inner T3 anchor hit 1 z (t3_0_z) + alpaka::math::sqrt(acc, x1 * x1 + y1 * y1), // inner T3 anchor hit 1 r (t3_0_r) + float(modules.layers()[lowerModuleIndex1] + 6 * is_endcap1), // inner T3 anchor hit 1 layer (t3_0_layer) + mds.anchorEta()[mdIndex2], // inner T3 anchor hit 2 eta (t3_2_eta) + mds.anchorPhi()[mdIndex2], // inner T3 anchor hit 2 phi (t3_2_phi) + mds.anchorZ()[mdIndex2], // inner T3 anchor hit 2 z (t3_2_z) + alpaka::math::sqrt(acc, x2 * x2 + y2 * y2), // inner T3 anchor hit 2 r (t3_2_r) + float(modules.layers()[lowerModuleIndex2] + 6 * is_endcap2), // inner T3 anchor hit 2 layer (t3_2_layer) + mds.anchorEta()[mdIndex3], // inner T3 anchor hit 3 eta (t3_4_eta) + mds.anchorPhi()[mdIndex3], // inner T3 anchor hit 3 phi (t3_4_phi) + mds.anchorZ()[mdIndex3], // inner T3 anchor hit 3 z (t3_4_z) + alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // inner T3 anchor hit 3 r (t3_4_r) + float(modules.layers()[lowerModuleIndex3] + 6 * is_endcap3), // inner T3 anchor hit 3 layer (t3_4_layer) + alpaka::math::log10(acc, 2 * k2Rinv1GeVf * outerRadius), // outer T3 pT (t3_pt) + mds.anchorEta()[mdIndex3], // outer T3 anchor hit 4 eta (t3_0_eta) + mds.anchorPhi()[mdIndex3], // outer T3 anchor hit 4 phi (t3_0_phi) + mds.anchorZ()[mdIndex3], // outer T3 anchor hit 3 eta (t3_0_z) + alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // outer T3 anchor hit 3 r (t3_0_r) + float(modules.layers()[lowerModuleIndex3] + 6 * is_endcap3), // outer T3 anchor hit 3 layer (t3_0_layer) + mds.anchorEta()[mdIndex4], // outer T3 anchor hit 4 eta (t3_2_eta) + mds.anchorPhi()[mdIndex4], // outer T3 anchor hit 4 phi (t3_2_phi) + mds.anchorZ()[mdIndex4], // outer T3 anchor hit 4 z (t3_2_z) + alpaka::math::sqrt(acc, x4 * x4 + y4 * y4), // outer T3 anchor hit 4 r (t3_2_r) + float(modules.layers()[lowerModuleIndex4] + 6 * is_endcap4), // outer T3 anchor hit 4 layer (t3_2_layer) + mds.anchorEta()[mdIndex5], // outer T3 anchor hit 5 eta (t3_4_eta) + mds.anchorPhi()[mdIndex5], // outer T3 anchor hit 5 phi (t3_4_phi) + mds.anchorZ()[mdIndex5], // outer T3 anchor hit 5 z (t3_4_z) + alpaka::math::sqrt(acc, x5 * x5 + y5 * y5), // outer T3 anchor hit 5 r (t3_4_r) + float(modules.layers()[lowerModuleIndex5] + 6 * is_endcap5), // outer T3 anchor hit 5 layer (t3_4_layer) + alpaka::math::log10(acc, (innerRadius + outerRadius) * k2Rinv1GeVf), // T5 pT (t5_pt) + mds.anchorEta()[md_idx_for_t5_eta_phi], // T5 eta (t5_eta) + mds.anchorPhi()[md_idx_for_t5_eta_phi], // T5 phi (t5_phi) + alpaka::math::log10(acc, innerRadius), // T5 inner radius (t5_innerRadius) + alpaka::math::log10(acc, bridgeRadius), // T5 bridge radius (t5_bridgeRadius) + alpaka::math::log10(acc, outerRadius) // T5 outer radius (t5_outerRadius) + }; + + // (0): Linear(in_features=38, out_features=32, bias=True) => x = x*W_T + b + float x_0[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_0[col] = 0; + for (unsigned int inner = 0; inner < 38; ++inner) { + x_0[col] += x[inner] * wgtT_0[inner][col]; + } + x_0[col] += bias_0[col]; + } + + // (1): ReLU() + float x_1[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_1[col] = (x_0[col] > 0.f) ? x_0[col] : 0.f; + } + + // (2): Linear(in_features=32, out_features=32, bias=True) => x = x*W_T + b + float x_2[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_2[col] = 0; + for (unsigned int inner = 0; inner < 32; ++inner) { + x_2[col] += x_1[inner] * wgtT_2[inner][col]; + } + x_2[col] += bias_2[col]; + } + + // (3): ReLU() + float x_3[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_3[col] = (x_2[col] > 0.f) ? x_2[col] : 0.f; + } + + // (4): Linear(in_features=32, out_features=1, bias=True) => x = x*W_T + b + float x_4[1]; + for (unsigned int col = 0; col < 1; ++col) { + x_4[col] = 0; + for (unsigned int inner = 0; inner < 32; ++inner) { + x_4[col] += x_3[inner] * wgtT_4[inner][col]; + } + x_4[col] += bias_4[col]; + } + + // (5): Sigmoid() + float x_5[1]; + for (unsigned int col = 0; col < 1; ++col) { + x_5[col] = alpaka::math::exp(acc, x_4[col]) / (alpaka::math::exp(acc, x_4[col]) + 1); + } + + return x_5[0]; + } + + } // namespace t5dnn +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h b/RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h new file mode 100644 index 0000000000000..d5321fea07a6e --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h @@ -0,0 +1,315 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_NeuralNetworkWeights_h +#define RecoTracker_LSTCore_src_alpaka_NeuralNetworkWeights_h + +#include + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + namespace t5dnn { + + ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_0[32] = { + -4.5069356f, -5.8842053f, 1.0793180f, -0.1540973f, -0.4705772f, 6.4027028f, -0.6620818f, -7.0734525f, + 0.6211641f, 4.9630723f, 3.4310920f, -0.8856288f, 4.5843782f, -6.0180559f, 0.0126438f, -1.5725276f, + -0.8549317f, -6.8545237f, -1.2129461f, 3.0617838f, -0.3911322f, 0.0799793f, -2.5398655f, -0.5780622f, + 2.8533990f, -0.1777968f, -2.6457164f, -0.7976936f, 4.5644889f, -2.1747942f, 3.4286616f, -10.1073380f}; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_0[38][32] = { + {6.1269712f, -10.6625051f, 17.4907818f, -0.0019928f, -3.4468415f, 1.6674044f, -7.8957767f, 2.2077549f, + 9.5517254f, -5.1345053f, -30.1643391f, 4.0148559f, -19.8330841f, -18.3806915f, 0.1334764f, 1.6213616f, + -4.1423774f, -15.3062429f, -1.0209556f, 1.5580219f, 0.7426265f, 0.0033929f, 1.3924170f, 0.9196110f, + -0.8995734f, 1.0594707f, 39.4390869f, 8.7642002f, 28.4583893f, -5.9235659f, 3.7221889f, 14.4167147f}, + {1.7863803f, -0.6068707f, 0.3166098f, -0.0608759f, 0.5939785f, 0.4870262f, -3.1375074f, -17.7147388f, + -0.7231818f, -9.3808413f, 2.2070611f, 15.7461920f, 0.9355862f, 2.3942475f, -0.0671409f, 3.5954301f, + -3.0463996f, -2.0748904f, -0.5450584f, -4.4800100f, 0.6074556f, -0.0161482f, 3.0624702f, -4.5688419f, + 2.9881518f, -0.3714012f, -0.0387531f, -0.7699140f, 4.4028845f, 5.0333014f, -4.7350726f, -8.6568584f}, + {5.6548429f, -0.0207700f, 0.1785973f, 0.0881671f, 0.2530097f, -0.1893259f, -0.1105739f, -0.5183877f, + 1.0728362f, 0.1833011f, 1.7765219f, 0.3127359f, 0.0455277f, -0.1442616f, -0.1048361f, -0.1235604f, + -0.1217661f, -0.5487315f, 0.7575656f, -0.1177454f, -17.0993137f, 0.1628031f, 0.2789381f, 0.5304270f, + 0.0837841f, -3.1120780f, 0.0074821f, -0.1648044f, -0.3395336f, 0.3958135f, 0.8718957f, -1.1980486f}, + {0.2401041f, -0.0585765f, -0.0144584f, 0.0411095f, 0.0752229f, 0.0292672f, -0.2437613f, -1.4396472f, + -0.0971315f, -1.7181139f, 0.2417643f, 2.2030578f, 0.0566049f, 0.1081589f, -0.1060181f, 0.3473758f, + -0.7095683f, -0.0345675f, 0.2794849f, -1.1702278f, 0.2622930f, -0.0072611f, 0.5026371f, -1.2882922f, + -0.4712771f, 0.0597130f, -0.0039970f, -0.6050836f, 0.1554724f, 1.0991164f, -0.4975886f, 0.2597970f}, + {0.0766028f, 0.0218421f, -0.1739017f, -0.0076569f, 0.0384461f, -0.1841756f, 0.9677940f, -3.1114254f, + 2.3830564f, 2.0706992f, -0.9643140f, 0.7361387f, -0.0060253f, -0.1554846f, -0.0831100f, 2.8754771f, + -1.4403527f, -0.5281797f, 0.5157787f, 4.2405987f, 0.4807618f, 0.0217647f, -1.2626950f, 0.9145837f, + -0.3931780f, 0.3426280f, -0.0065206f, -0.7510439f, -0.4555758f, 2.7724340f, -1.2173026f, 0.1039017f}, + {0.5685715f, 0.3927337f, 0.4942532f, -0.0671033f, -0.2808350f, -0.0336000f, -1.3983957f, 0.9876546f, + -2.3840380f, 0.7315395f, -2.2009561f, -1.4631602f, -0.4672308f, -0.4994236f, 0.1169335f, -1.1894208f, + -1.2692982f, 0.3303853f, -2.0147655f, -0.9912014f, 1.0042895f, 0.1121151f, -1.0789106f, -2.2821584f, + -6.6459913f, -0.0959398f, -0.0068429f, -2.8177626f, 0.3213172f, -2.6832986f, -4.7613306f, -0.9985733f}, + {1.4419515f, -0.3864825f, -0.6756768f, -0.1273375f, 0.4321181f, 0.3354745f, -0.8236564f, -2.8190827f, + 0.7090831f, 1.9072700f, -3.1834064f, -2.6938572f, 0.5051147f, 1.4382831f, 0.1241910f, -0.7352629f, + 0.7703634f, -1.7556250f, -2.1104112f, 3.0603442f, 1.9873468f, -0.0358815f, -1.0087154f, 3.8253262f, + -0.5466214f, 0.0875162f, 0.2691758f, 0.7121435f, 1.9314718f, -0.1580560f, 3.6484149f, -5.3173709f}, + {6.9104381f, -0.0033664f, -1.4405546f, -0.1768288f, 0.2028089f, -0.1012344f, -4.4735684f, 0.6354278f, + 4.3039737f, 0.2056303f, 1.8338999f, -1.1351355f, 0.1015760f, -0.0733253f, -0.0561627f, 2.5292397f, + 1.6314448f, -0.9333628f, -0.7773662f, 0.8313186f, -0.7829623f, 0.1265118f, 0.5922315f, -0.3463379f, + -1.3269740f, -3.3302619f, -0.0061799f, 2.3374722f, 0.0880938f, 0.7470241f, -0.4205743f, -4.7557602f}, + {0.0380794f, 0.0947470f, 0.0419397f, 0.0582226f, -0.0603404f, 0.0234028f, -0.2575402f, 0.4125248f, + 0.3035339f, 0.2663808f, -0.6092452f, -1.4727812f, 0.0247187f, -0.0539688f, -0.0150413f, 0.2094955f, + 0.5379737f, -0.3255228f, -0.5639279f, 0.0786276f, 0.6703192f, 0.1557026f, -0.2753083f, 1.1463971f, + -0.9372965f, 0.5657740f, 0.0041413f, 0.0870248f, 0.0101520f, -0.8214461f, 0.1212932f, 1.5648646f}, + {-0.0969819f, 0.0137566f, 1.3515147f, -0.0155047f, -0.1416170f, -0.1636726f, 0.5184190f, 0.4732984f, + 0.6815788f, -1.0522166f, -0.4486531f, -0.0516016f, 0.0201894f, -0.0849667f, -0.0861271f, -1.2027841f, + 1.2458711f, -0.7061657f, 1.0381308f, -0.3450044f, -0.1300479f, -0.0828402f, 0.6859242f, -1.0575374f, + 0.6947553f, -0.0922188f, 0.0199132f, 0.8038982f, -0.1734094f, -0.1057449f, 1.6305015f, -0.0688597f}, + {-1.8151448f, 0.1024327f, 1.7063105f, 0.1130912f, -0.1081472f, -0.2904744f, -1.3465070f, -1.0455177f, + -0.4581082f, -3.2220871f, 0.5221398f, -5.1637673f, 0.0811146f, -0.1326323f, -0.0379338f, -3.0439703f, + -2.4246936f, -0.3670847f, -3.1256330f, -1.6595014f, -3.4715190f, -0.1526113f, -1.0420206f, 0.9536474f, + -3.2932863f, 1.6048199f, 0.0025162f, -3.6049840f, 0.0604250f, -2.2404826f, 1.8406851f, -3.1381185f}, + {1.2985691f, -1.1044264f, 0.9062797f, -0.0788333f, 0.2694912f, 0.0032800f, -0.0574267f, 0.9734111f, + 1.1532565f, 2.6786125f, -3.8574269f, -2.2871449f, -0.1261243f, 1.0545347f, -0.1454154f, -0.5609738f, + 1.8385800f, -0.8035598f, -1.7668265f, 5.1665063f, 0.7966110f, 0.0940206f, -2.3943975f, 2.3344002f, + 1.0342182f, 0.4806454f, -0.3880928f, 0.6998246f, 1.4011886f, -1.7313483f, 4.9702630f, -6.0058608f}, + {1.0300356f, 0.0616315f, -0.1113776f, -0.1694220f, 0.7159944f, 0.0626456f, 2.0994680f, 0.3452290f, + -3.0487001f, 0.0654031f, -1.1510723f, 0.5370992f, -0.0290704f, -0.0300795f, 0.0751569f, -0.2345951f, + -0.3472281f, 0.4424143f, 1.2444530f, -0.2114656f, 0.7865694f, -0.0709381f, -0.1839961f, -0.0529834f, + 0.5867608f, -3.8793530f, -0.0814745f, -0.6368676f, 0.0361213f, -0.5549288f, 0.5661780f, 1.8374584f}, + {0.3345098f, 0.0068199f, -0.4205509f, -0.1088801f, -0.1043202f, -0.0040804f, 0.3400922f, 0.2673528f, + -0.6050695f, 0.4443954f, -0.4319905f, -0.6044132f, -0.0260679f, 0.0137036f, 0.0765494f, -0.0095099f, + 0.5880439f, -0.0083854f, -0.2407522f, 0.1942379f, 0.6554548f, -0.1322891f, -0.8298992f, 0.7909554f, + 1.0528831f, 0.1970959f, 0.0754069f, -0.0947960f, -0.0279494f, -0.5888316f, 0.8919419f, 0.4828835f}, + {0.3995822f, -0.2139665f, 0.3982936f, -0.1285759f, -0.3445527f, -0.1167238f, -0.1263519f, 0.8393803f, + -0.7758383f, 0.0719291f, -0.0134762f, 0.1715237f, 0.0796666f, 0.1023507f, -0.1172728f, -1.2364722f, + 1.2592632f, -0.3168479f, 0.7487004f, -1.5170647f, -0.2235429f, -0.1620898f, 1.4064828f, -1.0821995f, + 0.0740103f, -1.0412805f, -0.0621277f, 0.2439800f, 0.2684972f, -1.1661061f, 0.7859434f, -0.6170313f}, + {2.1615884f, 0.1431713f, 0.0642652f, -0.0522325f, -0.2658786f, -0.0245810f, -1.6857448f, -0.6685011f, + -0.6978170f, -0.8716729f, 0.3129902f, -2.5870812f, -0.2855283f, -0.3205920f, -0.0084069f, 1.3182145f, + -0.6923816f, -0.3730274f, -2.3638811f, -1.1128502f, -2.4709859f, 0.1349022f, -0.3574466f, -0.6597407f, + -4.1122031f, 0.2240651f, 0.1806145f, -1.6836300f, -0.0766231f, -3.2611966f, 0.0091456f, -0.0997367f}, + {5.2476101f, -0.1966512f, 4.8935304f, -0.1551689f, 1.6919724f, -0.8324367f, 14.3318472f, -0.3503132f, + 10.3614969f, -9.1522884f, -0.2543063f, -1.8476851f, 16.7961140f, 9.9541416f, -0.0434563f, -9.6973553f, + -5.0469398f, 6.1688442f, 7.6429725f, -7.3149266f, 1.2345183f, 0.1412155f, 0.7114770f, -1.6378664f, + 5.1548996f, 0.3686100f, -45.3027611f, 3.0492647f, -37.3445892f, 2.7421410f, -2.7958770f, -25.2034016f}, + {1.4597454f, -1.0561740f, 0.9751291f, 0.0446527f, 0.3691662f, 0.1006782f, 0.1418435f, 0.8871480f, + 1.1603093f, 2.8034730f, -4.0856910f, -1.9786842f, -0.2206208f, 0.9539357f, 0.0868183f, -0.6811873f, + 1.9642411f, -0.8065316f, -2.0244894f, 5.2936082f, 0.6120632f, -0.1194160f, -2.3925939f, 2.5555069f, + 1.0149733f, 0.4607603f, -0.2197217f, 0.5703423f, 1.4049014f, -1.5900208f, 5.1645074f, -6.0569463f}, + {0.9000676f, -0.0028781f, -0.1967366f, 0.1039593f, 0.7993248f, 0.0655172f, 2.2296758f, 0.4391927f, + -3.0292840f, 0.0334536f, -1.1728534f, 0.3479103f, -0.1190938f, 0.0410203f, 0.1146637f, -0.2958017f, + -0.3240463f, 0.4361866f, 1.0564958f, -0.1989332f, 0.5194008f, -0.0628912f, -0.1733121f, -0.1255383f, + 0.5990249f, -3.7692382f, 0.0995128f, -0.7101220f, -0.0785123f, -0.3514554f, 0.6662078f, 2.0991604f}, + {0.1781942f, -0.1873588f, -0.4653996f, -0.0153059f, -0.1399561f, -0.0498718f, 0.4552556f, 0.2300792f, + -0.7682312f, 0.4342302f, -0.3787803f, -0.6089386f, -0.1049337f, 0.0395331f, 0.0220332f, 0.0114750f, + 0.4672548f, 0.1284784f, -0.2472819f, 0.2892784f, 0.4788667f, 0.0472555f, -0.6593549f, 0.6508777f, + 0.9286987f, 0.3043948f, -0.0635985f, 0.0814399f, -0.1168853f, -0.6688027f, 0.8876534f, 0.4865684f}, + {0.4024099f, 0.0480259f, 0.4588822f, -0.1793082f, -0.2151573f, -0.1871128f, -0.1502780f, 1.1011307f, + -0.9467706f, 0.2632496f, -0.1257263f, -0.0241331f, 0.2280627f, 0.0878608f, -0.1334262f, -1.1642927f, + 1.0943586f, -0.4799654f, 0.5981907f, -1.5051398f, -0.4235946f, 0.0012827f, 1.2342577f, -0.8281875f, + 0.2776567f, -1.0362227f, 0.0408372f, 0.1540821f, 0.1777556f, -1.2684357f, 0.8836584f, -0.4001710f}, + {2.1558056f, 0.2082023f, 0.0863442f, 0.0364868f, -0.3985825f, 0.0307202f, -1.8889453f, -0.5614714f, + -0.7311882f, -0.8075573f, 0.4895108f, -2.7770483f, -0.3121874f, -0.1671291f, -0.1281284f, 1.3212786f, + -0.5310181f, -0.1974759f, -2.6240873f, -0.8320529f, -2.3875966f, -0.0286360f, -0.6263188f, -0.6553424f, + -4.1658955f, -0.0601300f, 0.0946256f, -1.6795633f, -0.1251303f, -3.0974686f, 0.2412274f, -0.0687501f}, + {2.0523887f, -0.6387668f, 2.0633900f, -0.0550964f, 0.5181718f, -0.4202190f, 1.8569367f, 0.8295385f, + 0.8555872f, 2.4727983f, -0.2072828f, -1.9006120f, 0.5379534f, 0.4463673f, 0.1468820f, 0.4918649f, + -3.4016700f, 0.2884440f, -1.9418719f, 4.5157170f, -0.5160927f, -0.0199372f, 3.1353824f, -0.9863126f, + -1.5135859f, 0.7576568f, 0.6715558f, 2.7409093f, 0.9291748f, -0.3247162f, 1.8204515f, -8.9181070f}, + {-0.1428107f, -0.0829889f, 0.4213613f, 0.0225415f, 1.2238166f, 0.0477106f, 0.3031853f, -0.7466553f, + 2.0663500f, 0.7588379f, 0.3689216f, -0.2003786f, 0.1242338f, 0.1693589f, -0.0351716f, -0.0186597f, + -0.0189417f, 0.5468715f, -0.2862698f, -0.1311738f, 3.0747476f, -0.0310747f, 0.0943165f, 0.3139819f, + 0.6274695f, -1.8314874f, 0.0147495f, 0.3554756f, 0.3829916f, 0.4891713f, 0.1328600f, 1.0535098f}, + {0.0534900f, 0.1787969f, -0.0571320f, -0.0685673f, 0.1968977f, 0.0374476f, 0.7876674f, 0.0828491f, + 0.6444036f, -0.2203166f, -0.2383427f, 0.5397566f, 0.0106769f, -0.1230072f, -0.0135021f, -0.5691944f, + -1.5040319f, 0.0406933f, -0.0025478f, 0.9251419f, -1.7180276f, -0.1112956f, 1.4840862f, 0.0407115f, + -0.0100329f, 0.0583593f, -0.0110524f, 0.7431355f, -0.0971857f, -0.5501527f, -0.6371027f, -0.1935233f}, + {-0.6455778f, 0.2317368f, 0.9285696f, -0.1415854f, 0.0822560f, 0.2488030f, -2.6992166f, 0.0884904f, + 0.6735302f, -0.1467820f, 0.5641044f, 0.6436581f, 0.0818401f, -0.0336634f, -0.0729000f, -0.1206900f, + -2.5739892f, 0.5776953f, 0.9531668f, -1.2362405f, -0.0615577f, -0.0143544f, -2.7525210f, 1.3738545f, + 0.2751348f, -1.7463943f, -0.0020144f, 2.4814103f, 0.1716725f, -0.7055540f, -0.3474010f, 0.4482578f}, + {-0.2526205f, -0.7463821f, -3.6076138f, -0.1511098f, 0.1216256f, 0.0888247f, -1.0190924f, -1.3260181f, + -0.0443211f, -4.8911066f, -3.4385188f, -6.0057454f, 0.3340450f, 0.2997236f, -0.0907855f, 0.7500492f, + -0.4007562f, 1.9382039f, 0.5687234f, 2.6511824f, 4.7703862f, 0.0006749f, -0.0201394f, -3.5885489f, + -4.1518898f, 0.0807014f, -0.0584071f, -0.8100027f, 0.7697087f, -0.8038046f, -1.2945876f, -4.0110312f}, + {0.4337017f, -1.1532011f, 2.0740633f, 0.0271806f, 0.6654227f, 0.1012998f, -4.0791736f, 1.2631345f, + 1.9511020f, 2.3272331f, 1.2707534f, 1.6306664f, 0.4936035f, 0.8285242f, 0.0807625f, 3.8652387f, + 0.0281145f, 1.6877037f, 1.2557380f, -0.3036775f, 0.5604967f, 0.1551418f, -0.9599600f, -6.3067718f, + -0.6352320f, 0.8058553f, 0.3657880f, -2.0491202f, -0.3926269f, 2.5650854f, 1.3697821f, -8.3070078f}, + {5.1334143f, -0.0351738f, -0.4774780f, -0.0679726f, 1.4569254f, 0.0580191f, -0.3649136f, -0.2298838f, + -3.3826666f, -0.7392708f, -0.6036060f, -0.2612940f, -0.1877640f, -0.1145124f, -0.0042578f, -0.0311193f, + -0.0320479f, 0.5270581f, -0.4324475f, 0.2681437f, 4.7813129f, -0.0222701f, -0.0525629f, -0.2861001f, + -0.1251072f, 3.9112861f, 0.0045046f, -0.0426071f, -0.3299106f, -0.0686970f, -0.1602017f, -0.0070103f}, + {-0.6633690f, 0.0103367f, 0.5998458f, 0.1256577f, -0.0359184f, -0.0176820f, -0.6458368f, -0.0370536f, + 0.3542259f, 0.1394724f, 0.8255956f, 0.2501569f, 0.0320156f, -0.0256806f, 0.0277949f, 0.0036392f, + 0.2825173f, 0.1400358f, 1.0011463f, -0.6792242f, 0.0672508f, 0.0728705f, -0.1089695f, -1.0414587f, + -0.4135485f, 0.4293025f, -0.0041241f, -0.9564193f, 0.0314900f, 0.8658463f, -0.7734696f, -0.7610567f}, + {-0.0200122f, -0.0749178f, -1.5026549f, -0.0387432f, -0.0713735f, 0.1214790f, 1.8730290f, -0.0552839f, + -1.6867150f, 0.2282097f, 0.7161849f, -0.1018546f, -0.1092003f, 0.0365504f, -0.1326883f, 1.2310545f, + 0.1800210f, 0.7024739f, -2.9606545f, 1.2275347f, -0.2050014f, 0.0940569f, 0.4761694f, 0.8812068f, + -0.0083424f, -1.5406264f, 0.0061815f, -2.7606382f, 0.0248556f, 1.1086880f, -1.3608936f, 1.0795454f}, + {0.9734020f, 0.3905411f, -3.7008634f, 0.0013557f, 0.1649124f, 0.9935362f, 1.3489184f, 0.9505764f, + 0.7966231f, -0.1627246f, -2.5754328f, 1.4892205f, 0.8586300f, 0.6974363f, 0.1320204f, -0.7840260f, + 0.3121157f, 0.0966901f, 2.7447381f, 1.8256680f, 0.7229405f, -0.1723188f, 0.9145948f, -2.1376033f, + 0.5259342f, 0.0731194f, -0.2908303f, -0.2603913f, -0.2326528f, 3.6684167f, -0.2883157f, -2.8546307f}, + {-4.8917460f, 6.7944999f, -0.2255474f, 0.1051999f, 3.9000113f, 2.0624907f, 5.3019547f, 10.0209141f, + 1.1268179f, 2.2669628f, -6.5002980f, 1.8408583f, 5.3039579f, 2.2055962f, 0.1055369f, 1.7230233f, + 6.9605255f, 7.7025104f, 2.9880707f, -0.9274251f, -0.2287160f, -0.0206735f, 0.6885675f, 2.8179996f, + -7.1129837f, -1.3772345f, 3.8655453f, -5.9388318f, -0.0469947f, 7.2763596f, -6.3536129f, -17.0069847f}, + {1.8787041f, -0.9953383f, -1.4839923f, 0.1308209f, 0.3657510f, 0.3106483f, -1.4158971f, -6.7449651f, + 0.6553892f, -4.5046172f, -3.5489719f, 3.5363002f, 0.5454772f, 2.3521471f, 0.1612140f, -0.9744226f, + 0.6546553f, -2.7179255f, -1.7758157f, 0.3089439f, 1.7462813f, 0.1654593f, -0.2440207f, 3.9501827f, + 1.3750844f, 0.0596805f, -0.1977254f, 0.0264880f, 2.6396444f, 1.0816911f, 3.6413448f, -6.0299959f}, + {-4.1295738f, 0.1044480f, 0.2131937f, 0.0420826f, 0.5292229f, 0.0090477f, -0.0973486f, 0.9596778f, + 2.9579651f, -0.6364226f, -1.7556342f, 0.1539868f, -0.1273174f, -0.1348504f, 0.1257833f, -1.4168571f, + -1.0960362f, 0.0482449f, -1.4395387f, -0.2524115f, -2.9162085f, -0.0451428f, -0.4021681f, -0.5756381f, + 0.0515293f, -3.1996479f, -0.0007676f, -1.3878343f, -0.2864279f, -0.9579773f, -1.0999249f, 1.6500067f}, + {-2.4806111f, -6.8115449f, 3.2805641f, 0.1187415f, -0.9950783f, 6.2553434f, -1.6450261f, -6.1463733f, + 2.7507148f, 4.2995782f, 0.0461297f, -0.5417359f, 2.4306326f, -7.3530145f, 0.0698273f, -0.9394333f, + -1.3595498f, -7.5141478f, -1.4911395f, 3.2300410f, 0.1203540f, 0.0314884f, -2.0116949f, -0.8167119f, + 2.4133310f, 0.1920709f, 1.0619365f, 0.2459123f, 6.9166069f, -2.6384118f, 3.6829739f, -7.2385545f}, + {0.9408096f, 14.9067144f, 1.7709646f, 0.1105646f, -0.5600107f, -15.3188124f, -12.3718462f, -1.8893757f, + 13.6364670f, -5.7327847f, -14.1805468f, 1.0581509f, -14.2186184f, 14.8948650f, 0.0190344f, 5.4395180f, + 6.7243400f, 9.8468456f, 4.5144215f, -1.4551491f, 1.1032411f, -0.0317988f, 2.3398454f, -3.1671596f, + -7.7541409f, 1.1255593f, 6.7340465f, -4.4448423f, -9.1472626f, -3.1959128f, 4.4181323f, -2.7904994f}, + {-2.1621978f, -4.7202382f, 1.7378219f, 0.1417439f, -0.5000908f, 5.4468708f, 1.4260571f, -6.6136570f, + 1.5713804f, 3.4479704f, 2.7354901f, -0.7388076f, 5.4666147f, -3.8697338f, -0.1368596f, -2.7903373f, + -1.2043713f, -4.9554005f, 0.3324645f, 1.6767365f, 0.1156244f, -0.0326964f, -2.0945346f, -0.4590589f, + 3.0942657f, 0.0015020f, -6.2626700f, -0.3969755f, 0.7717427f, -1.9667094f, 2.9664171f, -11.9477053f}, + }; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_2[32] = { + 9.8383608f, 3.6922295f, 3.5774977f, -4.4619012f, 6.5087032f, -0.9540017f, -0.5059246f, 0.0706402f, + 14.3396597f, -0.2771132f, -4.8409863f, -8.3581600f, -3.5078344f, 4.3287506f, -5.7808843f, 3.9264839f, + -2.1697845f, -0.0040514f, -0.2095029f, -6.8678174f, 1.7911285f, -0.4510343f, 1.2410443f, -4.5678806f, + -0.5693849f, 2.3320096f, 4.4606552f, -6.3771009f, -4.3149071f, -0.1905672f, -3.5726390f, -1.0744030f}; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_2[32][32] = { + {-0.0155548f, 0.0243339f, 0.0037967f, -0.2771824f, 0.0111955f, -0.0115980f, 0.0079653f, -2.9803498f, + -0.0061037f, -0.0956634f, 0.0332446f, 0.0179244f, -0.0080377f, -9.0180779f, 0.1720033f, 0.0350694f, + -0.0146588f, -0.2135506f, -0.3158041f, 1.3697664f, 0.0119146f, 0.0119120f, -0.0986927f, 0.0297492f, + 0.0355827f, -0.1196868f, -0.0745119f, 0.0281862f, -0.0422190f, -0.3069138f, -0.0477367f, -0.0550450f}, + {-1.7374619f, 1.4822800f, -2.1885235f, 1.8354234f, -0.5380136f, 1.6621803f, 0.6251035f, 0.1008954f, + -0.8387129f, -0.2063313f, 1.0661691f, -0.9799694f, -5.1710258f, -3.2260630f, -1.5073707f, -1.0792168f, + 1.8569958f, -0.2289213f, 0.0563821f, -1.6398847f, -4.1649504f, -2.7527378f, -0.0134577f, 3.0424533f, + 0.0364320f, 0.6762254f, -3.1551330f, 2.4888904f, 1.4757305f, -0.3141717f, -2.0126467f, -0.1675602f}, + {-0.9571826f, 0.0914152f, 0.0404339f, 0.2927902f, 0.2933607f, 0.0619171f, 0.0772318f, -1.3796169f, + -0.8194544f, -0.2179988f, -1.1241078f, -0.1443964f, 0.0559355f, -1.2914546f, -0.3445117f, 0.2031156f, + 0.0273864f, -0.0193422f, -0.2136522f, 0.0429592f, 0.0212854f, 0.0414394f, -1.1734651f, 0.0582848f, + 0.0136039f, -0.1892604f, 0.0764908f, -0.0130132f, -0.1272559f, -0.0818855f, -0.0408583f, -0.1563294f}, + {-0.0213695f, 0.0596942f, -0.0641309f, -0.0146449f, 0.0416586f, -0.0378931f, 0.1234860f, 0.1622967f, + 0.0794091f, -0.0639933f, -0.1030663f, 0.0579078f, 0.1050275f, -0.0136866f, 0.0149978f, 0.0876813f, + 0.0693554f, 0.1612417f, -0.0595916f, -0.1008234f, -0.0579058f, 0.0915138f, 0.1321436f, -0.1484535f, + -0.0920316f, -0.0024532f, -0.1045300f, 0.0924260f, 0.0277524f, -0.0287276f, -0.1271127f, 0.1164243f}, + {0.0713067f, 0.0198056f, -0.3023696f, -0.0025908f, -0.0085885f, -1.1157553f, 0.0236462f, -0.0704844f, + -0.0189257f, -0.0997382f, 0.3379845f, -0.1229390f, -0.0616165f, -0.8968034f, 0.0401445f, -0.1144476f, + -0.0532077f, 0.0604580f, 0.0609454f, -0.1613472f, 0.0103525f, -0.1653874f, 0.0205189f, 0.0758978f, + -0.1514593f, 0.0151441f, 0.2043469f, 0.0349607f, -0.1361278f, -0.1255922f, 0.0631648f, 0.3570991f}, + {0.3371337f, -3.7541580f, 2.2215877f, -0.3390516f, 0.1912718f, -4.1861577f, -1.2264019f, 2.8179801f, + 0.0667294f, -0.0093539f, 2.3029909f, 3.1814916f, 3.9780347f, 0.2310601f, 0.3986159f, -0.8544636f, + 0.4139664f, -0.1876569f, -0.2448732f, -2.8053334f, 4.0488625f, 2.1094146f, -6.7310257f, -4.9950023f, + -0.8315823f, 0.0555959f, 2.4573720f, -3.7234364f, -4.2910552f, -0.2995245f, -3.2605181f, 2.3620574f}, + {-1.5522735f, -0.1866350f, -0.0067679f, 0.3196557f, 1.4052233f, 2.8143549f, -0.9992948f, -0.5309914f, + -25.8852596f, -0.1218249f, 0.6625420f, 0.3007106f, -0.2767264f, -0.1847300f, -0.5313534f, -0.0383462f, + -0.1987552f, 0.0581405f, -0.3376078f, 1.2621028f, 0.0818709f, -0.1401216f, -0.4550788f, -0.1592657f, + 0.0597123f, 0.1344101f, -0.1005317f, -0.1538406f, 2.9142656f, -0.0806051f, -0.4267367f, -31.9512234f}, + {0.6859627f, 0.1212986f, 0.1291616f, 0.0459838f, -0.0899920f, 0.0287645f, 0.1987007f, -2.7079368f, + -0.2628384f, -0.1402464f, -0.6302179f, -0.2923960f, -0.1106663f, 0.8256195f, -2.8054097f, -0.0296494f, + -0.5632019f, -0.1335654f, -0.1558440f, -6.8611612f, 0.0203786f, 0.0046566f, -0.4401442f, -0.0471430f, + 0.4535986f, -0.8657981f, 0.0684740f, 0.0518814f, -0.0123748f, -0.2270164f, 0.0922878f, -0.3863277f}, + {0.0127175f, 2.3346109f, -0.4390767f, -0.4657893f, 0.1659466f, -0.1132782f, -0.4928388f, 0.7652873f, + 1.1510741f, -0.0879600f, 0.2721785f, -0.1878961f, -0.3477249f, -0.8473209f, -0.8931856f, -0.4328294f, + -11.9181929f, -0.0282545f, -0.0217915f, 1.6676594f, -0.2122232f, -0.6190930f, 1.9053432f, -0.7592348f, + -1.0739189f, -0.7170524f, 0.3864411f, -0.8849231f, 0.1393488f, 0.0738489f, 0.4460345f, 1.9020857f}, + {0.4453296f, -0.0767821f, 0.1638939f, 1.6997167f, -0.1098599f, -0.0551604f, 0.0040561f, -13.5290670f, + -0.1285677f, -0.0590394f, 0.6499141f, -0.7617344f, 0.0453151f, 0.3104213f, -1.0711143f, 0.1361838f, + -0.4365610f, -0.1300649f, 0.2013344f, -0.5308123f, 0.1451896f, 0.1030715f, -0.6487910f, -0.3136590f, + -0.0280079f, 0.5394178f, 0.1318262f, -0.0159292f, 0.0636870f, -0.3224248f, -0.1868187f, -0.2468304f}, + {-0.0333494f, -0.0834255f, -0.1221875f, 0.6861304f, 0.0521738f, -0.0416543f, -0.4437352f, -19.3246250f, + -0.1520821f, 0.0528602f, -0.6375434f, -0.5803806f, -0.0958465f, -2.0058544f, -0.8282642f, 0.0259000f, + 0.4846996f, 0.1211179f, 0.0356884f, 1.0009497f, 0.0635682f, -0.0314105f, -0.0011147f, 0.0131714f, + -0.3410152f, 0.2798154f, 0.0961889f, 0.1266228f, -0.0934717f, -0.0904307f, 0.1355542f, 0.5722573f}, + {0.2146454f, 0.2143834f, 0.1290650f, -0.9063646f, 0.2100945f, 0.1331054f, -0.2620614f, -0.1264993f, + 0.1313979f, 0.0455465f, -0.8395286f, -0.4967833f, -0.0538581f, 0.9155380f, 0.6627046f, 0.1691243f, + 0.9887002f, -0.1597013f, -0.1236713f, -1.9041336f, 0.0427585f, 0.0849747f, -5.2559652f, -0.3133100f, + 0.0141170f, -0.1635530f, 0.4938746f, 0.0162943f, 0.2107756f, -0.3413893f, -0.0657575f, 1.0542560f}, + {-2.8868380f, -2.0837426f, -1.0611480f, -0.6143807f, -0.6398501f, -2.8018746f, 0.5166737f, -1.0814301f, + -1.9272422f, -0.1017482f, -0.4651161f, -1.4021232f, 1.8854499f, 0.1815407f, 0.5965426f, -2.3344259f, + -0.0690846f, -0.1678239f, -0.4219488f, 0.6215640f, 1.0270095f, -0.3473049f, -0.3926674f, -0.7942593f, + 1.1305071f, -1.4621233f, -0.8051161f, -0.7698632f, -2.6038630f, -0.3090037f, -1.6365144f, -1.0179478f}, + {0.0046026f, 1.1319581f, -2.6405678f, -2.0353596f, -2.1687336f, 0.3364883f, 2.1122196f, 0.2584647f, + -2.4344857f, -0.0378498f, 0.6158544f, -0.6060749f, -4.9598379f, 0.1570698f, 2.2436838f, -2.6198347f, + -2.0935996f, -0.1845744f, -0.0716080f, -1.9338604f, -4.1995640f, -3.6706774f, -1.6762524f, 3.9646862f, + -0.9677961f, 1.8319578f, -3.1916575f, 3.7312632f, 0.0820446f, -0.0497568f, -0.0898171f, -0.2499462f}, + {-0.0780375f, -0.0286571f, 0.1007227f, 0.0012229f, -0.0531285f, 0.0840718f, 0.1013894f, 0.1312424f, + -0.0673772f, 0.1603183f, 0.0074385f, -0.0718321f, -0.1549873f, 0.1616689f, 0.0405887f, -0.1558588f, + 0.0740745f, 0.1696893f, -0.0064026f, -0.1656420f, -0.1186674f, -0.1262667f, -0.0784757f, -0.1280154f, + 0.0909976f, 0.0853046f, -0.1075811f, 0.1310615f, 0.0610194f, 0.0647223f, 0.1360559f, 0.0440074f}, + {-0.2106480f, 0.0087131f, 0.1119385f, -1.0611318f, 0.5250220f, 0.0525479f, -0.2733742f, -1.0799565f, + -0.5601607f, -0.0651806f, -1.9793440f, -0.3373334f, -0.1550518f, 0.8932216f, 0.7264332f, -0.0450735f, + 1.2373760f, -0.1236272f, 0.0680048f, -3.0446634f, -0.1533586f, -0.0127355f, -0.3326311f, -0.0225603f, + -0.2265739f, -2.3752897f, -0.3771705f, -0.0728938f, 0.1741305f, 0.1111639f, 0.4131119f, 0.2239323f}, + {-2.5691276f, -1.4011253f, -2.0640867f, -3.7236946f, 1.5542637f, -0.9456654f, -1.7575809f, 3.6794879f, + -0.4439790f, -0.1009826f, 3.6702275f, -0.1935008f, -0.4423219f, -0.3825364f, -0.4784791f, 0.5927492f, + -2.3482494f, 0.0801714f, -0.1567418f, -1.7934613f, -0.1706410f, -0.6326947f, 0.6260155f, 0.3631033f, + -0.9325932f, 1.9647995f, -1.3409088f, 1.3501998f, 0.0367797f, -0.1744210f, 1.8690013f, -1.0737898f}, + {-0.5934777f, 0.6232591f, -0.3391055f, 0.2640936f, -0.2824444f, 0.4815128f, 0.6625078f, -0.1103976f, + 0.9555223f, -0.0624896f, -0.6778919f, 0.1181502f, -0.5425385f, 0.7297349f, -1.7261271f, -0.2917557f, + 1.1873137f, -0.2725933f, 0.0975242f, 1.7756181f, -0.5735835f, -0.4453230f, 0.9800369f, 0.9344145f, + -1.8692539f, 0.0120440f, -0.7315661f, 0.6250805f, 0.3839143f, -0.0376306f, 0.3816243f, 0.6059195f}, + {0.5522162f, -1.8043815f, -10.9379101f, 0.5719097f, -0.2246755f, -1.4856353f, 0.4877502f, 0.7163438f, + -11.8135147f, -0.0180790f, -0.9928634f, 0.1107815f, -0.0005064f, -0.3824990f, -0.7453306f, -1.9909632f, + -7.4362645f, -0.0245507f, -0.1815712f, -3.5507584f, -0.0075889f, -11.0296011f, -1.1292133f, -0.0710276f, + 0.5675677f, 0.2017778f, -0.0684891f, -0.0367653f, -1.6674192f, 0.0281711f, -0.8356591f, -0.0447807f}, + {0.2537312f, -3.0178010f, -0.3493635f, 1.8573236f, 0.4017631f, 0.9912633f, -0.8625028f, -0.7783228f, + -1.7815375f, -0.1204695f, 1.8551122f, 0.3344182f, -0.2828701f, -1.3226960f, -1.4470471f, 0.2895959f, + 0.6780876f, -0.2010069f, 0.0425280f, -2.1786852f, -0.1274053f, -0.2549899f, -0.2233993f, -0.1561645f, + -0.4640818f, 0.6375850f, 0.7733670f, -0.2388286f, 1.0447853f, -0.1503223f, 0.3823584f, -13.8176088f}, + {0.2575197f, -2.2127593f, -0.0389457f, -0.0215759f, 0.1659477f, -0.0097748f, -0.1935415f, -0.9091369f, + -0.1453371f, 0.0442428f, -0.1206519f, 0.1435609f, -0.0186047f, -5.0154042f, 0.0538177f, 0.0403250f, + 0.0240955f, 0.0331080f, 0.0517951f, 0.7422639f, 0.0069818f, 0.0248351f, -0.2205741f, -0.0082387f, + 0.2043269f, 0.0459435f, 0.0876343f, 0.0140607f, 0.1056308f, 0.0062555f, 0.0184278f, -0.5539715f}, + {-0.0398742f, 0.1075264f, 0.1725024f, -0.0755192f, -0.0360048f, 0.1325573f, 0.0903103f, -0.0882263f, + 0.1207692f, 0.0032722f, 0.0048489f, -0.1257241f, 0.1450990f, -0.0713558f, 0.1116815f, 0.1107689f, + -0.1447252f, 0.1581838f, -0.0160124f, -0.0425587f, 0.1411217f, 0.0865060f, -0.0643460f, -0.0431262f, + -0.1452804f, -0.0195101f, 0.1234572f, 0.0520887f, 0.1117576f, -0.0751791f, 0.1511539f, 0.1224861f}, + {0.7728126f, 2.3075340f, -0.0385258f, -3.1270287f, 0.9414487f, 3.5251477f, -0.8043440f, 0.7212446f, + -7.6850162f, -0.1609414f, -3.7687578f, -1.0751100f, -0.2052089f, 5.0728245f, 2.2835267f, 0.5930225f, + 0.1303335f, -0.1428799f, -0.3715075f, 0.5136011f, -0.4755619f, -0.2192461f, -3.8696294f, -0.0062392f, + -1.3774812f, -0.0034140f, -1.5944362f, 0.9773729f, 3.2859125f, -0.1616932f, -1.2785367f, -13.5732412f}, + {0.5535743f, 0.1461481f, -0.2218016f, -0.2971808f, -0.2169309f, 0.1564545f, -0.0390397f, 1.1558976f, + -0.0119933f, -0.0774637f, 1.1907971f, -0.5127968f, -0.0066028f, -1.6794037f, -0.3650940f, 0.2555613f, + -0.9488379f, 0.0449603f, -0.1620417f, 0.1583214f, 0.0000908f, 0.0152763f, -1.0660053f, -0.0139402f, + -1.7440189f, 0.2515209f, 0.3333162f, 0.1904725f, 0.1116094f, -0.2287960f, -0.0007165f, -1.7047704f}, + {-5.9897852f, -0.1316296f, -0.0218074f, -0.4602887f, 0.3288545f, -0.0882939f, -0.5929499f, 0.4294790f, + -0.0383545f, 0.0556869f, 0.1975944f, 0.1341491f, 0.0629570f, -2.2742157f, 0.0175826f, -0.1439869f, + -24.8701649f, -0.1582915f, -0.2460304f, -3.9643264f, 0.0863483f, 0.0180861f, -0.2210452f, -0.0868723f, + -0.4175525f, -0.8231756f, 0.0247534f, -0.1473545f, -0.0021330f, -0.0410253f, -1.1944869f, -1.1523768f}, + {0.1031547f, -3.3402514f, -4.3636522f, -0.1534714f, -0.0622189f, 0.0374694f, -0.0870097f, -4.1865788f, + -0.0555377f, 0.0252329f, 0.1339467f, 0.0461691f, -0.0503090f, 0.0289890f, -0.0095674f, -0.3289992f, + -0.0279080f, 0.0274977f, -0.0903500f, 0.5610157f, -0.0478177f, 0.4346960f, 0.4822784f, -0.1058945f, + -0.2026870f, -0.0560638f, 0.0910069f, -0.0818529f, 0.0819198f, -0.0292193f, 0.3040628f, -0.1275230f}, + {-5.8789845f, -17.1114635f, -4.6755161f, 0.1016624f, -0.8685016f, -0.3898779f, -2.3363957f, 0.1413794f, + -2.4254086f, -0.2171030f, -0.0901150f, 0.7058705f, 0.4166250f, -0.0231085f, -0.1789686f, -9.4244318f, + -0.6418229f, -0.0857969f, 0.1683681f, -0.0310597f, -0.0247807f, -5.3748040f, -7.4730940f, 0.1019564f, + -1.2126822f, -0.3726285f, -1.0287101f, 0.1803891f, -0.2227769f, -0.0791530f, -0.0159770f, -1.4883354f}, + {-17.9394970f, -0.5228514f, -11.3547935f, -0.0672671f, -2.0371394f, -0.9076943f, 2.4331825f, -6.9409127f, + 0.8286008f, 0.0208618f, -0.8009814f, 1.2268484f, 0.1943726f, -1.7297083f, -0.7668949f, -6.5505466f, + -0.6495168f, -0.0404727f, -0.1260914f, -3.5029383f, -0.0852898f, -2.9679556f, 1.6404767f, -0.0251449f, + 1.1460075f, -0.7877688f, -0.0586593f, -0.4741839f, -1.7420560f, 0.0295600f, -2.3574052f, 0.0974777f}, + {0.4443443f, 0.6384261f, 1.3317494f, -1.0085982f, 0.9508762f, 1.3168396f, -0.1862490f, -0.1801148f, + 1.1106120f, -0.0654911f, 0.1186706f, -0.7198273f, 0.5449172f, -0.5886080f, 0.7504217f, 1.8046317f, + -0.1294390f, -0.1939137f, -0.2383934f, 0.4131435f, 0.6910310f, 1.2821866f, -0.1088722f, -0.5660405f, + -0.1188610f, 0.0364403f, 0.3597929f, -0.6409024f, 1.2114668f, -0.0212278f, 0.8423592f, 0.4848156f}, + {-0.8772649f, -13.5265112f, -4.5540547f, -0.2856667f, 0.7604876f, -0.6829260f, -0.8320626f, 0.6541347f, + 0.4020181f, 0.0009324f, -10.9660740f, -0.3540186f, -0.2316812f, 0.3576394f, 0.0998953f, -1.5738430f, + 1.2089975f, 0.0706465f, -0.2538019f, 0.7016497f, -0.0282650f, -3.1291001f, -0.4375663f, -0.3979468f, + -0.1588882f, 0.3978875f, 0.2038192f, -0.4281644f, -0.5787544f, -0.0922198f, 0.9595569f, 0.0212818f}, + {0.3392667f, 0.1170919f, -0.0705636f, -0.1025443f, -0.1192213f, -0.0495686f, 0.0284667f, -0.1226804f, + 0.0050191f, -0.0516545f, -1.0892097f, 0.0033689f, 0.0471462f, 1.4266804f, 0.0288870f, -0.0110408f, + -1.1283765f, -0.1299917f, -0.4318301f, -0.9854419f, -0.0190479f, -0.0269406f, 0.3697925f, -0.0757695f, + -0.3632923f, -0.1714077f, 0.0669245f, 0.0557428f, -0.1713906f, -0.4307863f, -0.1749060f, -2.1246362f}, + {0.8383662f, -3.8122442f, 0.1568939f, -2.2105119f, -0.7086993f, -0.4664145f, -0.3578597f, 0.5554636f, + 0.6965880f, -0.1506968f, 0.2646832f, 0.2874083f, 0.1901203f, -2.4997077f, -0.3519035f, -0.0518054f, + 1.0862818f, -0.2502540f, -0.3133347f, -0.7411230f, 0.1268138f, 0.1069811f, -0.8109779f, 0.0264679f, + 0.1604289f, -0.7534032f, -0.1419461f, 0.0688303f, -0.1570919f, -0.3055144f, -0.7415189f, 2.5547018f}, + }; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_4[1] = {1.4616280f}; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_4[32][1] = { + {0.0609813f}, {0.0685224f}, {0.1655236f}, {-0.0599842f}, {0.0669006f}, {-0.1817371f}, {-0.0539167f}, + {-0.0737955f}, {0.0654664f}, {0.0302955f}, {-0.0586768f}, {0.0717433f}, {0.1472274f}, {-0.0610073f}, + {-0.0601061f}, {0.2086218f}, {-0.0545418f}, {-0.0388369f}, {-0.0613536f}, {-0.1141072f}, {-0.2289097f}, + {-0.3354485f}, {0.0831025f}, {0.1333673f}, {0.0490410f}, {0.0484894f}, {0.0436755f}, {-0.1479877f}, + {0.1540713f}, {0.0021261f}, {-0.0845848f}, {-0.0564973f}, + }; + + } // namespace t5dnn +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h new file mode 100644 index 0000000000000..08feb0dfe3384 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -0,0 +1,818 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_PixelQuintuplet_h +#define RecoTracker_LSTCore_src_alpaka_PixelQuintuplet_h + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" + +#include "Hit.h" +#include "PixelTriplet.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelQuintupletToMemory(ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + QuintupletsConst quintuplets, + PixelQuintuplets pixelQuintuplets, + unsigned int pixelIndex, + unsigned int t5Index, + unsigned int pixelQuintupletIndex, + float rzChiSquared, + float rPhiChiSquared, + float rPhiChiSquaredInwards, + float score, + float eta, + float phi, + float pixelRadius, + float quintupletRadius, + float centerX, + float centerY) { + pixelQuintuplets.pixelSegmentIndices()[pixelQuintupletIndex] = pixelIndex; + pixelQuintuplets.quintupletIndices()[pixelQuintupletIndex] = t5Index; + pixelQuintuplets.isDup()[pixelQuintupletIndex] = false; + pixelQuintuplets.score()[pixelQuintupletIndex] = __F2H(score); + pixelQuintuplets.eta()[pixelQuintupletIndex] = __F2H(eta); + pixelQuintuplets.phi()[pixelQuintupletIndex] = __F2H(phi); + + pixelQuintuplets.pixelRadius()[pixelQuintupletIndex] = __F2H(pixelRadius); + pixelQuintuplets.quintupletRadius()[pixelQuintupletIndex] = __F2H(quintupletRadius); + pixelQuintuplets.centerX()[pixelQuintupletIndex] = __F2H(centerX); + pixelQuintuplets.centerY()[pixelQuintupletIndex] = __F2H(centerY); + + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][0] = 0; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][1] = 0; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][2] = quintuplets.logicalLayers()[t5Index][0]; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][3] = quintuplets.logicalLayers()[t5Index][1]; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][4] = quintuplets.logicalLayers()[t5Index][2]; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][5] = quintuplets.logicalLayers()[t5Index][3]; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][6] = quintuplets.logicalLayers()[t5Index][4]; + + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][0] = segments.innerLowerModuleIndices()[pixelIndex]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][1] = segments.outerLowerModuleIndices()[pixelIndex]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][2] = quintuplets.lowerModuleIndices()[t5Index][0]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][3] = quintuplets.lowerModuleIndices()[t5Index][1]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][4] = quintuplets.lowerModuleIndices()[t5Index][2]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][5] = quintuplets.lowerModuleIndices()[t5Index][3]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][6] = quintuplets.lowerModuleIndices()[t5Index][4]; + + unsigned int pixelInnerMD = segments.mdIndices()[pixelIndex][0]; + unsigned int pixelOuterMD = segments.mdIndices()[pixelIndex][1]; + + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][0] = mds.anchorHitIndices()[pixelInnerMD]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][1] = mds.outerHitIndices()[pixelInnerMD]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][2] = mds.anchorHitIndices()[pixelOuterMD]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][3] = mds.outerHitIndices()[pixelOuterMD]; + + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][4] = quintuplets.hitIndices()[t5Index][0]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][5] = quintuplets.hitIndices()[t5Index][1]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][6] = quintuplets.hitIndices()[t5Index][2]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][7] = quintuplets.hitIndices()[t5Index][3]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][8] = quintuplets.hitIndices()[t5Index][4]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][9] = quintuplets.hitIndices()[t5Index][5]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][10] = quintuplets.hitIndices()[t5Index][6]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][11] = quintuplets.hitIndices()[t5Index][7]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][12] = quintuplets.hitIndices()[t5Index][8]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][13] = quintuplets.hitIndices()[t5Index][9]; + + pixelQuintuplets.rzChiSquared()[pixelQuintupletIndex] = rzChiSquared; + pixelQuintuplets.rPhiChiSquared()[pixelQuintupletIndex] = rPhiChiSquared; + pixelQuintuplets.rPhiChiSquaredInwards()[pixelQuintupletIndex] = rPhiChiSquaredInwards; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RZChiSquaredCuts(ModulesConst modules, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + float rzChiSquared) { + const int layer1 = + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); + const int layer2 = + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); + const int layer3 = + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); + const int layer4 = + modules.layers()[lowerModuleIndex4] + 6 * (modules.subdets()[lowerModuleIndex4] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex4] == Endcap and modules.moduleType()[lowerModuleIndex4] == TwoS); + const int layer5 = + modules.layers()[lowerModuleIndex5] + 6 * (modules.subdets()[lowerModuleIndex5] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex5] == Endcap and modules.moduleType()[lowerModuleIndex5] == TwoS); + + if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + if (layer4 == 12 and layer5 == 13) { + return rzChiSquared < 451.141f; + } else if (layer4 == 4 and layer5 == 12) { + return rzChiSquared < 392.654f; + } else if (layer4 == 4 and layer5 == 5) { + return rzChiSquared < 225.322f; + } else if (layer4 == 7 and layer5 == 13) { + return rzChiSquared < 595.546f; + } else if (layer4 == 7 and layer5 == 8) { + return rzChiSquared < 196.111f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rzChiSquared < 297.446f; + } else if (layer4 == 8 and layer5 == 14) { + return rzChiSquared < 451.141f; + } else if (layer4 == 8 and layer5 == 9) { + return rzChiSquared < 518.339f; + } + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 10) { + return rzChiSquared < 341.75f; + } else if (layer4 == 9 and layer5 == 15) { + return rzChiSquared < 341.75f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + if (layer4 == 12 and layer5 == 13) { + return rzChiSquared < 392.655f; + } else if (layer4 == 5 and layer5 == 12) { + return rzChiSquared < 341.75f; + } else if (layer4 == 5 and layer5 == 6) { + return rzChiSquared < 112.537f; + } + } else if (layer1 == 2 and layer2 == 3 and layer4 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rzChiSquared < 595.545f; + } else if (layer4 == 8 and layer5 == 14) { + return rzChiSquared < 74.198f; + } + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + if (layer4 == 14 and layer5 == 15) { + return rzChiSquared < 518.339f; + } else if (layer4 == 9 and layer5 == 10) { + return rzChiSquared < 8.046f; + } else if (layer4 == 9 and layer5 == 15) { + return rzChiSquared < 451.141f; + } + } else if (layer1 == 3 and layer2 == 7 and layer3 == 8 and layer4 == 14 and layer5 == 15) { + return rzChiSquared < 56.207f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + if (layer4 == 10 and layer5 == 11) { + return rzChiSquared < 64.578f; + } else if (layer4 == 10 and layer5 == 16) { + return rzChiSquared < 85.250f; + } else if (layer4 == 15 and layer5 == 16) { + return rzChiSquared < 85.250f; + } + } + return true; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredCuts(ModulesConst modules, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + float rPhiChiSquared) { + const int layer1 = + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); + const int layer2 = + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); + const int layer3 = + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); + const int layer4 = + modules.layers()[lowerModuleIndex4] + 6 * (modules.subdets()[lowerModuleIndex4] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex4] == Endcap and modules.moduleType()[lowerModuleIndex4] == TwoS); + const int layer5 = + modules.layers()[lowerModuleIndex5] + 6 * (modules.subdets()[lowerModuleIndex5] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex5] == Endcap and modules.moduleType()[lowerModuleIndex5] == TwoS); + + if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + if (layer4 == 12 and layer5 == 13) { + return rPhiChiSquared < 48.921f; + } else if (layer4 == 4 and layer5 == 12) { + return rPhiChiSquared < 97.948f; + } else if (layer4 == 4 and layer5 == 5) { + return rPhiChiSquared < 129.3f; + } else if (layer4 == 7 and layer5 == 13) { + return rPhiChiSquared < 56.21f; + } else if (layer4 == 7 and layer5 == 8) { + return rPhiChiSquared < 74.198f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rPhiChiSquared < 21.265f; + } else if (layer4 == 8 and layer5 == 14) { + return rPhiChiSquared < 37.058f; + } else if (layer4 == 8 and layer5 == 9) { + return rPhiChiSquared < 42.578f; + } + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 10) { + return rPhiChiSquared < 32.253f; + } else if (layer4 == 9 and layer5 == 15) { + return rPhiChiSquared < 37.058f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + if (layer4 == 12 and layer5 == 13) { + return rPhiChiSquared < 97.947f; + } else if (layer4 == 5 and layer5 == 12) { + return rPhiChiSquared < 129.3f; + } else if (layer4 == 5 and layer5 == 6) { + return rPhiChiSquared < 170.68f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rPhiChiSquared < 48.92f; + } else if (layer4 == 8 and layer5 == 14) { + return rPhiChiSquared < 74.2f; + } + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + if (layer4 == 14 and layer5 == 15) { + return rPhiChiSquared < 42.58f; + } else if (layer4 == 9 and layer5 == 10) { + return rPhiChiSquared < 37.06f; + } else if (layer4 == 9 and layer5 == 15) { + return rPhiChiSquared < 48.92f; + } + } else if (layer1 == 3 and layer2 == 7 and layer3 == 8 and layer4 == 14 and layer5 == 15) { + return rPhiChiSquared < 85.25f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + if (layer4 == 10 and layer5 == 11) { + return rPhiChiSquared < 42.58f; + } else if (layer4 == 10 and layer5 == 16) { + return rPhiChiSquared < 37.06f; + } else if (layer4 == 15 and layer5 == 16) { + return rPhiChiSquared < 37.06f; + } + } + return true; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeChiSquaredpT5(TAcc const& acc, + unsigned int nPoints, + float* xs, + float* ys, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + float g, + float f, + float radius) { + /* + Given values of (g, f, radius) and a set of points (and its uncertainties) compute chi squared + */ + float c = g * g + f * f - radius * radius; + float chiSquared = 0.f; + float absArctanSlope, angleM, xPrime, yPrime, sigma2; + for (size_t i = 0; i < nPoints; i++) { + absArctanSlope = ((slopes[i] != kVerticalModuleSlope) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) + : kPi / 2.f); + if (xs[i] > 0 and ys[i] > 0) { + angleM = kPi / 2.f - absArctanSlope; + } else if (xs[i] < 0 and ys[i] > 0) { + angleM = absArctanSlope + kPi / 2.f; + } else if (xs[i] < 0 and ys[i] < 0) { + angleM = -(absArctanSlope + kPi / 2.f); + } else if (xs[i] > 0 and ys[i] < 0) { + angleM = -(kPi / 2.f - absArctanSlope); + } else { + angleM = 0; + } + if (not isFlat[i]) { + xPrime = xs[i] * alpaka::math::cos(acc, angleM) + ys[i] * alpaka::math::sin(acc, angleM); + yPrime = ys[i] * alpaka::math::cos(acc, angleM) - xs[i] * alpaka::math::sin(acc, angleM); + } else { + xPrime = xs[i]; + yPrime = ys[i]; + } + sigma2 = 4 * ((xPrime * delta1[i]) * (xPrime * delta1[i]) + (yPrime * delta2[i]) * (yPrime * delta2[i])); + chiSquared += (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) * + (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) / (sigma2); + } + return chiSquared; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeSigmasForRegression_pT5(TAcc const& acc, + ModulesConst modules, + const uint16_t* lowerModuleIndices, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + unsigned int nPoints = 5, + bool anchorHits = true) { + /* + bool anchorHits required to deal with a weird edge case wherein + the hits ultimately used in the regression are anchor hits, but the + lower modules need not all be Pixel Modules (in case of PS). Similarly, + when we compute the chi squared for the non-anchor hits, the "partner module" + need not always be a PS strip module, but all non-anchor hits sit on strip + modules. + */ + ModuleType moduleType; + short moduleSubdet, moduleSide; + float inv1 = kWidthPS / kWidth2S; + float inv2 = kPixelPSZpitch / kWidth2S; + float inv3 = kStripPSZpitch / kWidth2S; + for (size_t i = 0; i < nPoints; i++) { + moduleType = modules.moduleType()[lowerModuleIndices[i]]; + moduleSubdet = modules.subdets()[lowerModuleIndices[i]]; + moduleSide = modules.sides()[lowerModuleIndices[i]]; + const float& drdz = modules.drdzs()[lowerModuleIndices[i]]; + slopes[i] = modules.dxdys()[lowerModuleIndices[i]]; + //category 1 - barrel PS flat + if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { + delta1[i] = inv1; + delta2[i] = inv1; + slopes[i] = -999.f; + isFlat[i] = true; + } + //category 2 - barrel 2S + else if (moduleSubdet == Barrel and moduleType == TwoS) { + delta1[i] = 1.f; + delta2[i] = 1.f; + slopes[i] = -999.f; + isFlat[i] = true; + } + //category 3 - barrel PS tilted + else if (moduleSubdet == Barrel and moduleType == PS and moduleSide != Center) { + delta1[i] = inv1; + isFlat[i] = false; + + if (anchorHits) { + delta2[i] = (inv2 * drdz / alpaka::math::sqrt(acc, 1 + drdz * drdz)); + } else { + delta2[i] = (inv3 * drdz / alpaka::math::sqrt(acc, 1 + drdz * drdz)); + } + } + //category 4 - endcap PS + else if (moduleSubdet == Endcap and moduleType == PS) { + delta1[i] = inv1; + isFlat[i] = false; + /* + despite the type of the module layer of the lower module index, + all anchor hits are on the pixel side and all non-anchor hits are + on the strip side! + */ + if (anchorHits) { + delta2[i] = inv2; + } else { + delta2[i] = inv3; + } + } + //category 5 - endcap 2S + else if (moduleSubdet == Endcap and moduleType == TwoS) { + delta1[i] = 1.f; + delta2[i] = 500.f * inv1; + isFlat[i] = false; + } +#ifdef WARNINGS + else { + printf("ERROR!!!!! I SHOULDN'T BE HERE!!!! subdet = %d, type = %d, side = %d\n", + moduleSubdet, + moduleType, + moduleSide); + } +#endif + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RPhiChiSquared(TAcc const& acc, + ModulesConst modules, + uint16_t* lowerModuleIndices, + float g, + float f, + float radius, + float* xs, + float* ys) { + /* + Compute circle parameters from 3 pixel hits, and then use them to compute the chi squared for the outer hits + */ + + float delta1[5], delta2[5], slopes[5]; + bool isFlat[5]; + float chiSquared = 0; + + computeSigmasForRegression_pT5(acc, modules, lowerModuleIndices, delta1, delta2, slopes, isFlat); + chiSquared = computeChiSquaredpT5(acc, 5, xs, ys, delta1, delta2, slopes, isFlat, g, f, radius); + + return chiSquared; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RPhiChiSquaredInwards( + float g, float f, float r, float* xPix, float* yPix) { + /* + Using the computed regression center and radius, compute the chi squared for the pixels + */ + + float chiSquared = 0; + for (size_t i = 0; i < 2; i++) { + float residual = (xPix[i] - g) * (xPix[i] - g) + (yPix[i] - f) * (yPix[i] - f) - r * r; + chiSquared += residual * residual; + } + chiSquared *= 0.5f; + return chiSquared; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredInwardsCuts(ModulesConst modules, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + float rPhiChiSquared) { + const int layer1 = + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); + const int layer2 = + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); + const int layer3 = + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); + const int layer4 = + modules.layers()[lowerModuleIndex4] + 6 * (modules.subdets()[lowerModuleIndex4] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex4] == Endcap and modules.moduleType()[lowerModuleIndex4] == TwoS); + const int layer5 = + modules.layers()[lowerModuleIndex5] + 6 * (modules.subdets()[lowerModuleIndex5] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex5] == Endcap and modules.moduleType()[lowerModuleIndex5] == TwoS); + + if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + if (layer4 == 12 and layer5 == 13) { + return rPhiChiSquared < 451.141f; + } else if (layer4 == 4 and layer5 == 12) { + return rPhiChiSquared < 786.173f; + } else if (layer4 == 4 and layer5 == 5) { + return rPhiChiSquared < 595.545f; + } else if (layer4 == 7 and layer5 == 13) { + return rPhiChiSquared < 581.339f; + } else if (layer4 == 7 and layer5 == 8) { + return rPhiChiSquared < 112.537f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rPhiChiSquared < 225.322f; + } else if (layer4 == 8 and layer5 == 14) { + return rPhiChiSquared < 1192.402f; + } else if (layer4 == 8 and layer5 == 9) { + return rPhiChiSquared < 786.173f; + } + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 10) { + return rPhiChiSquared < 1037.817f; + } else if (layer4 == 9 and layer5 == 15) { + return rPhiChiSquared < 1808.536f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + if (layer4 == 12 and layer5 == 13) { + return rPhiChiSquared < 684.253f; + } else if (layer4 == 5 and layer5 == 12) { + return rPhiChiSquared < 684.253f; + } else if (layer4 == 5 and layer5 == 6) { + return rPhiChiSquared < 684.253f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rPhiChiSquared < 451.141f; + } else if (layer4 == 8 and layer5 == 14) { + return rPhiChiSquared < 518.34f; + } + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + if (layer4 == 14 and layer5 == 15) { + return rPhiChiSquared < 2077.92f; + } else if (layer4 == 9 and layer5 == 10) { + return rPhiChiSquared < 74.20f; + } else if (layer4 == 9 and layer5 == 15) { + return rPhiChiSquared < 1808.536f; + } + } else if (layer1 == 3 and layer2 == 7 and layer3 == 8 and layer4 == 14 and layer5 == 15) { + return rPhiChiSquared < 786.173f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + if (layer4 == 10 and layer5 == 11) { + return rPhiChiSquared < 1574.076f; + } else if (layer4 == 10 and layer5 == 16) { + return rPhiChiSquared < 5492.11f; + } else if (layer4 == 15 and layer5 == 16) { + return rPhiChiSquared < 2743.037f; + } + } + return true; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RZChiSquared(TAcc const& acc, + ModulesConst modules, + uint16_t* lowerModuleIndices, + float* rtPix, + float* zPix, + float* rts, + float* zs) { + //use the two anchor hits of the pixel segment to compute the slope + //then compute the pseudo chi squared of the five outer hits + + float slope = (zPix[1] - zPix[0]) / (rtPix[1] - rtPix[0]); + float residual = 0; + float error2 = 0; + //hardcoded array indices!!! + float RMSE = 0; + for (size_t i = 0; i < Params_T5::kLayers; i++) { + uint16_t& lowerModuleIndex = lowerModuleIndices[i]; + const int moduleType = modules.moduleType()[lowerModuleIndex]; + const int moduleSide = modules.sides()[lowerModuleIndex]; + const int moduleSubdet = modules.subdets()[lowerModuleIndex]; + + residual = (moduleSubdet == Barrel) ? (zs[i] - zPix[0]) - slope * (rts[i] - rtPix[0]) + : (rts[i] - rtPix[0]) - (zs[i] - zPix[0]) / slope; + const float& drdz = modules.drdzs()[lowerModuleIndex]; + //PS Modules + if (moduleType == 0) { + error2 = kPixelPSZpitch * kPixelPSZpitch; + } else //2S modules + { + error2 = kStrip2SZpitch * kStrip2SZpitch; + } + + //special dispensation to tilted PS modules! + if (moduleType == 0 and moduleSubdet == Barrel and moduleSide != Center) { + error2 /= (1.f + drdz * drdz); + } + RMSE += (residual * residual) / error2; + } + + RMSE = alpaka::math::sqrt(acc, 0.2f * RMSE); // Divided by the degree of freedom 5. + return RMSE; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelQuintupletDefaultAlgo(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + TripletsConst triplets, + QuintupletsConst quintuplets, + unsigned int pixelSegmentIndex, + unsigned int quintupletIndex, + float& rzChiSquared, + float& rPhiChiSquared, + float& rPhiChiSquaredInwards, + float& pixelRadius, + float& quintupletRadius, + float& centerX, + float& centerY, + unsigned int pixelSegmentArrayIndex) { + unsigned int t5InnerT3Index = quintuplets.tripletIndices()[quintupletIndex][0]; + unsigned int t5OuterT3Index = quintuplets.tripletIndices()[quintupletIndex][1]; + + float pixelRadiusTemp, tripletRadius, rPhiChiSquaredTemp, rzChiSquaredTemp, rPhiChiSquaredInwardsTemp, centerXTemp, + centerYTemp; + + if (not runPixelTripletDefaultAlgo(acc, + modules, + ranges, + mds, + segments, + segmentsPixel, + triplets, + pixelSegmentIndex, + t5InnerT3Index, + pixelRadiusTemp, + tripletRadius, + centerXTemp, + centerYTemp, + rzChiSquaredTemp, + rPhiChiSquaredTemp, + rPhiChiSquaredInwardsTemp, + false)) + return false; + + unsigned int firstSegmentIndex = triplets.segmentIndices()[t5InnerT3Index][0]; + unsigned int secondSegmentIndex = triplets.segmentIndices()[t5InnerT3Index][1]; + unsigned int thirdSegmentIndex = triplets.segmentIndices()[t5OuterT3Index][0]; + unsigned int fourthSegmentIndex = triplets.segmentIndices()[t5OuterT3Index][1]; + + unsigned int pixelInnerMDIndex = segments.mdIndices()[pixelSegmentIndex][0]; + unsigned int pixelOuterMDIndex = segments.mdIndices()[pixelSegmentIndex][1]; + unsigned int firstMDIndex = segments.mdIndices()[firstSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[secondSegmentIndex][0]; + unsigned int thirdMDIndex = segments.mdIndices()[secondSegmentIndex][1]; + unsigned int fourthMDIndex = segments.mdIndices()[thirdSegmentIndex][1]; + unsigned int fifthMDIndex = segments.mdIndices()[fourthSegmentIndex][1]; + + uint16_t lowerModuleIndex1 = quintuplets.lowerModuleIndices()[quintupletIndex][0]; + uint16_t lowerModuleIndex2 = quintuplets.lowerModuleIndices()[quintupletIndex][1]; + uint16_t lowerModuleIndex3 = quintuplets.lowerModuleIndices()[quintupletIndex][2]; + uint16_t lowerModuleIndex4 = quintuplets.lowerModuleIndices()[quintupletIndex][3]; + uint16_t lowerModuleIndex5 = quintuplets.lowerModuleIndices()[quintupletIndex][4]; + + uint16_t lowerModuleIndices[Params_T5::kLayers] = { + lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, lowerModuleIndex5}; + + float zPix[Params_pLS::kLayers] = {mds.anchorZ()[pixelInnerMDIndex], mds.anchorZ()[pixelOuterMDIndex]}; + float rtPix[Params_pLS::kLayers] = {mds.anchorRt()[pixelInnerMDIndex], mds.anchorRt()[pixelOuterMDIndex]}; + float zs[Params_T5::kLayers] = {mds.anchorZ()[firstMDIndex], + mds.anchorZ()[secondMDIndex], + mds.anchorZ()[thirdMDIndex], + mds.anchorZ()[fourthMDIndex], + mds.anchorZ()[fifthMDIndex]}; + float rts[Params_T5::kLayers] = {mds.anchorRt()[firstMDIndex], + mds.anchorRt()[secondMDIndex], + mds.anchorRt()[thirdMDIndex], + mds.anchorRt()[fourthMDIndex], + mds.anchorRt()[fifthMDIndex]}; + + rzChiSquared = computePT5RZChiSquared(acc, modules, lowerModuleIndices, rtPix, zPix, rts, zs); + + if (/*pixelRadius*/ 0 < 5.0f * kR1GeVf) { // FIXME: pixelRadius is not defined yet + if (not passPT5RZChiSquaredCuts(modules, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex3, + lowerModuleIndex4, + lowerModuleIndex5, + rzChiSquared)) + return false; + } + + //outer T5 + float xs[Params_T5::kLayers] = {mds.anchorX()[firstMDIndex], + mds.anchorX()[secondMDIndex], + mds.anchorX()[thirdMDIndex], + mds.anchorX()[fourthMDIndex], + mds.anchorX()[fifthMDIndex]}; + float ys[Params_T5::kLayers] = {mds.anchorY()[firstMDIndex], + mds.anchorY()[secondMDIndex], + mds.anchorY()[thirdMDIndex], + mds.anchorY()[fourthMDIndex], + mds.anchorY()[fifthMDIndex]}; + + //get the appropriate radii and centers + centerX = segmentsPixel.circleCenterX()[pixelSegmentArrayIndex]; + centerY = segmentsPixel.circleCenterY()[pixelSegmentArrayIndex]; + pixelRadius = segmentsPixel.circleRadius()[pixelSegmentArrayIndex]; + + float T5CenterX = quintuplets.regressionG()[quintupletIndex]; + float T5CenterY = quintuplets.regressionF()[quintupletIndex]; + quintupletRadius = quintuplets.regressionRadius()[quintupletIndex]; + + rPhiChiSquared = computePT5RPhiChiSquared(acc, modules, lowerModuleIndices, centerX, centerY, pixelRadius, xs, ys); + + if (pixelRadius < 5.0f * kR1GeVf) { + if (not passPT5RPhiChiSquaredCuts(modules, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex3, + lowerModuleIndex4, + lowerModuleIndex5, + rPhiChiSquared)) + return false; + } + + float xPix[] = {mds.anchorX()[pixelInnerMDIndex], mds.anchorX()[pixelOuterMDIndex]}; + float yPix[] = {mds.anchorY()[pixelInnerMDIndex], mds.anchorY()[pixelOuterMDIndex]}; + rPhiChiSquaredInwards = computePT5RPhiChiSquaredInwards(T5CenterX, T5CenterY, quintupletRadius, xPix, yPix); + + if (quintuplets.regressionRadius()[quintupletIndex] < 5.0f * kR1GeVf) { + if (not passPT5RPhiChiSquaredInwardsCuts(modules, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex3, + lowerModuleIndex4, + lowerModuleIndex5, + rPhiChiSquaredInwards)) + return false; + } + //trusting the T5 regression center to also be a good estimate.. + centerX = (centerX + T5CenterX) / 2; + centerY = (centerY + T5CenterY) / 2; + + return true; + } + + struct CreatePixelQuintupletsFromMap { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + ModulesPixelConst modulesPixel, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixel segmentsPixel, + Triplets triplets, + Quintuplets quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + PixelQuintuplets pixelQuintuplets, + unsigned int* connectedPixelSize, + unsigned int* connectedPixelIndex, + unsigned int nPixelSegments, + ObjectRangesConst ranges) const { + auto const globalBlockIdx = alpaka::getIdx(acc); + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridBlockExtent = alpaka::getWorkDiv(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (unsigned int i_pLS = globalThreadIdx[1]; i_pLS < nPixelSegments; i_pLS += gridThreadExtent[1]) { + auto iLSModule_max = connectedPixelIndex[i_pLS] + connectedPixelSize[i_pLS]; + for (unsigned int iLSModule = connectedPixelIndex[i_pLS] + globalBlockIdx[0]; iLSModule < iLSModule_max; + iLSModule += gridBlockExtent[0]) { + //these are actual module indices + uint16_t quintupletLowerModuleIndex = modulesPixel.connectedPixels()[iLSModule]; + if (quintupletLowerModuleIndex >= modules.nLowerModules()) + continue; + if (modules.moduleType()[quintupletLowerModuleIndex] == TwoS) + continue; + uint16_t pixelModuleIndex = modules.nLowerModules(); + if (segmentsPixel.isDup()[i_pLS]) + continue; + unsigned int nOuterQuintuplets = quintupletsOccupancy.nQuintuplets()[quintupletLowerModuleIndex]; + + if (nOuterQuintuplets == 0) + continue; + + unsigned int pixelSegmentIndex = ranges.segmentModuleIndices()[pixelModuleIndex] + i_pLS; + + //fetch the quintuplet + for (unsigned int outerQuintupletArrayIndex = globalThreadIdx[2]; + outerQuintupletArrayIndex < nOuterQuintuplets; + outerQuintupletArrayIndex += gridThreadExtent[2]) { + unsigned int quintupletIndex = + ranges.quintupletModuleIndices()[quintupletLowerModuleIndex] + outerQuintupletArrayIndex; + + if (quintuplets.isDup()[quintupletIndex]) + continue; + + float rzChiSquared, rPhiChiSquared, rPhiChiSquaredInwards, pixelRadius, quintupletRadius, centerX, centerY; + + bool success = runPixelQuintupletDefaultAlgo(acc, + modules, + ranges, + mds, + segments, + segmentsPixel, + triplets, + quintuplets, + pixelSegmentIndex, + quintupletIndex, + rzChiSquared, + rPhiChiSquared, + rPhiChiSquaredInwards, + pixelRadius, + quintupletRadius, + centerX, + centerY, + static_cast(i_pLS)); + if (success) { + unsigned int totOccupancyPixelQuintuplets = alpaka::atomicAdd( + acc, &pixelQuintuplets.totOccupancyPixelQuintuplets(), 1u, alpaka::hierarchy::Threads{}); + if (totOccupancyPixelQuintuplets >= n_max_pixel_quintuplets) { +#ifdef WARNINGS + printf("Pixel Quintuplet excess alert!\n"); +#endif + } else { + unsigned int pixelQuintupletIndex = + alpaka::atomicAdd(acc, &pixelQuintuplets.nPixelQuintuplets(), 1u, alpaka::hierarchy::Threads{}); + float eta = __H2F(quintuplets.eta()[quintupletIndex]); + float phi = __H2F(quintuplets.phi()[quintupletIndex]); + + addPixelQuintupletToMemory(modules, + mds, + segments, + quintuplets, + pixelQuintuplets, + pixelSegmentIndex, + quintupletIndex, + pixelQuintupletIndex, + rzChiSquared, + rPhiChiSquared, + rPhiChiSquaredInwards, + rPhiChiSquared, + eta, + phi, + pixelRadius, + quintupletRadius, + centerX, + centerY); + + triplets.partOfPT5()[quintuplets.tripletIndices()[quintupletIndex][0]] = true; + triplets.partOfPT5()[quintuplets.tripletIndices()[quintupletIndex][1]] = true; + segmentsPixel.partOfPT5()[i_pLS] = true; + quintuplets.partOfPT5()[quintupletIndex] = true; + } // tot occupancy + } // end success + } // end T5 + } // end iLS + } // end i_pLS + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h new file mode 100644 index 0000000000000..a8be90fff5227 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -0,0 +1,1587 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_PixelTriplet_h +#define RecoTracker_LSTCore_src_alpaka_PixelTriplet_h + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPBB(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + uint16_t pixelModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex); + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPEE(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + uint16_t pixelModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex); + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelTripletToMemory(MiniDoubletsConst mds, + SegmentsConst segments, + TripletsConst triplets, + PixelTriplets pixelTriplets, + unsigned int pixelSegmentIndex, + unsigned int tripletIndex, + float pixelRadius, + float tripletRadius, + float centerX, + float centerY, + float rPhiChiSquared, + float rPhiChiSquaredInwards, + float rzChiSquared, + unsigned int pixelTripletIndex, + float pt, + float eta, + float phi, + float eta_pix, + float phi_pix, + float score) { + pixelTriplets.pixelSegmentIndices()[pixelTripletIndex] = pixelSegmentIndex; + pixelTriplets.tripletIndices()[pixelTripletIndex] = tripletIndex; + pixelTriplets.pixelRadius()[pixelTripletIndex] = __F2H(pixelRadius); + pixelTriplets.tripletRadius()[pixelTripletIndex] = __F2H(tripletRadius); + pixelTriplets.pt()[pixelTripletIndex] = __F2H(pt); + pixelTriplets.eta()[pixelTripletIndex] = __F2H(eta); + pixelTriplets.phi()[pixelTripletIndex] = __F2H(phi); + pixelTriplets.eta_pix()[pixelTripletIndex] = __F2H(eta_pix); + pixelTriplets.phi_pix()[pixelTripletIndex] = __F2H(phi_pix); + pixelTriplets.isDup()[pixelTripletIndex] = false; + pixelTriplets.score()[pixelTripletIndex] = __F2H(score); + + pixelTriplets.centerX()[pixelTripletIndex] = __F2H(centerX); + pixelTriplets.centerY()[pixelTripletIndex] = __F2H(centerY); + pixelTriplets.logicalLayers()[pixelTripletIndex][0] = 0; + pixelTriplets.logicalLayers()[pixelTripletIndex][1] = 0; + pixelTriplets.logicalLayers()[pixelTripletIndex][2] = triplets.logicalLayers()[tripletIndex][0]; + pixelTriplets.logicalLayers()[pixelTripletIndex][3] = triplets.logicalLayers()[tripletIndex][1]; + pixelTriplets.logicalLayers()[pixelTripletIndex][4] = triplets.logicalLayers()[tripletIndex][2]; + + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][0] = segments.innerLowerModuleIndices()[pixelSegmentIndex]; + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][1] = segments.outerLowerModuleIndices()[pixelSegmentIndex]; + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][2] = triplets.lowerModuleIndices()[tripletIndex][0]; + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][3] = triplets.lowerModuleIndices()[tripletIndex][1]; + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][4] = triplets.lowerModuleIndices()[tripletIndex][2]; + + unsigned int pixelInnerMD = segments.mdIndices()[pixelSegmentIndex][0]; + unsigned int pixelOuterMD = segments.mdIndices()[pixelSegmentIndex][1]; + + pixelTriplets.hitIndices()[pixelTripletIndex][0] = mds.anchorHitIndices()[pixelInnerMD]; + pixelTriplets.hitIndices()[pixelTripletIndex][1] = mds.outerHitIndices()[pixelInnerMD]; + pixelTriplets.hitIndices()[pixelTripletIndex][2] = mds.anchorHitIndices()[pixelOuterMD]; + pixelTriplets.hitIndices()[pixelTripletIndex][3] = mds.outerHitIndices()[pixelOuterMD]; + + pixelTriplets.hitIndices()[pixelTripletIndex][4] = triplets.hitIndices()[tripletIndex][0]; + pixelTriplets.hitIndices()[pixelTripletIndex][5] = triplets.hitIndices()[tripletIndex][1]; + pixelTriplets.hitIndices()[pixelTripletIndex][6] = triplets.hitIndices()[tripletIndex][2]; + pixelTriplets.hitIndices()[pixelTripletIndex][7] = triplets.hitIndices()[tripletIndex][3]; + pixelTriplets.hitIndices()[pixelTripletIndex][8] = triplets.hitIndices()[tripletIndex][4]; + pixelTriplets.hitIndices()[pixelTripletIndex][9] = triplets.hitIndices()[tripletIndex][5]; + pixelTriplets.rPhiChiSquared()[pixelTripletIndex] = rPhiChiSquared; + pixelTriplets.rPhiChiSquaredInwards()[pixelTripletIndex] = rPhiChiSquaredInwards; + pixelTriplets.rzChiSquared()[pixelTripletIndex] = rzChiSquared; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTrackletDefaultAlgopT3(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + uint16_t pixelLowerModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex) { + short outerInnerLowerModuleSubdet = modules.subdets()[outerInnerLowerModuleIndex]; + short outerOuterLowerModuleSubdet = modules.subdets()[outerOuterLowerModuleIndex]; + + unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[innerSegmentIndex][1]; + + unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][0]; + unsigned int fourthMDIndex = segments.mdIndices()[outerSegmentIndex][1]; + + if (outerInnerLowerModuleSubdet == Barrel and + (outerOuterLowerModuleSubdet == Barrel or outerOuterLowerModuleSubdet == Endcap)) { + return runTripletDefaultAlgoPPBB(acc, + modules, + ranges, + mds, + segments, + segmentsPixel, + pixelLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } else if (outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { + return runTripletDefaultAlgoPPEE(acc, + modules, + ranges, + mds, + segments, + segmentsPixel, + pixelLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } + return false; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RZChiSquaredCuts(ModulesConst modules, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + float rzChiSquared) { + const int layer1 = + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); + const int layer2 = + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); + const int layer3 = + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); + + if (layer1 == 8 and layer2 == 9 and layer3 == 10) { + return rzChiSquared < 13.6067f; + } else if (layer1 == 8 and layer2 == 9 and layer3 == 15) { + return rzChiSquared < 5.5953f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + return rzChiSquared < 3.9263f; + } + /* + else if(layer1 == 7 and layer2 == 8 and layer3 == 14) + { + // PS+PS+2S in endcap layers 1+2+3, which is not really feasible in the current geometry, + // without skipping barrel layers 1 and 2 (not allowed by algorithm logic). + } + */ + else if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + return rzChiSquared < 9.4377f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + return rzChiSquared < 9.9975f; + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + return rzChiSquared < 8.6369f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + return rzChiSquared < 37.945f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 12) { + return rzChiSquared < 43.0167f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + return rzChiSquared < 8.6923f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + return rzChiSquared < 11.9672f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 13) { + return rzChiSquared < 16.2133f; + } + + //default - category not found! + return true; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeChiSquaredpT3(TAcc const& acc, + unsigned int nPoints, + float* xs, + float* ys, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + float g, + float f, + float radius) { + //given values of (g, f, radius) and a set of points (and its uncertainties) + //compute chi squared + float c = g * g + f * f - radius * radius; + float chiSquared = 0.f; + float absArctanSlope, angleM, xPrime, yPrime, sigma2; + for (size_t i = 0; i < nPoints; i++) { + absArctanSlope = ((slopes[i] != kVerticalModuleSlope) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) + : kPi / 2.f); + if (xs[i] > 0 and ys[i] > 0) { + angleM = kPi / 2.f - absArctanSlope; + } else if (xs[i] < 0 and ys[i] > 0) { + angleM = absArctanSlope + kPi / 2.f; + } else if (xs[i] < 0 and ys[i] < 0) { + angleM = -(absArctanSlope + kPi / 2.f); + } else if (xs[i] > 0 and ys[i] < 0) { + angleM = -(kPi / 2.f - absArctanSlope); + } else { + angleM = 0; + } + + if (not isFlat[i]) { + xPrime = xs[i] * alpaka::math::cos(acc, angleM) + ys[i] * alpaka::math::sin(acc, angleM); + yPrime = ys[i] * alpaka::math::cos(acc, angleM) - xs[i] * alpaka::math::sin(acc, angleM); + } else { + xPrime = xs[i]; + yPrime = ys[i]; + } + sigma2 = 4 * ((xPrime * delta1[i]) * (xPrime * delta1[i]) + (yPrime * delta2[i]) * (yPrime * delta2[i])); + chiSquared += (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) * + (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) / sigma2; + } + return chiSquared; + } + + //TODO: merge this one and the pT5 function later into a single function + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT3RPhiChiSquared(TAcc const& acc, + ModulesConst modules, + uint16_t* lowerModuleIndices, + float g, + float f, + float radius, + float* xs, + float* ys) { + float delta1[3]{}, delta2[3]{}, slopes[3]{}; + bool isFlat[3]{}; + float chiSquared = 0; + float inv1 = kWidthPS / kWidth2S; + float inv2 = kPixelPSZpitch / kWidth2S; + for (size_t i = 0; i < 3; i++) { + ModuleType moduleType = modules.moduleType()[lowerModuleIndices[i]]; + short moduleSubdet = modules.subdets()[lowerModuleIndices[i]]; + short moduleSide = modules.sides()[lowerModuleIndices[i]]; + float drdz = modules.drdzs()[lowerModuleIndices[i]]; + slopes[i] = modules.dxdys()[lowerModuleIndices[i]]; + //category 1 - barrel PS flat + if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { + delta1[i] = inv1; + delta2[i] = inv1; + slopes[i] = -999; + isFlat[i] = true; + } + //category 2 - barrel 2S + else if (moduleSubdet == Barrel and moduleType == TwoS) { + delta1[i] = 1; + delta2[i] = 1; + slopes[i] = -999; + isFlat[i] = true; + } + //category 3 - barrel PS tilted + else if (moduleSubdet == Barrel and moduleType == PS and moduleSide != Center) { + delta1[i] = inv1; + isFlat[i] = false; + delta2[i] = (inv2 * drdz / alpaka::math::sqrt(acc, 1 + drdz * drdz)); + } + //category 4 - endcap PS + else if (moduleSubdet == Endcap and moduleType == PS) { + delta1[i] = inv1; + isFlat[i] = false; + + /* + despite the type of the module layer of the lower module index, all anchor + hits are on the pixel side and all non-anchor hits are on the strip side! + */ + delta2[i] = inv2; + } + //category 5 - endcap 2S + else if (moduleSubdet == Endcap and moduleType == TwoS) { + delta1[i] = 1; + delta2[i] = 500 * inv1; + isFlat[i] = false; + } +#ifdef WARNINGS + else { + printf("ERROR!!!!! I SHOULDN'T BE HERE!!!! subdet = %d, type = %d, side = %d\n", + moduleSubdet, + moduleType, + moduleSide); + } +#endif + } + chiSquared = computeChiSquaredpT3(acc, 3, xs, ys, delta1, delta2, slopes, isFlat, g, f, radius); + + return chiSquared; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT3RPhiChiSquaredInwards( + float g, float f, float r, float* xPix, float* yPix) { + float residual = (xPix[0] - g) * (xPix[0] - g) + (yPix[0] - f) * (yPix[0] - f) - r * r; + float chiSquared = residual * residual; + residual = (xPix[1] - g) * (xPix[1] - g) + (yPix[1] - f) * (yPix[1] - f) - r * r; + chiSquared += residual * residual; + + chiSquared *= 0.5f; + return chiSquared; + } + + //90pc threshold + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredCuts(ModulesConst modules, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + float chiSquared) { + const int layer1 = + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); + const int layer2 = + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); + const int layer3 = + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); + + if (layer1 == 8 and layer2 == 9 and layer3 == 10) { + return chiSquared < 7.003f; + } else if (layer1 == 8 and layer2 == 9 and layer3 == 15) { + return chiSquared < 0.5f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + return chiSquared < 8.046f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 14) { + return chiSquared < 0.575f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + return chiSquared < 5.304f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + return chiSquared < 10.6211f; + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + return chiSquared < 4.617f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + return chiSquared < 8.046f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 13) { + return chiSquared < 0.435f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + return chiSquared < 9.244f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 12) { + return chiSquared < 0.287f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + return chiSquared < 18.509f; + } + + return true; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredInwardsCuts(ModulesConst modules, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + float chiSquared) { + const int layer1 = + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); + const int layer2 = + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); + const int layer3 = + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); + + if (layer1 == 7 and layer2 == 8 and layer3 == 9) // endcap layer 1,2,3, ps + { + return chiSquared < 22016.8055f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 14) // endcap layer 1,2,3 layer3->2s + { + return chiSquared < 935179.56807f; + } else if (layer1 == 8 and layer2 == 9 and layer3 == 10) // endcap layer 2,3,4 + { + return chiSquared < 29064.12959f; + } else if (layer1 == 8 and layer2 == 9 and layer3 == 15) // endcap layer 2,3,4, layer3->2s + { + return chiSquared < 935179.5681f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3) // barrel 1,2,3 + { + return chiSquared < 1370.0113195101474f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) // barrel 1,2 endcap 1 + { + return chiSquared < 5492.110048314815f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) // barrel 2,3,4 + { + return chiSquared < 4160.410806470067f; + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) // barrel 1, endcap 1,2 + { + return chiSquared < 29064.129591225726f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) // barrel 2,3 endcap 1 + { + return chiSquared < 12634.215376250893f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 12) // barrel 2,3, endcap 1->2s + { + return chiSquared < 353821.69361145404f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) // barrel2, endcap 1,2 + { + return chiSquared < 33393.26076341235f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 13) //barrel 2, endcap 1, endcap2->2s + { + return chiSquared < 935179.5680742573f; + } + + return true; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool checkIntervalOverlappT3(float firstMin, + float firstMax, + float secondMin, + float secondMax) { + return ((firstMin <= secondMin) && (secondMin < firstMax)) || ((secondMin < firstMin) && (firstMin < secondMax)); + } + + /*bounds for high Pt taken from : http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_efficiency/efficiencies/new_efficiencies/efficiencies_20210513_T5_recovering_high_Pt_efficiencies/highE_radius_matching/highE_bounds.txt */ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterionBBB(TAcc const& acc, + float pixelRadius, + float pixelRadiusError, + float tripletRadius) { + float tripletInvRadiusErrorBound = 0.15624f; + float pixelInvRadiusErrorBound = 0.17235f; + + if (pixelRadius > 2.0f * kR1GeVf) { + pixelInvRadiusErrorBound = 0.6375f; + tripletInvRadiusErrorBound = 0.6588f; + } + + float tripletRadiusInvMax = (1 + tripletInvRadiusErrorBound) / tripletRadius; + float tripletRadiusInvMin = alpaka::math::max(acc, (1 - tripletInvRadiusErrorBound) / tripletRadius, 0.0f); + + float pixelRadiusInvMax = + alpaka::math::max(acc, (1 + pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius - pixelRadiusError)); + float pixelRadiusInvMin = + alpaka::math::min(acc, (1 - pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius + pixelRadiusError)); + + return checkIntervalOverlappT3(tripletRadiusInvMin, tripletRadiusInvMax, pixelRadiusInvMin, pixelRadiusInvMax); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterionBBE(TAcc const& acc, + float pixelRadius, + float pixelRadiusError, + float tripletRadius) { + float tripletInvRadiusErrorBound = 0.45972f; + float pixelInvRadiusErrorBound = 0.19644f; + + if (pixelRadius > 2.0f * kR1GeVf) { + pixelInvRadiusErrorBound = 0.6805f; + tripletInvRadiusErrorBound = 0.8557f; + } + + float tripletRadiusInvMax = (1 + tripletInvRadiusErrorBound) / tripletRadius; + float tripletRadiusInvMin = alpaka::math::max(acc, (1 - tripletInvRadiusErrorBound) / tripletRadius, 0.0f); + + float pixelRadiusInvMax = + alpaka::math::max(acc, (1 + pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius - pixelRadiusError)); + float pixelRadiusInvMin = + alpaka::math::min(acc, (1 - pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius + pixelRadiusError)); + + return checkIntervalOverlappT3(tripletRadiusInvMin, tripletRadiusInvMax, pixelRadiusInvMin, pixelRadiusInvMax); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterionBEE(TAcc const& acc, + float pixelRadius, + float pixelRadiusError, + float tripletRadius) { + float tripletInvRadiusErrorBound = 1.59294f; + float pixelInvRadiusErrorBound = 0.255181f; + + if (pixelRadius > 2.0f * kR1GeVf) //as good as not having selections + { + pixelInvRadiusErrorBound = 2.2091f; + tripletInvRadiusErrorBound = 2.3548f; + } + + float tripletRadiusInvMax = (1 + tripletInvRadiusErrorBound) / tripletRadius; + float tripletRadiusInvMin = alpaka::math::max(acc, (1 - tripletInvRadiusErrorBound) / tripletRadius, 0.0f); + + float pixelRadiusInvMax = + alpaka::math::max(acc, (1 + pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius - pixelRadiusError)); + float pixelRadiusInvMin = + alpaka::math::min(acc, (1 - pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius + pixelRadiusError)); + pixelRadiusInvMin = alpaka::math::max(acc, pixelRadiusInvMin, 0.0f); + + return checkIntervalOverlappT3(tripletRadiusInvMin, tripletRadiusInvMax, pixelRadiusInvMin, pixelRadiusInvMax); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterionEEE(TAcc const& acc, + float pixelRadius, + float pixelRadiusError, + float tripletRadius) { + float tripletInvRadiusErrorBound = 1.7006f; + float pixelInvRadiusErrorBound = 0.26367f; + + if (pixelRadius > 2.0f * kR1GeVf) //as good as not having selections + { + pixelInvRadiusErrorBound = 2.286f; + tripletInvRadiusErrorBound = 2.436f; + } + + float tripletRadiusInvMax = (1 + tripletInvRadiusErrorBound) / tripletRadius; + float tripletRadiusInvMin = alpaka::math::max(acc, (1 - tripletInvRadiusErrorBound) / tripletRadius, 0.0f); + + float pixelRadiusInvMax = + alpaka::math::max(acc, (1 + pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius - pixelRadiusError)); + float pixelRadiusInvMin = + alpaka::math::min(acc, (1 - pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius + pixelRadiusError)); + pixelRadiusInvMin = alpaka::math::max(acc, 0.0f, pixelRadiusInvMin); + + return checkIntervalOverlappT3(tripletRadiusInvMin, tripletRadiusInvMax, pixelRadiusInvMin, pixelRadiusInvMax); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterion(TAcc const& acc, + ModulesConst modules, + float pixelRadius, + float pixelRadiusError, + float tripletRadius, + int16_t lowerModuleIndex, + uint16_t middleModuleIndex, + uint16_t upperModuleIndex) { + if (modules.subdets()[lowerModuleIndex] == Endcap) { + return passRadiusCriterionEEE(acc, pixelRadius, pixelRadiusError, tripletRadius); + } else if (modules.subdets()[middleModuleIndex] == Endcap) { + return passRadiusCriterionBEE(acc, pixelRadius, pixelRadiusError, tripletRadius); + } else if (modules.subdets()[upperModuleIndex] == Endcap) { + return passRadiusCriterionBBE(acc, pixelRadius, pixelRadiusError, tripletRadius); + } else { + return passRadiusCriterionBBB(acc, pixelRadius, pixelRadiusError, tripletRadius); + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT3RZChiSquared(TAcc const& acc, + ModulesConst modules, + const uint16_t* lowerModuleIndices, + const float* rtPix, + const float* xPix, + const float* yPix, + const float* zPix, + const float* rts, + const float* xs, + const float* ys, + const float* zs, + float pixelSegmentPt, + float pixelSegmentPx, + float pixelSegmentPy, + float pixelSegmentPz, + int pixelSegmentCharge) { + float residual = 0; + float error2 = 0; + float RMSE = 0; + + float Px = pixelSegmentPx, Py = pixelSegmentPy, Pz = pixelSegmentPz; + int charge = pixelSegmentCharge; + float x1 = xPix[1] / 100; + float y1 = yPix[1] / 100; + float z1 = zPix[1] / 100; + float r1 = rtPix[1] / 100; + + float a = -2.f * k2Rinv1GeVf * 100 * charge; // multiply by 100 to make the correct length units + + for (size_t i = 0; i < Params_T3::kLayers; i++) { + float zsi = zs[i] / 100; + float rtsi = rts[i] / 100; + uint16_t lowerModuleIndex = lowerModuleIndices[i]; + const int moduleType = modules.moduleType()[lowerModuleIndex]; + const int moduleSide = modules.sides()[lowerModuleIndex]; + const int moduleSubdet = modules.subdets()[lowerModuleIndex]; + + // calculation is detailed documented here https://indico.cern.ch/event/1185895/contributions/4982756/attachments/2526561/4345805/helix%20pT3%20summarize.pdf + float diffr, diffz; + float p = alpaka::math::sqrt(acc, Px * Px + Py * Py + Pz * Pz); + + float rou = a / p; + if (moduleSubdet == Endcap) { + float s = (zsi - z1) * p / Pz; + float x = x1 + Px / a * alpaka::math::sin(acc, rou * s) - Py / a * (1 - alpaka::math::cos(acc, rou * s)); + float y = y1 + Py / a * alpaka::math::sin(acc, rou * s) + Px / a * (1 - alpaka::math::cos(acc, rou * s)); + diffr = alpaka::math::abs(acc, rtsi - alpaka::math::sqrt(acc, x * x + y * y)) * 100; + } + + if (moduleSubdet == Barrel) { + float paraA = r1 * r1 + 2 * (Px * Px + Py * Py) / (a * a) + 2 * (y1 * Px - x1 * Py) / a - rtsi * rtsi; + float paraB = 2 * (x1 * Px + y1 * Py) / a; + float paraC = 2 * (y1 * Px - x1 * Py) / a + 2 * (Px * Px + Py * Py) / (a * a); + float A = paraB * paraB + paraC * paraC; + float B = 2 * paraA * paraB; + float C = paraA * paraA - paraC * paraC; + float sol1 = (-B + alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A); + float sol2 = (-B - alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A); + float solz1 = alpaka::math::asin(acc, sol1) / rou * Pz / p + z1; + float solz2 = alpaka::math::asin(acc, sol2) / rou * Pz / p + z1; + float diffz1 = alpaka::math::abs(acc, solz1 - zsi) * 100; + float diffz2 = alpaka::math::abs(acc, solz2 - zsi) * 100; + diffz = alpaka::math::min(acc, diffz1, diffz2); + } + + residual = moduleSubdet == Barrel ? diffz : diffr; + + //PS Modules + if (moduleType == 0) { + error2 = kPixelPSZpitch * kPixelPSZpitch; + } else //2S modules + { + error2 = kStrip2SZpitch * kStrip2SZpitch; + } + + //special dispensation to tilted PS modules! + if (moduleType == 0 and moduleSubdet == Barrel and moduleSide != Center) { + float drdz = modules.drdzs()[lowerModuleIndex]; + error2 /= (1 + drdz * drdz); + } + RMSE += (residual * residual) / error2; + } + + RMSE = alpaka::math::sqrt(acc, 0.2f * RMSE); // Divided by the degree of freedom 5. + + return RMSE; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTripletDefaultAlgo(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + TripletsConst triplets, + unsigned int pixelSegmentIndex, + unsigned int tripletIndex, + float& pixelRadius, + float& tripletRadius, + float& centerX, + float& centerY, + float& rzChiSquared, + float& rPhiChiSquared, + float& rPhiChiSquaredInwards, + bool runChiSquaredCuts = true) { + //run pT4 compatibility between the pixel segment and inner segment, and between the pixel and outer segment of the triplet + uint16_t pixelModuleIndex = segments.innerLowerModuleIndices()[pixelSegmentIndex]; + + uint16_t lowerModuleIndex = triplets.lowerModuleIndices()[tripletIndex][0]; + uint16_t middleModuleIndex = triplets.lowerModuleIndices()[tripletIndex][1]; + uint16_t upperModuleIndex = triplets.lowerModuleIndices()[tripletIndex][2]; + + { + // pixel segment vs inner segment of the triplet + if (not runPixelTrackletDefaultAlgopT3(acc, + modules, + ranges, + mds, + segments, + segmentsPixel, + pixelModuleIndex, + lowerModuleIndex, + middleModuleIndex, + pixelSegmentIndex, + triplets.segmentIndices()[tripletIndex][0])) + return false; + + //pixel segment vs outer segment of triplet + if (not runPixelTrackletDefaultAlgopT3(acc, + modules, + ranges, + mds, + segments, + segmentsPixel, + pixelModuleIndex, + middleModuleIndex, + upperModuleIndex, + pixelSegmentIndex, + triplets.segmentIndices()[tripletIndex][1])) + return false; + } + + //pt matching between the pixel ptin and the triplet circle pt + unsigned int pixelSegmentArrayIndex = pixelSegmentIndex - ranges.segmentModuleIndices()[pixelModuleIndex]; + float pixelSegmentPt = segmentsPixel.ptIn()[pixelSegmentArrayIndex]; + float pixelSegmentPtError = segmentsPixel.ptErr()[pixelSegmentArrayIndex]; + float pixelSegmentPx = segmentsPixel.px()[pixelSegmentArrayIndex]; + float pixelSegmentPy = segmentsPixel.py()[pixelSegmentArrayIndex]; + float pixelSegmentPz = segmentsPixel.pz()[pixelSegmentArrayIndex]; + int pixelSegmentCharge = segmentsPixel.charge()[pixelSegmentArrayIndex]; + + float pixelG = segmentsPixel.circleCenterX()[pixelSegmentArrayIndex]; + float pixelF = segmentsPixel.circleCenterY()[pixelSegmentArrayIndex]; + float pixelRadiusPCA = segmentsPixel.circleRadius()[pixelSegmentArrayIndex]; + + unsigned int pixelInnerMDIndex = segments.mdIndices()[pixelSegmentIndex][0]; + unsigned int pixelOuterMDIndex = segments.mdIndices()[pixelSegmentIndex][1]; + + pixelRadius = pixelSegmentPt * kR1GeVf; + float pixelRadiusError = pixelSegmentPtError * kR1GeVf; + unsigned int tripletInnerSegmentIndex = triplets.segmentIndices()[tripletIndex][0]; + unsigned int tripletOuterSegmentIndex = triplets.segmentIndices()[tripletIndex][1]; + + unsigned int firstMDIndex = segments.mdIndices()[tripletInnerSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[tripletInnerSegmentIndex][1]; + unsigned int thirdMDIndex = segments.mdIndices()[tripletOuterSegmentIndex][1]; + + float xs[Params_T3::kLayers] = { + mds.anchorX()[firstMDIndex], mds.anchorX()[secondMDIndex], mds.anchorX()[thirdMDIndex]}; + float ys[Params_T3::kLayers] = { + mds.anchorY()[firstMDIndex], mds.anchorY()[secondMDIndex], mds.anchorY()[thirdMDIndex]}; + + float g, f; + tripletRadius = triplets.radius()[tripletIndex]; + g = triplets.centerX()[tripletIndex]; + f = triplets.centerY()[tripletIndex]; + + if (not passRadiusCriterion(acc, + modules, + pixelRadius, + pixelRadiusError, + tripletRadius, + lowerModuleIndex, + middleModuleIndex, + upperModuleIndex)) + return false; + + uint16_t lowerModuleIndices[Params_T3::kLayers] = {lowerModuleIndex, middleModuleIndex, upperModuleIndex}; + + if (runChiSquaredCuts and pixelSegmentPt < 5.0f) { + float rts[Params_T3::kLayers] = { + mds.anchorRt()[firstMDIndex], mds.anchorRt()[secondMDIndex], mds.anchorRt()[thirdMDIndex]}; + float zs[Params_T3::kLayers] = { + mds.anchorZ()[firstMDIndex], mds.anchorZ()[secondMDIndex], mds.anchorZ()[thirdMDIndex]}; + float rtPix[Params_pLS::kLayers] = {mds.anchorRt()[pixelInnerMDIndex], mds.anchorRt()[pixelOuterMDIndex]}; + float xPix[Params_pLS::kLayers] = {mds.anchorX()[pixelInnerMDIndex], mds.anchorX()[pixelOuterMDIndex]}; + float yPix[Params_pLS::kLayers] = {mds.anchorY()[pixelInnerMDIndex], mds.anchorY()[pixelOuterMDIndex]}; + float zPix[Params_pLS::kLayers] = {mds.anchorZ()[pixelInnerMDIndex], mds.anchorZ()[pixelOuterMDIndex]}; + + rzChiSquared = computePT3RZChiSquared(acc, + modules, + lowerModuleIndices, + rtPix, + xPix, + yPix, + zPix, + rts, + xs, + ys, + zs, + pixelSegmentPt, + pixelSegmentPx, + pixelSegmentPy, + pixelSegmentPz, + pixelSegmentCharge); + if (not passPT3RZChiSquaredCuts(modules, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rzChiSquared)) + return false; + } else { + rzChiSquared = -1; + } + + rPhiChiSquared = computePT3RPhiChiSquared(acc, modules, lowerModuleIndices, pixelG, pixelF, pixelRadiusPCA, xs, ys); + + if (runChiSquaredCuts and pixelSegmentPt < 5.0f) { + if (not passPT3RPhiChiSquaredCuts(modules, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rPhiChiSquared)) + return false; + } + + float xPix[Params_pLS::kLayers] = {mds.anchorX()[pixelInnerMDIndex], mds.anchorX()[pixelOuterMDIndex]}; + float yPix[Params_pLS::kLayers] = {mds.anchorY()[pixelInnerMDIndex], mds.anchorY()[pixelOuterMDIndex]}; + rPhiChiSquaredInwards = computePT3RPhiChiSquaredInwards(g, f, tripletRadius, xPix, yPix); + + if (runChiSquaredCuts and pixelSegmentPt < 5.0f) { + if (not passPT3RPhiChiSquaredInwardsCuts( + modules, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rPhiChiSquaredInwards)) + return false; + } + centerX = 0; + centerY = 0; + return true; + } + + struct CreatePixelTripletsFromMap { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + ModulesPixelConst modulesPixel, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + Triplets triplets, + TripletsOccupancyConst tripletsOccupancy, + PixelTriplets pixelTriplets, + unsigned int* connectedPixelSize, + unsigned int* connectedPixelIndex, + unsigned int nPixelSegments) const { + auto const globalBlockIdx = alpaka::getIdx(acc); + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridBlockExtent = alpaka::getWorkDiv(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (unsigned int i_pLS = globalThreadIdx[1]; i_pLS < nPixelSegments; i_pLS += gridThreadExtent[1]) { + auto iLSModule_max = connectedPixelIndex[i_pLS] + connectedPixelSize[i_pLS]; + + for (unsigned int iLSModule = connectedPixelIndex[i_pLS] + globalBlockIdx[0]; iLSModule < iLSModule_max; + iLSModule += gridBlockExtent[0]) { + uint16_t tripletLowerModuleIndex = + modulesPixel.connectedPixels() + [iLSModule]; //connected pixels will have the appropriate lower module index by default! +#ifdef WARNINGS + if (tripletLowerModuleIndex >= modules.nLowerModules()) { + printf("tripletLowerModuleIndex %d >= modules.nLowerModules %d \n", + tripletLowerModuleIndex, + modules.nLowerModules()); + continue; //sanity check + } +#endif + //Removes 2S-2S :FIXME: filter these out in the pixel map + if (modules.moduleType()[tripletLowerModuleIndex] == TwoS) + continue; + + uint16_t pixelModuleIndex = modules.nLowerModules(); + unsigned int nOuterTriplets = tripletsOccupancy.nTriplets()[tripletLowerModuleIndex]; + if (nOuterTriplets == 0) + continue; + + unsigned int pixelSegmentIndex = ranges.segmentModuleIndices()[pixelModuleIndex] + i_pLS; + + if (segmentsPixel.isDup()[i_pLS]) + continue; + if (segmentsPixel.partOfPT5()[i_pLS]) + continue; //don't make pT3s for those pixels that are part of pT5 + + short layer2_adjustment; + if (modules.layers()[tripletLowerModuleIndex] == 1) { + layer2_adjustment = 1; + } //get upper segment to be in second layer + else if (modules.layers()[tripletLowerModuleIndex] == 2) { + layer2_adjustment = 0; + } // get lower segment to be in second layer + else { + continue; + } + + //fetch the triplet + for (unsigned int outerTripletArrayIndex = globalThreadIdx[2]; outerTripletArrayIndex < nOuterTriplets; + outerTripletArrayIndex += gridThreadExtent[2]) { + unsigned int outerTripletIndex = + ranges.tripletModuleIndices()[tripletLowerModuleIndex] + outerTripletArrayIndex; + if (modules.moduleType()[triplets.lowerModuleIndices()[outerTripletIndex][1]] == TwoS) + continue; //REMOVES PS-2S + + if (triplets.partOfPT5()[outerTripletIndex]) + continue; //don't create pT3s for T3s accounted in pT5s + + float pixelRadius, tripletRadius, rPhiChiSquared, rzChiSquared, rPhiChiSquaredInwards, centerX, centerY; + bool success = runPixelTripletDefaultAlgo(acc, + modules, + ranges, + mds, + segments, + segmentsPixel, + triplets, + pixelSegmentIndex, + outerTripletIndex, + pixelRadius, + tripletRadius, + centerX, + centerY, + rzChiSquared, + rPhiChiSquared, + rPhiChiSquaredInwards); + + if (success) { + float phi = + mds.anchorPhi()[segments + .mdIndices()[triplets.segmentIndices()[outerTripletIndex][0]][layer2_adjustment]]; + float eta = + mds.anchorEta()[segments + .mdIndices()[triplets.segmentIndices()[outerTripletIndex][0]][layer2_adjustment]]; + float eta_pix = segmentsPixel.eta()[i_pLS]; + float phi_pix = segmentsPixel.phi()[i_pLS]; + float pt = segmentsPixel.ptIn()[i_pLS]; + float score = rPhiChiSquared + rPhiChiSquaredInwards; + unsigned int totOccupancyPixelTriplets = + alpaka::atomicAdd(acc, &pixelTriplets.totOccupancyPixelTriplets(), 1u, alpaka::hierarchy::Threads{}); + if (totOccupancyPixelTriplets >= n_max_pixel_triplets) { +#ifdef WARNINGS + printf("Pixel Triplet excess alert!\n"); +#endif + } else { + unsigned int pixelTripletIndex = + alpaka::atomicAdd(acc, &pixelTriplets.nPixelTriplets(), 1u, alpaka::hierarchy::Threads{}); + addPixelTripletToMemory(mds, + segments, + triplets, + pixelTriplets, + pixelSegmentIndex, + outerTripletIndex, + pixelRadius, + tripletRadius, + centerX, + centerY, + rPhiChiSquared, + rPhiChiSquaredInwards, + rzChiSquared, + pixelTripletIndex, + pt, + eta, + phi, + eta_pix, + phi_pix, + score); + triplets.partOfPT3()[outerTripletIndex] = true; + } + } + } // for outerTripletArrayIndex + } // for iLSModule < iLSModule_max + } // for i_pLS + } + }; + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void runDeltaBetaIterationspT3(TAcc const& acc, + float& betaIn, + float& betaOut, + float betaAv, + float& pt_beta, + float sdIn_dr, + float sdOut_dr, + float dr, + float lIn) { + if (lIn == 0) { + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaOut); + return; + } + + if (betaIn * betaOut > 0.f and + (alpaka::math::abs(acc, pt_beta) < 4.f * kPt_betaMax or + (lIn >= 11 and alpaka::math::abs(acc, pt_beta) < + 8.f * kPt_betaMax))) //and the pt_beta is well-defined; less strict for endcap-endcap + { + const float betaInUpd = + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + const float betaOutUpd = + betaOut + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaOut); //FIXME: need a faster version + betaAv = 0.5f * (betaInUpd + betaOutUpd); + + //1st update + const float pt_beta_inv = + 1.f / alpaka::math::abs(acc, dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv)); //get a better pt estimate + + betaIn += alpaka::math::copysign( + acc, + alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), + betaOut); //FIXME: need a faster version + //update the av and pt + betaAv = 0.5f * (betaIn + betaOut); + //2nd update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + } else if (lIn < 11 && alpaka::math::abs(acc, betaOut) < 0.2f * alpaka::math::abs(acc, betaIn) && + alpaka::math::abs(acc, pt_beta) < 12.f * kPt_betaMax) //use betaIn sign as ref + { + const float pt_betaIn = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaIn); + + const float betaInUpd = + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + const float betaOutUpd = + betaOut + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, + alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaAv = (alpaka::math::abs(acc, betaOut) > 0.2f * alpaka::math::abs(acc, betaIn)) + ? (0.5f * (betaInUpd + betaOutUpd)) + : betaInUpd; + + //1st update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + betaIn += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + //update the av and pt + betaAv = 0.5f * (betaIn + betaOut); + //2nd update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPBB(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + uint16_t pixelModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + float dPhi, betaIn, betaOut, pt_beta, zLo, zHi, zLoPointed, zHiPointed, dPhiCut, betaOutCut; + + bool isPS_OutLo = (modules.moduleType()[outerInnerLowerModuleIndex] == PS); + + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InUp = mds.anchorRt()[secondMDIndex]; + float rt_OutLo = mds.anchorRt()[thirdMDIndex]; + float rt_OutUp = mds.anchorRt()[fourthMDIndex]; + + float z_InUp = mds.anchorZ()[secondMDIndex]; + float z_OutLo = mds.anchorZ()[thirdMDIndex]; + + float x_InLo = mds.anchorX()[firstMDIndex]; + float x_InUp = mds.anchorX()[secondMDIndex]; + float x_OutLo = mds.anchorX()[thirdMDIndex]; + float x_OutUp = mds.anchorX()[fourthMDIndex]; + + float y_InLo = mds.anchorY()[firstMDIndex]; + float y_InUp = mds.anchorY()[secondMDIndex]; + float y_OutLo = mds.anchorY()[thirdMDIndex]; + float y_OutUp = mds.anchorY()[fourthMDIndex]; + + float rt_InOut = rt_InUp; + + if (alpaka::math::abs(acc, deltaPhi(acc, x_InUp, y_InUp, x_OutLo, y_OutLo)) > kPi / 2.f) + return false; + + unsigned int pixelSegmentArrayIndex = innerSegmentIndex - ranges.segmentModuleIndices()[pixelModuleIndex]; + float ptIn = segmentsPixel.ptIn()[pixelSegmentArrayIndex]; + float ptSLo = ptIn; + float px = segmentsPixel.px()[pixelSegmentArrayIndex]; + float py = segmentsPixel.py()[pixelSegmentArrayIndex]; + float pz = segmentsPixel.pz()[pixelSegmentArrayIndex]; + float ptErr = segmentsPixel.ptErr()[pixelSegmentArrayIndex]; + float etaErr = segmentsPixel.etaErr()[pixelSegmentArrayIndex]; + ptSLo = alpaka::math::max(acc, ptCut, ptSLo - 10.0f * alpaka::math::max(acc, ptErr, 0.005f * ptSLo)); + ptSLo = alpaka::math::min(acc, 10.0f, ptSLo); + + float alpha1GeV_OutLo = + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + const float rtRatio_OutLoInOut = + rt_OutLo / rt_InOut; // Outer segment beginning rt divided by inner segment beginning rt; + + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly + const float zpitch_InLo = 0.05f; + const float zpitch_InOut = 0.05f; + float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); + float zGeom = zpitch_InLo + zpitch_OutLo; + zHi = z_InUp + (z_InUp + kDeltaZLum) * (rtRatio_OutLoInOut - 1.f) * (z_InUp < 0.f ? 1.f : dzDrtScale) + + (zpitch_InOut + zpitch_OutLo); + zLo = z_InUp + (z_InUp - kDeltaZLum) * (rtRatio_OutLoInOut - 1.f) * (z_InUp > 0.f ? 1.f : dzDrtScale) - + (zpitch_InOut + zpitch_OutLo); //slope-correction only on outer end + + if ((z_OutLo < zLo) || (z_OutLo > zHi)) + return false; + + const float cosh2Eta = 1.f + (pz * pz) / (ptIn * ptIn); + + const float drt_OutLo_InUp = (rt_OutLo - rt_InUp); + + const float r3_InUp = alpaka::math::sqrt(acc, z_InUp * z_InUp + rt_InUp * rt_InUp); + + float drt_InSeg = rt_InOut - rt_InLo; + + const float thetaMuls2 = + (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InUp) / 50.f) * (r3_InUp / rt_InUp); + const float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + + float dzErr = (drt_OutLo_InUp * drt_OutLo_InUp) * (etaErr * etaErr) * cosh2Eta; + dzErr += 0.03f * 0.03f; // Approximately account for IT module size + dzErr *= 9.f; // 3 sigma + dzErr += muls2 * (drt_OutLo_InUp * drt_OutLo_InUp) / 3.f * cosh2Eta; + dzErr += zGeom * zGeom; + dzErr = alpaka::math::sqrt(acc, dzErr); + + const float dzDrIn = pz / ptIn; + const float zWindow = dzErr / drt_InSeg * drt_OutLo_InUp + zGeom; + const float dzMean = dzDrIn * drt_OutLo_InUp * + (1.f + drt_OutLo_InUp * drt_OutLo_InUp * 4 * k2Rinv1GeVf * k2Rinv1GeVf / ptIn / ptIn / + 24.f); // with curved path correction + // Constructing upper and lower bound + zLoPointed = z_InUp + dzMean - zWindow; + zHiPointed = z_InUp + dzMean + zWindow; + + if ((z_OutLo < zLoPointed) || (z_OutLo > zHiPointed)) + return false; + + const float pvOffset = 0.1f / rt_OutLo; + dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); + + //no dphipos cut + float midPointX = 0.5f * (x_InLo + x_OutLo); + float midPointY = 0.5f * (y_InLo + y_OutLo); + + float diffX = x_OutLo - x_InLo; + float diffY = y_OutLo - y_InLo; + + dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); + + if (alpaka::math::abs(acc, dPhi) > dPhiCut) + return false; + + //lots of array accesses below this... + + float alpha_InLo = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float alpha_OutLo = __H2F(segments.dPhiChanges()[outerSegmentIndex]); + + bool isEC_lastLayer = modules.subdets()[outerOuterLowerModuleIndex] == Endcap and + modules.moduleType()[outerOuterLowerModuleIndex] == TwoS; + + float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; + alpha_OutUp = deltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); + + alpha_OutUp_highEdge = alpha_OutUp; + alpha_OutUp_lowEdge = alpha_OutUp; + + float tl_axis_x = x_OutUp - x_InUp; + float tl_axis_y = y_OutUp - y_InUp; + + float tl_axis_highEdge_x = tl_axis_x; + float tl_axis_highEdge_y = tl_axis_y; + + float tl_axis_lowEdge_x = tl_axis_x; + float tl_axis_lowEdge_y = tl_axis_y; + + betaIn = -deltaPhi(acc, px, py, tl_axis_x, tl_axis_y); + float betaInRHmin = betaIn; + float betaInRHmax = betaIn; + + betaOut = -alpha_OutUp + deltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); + + float betaOutRHmin = betaOut; + float betaOutRHmax = betaOut; + + if (isEC_lastLayer) { + alpha_OutUp_highEdge = deltaPhi(acc, + mds.anchorHighEdgeX()[fourthMDIndex], + mds.anchorHighEdgeY()[fourthMDIndex], + mds.anchorHighEdgeX()[fourthMDIndex] - x_OutLo, + mds.anchorHighEdgeY()[fourthMDIndex] - y_OutLo); + alpha_OutUp_lowEdge = deltaPhi(acc, + mds.anchorLowEdgeX()[fourthMDIndex], + mds.anchorLowEdgeY()[fourthMDIndex], + mds.anchorLowEdgeX()[fourthMDIndex] - x_OutLo, + mds.anchorLowEdgeY()[fourthMDIndex] - y_OutLo); + + tl_axis_highEdge_x = mds.anchorHighEdgeX()[fourthMDIndex] - x_InUp; + tl_axis_highEdge_y = mds.anchorHighEdgeY()[fourthMDIndex] - y_InUp; + tl_axis_lowEdge_x = mds.anchorLowEdgeX()[fourthMDIndex] - x_InUp; + tl_axis_lowEdge_y = mds.anchorLowEdgeY()[fourthMDIndex] - y_InUp; + + betaOutRHmin = -alpha_OutUp_highEdge + deltaPhi(acc, + mds.anchorHighEdgeX()[fourthMDIndex], + mds.anchorHighEdgeY()[fourthMDIndex], + tl_axis_highEdge_x, + tl_axis_highEdge_y); + betaOutRHmax = -alpha_OutUp_lowEdge + deltaPhi(acc, + mds.anchorLowEdgeX()[fourthMDIndex], + mds.anchorLowEdgeY()[fourthMDIndex], + tl_axis_lowEdge_x, + tl_axis_lowEdge_y); + } + + //beta computation + float drt_tl_axis = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + + //innerOuterAnchor - innerInnerAnchor + const float rt_InSeg = + alpaka::math::sqrt(acc, (x_InUp - x_InLo) * (x_InUp - x_InLo) + (y_InUp - y_InLo) * (y_InUp - y_InLo)); + + //no betaIn cut for the pixels + float betaAv = 0.5f * (betaIn + betaOut); + pt_beta = ptIn; + + int lIn = 0; + int lOut = isEC_lastLayer ? 11 : 5; + float sdOut_dr = + alpaka::math::sqrt(acc, (x_OutUp - x_OutLo) * (x_OutUp - x_OutLo) + (y_OutUp - y_OutLo) * (y_OutUp - y_OutLo)); + float sdOut_d = rt_OutUp - rt_OutLo; + + runDeltaBetaIterationspT3(acc, betaIn, betaOut, betaAv, pt_beta, rt_InSeg, sdOut_dr, drt_tl_axis, lIn); + + const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) + ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) + : 0.; //mean value of min,max is the old betaIn + const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) + ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) + : 0.; + betaInRHmin *= betaInMMSF; + betaInRHmax *= betaInMMSF; + betaOutRHmin *= betaOutMMSF; + betaOutRHmax *= betaOutMMSF; + + float min_ptBeta_ptBetaMax = alpaka::math::min( + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV + const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_ptBetaMax * min_ptBeta_ptBetaMax); + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_InLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InUp * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_OutLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InUp); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); + const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); + + const float sinDPhi = alpaka::math::sin(acc, dPhi); + const float dBetaRIn2 = 0; // TODO-RH + + float dBetaROut = 0; + if (isEC_lastLayer) { + dBetaROut = (alpaka::math::sqrt(acc, + mds.anchorHighEdgeX()[fourthMDIndex] * mds.anchorHighEdgeX()[fourthMDIndex] + + mds.anchorHighEdgeY()[fourthMDIndex] * mds.anchorHighEdgeY()[fourthMDIndex]) - + alpaka::math::sqrt(acc, + mds.anchorLowEdgeX()[fourthMDIndex] * mds.anchorLowEdgeX()[fourthMDIndex] + + mds.anchorLowEdgeY()[fourthMDIndex] * mds.anchorLowEdgeY()[fourthMDIndex])) * + sinDPhi / drt_tl_axis; + } + + const float dBetaROut2 = dBetaROut * dBetaROut; + + //FIXME: need faster version + betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + + //Cut #6: The real beta cut + if (alpaka::math::abs(acc, betaOut) >= betaOutCut) + return false; + const float dBetaRes = 0.02f / alpaka::math::min(acc, sdOut_d, drt_InSeg); + const float dBetaCut2 = + (dBetaRes * dBetaRes * 2.0f + dBetaMuls2 + dBetaLum2 + dBetaRIn2 + dBetaROut2 + + 0.25f * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); + float dBeta = betaIn - betaOut; + return dBeta * dBeta <= dBetaCut2; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPEE(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + uint16_t pixelModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + float dPhi, betaIn, betaOut, pt_beta, rtLo, rtHi, dPhiCut, betaOutCut; + + bool isPS_OutLo = (modules.moduleType()[outerInnerLowerModuleIndex] == PS); + + float z_InUp = mds.anchorZ()[secondMDIndex]; + float z_OutLo = mds.anchorZ()[thirdMDIndex]; + + if (z_InUp * z_OutLo <= 0) + return false; + + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InUp = mds.anchorRt()[secondMDIndex]; + float rt_OutLo = mds.anchorRt()[thirdMDIndex]; + float rt_OutUp = mds.anchorRt()[fourthMDIndex]; + + float x_InLo = mds.anchorX()[firstMDIndex]; + float x_InUp = mds.anchorX()[secondMDIndex]; + float x_OutLo = mds.anchorX()[thirdMDIndex]; + float x_OutUp = mds.anchorX()[fourthMDIndex]; + + float y_InLo = mds.anchorY()[firstMDIndex]; + float y_InUp = mds.anchorY()[secondMDIndex]; + float y_OutLo = mds.anchorY()[thirdMDIndex]; + float y_OutUp = mds.anchorY()[fourthMDIndex]; + + unsigned int pixelSegmentArrayIndex = innerSegmentIndex - ranges.segmentModuleIndices()[pixelModuleIndex]; + + float ptIn = segmentsPixel.ptIn()[pixelSegmentArrayIndex]; + float ptSLo = ptIn; + float px = segmentsPixel.px()[pixelSegmentArrayIndex]; + float py = segmentsPixel.py()[pixelSegmentArrayIndex]; + float pz = segmentsPixel.pz()[pixelSegmentArrayIndex]; + float ptErr = segmentsPixel.ptErr()[pixelSegmentArrayIndex]; + float etaErr = segmentsPixel.etaErr()[pixelSegmentArrayIndex]; + + ptSLo = alpaka::math::max(acc, ptCut, ptSLo - 10.0f * alpaka::math::max(acc, ptErr, 0.005f * ptSLo)); + ptSLo = alpaka::math::min(acc, 10.0f, ptSLo); + + const float zpitch_InLo = 0.05f; + float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); + float zGeom = zpitch_InLo + zpitch_OutLo; + + const float slope = alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + const float dzDrtScale = alpaka::math::tan(acc, slope) / slope; //FIXME: need approximate value + + const float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InUp); + bool isOutSgInnerMDPS = modules.moduleType()[outerInnerLowerModuleIndex] == PS; + + const float rtGeom1 = isOutSgInnerMDPS + ? kPixelPSZpitch + : kStrip2SZpitch; //FIXME: make this chosen by configuration for lay11,12 full PS + const float zGeom1 = alpaka::math::copysign(acc, zGeom, z_InUp); //used in B-E region + rtLo = rt_InUp * (1.f + (z_OutLo - z_InUp - zGeom1) / (z_InUp + zGeom1 + dLum) / dzDrtScale) - + rtGeom1; //slope correction only on the lower end + + float zInForHi = z_InUp - zGeom1 - dLum; + if (zInForHi * z_InUp < 0) + zInForHi = alpaka::math::copysign(acc, 0.1f, z_InUp); + rtHi = rt_InUp * (1.f + (z_OutLo - z_InUp + zGeom1) / zInForHi) + rtGeom1; + + // Cut #2: rt condition + if ((rt_OutLo < rtLo) || (rt_OutLo > rtHi)) + return false; + + const float dzOutInAbs = alpaka::math::abs(acc, z_OutLo - z_InUp); + const float cosh2Eta = 1.f + (pz * pz) / (ptIn * ptIn); + const float multDzDr2 = (dzOutInAbs * dzOutInAbs) * cosh2Eta / ((cosh2Eta - 1.f) * (cosh2Eta - 1.f)); + const float r3_InUp = alpaka::math::sqrt(acc, z_InUp * z_InUp + rt_InUp * rt_InUp); + const float thetaMuls2 = + (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InUp) / 50.f) * (r3_InUp / rt_InUp); + const float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + + float drtErr = (etaErr * etaErr) * multDzDr2; + drtErr += 0.03f * 0.03f; // Approximately account for IT module size + drtErr *= 9.f; // 3 sigma + drtErr += muls2 * multDzDr2 / 3.f * cosh2Eta; + drtErr = alpaka::math::sqrt(acc, drtErr); + const float drtDzIn = alpaka::math::abs(acc, ptIn / pz); + + const float drt_OutLo_InUp = (rt_OutLo - rt_InUp); // drOutIn + + const float rtWindow = drtErr + rtGeom1; + const float drtMean = drtDzIn * dzOutInAbs * + (1.f - drt_OutLo_InUp * drt_OutLo_InUp * 4 * k2Rinv1GeVf * k2Rinv1GeVf / ptIn / ptIn / + 24.f); // with curved path correction + const float rtLo_point = rt_InUp + drtMean - rtWindow; + const float rtHi_point = rt_InUp + drtMean + rtWindow; + + // Cut #3: rt-z pointed + if ((rt_OutLo < rtLo_point) || (rt_OutLo > rtHi_point)) + return false; + + const float alpha1GeV_OutLo = + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + const float pvOffset = 0.1f / rt_OutLo; + dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); + + float midPointX = 0.5f * (x_InLo + x_OutLo); + float midPointY = 0.5f * (y_InLo + y_OutLo); + + float diffX = x_OutLo - x_InLo; + float diffY = y_OutLo - y_InLo; + + dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); + + // Cut #5: deltaPhiChange + if (alpaka::math::abs(acc, dPhi) > dPhiCut) + return false; + + float alpha_InLo = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float alpha_OutLo = __H2F(segments.dPhiChanges()[outerSegmentIndex]); + + bool isEC_lastLayer = modules.subdets()[outerOuterLowerModuleIndex] == Endcap and + modules.moduleType()[outerOuterLowerModuleIndex] == TwoS; + + float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; + + alpha_OutUp = deltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); + alpha_OutUp_highEdge = alpha_OutUp; + alpha_OutUp_lowEdge = alpha_OutUp; + + float tl_axis_x = x_OutUp - x_InUp; + float tl_axis_y = y_OutUp - y_InUp; + + float tl_axis_highEdge_x = tl_axis_x; + float tl_axis_highEdge_y = tl_axis_y; + + float tl_axis_lowEdge_x = tl_axis_x; + float tl_axis_lowEdge_y = tl_axis_y; + + betaIn = -deltaPhi(acc, px, py, tl_axis_x, tl_axis_y); + float betaInRHmin = betaIn; + float betaInRHmax = betaIn; + + betaOut = -alpha_OutUp + deltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); + float betaOutRHmin = betaOut; + float betaOutRHmax = betaOut; + + if (isEC_lastLayer) { + alpha_OutUp_highEdge = deltaPhi(acc, + mds.anchorHighEdgeX()[fourthMDIndex], + mds.anchorHighEdgeY()[fourthMDIndex], + mds.anchorHighEdgeX()[fourthMDIndex] - x_OutLo, + mds.anchorHighEdgeY()[fourthMDIndex] - y_OutLo); + alpha_OutUp_lowEdge = deltaPhi(acc, + mds.anchorLowEdgeX()[fourthMDIndex], + mds.anchorLowEdgeY()[fourthMDIndex], + mds.anchorLowEdgeX()[fourthMDIndex] - x_OutLo, + mds.anchorLowEdgeY()[fourthMDIndex] - y_OutLo); + + tl_axis_highEdge_x = mds.anchorHighEdgeX()[fourthMDIndex] - x_InUp; + tl_axis_highEdge_y = mds.anchorHighEdgeY()[fourthMDIndex] - y_InUp; + tl_axis_lowEdge_x = mds.anchorLowEdgeX()[fourthMDIndex] - x_InUp; + tl_axis_lowEdge_y = mds.anchorLowEdgeY()[fourthMDIndex] - y_InUp; + + betaOutRHmin = -alpha_OutUp_highEdge + deltaPhi(acc, + mds.anchorHighEdgeX()[fourthMDIndex], + mds.anchorHighEdgeY()[fourthMDIndex], + tl_axis_highEdge_x, + tl_axis_highEdge_y); + betaOutRHmax = -alpha_OutUp_lowEdge + deltaPhi(acc, + mds.anchorLowEdgeX()[fourthMDIndex], + mds.anchorLowEdgeY()[fourthMDIndex], + tl_axis_lowEdge_x, + tl_axis_lowEdge_y); + } + + //beta computation + float drt_tl_axis = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + //no betaIn cut for the pixels + const float rt_InSeg = + alpaka::math::sqrt(acc, (x_InUp - x_InLo) * (x_InUp - x_InLo) + (y_InUp - y_InLo) * (y_InUp - y_InLo)); + + float betaAv = 0.5f * (betaIn + betaOut); + pt_beta = ptIn; + + int lIn = 0; + int lOut = isEC_lastLayer ? 11 : 5; + float sdOut_dr = + alpaka::math::sqrt(acc, (x_OutUp - x_OutLo) * (x_OutUp - x_OutLo) + (y_OutUp - y_OutLo) * (y_OutUp - y_OutLo)); + float sdOut_d = rt_OutUp - rt_OutLo; + + runDeltaBetaIterationspT3(acc, betaIn, betaOut, betaAv, pt_beta, rt_InSeg, sdOut_dr, drt_tl_axis, lIn); + + const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) + ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) + : 0.; //mean value of min,max is the old betaIn + const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) + ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) + : 0.; + betaInRHmin *= betaInMMSF; + betaInRHmax *= betaInMMSF; + betaOutRHmin *= betaOutMMSF; + betaOutRHmax *= betaOutMMSF; + + float min_ptBeta_ptBetaMax = alpaka::math::min( + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV + const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_ptBetaMax * min_ptBeta_ptBetaMax); + + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_InLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InUp * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_OutLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InUp); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); + const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); + + const float sinDPhi = alpaka::math::sin(acc, dPhi); + const float dBetaRIn2 = 0; // TODO-RH + + float dBetaROut = 0; + if (isEC_lastLayer) { + dBetaROut = (alpaka::math::sqrt(acc, + mds.anchorHighEdgeX()[fourthMDIndex] * mds.anchorHighEdgeX()[fourthMDIndex] + + mds.anchorHighEdgeY()[fourthMDIndex] * mds.anchorHighEdgeY()[fourthMDIndex]) - + alpaka::math::sqrt(acc, + mds.anchorLowEdgeX()[fourthMDIndex] * mds.anchorLowEdgeX()[fourthMDIndex] + + mds.anchorLowEdgeY()[fourthMDIndex] * mds.anchorLowEdgeY()[fourthMDIndex])) * + sinDPhi / drt_tl_axis; + } + + const float dBetaROut2 = dBetaROut * dBetaROut; + + betaOutCut = + alpaka::math::asin( + acc, alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax)) //FIXME: need faster version + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + + //Cut #6: The real beta cut + if (alpaka::math::abs(acc, betaOut) >= betaOutCut) + return false; + + float drt_InSeg = rt_InUp - rt_InLo; + + const float dBetaRes = 0.02f / alpaka::math::min(acc, sdOut_d, drt_InSeg); + const float dBetaCut2 = + (dBetaRes * dBetaRes * 2.0f + dBetaMuls2 + dBetaLum2 + dBetaRIn2 + dBetaROut2 + + 0.25f * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); + float dBeta = betaIn - betaOut; + return dBeta * dBeta <= dBetaCut2; + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h new file mode 100644 index 0000000000000..24ce2d1d53e22 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -0,0 +1,2592 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_Quintuplet_h +#define RecoTracker_LSTCore_src_alpaka_Quintuplet_h + +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" + +#include "NeuralNetwork.h" +#include "Hit.h" +#include "Triplet.h" // FIXME: need to refactor common functions to a common place + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool checkIntervalOverlap(float firstMin, + float firstMax, + float secondMin, + float secondMax) { + return ((firstMin <= secondMin) && (secondMin < firstMax)) || ((secondMin < firstMin) && (firstMin < secondMax)); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addQuintupletToMemory(TripletsConst triplets, + Quintuplets quintuplets, + unsigned int innerTripletIndex, + unsigned int outerTripletIndex, + uint16_t lowerModule1, + uint16_t lowerModule2, + uint16_t lowerModule3, + uint16_t lowerModule4, + uint16_t lowerModule5, + float innerRadius, + float bridgeRadius, + float outerRadius, + float regressionG, + float regressionF, + float regressionRadius, + float rzChiSquared, + float rPhiChiSquared, + float nonAnchorChiSquared, + float pt, + float eta, + float phi, + float scores, + uint8_t layer, + unsigned int quintupletIndex, + bool tightCutFlag) { + quintuplets.tripletIndices()[quintupletIndex][0] = innerTripletIndex; + quintuplets.tripletIndices()[quintupletIndex][1] = outerTripletIndex; + + quintuplets.lowerModuleIndices()[quintupletIndex][0] = lowerModule1; + quintuplets.lowerModuleIndices()[quintupletIndex][1] = lowerModule2; + quintuplets.lowerModuleIndices()[quintupletIndex][2] = lowerModule3; + quintuplets.lowerModuleIndices()[quintupletIndex][3] = lowerModule4; + quintuplets.lowerModuleIndices()[quintupletIndex][4] = lowerModule5; + quintuplets.innerRadius()[quintupletIndex] = __F2H(innerRadius); + quintuplets.outerRadius()[quintupletIndex] = __F2H(outerRadius); + quintuplets.pt()[quintupletIndex] = __F2H(pt); + quintuplets.eta()[quintupletIndex] = __F2H(eta); + quintuplets.phi()[quintupletIndex] = __F2H(phi); + quintuplets.score_rphisum()[quintupletIndex] = __F2H(scores); + quintuplets.isDup()[quintupletIndex] = 0; + quintuplets.tightCutFlag()[quintupletIndex] = tightCutFlag; + quintuplets.regressionRadius()[quintupletIndex] = regressionRadius; + quintuplets.regressionG()[quintupletIndex] = regressionG; + quintuplets.regressionF()[quintupletIndex] = regressionF; + quintuplets.logicalLayers()[quintupletIndex][0] = triplets.logicalLayers()[innerTripletIndex][0]; + quintuplets.logicalLayers()[quintupletIndex][1] = triplets.logicalLayers()[innerTripletIndex][1]; + quintuplets.logicalLayers()[quintupletIndex][2] = triplets.logicalLayers()[innerTripletIndex][2]; + quintuplets.logicalLayers()[quintupletIndex][3] = triplets.logicalLayers()[outerTripletIndex][1]; + quintuplets.logicalLayers()[quintupletIndex][4] = triplets.logicalLayers()[outerTripletIndex][2]; + + quintuplets.hitIndices()[quintupletIndex][0] = triplets.hitIndices()[innerTripletIndex][0]; + quintuplets.hitIndices()[quintupletIndex][1] = triplets.hitIndices()[innerTripletIndex][1]; + quintuplets.hitIndices()[quintupletIndex][2] = triplets.hitIndices()[innerTripletIndex][2]; + quintuplets.hitIndices()[quintupletIndex][3] = triplets.hitIndices()[innerTripletIndex][3]; + quintuplets.hitIndices()[quintupletIndex][4] = triplets.hitIndices()[innerTripletIndex][4]; + quintuplets.hitIndices()[quintupletIndex][5] = triplets.hitIndices()[innerTripletIndex][5]; + quintuplets.hitIndices()[quintupletIndex][6] = triplets.hitIndices()[outerTripletIndex][2]; + quintuplets.hitIndices()[quintupletIndex][7] = triplets.hitIndices()[outerTripletIndex][3]; + quintuplets.hitIndices()[quintupletIndex][8] = triplets.hitIndices()[outerTripletIndex][4]; + quintuplets.hitIndices()[quintupletIndex][9] = triplets.hitIndices()[outerTripletIndex][5]; + quintuplets.bridgeRadius()[quintupletIndex] = bridgeRadius; + quintuplets.rzChiSquared()[quintupletIndex] = rzChiSquared; + quintuplets.chiSquared()[quintupletIndex] = rPhiChiSquared; + quintuplets.nonAnchorChiSquared()[quintupletIndex] = nonAnchorChiSquared; + } + + //90% constraint + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passChiSquaredConstraint(ModulesConst modules, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + float chiSquared) { + // Using lstLayer numbering convention defined in ModuleMethods.h + const int layer1 = modules.lstLayers()[lowerModuleIndex1]; + const int layer2 = modules.lstLayers()[lowerModuleIndex2]; + const int layer3 = modules.lstLayers()[lowerModuleIndex3]; + const int layer4 = modules.lstLayers()[lowerModuleIndex4]; + const int layer5 = modules.lstLayers()[lowerModuleIndex5]; + + if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + if (layer4 == 10 and layer5 == 11) { + return chiSquared < 0.01788f; + } else if (layer4 == 10 and layer5 == 16) { + return chiSquared < 0.04725f; + } else if (layer4 == 15 and layer5 == 16) { + return chiSquared < 0.04725f; + } + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 10) { + return chiSquared < 0.01788f; + } else if (layer4 == 9 and layer5 == 15) { + return chiSquared < 0.08234f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + if (layer4 == 8 and layer5 == 9) { + return chiSquared < 0.02360f; + } else if (layer4 == 8 and layer5 == 14) { + return chiSquared < 0.07167f; + } else if (layer4 == 13 and layer5 == 14) { + return chiSquared < 0.08234f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + if (layer4 == 7 and layer5 == 8) { + return chiSquared < 0.01026f; + } else if (layer4 == 7 and layer5 == 13) { + return chiSquared < 0.06238f; + } else if (layer4 == 12 and layer5 == 13) { + return chiSquared < 0.06238f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3 and layer4 == 4) { + if (layer5 == 5) { + return chiSquared < 0.04725f; + } else if (layer5 == 12) { + return chiSquared < 0.09461f; + } + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 10) { + return chiSquared < 0.00512f; + } + if (layer4 == 9 and layer5 == 15) { + return chiSquared < 0.04112f; + } else if (layer4 == 14 and layer5 == 15) { + return chiSquared < 0.06238f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + if (layer4 == 8 and layer5 == 14) { + return chiSquared < 0.07167f; + } else if (layer4 == 13 and layer5 == 14) { + return chiSquared < 0.06238f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + if (layer4 == 5 and layer5 == 6) { + return chiSquared < 0.08234f; + } else if (layer4 == 5 and layer5 == 12) { + return chiSquared < 0.10870f; + } else if (layer4 == 12 and layer5 == 13) { + return chiSquared < 0.10870f; + } + } else if (layer1 == 3 and layer2 == 7 and layer3 == 8 and layer4 == 14 and layer5 == 15) { + return chiSquared < 0.09461f; + } else if (layer1 == 3 and layer2 == 4 and layer3 == 5 and layer4 == 12 and layer5 == 13) { + return chiSquared < 0.09461f; + } + + return true; + } + + //bounds can be found at http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_RZFix/t5_rz_thresholds.txt + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passT5RZConstraint(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex, + unsigned int fifthMDIndex, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + float& rzChiSquared, + float inner_pt, + float innerRadius, + float g, + float f, + bool& tightCutFlag) { + //(g,f) is the center of the circle fitted by the innermost 3 points on x,y coordinates + const float rt1 = mds.anchorRt()[firstMDIndex] / 100; //in the unit of m instead of cm + const float rt2 = mds.anchorRt()[secondMDIndex] / 100; + const float rt3 = mds.anchorRt()[thirdMDIndex] / 100; + const float rt4 = mds.anchorRt()[fourthMDIndex] / 100; + const float rt5 = mds.anchorRt()[fifthMDIndex] / 100; + + const float z1 = mds.anchorZ()[firstMDIndex] / 100; + const float z2 = mds.anchorZ()[secondMDIndex] / 100; + const float z3 = mds.anchorZ()[thirdMDIndex] / 100; + const float z4 = mds.anchorZ()[fourthMDIndex] / 100; + const float z5 = mds.anchorZ()[fifthMDIndex] / 100; + + // Using lst_layer numbering convention defined in ModuleMethods.h + const int layer1 = modules.lstLayers()[lowerModuleIndex1]; + const int layer2 = modules.lstLayers()[lowerModuleIndex2]; + const int layer3 = modules.lstLayers()[lowerModuleIndex3]; + const int layer4 = modules.lstLayers()[lowerModuleIndex4]; + const int layer5 = modules.lstLayers()[lowerModuleIndex5]; + + //slope computed using the internal T3s + const int moduleType1 = modules.moduleType()[lowerModuleIndex1]; //0 is ps, 1 is 2s + const int moduleType2 = modules.moduleType()[lowerModuleIndex2]; + const int moduleType3 = modules.moduleType()[lowerModuleIndex3]; + const int moduleType4 = modules.moduleType()[lowerModuleIndex4]; + const int moduleType5 = modules.moduleType()[lowerModuleIndex5]; + + const float x1 = mds.anchorX()[firstMDIndex] / 100; + const float x2 = mds.anchorX()[secondMDIndex] / 100; + const float x3 = mds.anchorX()[thirdMDIndex] / 100; + const float x4 = mds.anchorX()[fourthMDIndex] / 100; + const float y1 = mds.anchorY()[firstMDIndex] / 100; + const float y2 = mds.anchorY()[secondMDIndex] / 100; + const float y3 = mds.anchorY()[thirdMDIndex] / 100; + const float y4 = mds.anchorY()[fourthMDIndex] / 100; + + float residual = 0; + float error2 = 0; + float x_center = g / 100, y_center = f / 100; + float x_init = mds.anchorX()[thirdMDIndex] / 100; + float y_init = mds.anchorY()[thirdMDIndex] / 100; + float z_init = mds.anchorZ()[thirdMDIndex] / 100; + float rt_init = mds.anchorRt()[thirdMDIndex] / 100; //use the second MD as initial point + + if (moduleType3 == 1) // 1: if MD3 is in 2s layer + { + x_init = mds.anchorX()[secondMDIndex] / 100; + y_init = mds.anchorY()[secondMDIndex] / 100; + z_init = mds.anchorZ()[secondMDIndex] / 100; + rt_init = mds.anchorRt()[secondMDIndex] / 100; + } + + // start from a circle of inner T3. + // to determine the charge + int charge = 0; + float slope3c = (y3 - y_center) / (x3 - x_center); + float slope1c = (y1 - y_center) / (x1 - x_center); + // these 4 "if"s basically separate the x-y plane into 4 quarters. It determines geometrically how a circle and line slope goes and their positions, and we can get the charges correspondingly. + if ((y3 - y_center) > 0 && (y1 - y_center) > 0) { + if (slope1c > 0 && slope3c < 0) + charge = -1; // on x axis of a quarter, 3 hits go anti-clockwise + else if (slope1c < 0 && slope3c > 0) + charge = 1; // on x axis of a quarter, 3 hits go clockwise + else if (slope3c > slope1c) + charge = -1; + else if (slope3c < slope1c) + charge = 1; + } else if ((y3 - y_center) < 0 && (y1 - y_center) < 0) { + if (slope1c < 0 && slope3c > 0) + charge = 1; + else if (slope1c > 0 && slope3c < 0) + charge = -1; + else if (slope3c > slope1c) + charge = -1; + else if (slope3c < slope1c) + charge = 1; + } else if ((y3 - y_center) < 0 && (y1 - y_center) > 0) { + if ((x3 - x_center) > 0 && (x1 - x_center) > 0) + charge = 1; + else if ((x3 - x_center) < 0 && (x1 - x_center) < 0) + charge = -1; + } else if ((y3 - y_center) > 0 && (y1 - y_center) < 0) { + if ((x3 - x_center) > 0 && (x1 - x_center) > 0) + charge = -1; + else if ((x3 - x_center) < 0 && (x1 - x_center) < 0) + charge = 1; + } + + float pseudo_phi = alpaka::math::atan( + acc, (y_init - y_center) / (x_init - x_center)); //actually represent pi/2-phi, wrt helix axis z + float Pt = inner_pt, Px = Pt * alpaka::math::abs(acc, alpaka::math::sin(acc, pseudo_phi)), + Py = Pt * alpaka::math::abs(acc, cos(pseudo_phi)); + + // Above line only gives you the correct value of Px and Py, but signs of Px and Py calculated below. + // We look at if the circle is clockwise or anti-clock wise, to make it simpler, we separate the x-y plane into 4 quarters. + if (x_init > x_center && y_init > y_center) //1st quad + { + if (charge == 1) + Py = -Py; + if (charge == -1) + Px = -Px; + } + if (x_init < x_center && y_init > y_center) //2nd quad + { + if (charge == -1) { + Px = -Px; + Py = -Py; + } + } + if (x_init < x_center && y_init < y_center) //3rd quad + { + if (charge == 1) + Px = -Px; + if (charge == -1) + Py = -Py; + } + if (x_init > x_center && y_init < y_center) //4th quad + { + if (charge == 1) { + Px = -Px; + Py = -Py; + } + } + + // But if the initial T5 curve goes across quarters(i.e. cross axis to separate the quarters), need special redeclaration of Px,Py signs on these to avoid errors + if (moduleType3 == 0) { // 0 is ps + if (x4 < x3 && x3 < x2) + Px = -alpaka::math::abs(acc, Px); + else if (x4 > x3 && x3 > x2) + Px = alpaka::math::abs(acc, Px); + if (y4 < y3 && y3 < y2) + Py = -alpaka::math::abs(acc, Py); + else if (y4 > y3 && y3 > y2) + Py = alpaka::math::abs(acc, Py); + } else if (moduleType3 == 1) // 1 is 2s + { + if (x3 < x2 && x2 < x1) + Px = -alpaka::math::abs(acc, Px); + else if (x3 > x2 && x2 > x1) + Px = alpaka::math::abs(acc, Px); + if (y3 < y2 && y2 < y1) + Py = -alpaka::math::abs(acc, Py); + else if (y3 > y2 && y2 > y1) + Py = alpaka::math::abs(acc, Py); + } + + //to get Pz, we use pt/pz=ds/dz, ds is the arclength between MD1 and MD3. + float AO = alpaka::math::sqrt(acc, (x1 - x_center) * (x1 - x_center) + (y1 - y_center) * (y1 - y_center)); + float BO = + alpaka::math::sqrt(acc, (x_init - x_center) * (x_init - x_center) + (y_init - y_center) * (y_init - y_center)); + float AB2 = (x1 - x_init) * (x1 - x_init) + (y1 - y_init) * (y1 - y_init); + float dPhi = alpaka::math::acos(acc, (AO * AO + BO * BO - AB2) / (2 * AO * BO)); + float ds = innerRadius / 100 * dPhi; + + float Pz = (z_init - z1) / ds * Pt; + float p = alpaka::math::sqrt(acc, Px * Px + Py * Py + Pz * Pz); + + float a = -2.f * k2Rinv1GeVf * 100 * charge; // multiply by 100 to make the correct length units + + float zsi, rtsi; + int layeri, moduleTypei; + rzChiSquared = 0; + for (size_t i = 2; i < 6; i++) { + if (i == 2) { + zsi = z2; + rtsi = rt2; + layeri = layer2; + moduleTypei = moduleType2; + } else if (i == 3) { + zsi = z3; + rtsi = rt3; + layeri = layer3; + moduleTypei = moduleType3; + } else if (i == 4) { + zsi = z4; + rtsi = rt4; + layeri = layer4; + moduleTypei = moduleType4; + } else if (i == 5) { + zsi = z5; + rtsi = rt5; + layeri = layer5; + moduleTypei = moduleType5; + } + + if (moduleType3 == 0) { //0: ps + if (i == 3) + continue; + } else { + if (i == 2) + continue; + } + + // calculation is copied from PixelTriplet.h computePT3RZChiSquared + float diffr = 0, diffz = 0; + + float rou = a / p; + // for endcap + float s = (zsi - z_init) * p / Pz; + float x = x_init + Px / a * alpaka::math::sin(acc, rou * s) - Py / a * (1 - alpaka::math::cos(acc, rou * s)); + float y = y_init + Py / a * alpaka::math::sin(acc, rou * s) + Px / a * (1 - alpaka::math::cos(acc, rou * s)); + diffr = (rtsi - alpaka::math::sqrt(acc, x * x + y * y)) * 100; + + // for barrel + if (layeri <= 6) { + float paraA = + rt_init * rt_init + 2 * (Px * Px + Py * Py) / (a * a) + 2 * (y_init * Px - x_init * Py) / a - rtsi * rtsi; + float paraB = 2 * (x_init * Px + y_init * Py) / a; + float paraC = 2 * (y_init * Px - x_init * Py) / a + 2 * (Px * Px + Py * Py) / (a * a); + float A = paraB * paraB + paraC * paraC; + float B = 2 * paraA * paraB; + float C = paraA * paraA - paraC * paraC; + float sol1 = (-B + alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A); + float sol2 = (-B - alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A); + float solz1 = alpaka::math::asin(acc, sol1) / rou * Pz / p + z_init; + float solz2 = alpaka::math::asin(acc, sol2) / rou * Pz / p + z_init; + float diffz1 = (solz1 - zsi) * 100; + float diffz2 = (solz2 - zsi) * 100; + if (alpaka::math::isnan(acc, diffz1)) + diffz = diffz2; + else if (alpaka::math::isnan(acc, diffz2)) + diffz = diffz1; + else { + diffz = (alpaka::math::abs(acc, diffz1) < alpaka::math::abs(acc, diffz2)) ? diffz1 : diffz2; + } + } + residual = (layeri > 6) ? diffr : diffz; + + //PS Modules + if (moduleTypei == 0) { + error2 = kPixelPSZpitch * kPixelPSZpitch; + } else //2S modules + { + error2 = kStrip2SZpitch * kStrip2SZpitch; + } + + //check the tilted module, side: PosZ, NegZ, Center(for not tilted) + float drdz; + short side, subdets; + if (i == 2) { + drdz = alpaka::math::abs(acc, modules.drdzs()[lowerModuleIndex2]); + side = modules.sides()[lowerModuleIndex2]; + subdets = modules.subdets()[lowerModuleIndex2]; + } + if (i == 3) { + drdz = alpaka::math::abs(acc, modules.drdzs()[lowerModuleIndex3]); + side = modules.sides()[lowerModuleIndex3]; + subdets = modules.subdets()[lowerModuleIndex3]; + } + if (i == 2 || i == 3) { + residual = (layeri <= 6 && ((side == Center) or (drdz < 1))) ? diffz : diffr; + float projection_missing2 = 1.f; + if (drdz < 1) + projection_missing2 = + ((subdets == Endcap) or (side == Center)) ? 1.f : 1.f / (1 + drdz * drdz); // cos(atan(drdz)), if dr/dz<1 + if (drdz > 1) + projection_missing2 = ((subdets == Endcap) or (side == Center)) + ? 1.f + : (drdz * drdz) / (1 + drdz * drdz); //sin(atan(drdz)), if dr/dz>1 + error2 = error2 * projection_missing2; + } + rzChiSquared += 12 * (residual * residual) / error2; + } + // for set rzchi2 cut + // if the 5 points are linear, helix calculation gives nan + if (inner_pt > 100 || alpaka::math::isnan(acc, rzChiSquared)) { + float slope; + if (moduleType1 == 0 and moduleType2 == 0 and moduleType3 == 1) //PSPS2S + { + slope = (z2 - z1) / (rt2 - rt1); + } else { + slope = (z3 - z1) / (rt3 - rt1); + } + float residual4_linear = (layer4 <= 6) ? ((z4 - z1) - slope * (rt4 - rt1)) : ((rt4 - rt1) - (z4 - z1) / slope); + float residual5_linear = (layer4 <= 6) ? ((z5 - z1) - slope * (rt5 - rt1)) : ((rt5 - rt1) - (z5 - z1) / slope); + + // creating a chi squared type quantity + // 0-> PS, 1->2S + residual4_linear = (moduleType4 == 0) ? residual4_linear / kPixelPSZpitch : residual4_linear / kStrip2SZpitch; + residual5_linear = (moduleType5 == 0) ? residual5_linear / kPixelPSZpitch : residual5_linear / kStrip2SZpitch; + residual4_linear = residual4_linear * 100; + residual5_linear = residual5_linear * 100; + + rzChiSquared = 12 * (residual4_linear * residual4_linear + residual5_linear * residual5_linear); + return rzChiSquared < 4.677f; + } + + // when building T5, apply 99% chi2 cuts as default, and add to pT5 collection. But when adding T5 to TC collections, apply 95% cut to reduce the fake rate + tightCutFlag = false; + // The category numbers are related to module regions and layers, decoding of the region numbers can be found here in slide 2 table. https://github.com/SegmentLinking/TrackLooper/files/11420927/part.2.pdf + // The commented numbers after each case is the region code, and can look it up from the table to see which category it belongs to. For example, //0 means T5 built with Endcap 1,2,3,4,5 ps modules + if (layer1 == 7 and layer2 == 8 and layer3 == 9 and layer4 == 10 and layer5 == 11) //0 + { + if (rzChiSquared < 94.470f) + tightCutFlag = true; + return true; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9 and layer4 == 10 and layer5 == 16) //1 + { + if (rzChiSquared < 22.099f) + tightCutFlag = true; + return rzChiSquared < 37.956f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9 and layer4 == 15 and layer5 == 16) //2 + { + if (rzChiSquared < 7.992f) + tightCutFlag = true; + return rzChiSquared < 11.622f; + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8 and layer4 == 9) { + if (layer5 == 10) //3 + { + if (rzChiSquared < 111.390f) + tightCutFlag = true; + return true; + } + if (layer5 == 15) //4 + { + if (rzChiSquared < 18.351f) + tightCutFlag = true; + return rzChiSquared < 37.941f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + if (layer4 == 8 and layer5 == 9) //5 + { + if (rzChiSquared < 116.148f) + tightCutFlag = true; + return true; + } + if (layer4 == 8 and layer5 == 14) //6 + { + if (rzChiSquared < 19.352f) + tightCutFlag = true; + return rzChiSquared < 52.561f; + } else if (layer4 == 13 and layer5 == 14) //7 + { + if (rzChiSquared < 10.392f) + tightCutFlag = true; + return rzChiSquared < 13.76f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + if (layer4 == 7 and layer5 == 8) //8 + { + if (rzChiSquared < 27.824f) + tightCutFlag = true; + return rzChiSquared < 44.247f; + } else if (layer4 == 7 and layer5 == 13) //9 + { + if (rzChiSquared < 18.145f) + tightCutFlag = true; + return rzChiSquared < 33.752f; + } else if (layer4 == 12 and layer5 == 13) //10 + { + if (rzChiSquared < 13.308f) + tightCutFlag = true; + return rzChiSquared < 21.213f; + } else if (layer4 == 4 and layer5 == 5) //11 + { + if (rzChiSquared < 15.627f) + tightCutFlag = true; + return rzChiSquared < 29.035f; + } else if (layer4 == 4 and layer5 == 12) //12 + { + if (rzChiSquared < 14.64f) + tightCutFlag = true; + return rzChiSquared < 23.037f; + } + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 15) //14 + { + if (rzChiSquared < 24.662f) + tightCutFlag = true; + return rzChiSquared < 41.036f; + } else if (layer4 == 14 and layer5 == 15) //15 + { + if (rzChiSquared < 8.866f) + tightCutFlag = true; + return rzChiSquared < 14.092f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + if (layer4 == 8 and layer5 == 14) //16 + { + if (rzChiSquared < 23.730f) + tightCutFlag = true; + return rzChiSquared < 23.748f; + } + if (layer4 == 13 and layer5 == 14) //17 + { + if (rzChiSquared < 10.772f) + tightCutFlag = true; + return rzChiSquared < 17.945f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + if (layer4 == 5 and layer5 == 6) //18 + { + if (rzChiSquared < 6.065f) + tightCutFlag = true; + return rzChiSquared < 8.803f; + } else if (layer4 == 5 and layer5 == 12) //19 + { + if (rzChiSquared < 5.693f) + tightCutFlag = true; + return rzChiSquared < 7.930f; + } + + else if (layer4 == 12 and layer5 == 13) //20 + { + if (rzChiSquared < 5.473f) + tightCutFlag = true; + return rzChiSquared < 7.626f; + } + } + return true; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool T5HasCommonMiniDoublet(TripletsConst triplets, + SegmentsConst segments, + unsigned int innerTripletIndex, + unsigned int outerTripletIndex) { + unsigned int innerOuterSegmentIndex = triplets.segmentIndices()[innerTripletIndex][1]; + unsigned int outerInnerSegmentIndex = triplets.segmentIndices()[outerTripletIndex][0]; + unsigned int innerOuterOuterMiniDoubletIndex = + segments.mdIndices()[innerOuterSegmentIndex][1]; //inner triplet outer segment outer MD index + unsigned int outerInnerInnerMiniDoubletIndex = + segments.mdIndices()[outerInnerSegmentIndex][0]; //outer triplet inner segment inner MD index + + return (innerOuterOuterMiniDoubletIndex == outerInnerInnerMiniDoubletIndex); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeErrorInRadius(TAcc const& acc, + float* x1Vec, + float* y1Vec, + float* x2Vec, + float* y2Vec, + float* x3Vec, + float* y3Vec, + float& minimumRadius, + float& maximumRadius) { + //brute force + float candidateRadius; + float g, f; + minimumRadius = kVerticalModuleSlope; + maximumRadius = 0.f; + for (size_t i = 0; i < 3; i++) { + float x1 = x1Vec[i]; + float y1 = y1Vec[i]; + for (size_t j = 0; j < 3; j++) { + float x2 = x2Vec[j]; + float y2 = y2Vec[j]; + for (size_t k = 0; k < 3; k++) { + float x3 = x3Vec[k]; + float y3 = y3Vec[k]; + candidateRadius = computeRadiusFromThreeAnchorHits(acc, x1, y1, x2, y2, x3, y3, g, f); + maximumRadius = alpaka::math::max(acc, candidateRadius, maximumRadius); + minimumRadius = alpaka::math::min(acc, candidateRadius, minimumRadius); + } + } + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE12378(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.178f; + float bridgeInvRadiusErrorBound = 0.507f; + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, + innerInvRadiusMax, + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); + } + + /*bounds for high Pt taken from : http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_efficiency/efficiencies/new_efficiencies/efficiencies_20210513_T5_recovering_high_Pt_efficiencies/highE_radius_matching/highE_bounds.txt */ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBBB(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.1512f; + float bridgeInvRadiusErrorBound = 0.1781f; + + if (innerRadius * k2Rinv1GeVf > 1.f) { + innerInvRadiusErrorBound = 0.4449f; + bridgeInvRadiusErrorBound = 0.4033f; + } + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBBE(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.1781f; + float bridgeInvRadiusErrorBound = 0.2167f; + + if (innerRadius * k2Rinv1GeVf > 1.f) { + innerInvRadiusErrorBound = 0.4750f; + bridgeInvRadiusErrorBound = 0.3903f; + } + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE23478(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.2097f; + float bridgeInvRadiusErrorBound = 0.8557f; + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, + innerInvRadiusMax, + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE34578(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.066f; + float bridgeInvRadiusErrorBound = 0.617f; + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, + innerInvRadiusMax, + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBEEE(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.6376f; + float bridgeInvRadiusErrorBound = 2.1381f; + + if (innerRadius * k2Rinv1GeVf > 1.f) //as good as no selections! + { + innerInvRadiusErrorBound = 12.9173f; + bridgeInvRadiusErrorBound = 5.1700f; + } + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, + innerInvRadiusMax, + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBEEEE(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float innerRadiusMin2S, + float innerRadiusMax2S, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 1.9382f; + float bridgeInvRadiusErrorBound = 3.7280f; + + if (innerRadius * k2Rinv1GeVf > 1.f) { + innerInvRadiusErrorBound = 23.2713f; + bridgeInvRadiusErrorBound = 21.7980f; + } + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(alpaka::math::min(acc, innerInvRadiusMin, 1.0f / innerRadiusMax2S), + alpaka::math::max(acc, innerInvRadiusMax, 1.0f / innerRadiusMin2S), + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiEEEEE(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float innerRadiusMin2S, + float innerRadiusMax2S, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 1.9382f; + float bridgeInvRadiusErrorBound = 2.2091f; + + if (innerRadius * k2Rinv1GeVf > 1.f) { + innerInvRadiusErrorBound = 22.5226f; + bridgeInvRadiusErrorBound = 21.0966f; + } + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(alpaka::math::min(acc, innerInvRadiusMin, 1.0f / innerRadiusMax2S), + alpaka::math::max(acc, innerInvRadiusMax, 1.0f / innerRadiusMin2S), + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeSigmasForRegression(TAcc const& acc, + ModulesConst modules, + const uint16_t* lowerModuleIndices, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + unsigned int nPoints = 5, + bool anchorHits = true) { + /* + Bool anchorHits required to deal with a weird edge case wherein + the hits ultimately used in the regression are anchor hits, but the + lower modules need not all be Pixel Modules (in case of PS). Similarly, + when we compute the chi squared for the non-anchor hits, the "partner module" + need not always be a PS strip module, but all non-anchor hits sit on strip + modules. + */ + + ModuleType moduleType; + short moduleSubdet, moduleSide; + float inv1 = kWidthPS / kWidth2S; + float inv2 = kPixelPSZpitch / kWidth2S; + float inv3 = kStripPSZpitch / kWidth2S; + for (size_t i = 0; i < nPoints; i++) { + moduleType = modules.moduleType()[lowerModuleIndices[i]]; + moduleSubdet = modules.subdets()[lowerModuleIndices[i]]; + moduleSide = modules.sides()[lowerModuleIndices[i]]; + const float& drdz = modules.drdzs()[lowerModuleIndices[i]]; + slopes[i] = modules.dxdys()[lowerModuleIndices[i]]; + //category 1 - barrel PS flat + if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { + delta1[i] = inv1; + delta2[i] = inv1; + slopes[i] = -999.f; + isFlat[i] = true; + } + //category 2 - barrel 2S + else if (moduleSubdet == Barrel and moduleType == TwoS) { + delta1[i] = 1.f; + delta2[i] = 1.f; + slopes[i] = -999.f; + isFlat[i] = true; + } + //category 3 - barrel PS tilted + else if (moduleSubdet == Barrel and moduleType == PS and moduleSide != Center) { + delta1[i] = inv1; + isFlat[i] = false; + + if (anchorHits) { + delta2[i] = (inv2 * drdz / alpaka::math::sqrt(acc, 1 + drdz * drdz)); + } else { + delta2[i] = (inv3 * drdz / alpaka::math::sqrt(acc, 1 + drdz * drdz)); + } + } + //category 4 - endcap PS + else if (moduleSubdet == Endcap and moduleType == PS) { + delta1[i] = inv1; + isFlat[i] = false; + + /* + despite the type of the module layer of the lower module index, + all anchor hits are on the pixel side and all non-anchor hits are + on the strip side! + */ + if (anchorHits) { + delta2[i] = inv2; + } else { + delta2[i] = inv3; + } + } + //category 5 - endcap 2S + else if (moduleSubdet == Endcap and moduleType == TwoS) { + delta1[i] = 1.f; + delta2[i] = 500.f * inv1; + isFlat[i] = false; + } else { +#ifdef WARNINGS + printf("ERROR!!!!! I SHOULDN'T BE HERE!!!! subdet = %d, type = %d, side = %d\n", + moduleSubdet, + moduleType, + moduleSide); +#endif + } + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeRadiusUsingRegression(TAcc const& acc, + unsigned int nPoints, + float* xs, + float* ys, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + float& g, + float& f, + float* sigmas2, + float& chiSquared) { + float radius = 0.f; + + // Some extra variables + // the two variables will be called x1 and x2, and y (which is x^2 + y^2) + + float sigmaX1Squared = 0.f; + float sigmaX2Squared = 0.f; + float sigmaX1X2 = 0.f; + float sigmaX1y = 0.f; + float sigmaX2y = 0.f; + float sigmaY = 0.f; + float sigmaX1 = 0.f; + float sigmaX2 = 0.f; + float sigmaOne = 0.f; + + float xPrime, yPrime, absArctanSlope, angleM; + for (size_t i = 0; i < nPoints; i++) { + // Computing sigmas is a very tricky affair + // if the module is tilted or endcap, we need to use the slopes properly! + + absArctanSlope = ((slopes[i] != kVerticalModuleSlope) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) + : kPi / 2.f); + + if (xs[i] > 0 and ys[i] > 0) { + angleM = kPi / 2.f - absArctanSlope; + } else if (xs[i] < 0 and ys[i] > 0) { + angleM = absArctanSlope + kPi / 2.f; + } else if (xs[i] < 0 and ys[i] < 0) { + angleM = -(absArctanSlope + kPi / 2.f); + } else if (xs[i] > 0 and ys[i] < 0) { + angleM = -(kPi / 2.f - absArctanSlope); + } else { + angleM = 0; + } + + if (not isFlat[i]) { + xPrime = xs[i] * alpaka::math::cos(acc, angleM) + ys[i] * alpaka::math::sin(acc, angleM); + yPrime = ys[i] * alpaka::math::cos(acc, angleM) - xs[i] * alpaka::math::sin(acc, angleM); + } else { + xPrime = xs[i]; + yPrime = ys[i]; + } + sigmas2[i] = 4 * ((xPrime * delta1[i]) * (xPrime * delta1[i]) + (yPrime * delta2[i]) * (yPrime * delta2[i])); + + sigmaX1Squared += (xs[i] * xs[i]) / sigmas2[i]; + sigmaX2Squared += (ys[i] * ys[i]) / sigmas2[i]; + sigmaX1X2 += (xs[i] * ys[i]) / sigmas2[i]; + sigmaX1y += (xs[i] * (xs[i] * xs[i] + ys[i] * ys[i])) / sigmas2[i]; + sigmaX2y += (ys[i] * (xs[i] * xs[i] + ys[i] * ys[i])) / sigmas2[i]; + sigmaY += (xs[i] * xs[i] + ys[i] * ys[i]) / sigmas2[i]; + sigmaX1 += xs[i] / sigmas2[i]; + sigmaX2 += ys[i] / sigmas2[i]; + sigmaOne += 1.0f / sigmas2[i]; + } + float denominator = (sigmaX1X2 - sigmaX1 * sigmaX2) * (sigmaX1X2 - sigmaX1 * sigmaX2) - + (sigmaX1Squared - sigmaX1 * sigmaX1) * (sigmaX2Squared - sigmaX2 * sigmaX2); + + float twoG = ((sigmaX2y - sigmaX2 * sigmaY) * (sigmaX1X2 - sigmaX1 * sigmaX2) - + (sigmaX1y - sigmaX1 * sigmaY) * (sigmaX2Squared - sigmaX2 * sigmaX2)) / + denominator; + float twoF = ((sigmaX1y - sigmaX1 * sigmaY) * (sigmaX1X2 - sigmaX1 * sigmaX2) - + (sigmaX2y - sigmaX2 * sigmaY) * (sigmaX1Squared - sigmaX1 * sigmaX1)) / + denominator; + + float c = -(sigmaY - twoG * sigmaX1 - twoF * sigmaX2) / sigmaOne; + g = 0.5f * twoG; + f = 0.5f * twoF; + if (g * g + f * f - c < 0) { +#ifdef WARNINGS + printf("FATAL! r^2 < 0!\n"); +#endif + chiSquared = -1; + return -1; + } + + radius = alpaka::math::sqrt(acc, g * g + f * f - c); + // compute chi squared + chiSquared = 0.f; + for (size_t i = 0; i < nPoints; i++) { + chiSquared += (xs[i] * xs[i] + ys[i] * ys[i] - twoG * xs[i] - twoF * ys[i] + c) * + (xs[i] * xs[i] + ys[i] * ys[i] - twoG * xs[i] - twoF * ys[i] + c) / sigmas2[i]; + } + return radius; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeChiSquared(TAcc const& acc, + unsigned int nPoints, + float* xs, + float* ys, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + float g, + float f, + float radius) { + // given values of (g, f, radius) and a set of points (and its uncertainties) + // compute chi squared + float c = g * g + f * f - radius * radius; + float chiSquared = 0.f; + float absArctanSlope, angleM, xPrime, yPrime, sigma2; + for (size_t i = 0; i < nPoints; i++) { + absArctanSlope = ((slopes[i] != kVerticalModuleSlope) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) + : kPi / 2.f); + if (xs[i] > 0 and ys[i] > 0) { + angleM = kPi / 2.f - absArctanSlope; + } else if (xs[i] < 0 and ys[i] > 0) { + angleM = absArctanSlope + kPi / 2.f; + } else if (xs[i] < 0 and ys[i] < 0) { + angleM = -(absArctanSlope + kPi / 2.f); + } else if (xs[i] > 0 and ys[i] < 0) { + angleM = -(kPi / 2.f - absArctanSlope); + } else { + angleM = 0; + } + + if (not isFlat[i]) { + xPrime = xs[i] * alpaka::math::cos(acc, angleM) + ys[i] * alpaka::math::sin(acc, angleM); + yPrime = ys[i] * alpaka::math::cos(acc, angleM) - xs[i] * alpaka::math::sin(acc, angleM); + } else { + xPrime = xs[i]; + yPrime = ys[i]; + } + sigma2 = 4 * ((xPrime * delta1[i]) * (xPrime * delta1[i]) + (yPrime * delta2[i]) * (yPrime * delta2[i])); + chiSquared += (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) * + (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) / sigma2; + } + return chiSquared; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void runDeltaBetaIterationsT5(TAcc const& acc, + float& betaIn, + float& betaOut, + float betaAv, + float& pt_beta, + float sdIn_dr, + float sdOut_dr, + float dr, + float lIn) { + if (lIn == 0) { + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaOut); + return; + } + + if (betaIn * betaOut > 0.f and + (alpaka::math::abs(acc, pt_beta) < 4.f * kPt_betaMax or + (lIn >= 11 and alpaka::math::abs(acc, pt_beta) < + 8.f * kPt_betaMax))) //and the pt_beta is well-defined; less strict for endcap-endcap + { + const float betaInUpd = + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + const float betaOutUpd = + betaOut + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaOut); //FIXME: need a faster version + betaAv = 0.5f * (betaInUpd + betaOutUpd); + + //1st update + const float pt_beta_inv = + 1.f / alpaka::math::abs(acc, dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv)); //get a better pt estimate + + betaIn += alpaka::math::copysign( + acc, + alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), + betaOut); //FIXME: need a faster version + //update the av and pt + betaAv = 0.5f * (betaIn + betaOut); + //2nd update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + } else if (lIn < 11 && alpaka::math::abs(acc, betaOut) < 0.2f * alpaka::math::abs(acc, betaIn) && + alpaka::math::abs(acc, pt_beta) < 12.f * kPt_betaMax) //use betaIn sign as ref + { + const float pt_betaIn = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaIn); + + const float betaInUpd = + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + const float betaOutUpd = + betaOut + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, + alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaAv = (alpaka::math::abs(acc, betaOut) > 0.2f * alpaka::math::abs(acc, betaIn)) + ? (0.5f * (betaInUpd + betaOutUpd)) + : betaInUpd; + + //1st update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + betaIn += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + //update the av and pt + betaAv = 0.5f * (betaIn + betaOut); + //2nd update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBBB(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t innerOuterLowerModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + bool isPS_InLo = (modules.moduleType()[innerInnerLowerModuleIndex] == PS); + bool isPS_OutLo = (modules.moduleType()[outerInnerLowerModuleIndex] == PS); + + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InOut = mds.anchorRt()[secondMDIndex]; + float rt_OutLo = mds.anchorRt()[thirdMDIndex]; + + float z_InLo = mds.anchorZ()[firstMDIndex]; + float z_InOut = mds.anchorZ()[secondMDIndex]; + float z_OutLo = mds.anchorZ()[thirdMDIndex]; + + float alpha1GeV_OutLo = + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float rtRatio_OutLoInLo = rt_OutLo / rt_InLo; // Outer segment beginning rt divided by inner segment beginning rt; + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly + float zpitch_InLo = (isPS_InLo ? kPixelPSZpitch : kStrip2SZpitch); + float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); + + float zHi = z_InLo + (z_InLo + kDeltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo < 0.f ? 1.f : dzDrtScale) + + (zpitch_InLo + zpitch_OutLo); + float zLo = z_InLo + (z_InLo - kDeltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo > 0.f ? 1.f : dzDrtScale) - + (zpitch_InLo + zpitch_OutLo); + + //Cut 1 - z compatibility + if ((z_OutLo < zLo) || (z_OutLo > zHi)) + return false; + + float drt_OutLo_InLo = (rt_OutLo - rt_InLo); + float r3_InLo = alpaka::math::sqrt(acc, z_InLo * z_InLo + rt_InLo * rt_InLo); + float drt_InSeg = rt_InOut - rt_InLo; + float dz_InSeg = z_InOut - z_InLo; + float dr3_InSeg = alpaka::math::sqrt(acc, rt_InOut * rt_InOut + z_InOut * z_InOut) - + alpaka::math::sqrt(acc, rt_InLo * rt_InLo + z_InLo * z_InLo); + + float coshEta = dr3_InSeg / drt_InSeg; + float dzErr = (zpitch_InLo + zpitch_OutLo) * (zpitch_InLo + zpitch_OutLo) * 2.f; + + float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f) * (r3_InLo / rt_InLo); + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + dzErr += muls2 * drt_OutLo_InLo * drt_OutLo_InLo / 3.f * coshEta * coshEta; + dzErr = alpaka::math::sqrt(acc, dzErr); + + // Constructing upper and lower bound + const float dzMean = dz_InSeg / drt_InSeg * drt_OutLo_InLo; + const float zWindow = + dzErr / drt_InSeg * drt_OutLo_InLo + + (zpitch_InLo + zpitch_OutLo); //FIXME for ptCut lower than ~0.8 need to add curv path correction + float zLoPointed = z_InLo + dzMean * (z_InLo > 0.f ? 1.f : dzDrtScale) - zWindow; + float zHiPointed = z_InLo + dzMean * (z_InLo < 0.f ? 1.f : dzDrtScale) + zWindow; + + // Cut #2: Pointed Z (Inner segment two MD points to outer segment inner MD) + if ((z_OutLo < zLoPointed) || (z_OutLo > zHiPointed)) + return false; + + float pvOffset = 0.1f / rt_OutLo; + float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); + + float deltaPhiPos = phi_mpi_pi(acc, mds.anchorPhi()[fourthMDIndex] - mds.anchorPhi()[secondMDIndex]); + // Cut #3: FIXME:deltaPhiPos can be tighter + if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) + return false; + + float midPointX = 0.5f * (mds.anchorX()[firstMDIndex] + mds.anchorX()[thirdMDIndex]); + float midPointY = 0.5f * (mds.anchorY()[firstMDIndex] + mds.anchorY()[thirdMDIndex]); + float diffX = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float diffY = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; + + float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); + + // Cut #4: deltaPhiChange + if (alpaka::math::abs(acc, dPhi) > dPhiCut) + return false; + + // First obtaining the raw betaIn and betaOut values without any correction and just purely based on the mini-doublet hit positions + float alpha_InLo = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float alpha_OutLo = __H2F(segments.dPhiChanges()[outerSegmentIndex]); + + bool isEC_lastLayer = modules.subdets()[outerOuterLowerModuleIndex] == Endcap and + modules.moduleType()[outerOuterLowerModuleIndex] == TwoS; + + float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; + + alpha_OutUp = phi_mpi_pi(acc, + phi(acc, + mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex], + mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) - + mds.anchorPhi()[fourthMDIndex]); + + alpha_OutUp_highEdge = alpha_OutUp; + alpha_OutUp_lowEdge = alpha_OutUp; + + float tl_axis_x = mds.anchorX()[fourthMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[fourthMDIndex] - mds.anchorY()[firstMDIndex]; + float tl_axis_highEdge_x = tl_axis_x; + float tl_axis_highEdge_y = tl_axis_y; + float tl_axis_lowEdge_x = tl_axis_x; + float tl_axis_lowEdge_y = tl_axis_y; + + float betaIn = alpha_InLo - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); + + float betaInRHmin = betaIn; + float betaInRHmax = betaIn; + float betaOut = -alpha_OutUp + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[fourthMDIndex]); + + float betaOutRHmin = betaOut; + float betaOutRHmax = betaOut; + + if (isEC_lastLayer) { + alpha_OutUp_highEdge = phi_mpi_pi(acc, + phi(acc, + mds.anchorHighEdgeX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex], + mds.anchorHighEdgeY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) - + mds.anchorHighEdgePhi()[fourthMDIndex]); + alpha_OutUp_lowEdge = phi_mpi_pi(acc, + phi(acc, + mds.anchorLowEdgeX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex], + mds.anchorLowEdgeY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) - + mds.anchorLowEdgePhi()[fourthMDIndex]); + + tl_axis_highEdge_x = mds.anchorHighEdgeX()[fourthMDIndex] - mds.anchorX()[firstMDIndex]; + tl_axis_highEdge_y = mds.anchorHighEdgeY()[fourthMDIndex] - mds.anchorY()[firstMDIndex]; + tl_axis_lowEdge_x = mds.anchorLowEdgeX()[fourthMDIndex] - mds.anchorX()[firstMDIndex]; + tl_axis_lowEdge_y = mds.anchorLowEdgeY()[fourthMDIndex] - mds.anchorY()[firstMDIndex]; + + betaOutRHmin = + -alpha_OutUp_highEdge + + phi_mpi_pi(acc, phi(acc, tl_axis_highEdge_x, tl_axis_highEdge_y) - mds.anchorHighEdgePhi()[fourthMDIndex]); + betaOutRHmax = + -alpha_OutUp_lowEdge + + phi_mpi_pi(acc, phi(acc, tl_axis_lowEdge_x, tl_axis_lowEdge_y) - mds.anchorLowEdgePhi()[fourthMDIndex]); + } + + //beta computation + float drt_tl_axis = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + + float corrF = 1.f; + //innerOuterAnchor - innerInnerAnchor + const float rt_InSeg = alpaka::math::sqrt(acc, + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); + float betaInCut = + alpaka::math::asin( + acc, alpaka::math::min(acc, (-rt_InSeg * corrF + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / drt_InSeg); + + //Cut #5: first beta cut + if (alpaka::math::abs(acc, betaInRHmin) >= betaInCut) + return false; + + float betaAv = 0.5f * (betaIn + betaOut); + float pt_beta = drt_tl_axis * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); + int lIn = 5; + int lOut = isEC_lastLayer ? 11 : 5; + float sdOut_dr = alpaka::math::sqrt(acc, + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) * + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) + + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) * + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex])); + float sdOut_d = mds.anchorRt()[fourthMDIndex] - mds.anchorRt()[thirdMDIndex]; + + runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, rt_InSeg, sdOut_dr, drt_tl_axis, lIn); + + const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) + ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) + : 0.f; //mean value of min,max is the old betaIn + const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) + ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) + : 0.f; + betaInRHmin *= betaInMMSF; + betaInRHmax *= betaInMMSF; + betaOutRHmin *= betaOutMMSF; + betaOutRHmax *= betaOutMMSF; + + float min_ptBeta_maxPtBeta = alpaka::math::min( + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confimm the range-out value of 7 GeV + const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); + + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_InLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_OutLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); + const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); + const float sinDPhi = alpaka::math::sin(acc, dPhi); + + const float dBetaRIn2 = 0; // TODO-RH + float dBetaROut = 0; + if (isEC_lastLayer) { + dBetaROut = (alpaka::math::sqrt(acc, + mds.anchorHighEdgeX()[fourthMDIndex] * mds.anchorHighEdgeX()[fourthMDIndex] + + mds.anchorHighEdgeY()[fourthMDIndex] * mds.anchorHighEdgeY()[fourthMDIndex]) - + alpaka::math::sqrt(acc, + mds.anchorLowEdgeX()[fourthMDIndex] * mds.anchorLowEdgeX()[fourthMDIndex] + + mds.anchorLowEdgeY()[fourthMDIndex] * mds.anchorLowEdgeY()[fourthMDIndex])) * + sinDPhi / drt_tl_axis; + } + + const float dBetaROut2 = dBetaROut * dBetaROut; + + float betaOutCut = + alpaka::math::asin(acc, alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + + //Cut #6: The real beta cut + if (alpaka::math::abs(acc, betaOut) >= betaOutCut) + return false; + + float dBetaRes = 0.02f / alpaka::math::min(acc, sdOut_d, drt_InSeg); + float dBetaCut2 = + (dBetaRes * dBetaRes * 2.0f + dBetaMuls2 + dBetaLum2 + dBetaRIn2 + dBetaROut2 + + 0.25f * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); + + float dBeta = betaIn - betaOut; + return dBeta * dBeta <= dBetaCut2; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBEE(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t innerOuterLowerModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + bool isPS_InLo = (modules.moduleType()[innerInnerLowerModuleIndex] == PS); + bool isPS_OutLo = (modules.moduleType()[outerInnerLowerModuleIndex] == PS); + + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InOut = mds.anchorRt()[secondMDIndex]; + float rt_OutLo = mds.anchorRt()[thirdMDIndex]; + + float z_InLo = mds.anchorZ()[firstMDIndex]; + float z_InOut = mds.anchorZ()[secondMDIndex]; + float z_OutLo = mds.anchorZ()[thirdMDIndex]; + + float alpha1GeV_OutLo = + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly + float zpitch_InLo = (isPS_InLo ? kPixelPSZpitch : kStrip2SZpitch); + float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); + float zGeom = zpitch_InLo + zpitch_OutLo; + + // Cut #0: Preliminary (Only here in endcap case) + if (z_InLo * z_OutLo <= 0) + return false; + + float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InLo); + bool isOutSgInnerMDPS = modules.moduleType()[outerInnerLowerModuleIndex] == PS; + float rtGeom1 = isOutSgInnerMDPS ? kPixelPSZpitch : kStrip2SZpitch; + float zGeom1 = alpaka::math::copysign(acc, zGeom, z_InLo); + float rtLo = rt_InLo * (1.f + (z_OutLo - z_InLo - zGeom1) / (z_InLo + zGeom1 + dLum) / dzDrtScale) - + rtGeom1; //slope correction only on the lower end + float rtOut = rt_OutLo; + + //Cut #1: rt condition + if (rtOut < rtLo) + return false; + + float zInForHi = z_InLo - zGeom1 - dLum; + if (zInForHi * z_InLo < 0) { + zInForHi = alpaka::math::copysign(acc, 0.1f, z_InLo); + } + float rtHi = rt_InLo * (1.f + (z_OutLo - z_InLo + zGeom1) / zInForHi) + rtGeom1; + + //Cut #2: rt condition + if ((rt_OutLo < rtLo) || (rt_OutLo > rtHi)) + return false; + + float rIn = alpaka::math::sqrt(acc, z_InLo * z_InLo + rt_InLo * rt_InLo); + const float drtSDIn = rt_InOut - rt_InLo; + const float dzSDIn = z_InOut - z_InLo; + const float dr3SDIn = alpaka::math::sqrt(acc, rt_InOut * rt_InOut + z_InOut * z_InOut) - + alpaka::math::sqrt(acc, rt_InLo * rt_InLo + z_InLo * z_InLo); + + const float coshEta = dr3SDIn / drtSDIn; //direction estimate + const float dzOutInAbs = alpaka::math::abs(acc, z_OutLo - z_InLo); + const float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f); + const float zGeom1_another = kPixelPSZpitch; + float kZ = (z_OutLo - z_InLo) / dzSDIn; + float drtErr = + zGeom1_another * zGeom1_another * drtSDIn * drtSDIn / dzSDIn / dzSDIn * (1.f - 2.f * kZ + 2.f * kZ * kZ); + const float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f) * (rIn / rt_InLo); + const float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + drtErr += muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta; + drtErr = alpaka::math::sqrt(acc, drtErr); + + //Cut #3: rt-z pointed + if ((kZ < 0) || (rtOut < rtLo) || (rtOut > rtHi)) + return false; + + const float pvOffset = 0.1f / rt_OutLo; + float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); + + float deltaPhiPos = phi_mpi_pi(acc, mds.anchorPhi()[fourthMDIndex] - mds.anchorPhi()[secondMDIndex]); + + //Cut #4: deltaPhiPos can be tighter + if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) + return false; + + float midPointX = 0.5f * (mds.anchorX()[firstMDIndex] + mds.anchorX()[thirdMDIndex]); + float midPointY = 0.5f * (mds.anchorY()[firstMDIndex] + mds.anchorY()[thirdMDIndex]); + float diffX = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float diffY = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; + + float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); + // Cut #5: deltaPhiChange + if (alpaka::math::abs(acc, dPhi) > dPhiCut) + return false; + + float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float sdIn_alpha_min = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); + float sdIn_alpha_max = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]); + float sdOut_alpha = sdIn_alpha; + + float sdOut_alphaOut = phi_mpi_pi(acc, + phi(acc, + mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex], + mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) - + mds.anchorPhi()[fourthMDIndex]); + + float sdOut_alphaOut_min = phi_mpi_pi( + acc, __H2F(segments.dPhiChangeMins()[outerSegmentIndex]) - __H2F(segments.dPhiMins()[outerSegmentIndex])); + float sdOut_alphaOut_max = phi_mpi_pi( + acc, __H2F(segments.dPhiChangeMaxs()[outerSegmentIndex]) - __H2F(segments.dPhiMaxs()[outerSegmentIndex])); + + float tl_axis_x = mds.anchorX()[fourthMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[fourthMDIndex] - mds.anchorY()[firstMDIndex]; + + float betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); + + float betaInRHmin = betaIn; + float betaInRHmax = betaIn; + float betaOut = -sdOut_alphaOut + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[fourthMDIndex]); + + float betaOutRHmin = betaOut; + float betaOutRHmax = betaOut; + + bool isEC_secondLayer = (modules.subdets()[innerOuterLowerModuleIndex] == Endcap) and + (modules.moduleType()[innerOuterLowerModuleIndex] == TwoS); + + if (isEC_secondLayer) { + betaInRHmin = betaIn - sdIn_alpha_min + sdIn_alpha; + betaInRHmax = betaIn - sdIn_alpha_max + sdIn_alpha; + } + + betaOutRHmin = betaOut - sdOut_alphaOut_min + sdOut_alphaOut; + betaOutRHmax = betaOut - sdOut_alphaOut_max + sdOut_alphaOut; + + float swapTemp; + if (alpaka::math::abs(acc, betaOutRHmin) > alpaka::math::abs(acc, betaOutRHmax)) { + swapTemp = betaOutRHmin; + betaOutRHmin = betaOutRHmax; + betaOutRHmax = swapTemp; + } + + if (alpaka::math::abs(acc, betaInRHmin) > alpaka::math::abs(acc, betaInRHmax)) { + swapTemp = betaInRHmin; + betaInRHmin = betaInRHmax; + betaInRHmax = swapTemp; + } + + float sdIn_dr = alpaka::math::sqrt(acc, + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); + float sdIn_d = rt_InOut - rt_InLo; + + float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + const float corrF = 1.f; + float betaInCut = + alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr * corrF + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdIn_d); + + //Cut #6: first beta cut + if (alpaka::math::abs(acc, betaInRHmin) >= betaInCut) + return false; + + float betaAv = 0.5f * (betaIn + betaOut); + float pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); + + float lIn = 5; + float lOut = 11; + + float sdOut_dr = alpaka::math::sqrt(acc, + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) * + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) + + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) * + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex])); + float sdOut_d = mds.anchorRt()[fourthMDIndex] - mds.anchorRt()[thirdMDIndex]; + + runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn); + + const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) + ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) + : 0.; //mean value of min,max is the old betaIn + const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) + ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) + : 0.; + betaInRHmin *= betaInMMSF; + betaInRHmax *= betaInMMSF; + betaOutRHmin *= betaOutMMSF; + betaOutRHmax *= betaOutMMSF; + + float min_ptBeta_maxPtBeta = alpaka::math::min( + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV + const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); + + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdIn_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdOut_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); + const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); + const float sinDPhi = alpaka::math::sin(acc, dPhi); + + const float dBetaRIn2 = 0; // TODO-RH + float dBetaROut = 0; + if (modules.moduleType()[outerOuterLowerModuleIndex] == TwoS) { + dBetaROut = (alpaka::math::sqrt(acc, + mds.anchorHighEdgeX()[fourthMDIndex] * mds.anchorHighEdgeX()[fourthMDIndex] + + mds.anchorHighEdgeY()[fourthMDIndex] * mds.anchorHighEdgeY()[fourthMDIndex]) - + alpaka::math::sqrt(acc, + mds.anchorLowEdgeX()[fourthMDIndex] * mds.anchorLowEdgeX()[fourthMDIndex] + + mds.anchorLowEdgeY()[fourthMDIndex] * mds.anchorLowEdgeY()[fourthMDIndex])) * + sinDPhi / dr; + } + + const float dBetaROut2 = dBetaROut * dBetaROut; + float betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, dr * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + + //Cut #6: The real beta cut + if (alpaka::math::abs(acc, betaOut) >= betaOutCut) + return false; + + float dBetaRes = 0.02f / alpaka::math::min(acc, sdOut_d, sdIn_d); + float dBetaCut2 = + (dBetaRes * dBetaRes * 2.0f + dBetaMuls2 + dBetaLum2 + dBetaRIn2 + dBetaROut2 + + 0.25f * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); + float dBeta = betaIn - betaOut; + //Cut #7: Cut on dBet + return dBeta * dBeta <= dBetaCut2; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoEEEE(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t innerOuterLowerModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InOut = mds.anchorRt()[secondMDIndex]; + float rt_OutLo = mds.anchorRt()[thirdMDIndex]; + + float z_InLo = mds.anchorZ()[firstMDIndex]; + float z_InOut = mds.anchorZ()[secondMDIndex]; + float z_OutLo = mds.anchorZ()[thirdMDIndex]; + + float alpha1GeV_OutLo = + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly + + // Cut #0: Preliminary (Only here in endcap case) + if ((z_InLo * z_OutLo) <= 0) + return false; + + float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InLo); + bool isOutSgInnerMDPS = modules.moduleType()[outerInnerLowerModuleIndex] == PS; + bool isInSgInnerMDPS = modules.moduleType()[innerInnerLowerModuleIndex] == PS; + + float rtGeom = (isInSgInnerMDPS and isOutSgInnerMDPS) ? 2.f * kPixelPSZpitch + : (isInSgInnerMDPS or isOutSgInnerMDPS) ? kPixelPSZpitch + kStrip2SZpitch + : 2.f * kStrip2SZpitch; + + float dz = z_OutLo - z_InLo; + float rtLo = rt_InLo * (1.f + dz / (z_InLo + dLum) / dzDrtScale) - rtGeom; //slope correction only on the lower end + + float rtOut = rt_OutLo; + + //Cut #1: rt condition + + float rtHi = rt_InLo * (1.f + dz / (z_InLo - dLum)) + rtGeom; + + if ((rtOut < rtLo) || (rtOut > rtHi)) + return false; + + bool isInSgOuterMDPS = modules.moduleType()[innerOuterLowerModuleIndex] == PS; + + const float drtSDIn = rt_InOut - rt_InLo; + const float dzSDIn = z_InOut - z_InLo; + const float dr3SDIn = alpaka::math::sqrt(acc, rt_InOut * rt_InOut + z_InOut * z_InOut) - + alpaka::math::sqrt(acc, rt_InLo * rt_InLo + z_InLo * z_InLo); + float coshEta = dr3SDIn / drtSDIn; //direction estimate + float dzOutInAbs = alpaka::math::abs(acc, z_OutLo - z_InLo); + float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f); + + float kZ = (z_OutLo - z_InLo) / dzSDIn; + float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f); + + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + + float drtErr = + alpaka::math::sqrt(acc, + kPixelPSZpitch * kPixelPSZpitch * 2.f / (dzSDIn * dzSDIn) * (dzOutInAbs * dzOutInAbs) + + muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta); + + float drtMean = drtSDIn * dzOutInAbs / alpaka::math::abs(acc, dzSDIn); + float rtWindow = drtErr + rtGeom; + float rtLo_point = rt_InLo + drtMean / dzDrtScale - rtWindow; + float rtHi_point = rt_InLo + drtMean + rtWindow; + + // Cut #3: rt-z pointed + // https://github.com/slava77/cms-tkph2-ntuple/blob/superDoubletLinked-91X-noMock/doubletAnalysis.C#L3765 + + if (isInSgInnerMDPS and isInSgOuterMDPS) // If both PS then we can point + { + if (kZ < 0 || rtOut < rtLo_point || rtOut > rtHi_point) + return false; + } + + float pvOffset = 0.1f / rtOut; + float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); + + float deltaPhiPos = phi_mpi_pi(acc, mds.anchorPhi()[fourthMDIndex] - mds.anchorPhi()[secondMDIndex]); + + if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) + return false; + + float midPointX = 0.5f * (mds.anchorX()[firstMDIndex] + mds.anchorX()[thirdMDIndex]); + float midPointY = 0.5f * (mds.anchorY()[firstMDIndex] + mds.anchorY()[thirdMDIndex]); + float diffX = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float diffY = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; + + float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); + + // Cut #5: deltaPhiChange + if (alpaka::math::abs(acc, dPhi) > dPhiCut) + return false; + + float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float sdOut_alpha = sdIn_alpha; //weird + float sdOut_dPhiPos = phi_mpi_pi(acc, mds.anchorPhi()[fourthMDIndex] - mds.anchorPhi()[thirdMDIndex]); + + float sdOut_dPhiChange = __H2F(segments.dPhiChanges()[outerSegmentIndex]); + float sdOut_dPhiChange_min = __H2F(segments.dPhiChangeMins()[outerSegmentIndex]); + float sdOut_dPhiChange_max = __H2F(segments.dPhiChangeMaxs()[outerSegmentIndex]); + + float sdOut_alphaOutRHmin = phi_mpi_pi(acc, sdOut_dPhiChange_min - sdOut_dPhiPos); + float sdOut_alphaOutRHmax = phi_mpi_pi(acc, sdOut_dPhiChange_max - sdOut_dPhiPos); + float sdOut_alphaOut = phi_mpi_pi(acc, sdOut_dPhiChange - sdOut_dPhiPos); + + float tl_axis_x = mds.anchorX()[fourthMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[fourthMDIndex] - mds.anchorY()[firstMDIndex]; + + float betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); + + float sdIn_alphaRHmin = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); + float sdIn_alphaRHmax = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]); + float betaInRHmin = betaIn + sdIn_alphaRHmin - sdIn_alpha; + float betaInRHmax = betaIn + sdIn_alphaRHmax - sdIn_alpha; + + float betaOut = -sdOut_alphaOut + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[fourthMDIndex]); + + float betaOutRHmin = betaOut - sdOut_alphaOutRHmin + sdOut_alphaOut; + float betaOutRHmax = betaOut - sdOut_alphaOutRHmax + sdOut_alphaOut; + + float swapTemp; + if (alpaka::math::abs(acc, betaOutRHmin) > alpaka::math::abs(acc, betaOutRHmax)) { + swapTemp = betaOutRHmin; + betaOutRHmin = betaOutRHmax; + betaOutRHmax = swapTemp; + } + + if (alpaka::math::abs(acc, betaInRHmin) > alpaka::math::abs(acc, betaInRHmax)) { + swapTemp = betaInRHmin; + betaInRHmin = betaInRHmax; + betaInRHmax = swapTemp; + } + float sdIn_dr = alpaka::math::sqrt(acc, + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); + float sdIn_d = rt_InOut - rt_InLo; + + float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + const float corrF = 1.f; + float betaInCut = + alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr * corrF + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdIn_d); + + //Cut #6: first beta cut + if (alpaka::math::abs(acc, betaInRHmin) >= betaInCut) + return false; + + float betaAv = 0.5f * (betaIn + betaOut); + float pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); + + int lIn = 11; //endcap + int lOut = 13; //endcap + + float sdOut_dr = alpaka::math::sqrt(acc, + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) * + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) + + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) * + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex])); + float sdOut_d = mds.anchorRt()[fourthMDIndex] - mds.anchorRt()[thirdMDIndex]; + + runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn); + + const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) + ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) + : 0.; //mean value of min,max is the old betaIn + const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) + ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) + : 0.; + betaInRHmin *= betaInMMSF; + betaInRHmax *= betaInMMSF; + betaOutRHmin *= betaOutMMSF; + betaOutRHmax *= betaOutMMSF; + + float min_ptBeta_maxPtBeta = alpaka::math::min( + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV + const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); + + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdIn_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdOut_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); + const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); + + const float dBetaRIn2 = 0; // TODO-RH + + float dBetaROut2 = 0; //TODO-RH + float betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, dr * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + + //Cut #6: The real beta cut + if (alpaka::math::abs(acc, betaOut) >= betaOutCut) + return false; + + float dBetaRes = 0.02f / alpaka::math::min(acc, sdOut_d, sdIn_d); + float dBetaCut2 = + (dBetaRes * dBetaRes * 2.0f + dBetaMuls2 + dBetaLum2 + dBetaRIn2 + dBetaROut2 + + 0.25f * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); + float dBeta = betaIn - betaOut; + //Cut #7: Cut on dBeta + return dBeta * dBeta <= dBetaCut2; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletAlgoSelector(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t innerOuterLowerModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + short innerInnerLowerModuleSubdet = modules.subdets()[innerInnerLowerModuleIndex]; + short innerOuterLowerModuleSubdet = modules.subdets()[innerOuterLowerModuleIndex]; + short outerInnerLowerModuleSubdet = modules.subdets()[outerInnerLowerModuleIndex]; + short outerOuterLowerModuleSubdet = modules.subdets()[outerOuterLowerModuleIndex]; + + if (innerInnerLowerModuleSubdet == Barrel and innerOuterLowerModuleSubdet == Barrel and + outerInnerLowerModuleSubdet == Barrel and outerOuterLowerModuleSubdet == Barrel) { + return runQuintupletDefaultAlgoBBBB(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + innerOuterLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } else if (innerInnerLowerModuleSubdet == Barrel and innerOuterLowerModuleSubdet == Barrel and + outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { + return runQuintupletDefaultAlgoBBEE(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + innerOuterLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } else if (innerInnerLowerModuleSubdet == Barrel and innerOuterLowerModuleSubdet == Barrel and + outerInnerLowerModuleSubdet == Barrel and outerOuterLowerModuleSubdet == Endcap) { + return runQuintupletDefaultAlgoBBBB(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + innerOuterLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } else if (innerInnerLowerModuleSubdet == Barrel and innerOuterLowerModuleSubdet == Endcap and + outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { + return runQuintupletDefaultAlgoBBEE(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + innerOuterLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } else if (innerInnerLowerModuleSubdet == Endcap and innerOuterLowerModuleSubdet == Endcap and + outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { + return runQuintupletDefaultAlgoEEEE(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + innerOuterLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } + + return false; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgo(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + TripletsConst triplets, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + unsigned int innerTripletIndex, + unsigned int outerTripletIndex, + float& innerRadius, + float& outerRadius, + float& bridgeRadius, + float& regressionG, + float& regressionF, + float& regressionRadius, + float& rzChiSquared, + float& chiSquared, + float& nonAnchorChiSquared, + bool& tightCutFlag) { + unsigned int firstSegmentIndex = triplets.segmentIndices()[innerTripletIndex][0]; + unsigned int secondSegmentIndex = triplets.segmentIndices()[innerTripletIndex][1]; + unsigned int thirdSegmentIndex = triplets.segmentIndices()[outerTripletIndex][0]; + unsigned int fourthSegmentIndex = triplets.segmentIndices()[outerTripletIndex][1]; + + unsigned int innerOuterOuterMiniDoubletIndex = + segments.mdIndices()[secondSegmentIndex][1]; //inner triplet outer segment outer MD index + unsigned int outerInnerInnerMiniDoubletIndex = + segments.mdIndices()[thirdSegmentIndex][0]; //outer triplet inner segment inner MD index + + //this cut reduces the number of candidates by a factor of 3, i.e., 2 out of 3 warps can end right here! + if (innerOuterOuterMiniDoubletIndex != outerInnerInnerMiniDoubletIndex) + return false; + + unsigned int firstMDIndex = segments.mdIndices()[firstSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[secondSegmentIndex][0]; + unsigned int thirdMDIndex = segments.mdIndices()[secondSegmentIndex][1]; + unsigned int fourthMDIndex = segments.mdIndices()[thirdSegmentIndex][1]; + unsigned int fifthMDIndex = segments.mdIndices()[fourthSegmentIndex][1]; + + if (not runQuintupletAlgoSelector(acc, + modules, + mds, + segments, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex3, + lowerModuleIndex4, + firstSegmentIndex, + thirdSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex)) + return false; + + if (not runQuintupletAlgoSelector(acc, + modules, + mds, + segments, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex4, + lowerModuleIndex5, + firstSegmentIndex, + fourthSegmentIndex, + firstMDIndex, + secondMDIndex, + fourthMDIndex, + fifthMDIndex)) + return false; + + float x1 = mds.anchorX()[firstMDIndex]; + float x2 = mds.anchorX()[secondMDIndex]; + float x3 = mds.anchorX()[thirdMDIndex]; + float x4 = mds.anchorX()[fourthMDIndex]; + float x5 = mds.anchorX()[fifthMDIndex]; + + float y1 = mds.anchorY()[firstMDIndex]; + float y2 = mds.anchorY()[secondMDIndex]; + float y3 = mds.anchorY()[thirdMDIndex]; + float y4 = mds.anchorY()[fourthMDIndex]; + float y5 = mds.anchorY()[fifthMDIndex]; + + //construct the arrays + float x1Vec[] = {x1, x1, x1}; + float y1Vec[] = {y1, y1, y1}; + float x2Vec[] = {x2, x2, x2}; + float y2Vec[] = {y2, y2, y2}; + float x3Vec[] = {x3, x3, x3}; + float y3Vec[] = {y3, y3, y3}; + + if (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS) { + x1Vec[1] = mds.anchorLowEdgeX()[firstMDIndex]; + x1Vec[2] = mds.anchorHighEdgeX()[firstMDIndex]; + + y1Vec[1] = mds.anchorLowEdgeY()[firstMDIndex]; + y1Vec[2] = mds.anchorHighEdgeY()[firstMDIndex]; + } + if (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS) { + x2Vec[1] = mds.anchorLowEdgeX()[secondMDIndex]; + x2Vec[2] = mds.anchorHighEdgeX()[secondMDIndex]; + + y2Vec[1] = mds.anchorLowEdgeY()[secondMDIndex]; + y2Vec[2] = mds.anchorHighEdgeY()[secondMDIndex]; + } + if (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS) { + x3Vec[1] = mds.anchorLowEdgeX()[thirdMDIndex]; + x3Vec[2] = mds.anchorHighEdgeX()[thirdMDIndex]; + + y3Vec[1] = mds.anchorLowEdgeY()[thirdMDIndex]; + y3Vec[2] = mds.anchorHighEdgeY()[thirdMDIndex]; + } + + float innerRadiusMin2S, innerRadiusMax2S; + computeErrorInRadius(acc, x1Vec, y1Vec, x2Vec, y2Vec, x3Vec, y3Vec, innerRadiusMin2S, innerRadiusMax2S); + + for (int i = 0; i < 3; i++) { + x1Vec[i] = x4; + y1Vec[i] = y4; + } + if (modules.subdets()[lowerModuleIndex4] == Endcap and modules.moduleType()[lowerModuleIndex4] == TwoS) { + x1Vec[1] = mds.anchorLowEdgeX()[fourthMDIndex]; + x1Vec[2] = mds.anchorHighEdgeX()[fourthMDIndex]; + + y1Vec[1] = mds.anchorLowEdgeY()[fourthMDIndex]; + y1Vec[2] = mds.anchorHighEdgeY()[fourthMDIndex]; + } + + float bridgeRadiusMin2S, bridgeRadiusMax2S; + computeErrorInRadius(acc, x2Vec, y2Vec, x3Vec, y3Vec, x1Vec, y1Vec, bridgeRadiusMin2S, bridgeRadiusMax2S); + + for (int i = 0; i < 3; i++) { + x2Vec[i] = x5; + y2Vec[i] = y5; + } + if (modules.subdets()[lowerModuleIndex5] == Endcap and modules.moduleType()[lowerModuleIndex5] == TwoS) { + x2Vec[1] = mds.anchorLowEdgeX()[fifthMDIndex]; + x2Vec[2] = mds.anchorHighEdgeX()[fifthMDIndex]; + + y2Vec[1] = mds.anchorLowEdgeY()[fifthMDIndex]; + y2Vec[2] = mds.anchorHighEdgeY()[fifthMDIndex]; + } + + float outerRadiusMin2S, outerRadiusMax2S; + computeErrorInRadius(acc, x3Vec, y3Vec, x1Vec, y1Vec, x2Vec, y2Vec, outerRadiusMin2S, outerRadiusMax2S); + + float g, f; + outerRadius = triplets.radius()[outerTripletIndex]; + bridgeRadius = computeRadiusFromThreeAnchorHits(acc, x2, y2, x3, y3, x4, y4, g, f); + innerRadius = triplets.radius()[innerTripletIndex]; + g = triplets.centerX()[innerTripletIndex]; + f = triplets.centerY()[innerTripletIndex]; + + float inner_pt = 2 * k2Rinv1GeVf * innerRadius; + + if (not passT5RZConstraint(acc, + modules, + mds, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex, + fifthMDIndex, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex3, + lowerModuleIndex4, + lowerModuleIndex5, + rzChiSquared, + inner_pt, + innerRadius, + g, + f, + tightCutFlag)) + return false; + + if (innerRadius < 0.95f * ptCut / (2.f * k2Rinv1GeVf)) + return false; + + //split by category + bool matchedRadii; + if (modules.subdets()[lowerModuleIndex1] == Barrel and modules.subdets()[lowerModuleIndex2] == Barrel and + modules.subdets()[lowerModuleIndex3] == Barrel and modules.subdets()[lowerModuleIndex4] == Barrel and + modules.subdets()[lowerModuleIndex5] == Barrel) { + matchedRadii = matchRadiiBBBBB(acc, innerRadius, bridgeRadius, outerRadius); + } else if (modules.subdets()[lowerModuleIndex1] == Barrel and modules.subdets()[lowerModuleIndex2] == Barrel and + modules.subdets()[lowerModuleIndex3] == Barrel and modules.subdets()[lowerModuleIndex4] == Barrel and + modules.subdets()[lowerModuleIndex5] == Endcap) { + matchedRadii = matchRadiiBBBBE(acc, innerRadius, bridgeRadius, outerRadius); + } else if (modules.subdets()[lowerModuleIndex1] == Barrel and modules.subdets()[lowerModuleIndex2] == Barrel and + modules.subdets()[lowerModuleIndex3] == Barrel and modules.subdets()[lowerModuleIndex4] == Endcap and + modules.subdets()[lowerModuleIndex5] == Endcap) { + if (modules.layers()[lowerModuleIndex1] == 1) { + matchedRadii = + matchRadiiBBBEE12378(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); + } else if (modules.layers()[lowerModuleIndex1] == 2) { + matchedRadii = + matchRadiiBBBEE23478(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); + } else { + matchedRadii = + matchRadiiBBBEE34578(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); + } + } + + else if (modules.subdets()[lowerModuleIndex1] == Barrel and modules.subdets()[lowerModuleIndex2] == Barrel and + modules.subdets()[lowerModuleIndex3] == Endcap and modules.subdets()[lowerModuleIndex4] == Endcap and + modules.subdets()[lowerModuleIndex5] == Endcap) { + matchedRadii = matchRadiiBBEEE(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); + } else if (modules.subdets()[lowerModuleIndex1] == Barrel and modules.subdets()[lowerModuleIndex2] == Endcap and + modules.subdets()[lowerModuleIndex3] == Endcap and modules.subdets()[lowerModuleIndex4] == Endcap and + modules.subdets()[lowerModuleIndex5] == Endcap) { + matchedRadii = matchRadiiBEEEE(acc, + innerRadius, + bridgeRadius, + outerRadius, + innerRadiusMin2S, + innerRadiusMax2S, + bridgeRadiusMin2S, + bridgeRadiusMax2S); + } else { + matchedRadii = matchRadiiEEEEE(acc, + innerRadius, + bridgeRadius, + outerRadius, + innerRadiusMin2S, + innerRadiusMax2S, + bridgeRadiusMin2S, + bridgeRadiusMax2S); + } + + //compute regression radius right here - this computation is expensive!!! + if (not matchedRadii) + return false; + + float xVec[] = {x1, x2, x3, x4, x5}; + float yVec[] = {y1, y2, y3, y4, y5}; + const uint16_t lowerModuleIndices[] = { + lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, lowerModuleIndex5}; + + // 5 categories for sigmas + float sigmas2[5], delta1[5], delta2[5], slopes[5]; + bool isFlat[5]; + + computeSigmasForRegression(acc, modules, lowerModuleIndices, delta1, delta2, slopes, isFlat); + regressionRadius = computeRadiusUsingRegression(acc, + Params_T5::kLayers, + xVec, + yVec, + delta1, + delta2, + slopes, + isFlat, + regressionG, + regressionF, + sigmas2, + chiSquared); + + unsigned int mdIndices[] = {firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, fifthMDIndex}; + float inference = t5dnn::runInference(acc, + modules, + mds, + segments, + triplets, + xVec, + yVec, + mdIndices, + lowerModuleIndices, + innerTripletIndex, + outerTripletIndex, + innerRadius, + outerRadius, + bridgeRadius); + tightCutFlag = tightCutFlag and (inference > t5dnn::kLSTWp2); // T5-in-TC cut + if (inference <= t5dnn::kLSTWp2) // T5-building cut + return false; + + //compute the other chisquared + //non anchor is always shifted for tilted and endcap! + float nonAnchorDelta1[Params_T5::kLayers], nonAnchorDelta2[Params_T5::kLayers], nonAnchorSlopes[Params_T5::kLayers]; + float nonAnchorxs[] = {mds.outerX()[firstMDIndex], + mds.outerX()[secondMDIndex], + mds.outerX()[thirdMDIndex], + mds.outerX()[fourthMDIndex], + mds.outerX()[fifthMDIndex]}; + float nonAnchorys[] = {mds.outerY()[firstMDIndex], + mds.outerY()[secondMDIndex], + mds.outerY()[thirdMDIndex], + mds.outerY()[fourthMDIndex], + mds.outerY()[fifthMDIndex]}; + + computeSigmasForRegression(acc, + modules, + lowerModuleIndices, + nonAnchorDelta1, + nonAnchorDelta2, + nonAnchorSlopes, + isFlat, + Params_T5::kLayers, + false); + nonAnchorChiSquared = computeChiSquared(acc, + Params_T5::kLayers, + nonAnchorxs, + nonAnchorys, + nonAnchorDelta1, + nonAnchorDelta2, + nonAnchorSlopes, + isFlat, + regressionG, + regressionF, + regressionRadius); + return true; + } + + struct CreateQuintuplets { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + Triplets triplets, + TripletsOccupancyConst tripletsOccupancy, + Quintuplets quintuplets, + QuintupletsOccupancy quintupletsOccupancy, + ObjectRangesConst ranges, + uint16_t nEligibleT5Modules) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (int iter = globalThreadIdx[0]; iter < nEligibleT5Modules; iter += gridThreadExtent[0]) { + uint16_t lowerModule1 = ranges.indicesOfEligibleT5Modules()[iter]; + short layer2_adjustment; + int layer = modules.layers()[lowerModule1]; + if (layer == 1) { + layer2_adjustment = 1; + } // get upper segment to be in second layer + else if (layer == 2) { + layer2_adjustment = 0; + } // get lower segment to be in second layer + else { + continue; + } + unsigned int nInnerTriplets = tripletsOccupancy.nTriplets()[lowerModule1]; + for (unsigned int innerTripletArrayIndex = globalThreadIdx[1]; innerTripletArrayIndex < nInnerTriplets; + innerTripletArrayIndex += gridThreadExtent[1]) { + unsigned int innerTripletIndex = ranges.tripletModuleIndices()[lowerModule1] + innerTripletArrayIndex; + uint16_t lowerModule2 = triplets.lowerModuleIndices()[innerTripletIndex][1]; + uint16_t lowerModule3 = triplets.lowerModuleIndices()[innerTripletIndex][2]; + unsigned int nOuterTriplets = tripletsOccupancy.nTriplets()[lowerModule3]; + for (unsigned int outerTripletArrayIndex = globalThreadIdx[2]; outerTripletArrayIndex < nOuterTriplets; + outerTripletArrayIndex += gridThreadExtent[2]) { + unsigned int outerTripletIndex = ranges.tripletModuleIndices()[lowerModule3] + outerTripletArrayIndex; + uint16_t lowerModule4 = triplets.lowerModuleIndices()[outerTripletIndex][1]; + uint16_t lowerModule5 = triplets.lowerModuleIndices()[outerTripletIndex][2]; + + float innerRadius, outerRadius, bridgeRadius, regressionG, regressionF, regressionRadius, rzChiSquared, + chiSquared, nonAnchorChiSquared; //required for making distributions + + bool tightCutFlag = false; + bool success = runQuintupletDefaultAlgo(acc, + modules, + mds, + segments, + triplets, + lowerModule1, + lowerModule2, + lowerModule3, + lowerModule4, + lowerModule5, + innerTripletIndex, + outerTripletIndex, + innerRadius, + outerRadius, + bridgeRadius, + regressionG, + regressionF, + regressionRadius, + rzChiSquared, + chiSquared, + nonAnchorChiSquared, + tightCutFlag); + + if (success) { + int totOccupancyQuintuplets = alpaka::atomicAdd( + acc, &quintupletsOccupancy.totOccupancyQuintuplets()[lowerModule1], 1u, alpaka::hierarchy::Threads{}); + if (totOccupancyQuintuplets >= ranges.quintupletModuleOccupancy()[lowerModule1]) { +#ifdef WARNINGS + printf("Quintuplet excess alert! Module index = %d\n", lowerModule1); +#endif + } else { + int quintupletModuleIndex = alpaka::atomicAdd( + acc, &quintupletsOccupancy.nQuintuplets()[lowerModule1], 1u, alpaka::hierarchy::Threads{}); + //this if statement should never get executed! + if (ranges.quintupletModuleIndices()[lowerModule1] == -1) { +#ifdef WARNINGS + printf("Quintuplets : no memory for module at module index = %d\n", lowerModule1); +#endif + } else { + unsigned int quintupletIndex = ranges.quintupletModuleIndices()[lowerModule1] + quintupletModuleIndex; + float phi = mds.anchorPhi()[segments.mdIndices()[triplets.segmentIndices()[innerTripletIndex][0]] + [layer2_adjustment]]; + float eta = mds.anchorEta()[segments.mdIndices()[triplets.segmentIndices()[innerTripletIndex][0]] + [layer2_adjustment]]; + float pt = (innerRadius + outerRadius) * k2Rinv1GeVf; + float scores = chiSquared + nonAnchorChiSquared; + addQuintupletToMemory(triplets, + quintuplets, + innerTripletIndex, + outerTripletIndex, + lowerModule1, + lowerModule2, + lowerModule3, + lowerModule4, + lowerModule5, + innerRadius, + bridgeRadius, + outerRadius, + regressionG, + regressionF, + regressionRadius, + rzChiSquared, + chiSquared, + nonAnchorChiSquared, + pt, + eta, + phi, + scores, + layer, + quintupletIndex, + tightCutFlag); + + triplets.partOfT5()[quintuplets.tripletIndices()[quintupletIndex][0]] = true; + triplets.partOfT5()[quintuplets.tripletIndices()[quintupletIndex][1]] = true; + } + } + } + } + } + } + } + }; + + struct CreateEligibleModulesListForQuintuplets { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + TripletsOccupancyConst tripletsOccupancy, + ObjectRanges ranges) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + // Initialize variables in shared memory and set to 0 + int& nEligibleT5Modulesx = alpaka::declareSharedVar(acc); + int& nTotalQuintupletsx = alpaka::declareSharedVar(acc); + if (cms::alpakatools::once_per_block(acc)) { + nTotalQuintupletsx = 0; + nEligibleT5Modulesx = 0; + } + alpaka::syncBlockThreads(acc); + + for (int i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + // Condition for a quintuple to exist for a module + // TCs don't exist for layers 5 and 6 barrel, and layers 2,3,4,5 endcap + short module_rings = modules.rings()[i]; + short module_layers = modules.layers()[i]; + short module_subdets = modules.subdets()[i]; + float module_eta = alpaka::math::abs(acc, modules.eta()[i]); + + if (tripletsOccupancy.nTriplets()[i] == 0) + continue; + if (module_subdets == Barrel and module_layers >= 3) + continue; + if (module_subdets == Endcap and module_layers > 1) + continue; + + int nEligibleT5Modules = alpaka::atomicAdd(acc, &nEligibleT5Modulesx, 1, alpaka::hierarchy::Threads{}); + + int category_number; + if (module_layers <= 3 && module_subdets == 5) + category_number = 0; + else if (module_layers >= 4 && module_subdets == 5) + category_number = 1; + else if (module_layers <= 2 && module_subdets == 4 && module_rings >= 11) + category_number = 2; + else if (module_layers >= 3 && module_subdets == 4 && module_rings >= 8) + category_number = 2; + else if (module_layers <= 2 && module_subdets == 4 && module_rings <= 10) + category_number = 3; + else if (module_layers >= 3 && module_subdets == 4 && module_rings <= 7) + category_number = 3; + else + category_number = -1; + + int eta_number; + if (module_eta < 0.75f) + eta_number = 0; + else if (module_eta < 1.5f) + eta_number = 1; + else if (module_eta < 2.25f) + eta_number = 2; + else if (module_eta < 3.0f) + eta_number = 3; + else + eta_number = -1; + + int occupancy; + if (category_number == 0 && eta_number == 0) + occupancy = 336; + else if (category_number == 0 && eta_number == 1) + occupancy = 414; + else if (category_number == 0 && eta_number == 2) + occupancy = 231; + else if (category_number == 0 && eta_number == 3) + occupancy = 146; + else if (category_number == 3 && eta_number == 1) + occupancy = 0; + else if (category_number == 3 && eta_number == 2) + occupancy = 191; + else if (category_number == 3 && eta_number == 3) + occupancy = 106; + else { + occupancy = 0; +#ifdef WARNINGS + printf("Unhandled case in createEligibleModulesListForQuintupletsGPU! Module index = %i\n", i); +#endif + } + + int nTotQ = alpaka::atomicAdd(acc, &nTotalQuintupletsx, occupancy, alpaka::hierarchy::Threads{}); + ranges.quintupletModuleIndices()[i] = nTotQ; + ranges.indicesOfEligibleT5Modules()[nEligibleT5Modules] = i; + ranges.quintupletModuleOccupancy()[i] = occupancy; + } + + // Wait for all threads to finish before reporting final values + alpaka::syncBlockThreads(acc); + if (cms::alpakatools::once_per_block(acc)) { + ranges.nEligibleT5Modules() = static_cast(nEligibleT5Modulesx); + ranges.nTotalQuints() = static_cast(nTotalQuintupletsx); + } + } + }; + + struct AddQuintupletRangesToEventExplicit { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + QuintupletsOccupancyConst quintupletsOccupancy, + ObjectRanges ranges) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (quintupletsOccupancy.nQuintuplets()[i] == 0 or ranges.quintupletModuleIndices()[i] == -1) { + ranges.quintupletRanges()[i][0] = -1; + ranges.quintupletRanges()[i][1] = -1; + } else { + ranges.quintupletRanges()[i][0] = ranges.quintupletModuleIndices()[i]; + ranges.quintupletRanges()[i][1] = + ranges.quintupletModuleIndices()[i] + quintupletsOccupancy.nQuintuplets()[i] - 1; + } + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h new file mode 100644 index 0000000000000..fc885e9d66afe --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -0,0 +1,853 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_Segment_h +#define RecoTracker_LSTCore_src_alpaka_Segment_h + +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" + +#include "MiniDoublet.h" +#include "Hit.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isTighterTiltedModules_seg(ModulesConst modules, unsigned int moduleIndex) { + // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing + // This is the same as what was previously considered as"isNormalTiltedModules" + // See Figure 9.1 of https://cds.cern.ch/record/2272264/files/CMS-TDR-014.pdf + short subdet = modules.subdets()[moduleIndex]; + short layer = modules.layers()[moduleIndex]; + short side = modules.sides()[moduleIndex]; + short rod = modules.rods()[moduleIndex]; + + return (subdet == Barrel) && (((side != Center) && (layer == 3)) || + ((side == NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || + ((side == PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isTighterTiltedModules_seg(short subdet, short layer, short side, short rod) { + // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing + // This is the same as what was previously considered as"isNormalTiltedModules" + // See Figure 9.1 of https://cds.cern.ch/record/2272264/files/CMS-TDR-014.pdf + return (subdet == Barrel) && (((side != Center) && (layer == 3)) || + ((side == NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || + ((side == PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(short layer, short ring, short subdet, short side, short rod) { + static constexpr float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; + static constexpr float miniDeltaFlat[6] = {0.26f, 0.16f, 0.16f, 0.18f, 0.18f, 0.18f}; + static constexpr float miniDeltaLooseTilted[3] = {0.4f, 0.4f, 0.4f}; + static constexpr float miniDeltaEndcap[5][15] = { + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}}; + + unsigned int iL = layer - 1; + unsigned int iR = ring - 1; + + float moduleSeparation = 0; + + if (subdet == Barrel and side == Center) { + moduleSeparation = miniDeltaFlat[iL]; + } else if (isTighterTiltedModules_seg(subdet, layer, side, rod)) { + moduleSeparation = miniDeltaTilted[iL]; + } else if (subdet == Endcap) { + moduleSeparation = miniDeltaEndcap[iL][iR]; + } else //Loose tilted modules + { + moduleSeparation = miniDeltaLooseTilted[iL]; + } + + return moduleSeparation; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(ModulesConst modules, unsigned int moduleIndex) { + static constexpr float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; + static constexpr float miniDeltaFlat[6] = {0.26f, 0.16f, 0.16f, 0.18f, 0.18f, 0.18f}; + static constexpr float miniDeltaLooseTilted[3] = {0.4f, 0.4f, 0.4f}; + static constexpr float miniDeltaEndcap[5][15] = { + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}}; + + unsigned int iL = modules.layers()[moduleIndex] - 1; + unsigned int iR = modules.rings()[moduleIndex] - 1; + short subdet = modules.subdets()[moduleIndex]; + short side = modules.sides()[moduleIndex]; + + float moduleSeparation = 0; + + if (subdet == Barrel and side == Center) { + moduleSeparation = miniDeltaFlat[iL]; + } else if (isTighterTiltedModules_seg(modules, moduleIndex)) { + moduleSeparation = miniDeltaTilted[iL]; + } else if (subdet == Endcap) { + moduleSeparation = miniDeltaEndcap[iL][iR]; + } else //Loose tilted modules + { + moduleSeparation = miniDeltaLooseTilted[iL]; + } + + return moduleSeparation; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void dAlphaThreshold(TAcc const& acc, + float* dAlphaThresholdValues, + ModulesConst modules, + MiniDoubletsConst mds, + float xIn, + float yIn, + float zIn, + float rtIn, + float xOut, + float yOut, + float zOut, + float rtOut, + uint16_t innerLowerModuleIndex, + uint16_t outerLowerModuleIndex, + unsigned int innerMDIndex, + unsigned int outerMDIndex) { + float sdMuls = (modules.subdets()[innerLowerModuleIndex] == Barrel) + ? kMiniMulsPtScaleBarrel[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut; + + //more accurate then outer rt - inner rt + float segmentDr = alpaka::math::sqrt(acc, (yOut - yIn) * (yOut - yIn) + (xOut - xIn) * (xOut - xIn)); + + const float dAlpha_Bfield = + alpaka::math::asin(acc, alpaka::math::min(acc, segmentDr * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + bool isInnerTilted = + modules.subdets()[innerLowerModuleIndex] == Barrel and modules.sides()[innerLowerModuleIndex] != Center; + bool isOuterTilted = + modules.subdets()[outerLowerModuleIndex] == Barrel and modules.sides()[outerLowerModuleIndex] != Center; + + float drdzInner = modules.drdzs()[innerLowerModuleIndex]; + float drdzOuter = modules.drdzs()[outerLowerModuleIndex]; + float innerModuleGapSize = moduleGapSize_seg(modules, innerLowerModuleIndex); + float outerModuleGapSize = moduleGapSize_seg(modules, outerLowerModuleIndex); + const float innerminiTilt2 = isInnerTilted + ? ((0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdzInner * drdzInner) / + (1.f + drdzInner * drdzInner) / (innerModuleGapSize * innerModuleGapSize)) + : 0; + + const float outerminiTilt2 = isOuterTilted + ? ((0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdzOuter * drdzOuter) / + (1.f + drdzOuter * drdzOuter) / (outerModuleGapSize * outerModuleGapSize)) + : 0; + + float miniDelta = innerModuleGapSize; + + float sdLumForInnerMini2; + float sdLumForOuterMini2; + + if (modules.subdets()[innerLowerModuleIndex] == Barrel) { + sdLumForInnerMini2 = innerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield); + } else { + sdLumForInnerMini2 = (mds.dphis()[innerMDIndex] * mds.dphis()[innerMDIndex]) * (kDeltaZLum * kDeltaZLum) / + (mds.dzs()[innerMDIndex] * mds.dzs()[innerMDIndex]); + } + + if (modules.subdets()[outerLowerModuleIndex] == Barrel) { + sdLumForOuterMini2 = outerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield); + } else { + sdLumForOuterMini2 = (mds.dphis()[outerMDIndex] * mds.dphis()[outerMDIndex]) * (kDeltaZLum * kDeltaZLum) / + (mds.dzs()[outerMDIndex] * mds.dzs()[outerMDIndex]); + } + + // Unique stuff for the segment dudes alone + float dAlpha_res_inner = + 0.02f / miniDelta * + (modules.subdets()[innerLowerModuleIndex] == Barrel ? 1.0f : alpaka::math::abs(acc, zIn) / rtIn); + float dAlpha_res_outer = + 0.02f / miniDelta * + (modules.subdets()[outerLowerModuleIndex] == Barrel ? 1.0f : alpaka::math::abs(acc, zOut) / rtOut); + + float dAlpha_res = dAlpha_res_inner + dAlpha_res_outer; + + if (modules.subdets()[innerLowerModuleIndex] == Barrel and modules.sides()[innerLowerModuleIndex] == Center) { + dAlphaThresholdValues[0] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); + } else { + dAlphaThresholdValues[0] = + dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls + sdLumForInnerMini2); + } + + if (modules.subdets()[outerLowerModuleIndex] == Barrel and modules.sides()[outerLowerModuleIndex] == Center) { + dAlphaThresholdValues[1] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); + } else { + dAlphaThresholdValues[1] = + dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls + sdLumForOuterMini2); + } + + //Inner to outer + dAlphaThresholdValues[2] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addSegmentToMemory(Segments segments, + unsigned int lowerMDIndex, + unsigned int upperMDIndex, + uint16_t innerLowerModuleIndex, + uint16_t outerLowerModuleIndex, + unsigned int innerMDAnchorHitIndex, + unsigned int outerMDAnchorHitIndex, + float dPhi, + float dPhiMin, + float dPhiMax, + float dPhiChange, + float dPhiChangeMin, + float dPhiChangeMax, + unsigned int idx) { + segments.mdIndices()[idx][0] = lowerMDIndex; + segments.mdIndices()[idx][1] = upperMDIndex; + segments.innerLowerModuleIndices()[idx] = innerLowerModuleIndex; + segments.outerLowerModuleIndices()[idx] = outerLowerModuleIndex; + segments.innerMiniDoubletAnchorHitIndices()[idx] = innerMDAnchorHitIndex; + segments.outerMiniDoubletAnchorHitIndices()[idx] = outerMDAnchorHitIndex; + + segments.dPhis()[idx] = __F2H(dPhi); + segments.dPhiMins()[idx] = __F2H(dPhiMin); + segments.dPhiMaxs()[idx] = __F2H(dPhiMax); + segments.dPhiChanges()[idx] = __F2H(dPhiChange); + segments.dPhiChangeMins()[idx] = __F2H(dPhiChangeMin); + segments.dPhiChangeMaxs()[idx] = __F2H(dPhiChangeMax); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelSegmentToMemory(TAcc const& acc, + Segments segments, + SegmentsPixel segmentsPixel, + MiniDoubletsConst mds, + unsigned int innerMDIndex, + unsigned int outerMDIndex, + uint16_t pixelModuleIndex, + unsigned int hitIdxs[4], + unsigned int innerAnchorHitIndex, + unsigned int outerAnchorHitIndex, + float dPhiChange, + unsigned int idx, + unsigned int pixelSegmentArrayIndex, + float score) { + segments.mdIndices()[idx][0] = innerMDIndex; + segments.mdIndices()[idx][1] = outerMDIndex; + segments.innerLowerModuleIndices()[idx] = pixelModuleIndex; + segments.outerLowerModuleIndices()[idx] = pixelModuleIndex; + segments.innerMiniDoubletAnchorHitIndices()[idx] = innerAnchorHitIndex; + segments.outerMiniDoubletAnchorHitIndices()[idx] = outerAnchorHitIndex; + segments.dPhiChanges()[idx] = __F2H(dPhiChange); + + segmentsPixel.isDup()[pixelSegmentArrayIndex] = false; + segmentsPixel.partOfPT5()[pixelSegmentArrayIndex] = false; + segmentsPixel.score()[pixelSegmentArrayIndex] = score; + segmentsPixel.pLSHitsIdxs()[pixelSegmentArrayIndex].x = hitIdxs[0]; + segmentsPixel.pLSHitsIdxs()[pixelSegmentArrayIndex].y = hitIdxs[1]; + segmentsPixel.pLSHitsIdxs()[pixelSegmentArrayIndex].z = hitIdxs[2]; + segmentsPixel.pLSHitsIdxs()[pixelSegmentArrayIndex].w = hitIdxs[3]; + + //computing circle parameters + /* + The two anchor hits are r3PCA and r3LH. p3PCA pt, eta, phi is hitIndex1 x, y, z + */ + float circleRadius = mds.outerX()[innerMDIndex] / (2 * k2Rinv1GeVf); + float circlePhi = mds.outerZ()[innerMDIndex]; + float candidateCenterXs[] = {mds.anchorX()[innerMDIndex] + circleRadius * alpaka::math::sin(acc, circlePhi), + mds.anchorX()[innerMDIndex] - circleRadius * alpaka::math::sin(acc, circlePhi)}; + float candidateCenterYs[] = {mds.anchorY()[innerMDIndex] - circleRadius * alpaka::math::cos(acc, circlePhi), + mds.anchorY()[innerMDIndex] + circleRadius * alpaka::math::cos(acc, circlePhi)}; + + //check which of the circles can accommodate r3LH better (we won't get perfect agreement) + float bestChiSquared = kVerticalModuleSlope; + float chiSquared; + size_t bestIndex; + for (size_t i = 0; i < 2; i++) { + chiSquared = alpaka::math::abs(acc, + alpaka::math::sqrt(acc, + (mds.anchorX()[outerMDIndex] - candidateCenterXs[i]) * + (mds.anchorX()[outerMDIndex] - candidateCenterXs[i]) + + (mds.anchorY()[outerMDIndex] - candidateCenterYs[i]) * + (mds.anchorY()[outerMDIndex] - candidateCenterYs[i])) - + circleRadius); + if (chiSquared < bestChiSquared) { + bestChiSquared = chiSquared; + bestIndex = i; + } + } + segmentsPixel.circleCenterX()[pixelSegmentArrayIndex] = candidateCenterXs[bestIndex]; + segmentsPixel.circleCenterY()[pixelSegmentArrayIndex] = candidateCenterYs[bestIndex]; + segmentsPixel.circleRadius()[pixelSegmentArrayIndex] = circleRadius; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoBarrel(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + uint16_t innerLowerModuleIndex, + uint16_t outerLowerModuleIndex, + unsigned int innerMDIndex, + unsigned int outerMDIndex, + float& dPhi, + float& dPhiMin, + float& dPhiMax, + float& dPhiChange, + float& dPhiChangeMin, + float& dPhiChangeMax) { + float sdMuls = (modules.subdets()[innerLowerModuleIndex] == Barrel) + ? kMiniMulsPtScaleBarrel[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut; + + float xIn, yIn, zIn, rtIn, xOut, yOut, zOut, rtOut; + + xIn = mds.anchorX()[innerMDIndex]; + yIn = mds.anchorY()[innerMDIndex]; + zIn = mds.anchorZ()[innerMDIndex]; + rtIn = mds.anchorRt()[innerMDIndex]; + + xOut = mds.anchorX()[outerMDIndex]; + yOut = mds.anchorY()[outerMDIndex]; + zOut = mds.anchorZ()[outerMDIndex]; + rtOut = mds.anchorRt()[outerMDIndex]; + + float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + float sdPVoff = 0.1f / rtOut; + float dzDrtScale = alpaka::math::tan(acc, sdSlope) / sdSlope; //FIXME: need appropriate value + + const float zGeom = modules.layers()[innerLowerModuleIndex] <= 2 ? 2.f * kPixelPSZpitch : 2.f * kStrip2SZpitch; + + float zLo = zIn + (zIn - kDeltaZLum) * (rtOut / rtIn - 1.f) * (zIn > 0.f ? 1.f : dzDrtScale) - + zGeom; //slope-correction only on outer end + float zHi = zIn + (zIn + kDeltaZLum) * (rtOut / rtIn - 1.f) * (zIn < 0.f ? 1.f : dzDrtScale) + zGeom; + + if ((zOut < zLo) || (zOut > zHi)) + return false; + + float sdCut = sdSlope + alpaka::math::sqrt(acc, sdMuls * sdMuls + sdPVoff * sdPVoff); + + dPhi = phi_mpi_pi(acc, mds.anchorPhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]); + + if (alpaka::math::abs(acc, dPhi) > sdCut) + return false; + + dPhiChange = phi_mpi_pi(acc, phi(acc, xOut - xIn, yOut - yIn) - mds.anchorPhi()[innerMDIndex]); + + if (alpaka::math::abs(acc, dPhiChange) > sdCut) + return false; + + float dAlphaThresholdValues[3]; + dAlphaThreshold(acc, + dAlphaThresholdValues, + modules, + mds, + xIn, + yIn, + zIn, + rtIn, + xOut, + yOut, + zOut, + rtOut, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMDIndex, + outerMDIndex); + + float innerMDAlpha = mds.dphichanges()[innerMDIndex]; + float outerMDAlpha = mds.dphichanges()[outerMDIndex]; + float dAlphaInnerMDSegment = innerMDAlpha - dPhiChange; + float dAlphaOuterMDSegment = outerMDAlpha - dPhiChange; + float dAlphaInnerMDOuterMD = innerMDAlpha - outerMDAlpha; + + float dAlphaInnerMDSegmentThreshold = dAlphaThresholdValues[0]; + float dAlphaOuterMDSegmentThreshold = dAlphaThresholdValues[1]; + float dAlphaInnerMDOuterMDThreshold = dAlphaThresholdValues[2]; + + if (alpaka::math::abs(acc, dAlphaInnerMDSegment) >= dAlphaInnerMDSegmentThreshold) + return false; + if (alpaka::math::abs(acc, dAlphaOuterMDSegment) >= dAlphaOuterMDSegmentThreshold) + return false; + return alpaka::math::abs(acc, dAlphaInnerMDOuterMD) < dAlphaInnerMDOuterMDThreshold; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoEndcap(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + uint16_t innerLowerModuleIndex, + uint16_t outerLowerModuleIndex, + unsigned int innerMDIndex, + unsigned int outerMDIndex, + float& dPhi, + float& dPhiMin, + float& dPhiMax, + float& dPhiChange, + float& dPhiChangeMin, + float& dPhiChangeMax) { + float xIn, yIn, zIn, rtIn, xOut, yOut, zOut, rtOut; + + xIn = mds.anchorX()[innerMDIndex]; + yIn = mds.anchorY()[innerMDIndex]; + zIn = mds.anchorZ()[innerMDIndex]; + rtIn = mds.anchorRt()[innerMDIndex]; + + xOut = mds.anchorX()[outerMDIndex]; + yOut = mds.anchorY()[outerMDIndex]; + zOut = mds.anchorZ()[outerMDIndex]; + rtOut = mds.anchorRt()[outerMDIndex]; + + bool outerLayerEndcapTwoS = + (modules.subdets()[outerLowerModuleIndex] == Endcap) && (modules.moduleType()[outerLowerModuleIndex] == TwoS); + + float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + float disks2SMinRadius = 60.f; + + float rtGeom = ((rtIn < disks2SMinRadius && rtOut < disks2SMinRadius) + ? (2.f * kPixelPSZpitch) + : ((rtIn < disks2SMinRadius || rtOut < disks2SMinRadius) ? (kPixelPSZpitch + kStrip2SZpitch) + : (2.f * kStrip2SZpitch))); + + //cut 0 - z compatibility + if (zIn * zOut < 0) + return false; + + float dz = zOut - zIn; + float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); + float drtDzScale = sdSlope / alpaka::math::tan(acc, sdSlope); + + float rtLo = alpaka::math::max( + acc, rtIn * (1.f + dz / (zIn + dLum) * drtDzScale) - rtGeom, rtIn - 0.5f * rtGeom); //rt should increase + float rtHi = rtIn * (zOut - dLum) / (zIn - dLum) + + rtGeom; //dLum for luminous; rGeom for measurement size; no tanTheta_loc(pt) correction + + // Completeness + if ((rtOut < rtLo) || (rtOut > rtHi)) + return false; + + dPhi = phi_mpi_pi(acc, mds.anchorPhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]); + + float sdCut = sdSlope; + if (outerLayerEndcapTwoS) { + float dPhiPos_high = phi_mpi_pi(acc, mds.anchorHighEdgePhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]); + float dPhiPos_low = phi_mpi_pi(acc, mds.anchorLowEdgePhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]); + + dPhiMax = alpaka::math::abs(acc, dPhiPos_high) > alpaka::math::abs(acc, dPhiPos_low) ? dPhiPos_high : dPhiPos_low; + dPhiMin = alpaka::math::abs(acc, dPhiPos_high) > alpaka::math::abs(acc, dPhiPos_low) ? dPhiPos_low : dPhiPos_high; + } else { + dPhiMax = dPhi; + dPhiMin = dPhi; + } + if (alpaka::math::abs(acc, dPhi) > sdCut) + return false; + + float dzFrac = dz / zIn; + dPhiChange = dPhi / dzFrac * (1.f + dzFrac); + dPhiChangeMin = dPhiMin / dzFrac * (1.f + dzFrac); + dPhiChangeMax = dPhiMax / dzFrac * (1.f + dzFrac); + + if (alpaka::math::abs(acc, dPhiChange) > sdCut) + return false; + + float dAlphaThresholdValues[3]; + dAlphaThreshold(acc, + dAlphaThresholdValues, + modules, + mds, + xIn, + yIn, + zIn, + rtIn, + xOut, + yOut, + zOut, + rtOut, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMDIndex, + outerMDIndex); + + float innerMDAlpha = mds.dphichanges()[innerMDIndex]; + float outerMDAlpha = mds.dphichanges()[outerMDIndex]; + float dAlphaInnerMDSegment = innerMDAlpha - dPhiChange; + float dAlphaOuterMDSegment = outerMDAlpha - dPhiChange; + float dAlphaInnerMDOuterMD = innerMDAlpha - outerMDAlpha; + + float dAlphaInnerMDSegmentThreshold = dAlphaThresholdValues[0]; + float dAlphaOuterMDSegmentThreshold = dAlphaThresholdValues[1]; + float dAlphaInnerMDOuterMDThreshold = dAlphaThresholdValues[2]; + + if (alpaka::math::abs(acc, dAlphaInnerMDSegment) >= dAlphaInnerMDSegmentThreshold) + return false; + if (alpaka::math::abs(acc, dAlphaOuterMDSegment) >= dAlphaOuterMDSegmentThreshold) + return false; + return alpaka::math::abs(acc, dAlphaInnerMDOuterMD) < dAlphaInnerMDOuterMDThreshold; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgo(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + uint16_t innerLowerModuleIndex, + uint16_t outerLowerModuleIndex, + unsigned int innerMDIndex, + unsigned int outerMDIndex, + float& dPhi, + float& dPhiMin, + float& dPhiMax, + float& dPhiChange, + float& dPhiChangeMin, + float& dPhiChangeMax) { + if (modules.subdets()[innerLowerModuleIndex] == Barrel and modules.subdets()[outerLowerModuleIndex] == Barrel) { + return runSegmentDefaultAlgoBarrel(acc, + modules, + mds, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMDIndex, + outerMDIndex, + dPhi, + dPhiMin, + dPhiMax, + dPhiChange, + dPhiChangeMin, + dPhiChangeMax); + } else { + return runSegmentDefaultAlgoEndcap(acc, + modules, + mds, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMDIndex, + outerMDIndex, + dPhi, + dPhiMin, + dPhiMax, + dPhiChange, + dPhiChangeMin, + dPhiChangeMax); + } + } + + struct CreateSegments { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + MiniDoubletsOccupancyConst mdsOccupancy, + Segments segments, + SegmentsOccupancy segmentsOccupancy, + ObjectRangesConst ranges) const { + auto const globalBlockIdx = alpaka::getIdx(acc); + auto const blockThreadIdx = alpaka::getIdx(acc); + auto const gridBlockExtent = alpaka::getWorkDiv(acc); + auto const blockThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t innerLowerModuleIndex = globalBlockIdx[2]; innerLowerModuleIndex < modules.nLowerModules(); + innerLowerModuleIndex += gridBlockExtent[2]) { + unsigned int nInnerMDs = mdsOccupancy.nMDs()[innerLowerModuleIndex]; + if (nInnerMDs == 0) + continue; + + unsigned int nConnectedModules = modules.nConnectedModules()[innerLowerModuleIndex]; + + for (uint16_t outerLowerModuleArrayIdx = blockThreadIdx[1]; outerLowerModuleArrayIdx < nConnectedModules; + outerLowerModuleArrayIdx += blockThreadExtent[1]) { + uint16_t outerLowerModuleIndex = modules.moduleMap()[innerLowerModuleIndex][outerLowerModuleArrayIdx]; + + unsigned int nOuterMDs = mdsOccupancy.nMDs()[outerLowerModuleIndex]; + + unsigned int limit = nInnerMDs * nOuterMDs; + + if (limit == 0) + continue; + for (unsigned int hitIndex = blockThreadIdx[2]; hitIndex < limit; hitIndex += blockThreadExtent[2]) { + unsigned int innerMDArrayIdx = hitIndex / nOuterMDs; + unsigned int outerMDArrayIdx = hitIndex % nOuterMDs; + if (outerMDArrayIdx >= nOuterMDs) + continue; + + unsigned int innerMDIndex = ranges.mdRanges()[innerLowerModuleIndex][0] + innerMDArrayIdx; + unsigned int outerMDIndex = ranges.mdRanges()[outerLowerModuleIndex][0] + outerMDArrayIdx; + + float dPhi, dPhiMin, dPhiMax, dPhiChange, dPhiChangeMin, dPhiChangeMax; + + unsigned int innerMiniDoubletAnchorHitIndex = mds.anchorHitIndices()[innerMDIndex]; + unsigned int outerMiniDoubletAnchorHitIndex = mds.anchorHitIndices()[outerMDIndex]; + dPhiMin = 0; + dPhiMax = 0; + dPhiChangeMin = 0; + dPhiChangeMax = 0; + if (runSegmentDefaultAlgo(acc, + modules, + mds, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMDIndex, + outerMDIndex, + dPhi, + dPhiMin, + dPhiMax, + dPhiChange, + dPhiChangeMin, + dPhiChangeMax)) { + unsigned int totOccupancySegments = + alpaka::atomicAdd(acc, + &segmentsOccupancy.totOccupancySegments()[innerLowerModuleIndex], + 1u, + alpaka::hierarchy::Threads{}); + if (static_cast(totOccupancySegments) >= ranges.segmentModuleOccupancy()[innerLowerModuleIndex]) { +#ifdef WARNINGS + printf("Segment excess alert! Module index = %d\n", innerLowerModuleIndex); +#endif + } else { + unsigned int segmentModuleIdx = alpaka::atomicAdd( + acc, &segmentsOccupancy.nSegments()[innerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + unsigned int segmentIdx = ranges.segmentModuleIndices()[innerLowerModuleIndex] + segmentModuleIdx; + + addSegmentToMemory(segments, + innerMDIndex, + outerMDIndex, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMiniDoubletAnchorHitIndex, + outerMiniDoubletAnchorHitIndex, + dPhi, + dPhiMin, + dPhiMax, + dPhiChange, + dPhiChangeMin, + dPhiChangeMax, + segmentIdx); + } + } + } + } + } + } + }; + + struct CreateSegmentArrayRanges { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + ObjectRanges ranges, + MiniDoubletsConst mds) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + // Initialize variables in shared memory and set to 0 + int& nTotalSegments = alpaka::declareSharedVar(acc); + if (cms::alpakatools::once_per_block(acc)) { + nTotalSegments = 0; + } + alpaka::syncBlockThreads(acc); + + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (modules.nConnectedModules()[i] == 0) { + ranges.segmentModuleIndices()[i] = nTotalSegments; + ranges.segmentModuleOccupancy()[i] = 0; + continue; + } + + short module_rings = modules.rings()[i]; + short module_layers = modules.layers()[i]; + short module_subdets = modules.subdets()[i]; + float module_eta = alpaka::math::abs(acc, modules.eta()[i]); + + int category_number; + if (module_layers <= 3 && module_subdets == 5) + category_number = 0; + else if (module_layers >= 4 && module_subdets == 5) + category_number = 1; + else if (module_layers <= 2 && module_subdets == 4 && module_rings >= 11) + category_number = 2; + else if (module_layers >= 3 && module_subdets == 4 && module_rings >= 8) + category_number = 2; + else if (module_layers <= 2 && module_subdets == 4 && module_rings <= 10) + category_number = 3; + else if (module_layers >= 3 && module_subdets == 4 && module_rings <= 7) + category_number = 3; + else + category_number = -1; + + int eta_number; + if (module_eta < 0.75f) + eta_number = 0; + else if (module_eta < 1.5f) + eta_number = 1; + else if (module_eta < 2.25f) + eta_number = 2; + else if (module_eta < 3.0f) + eta_number = 3; + else + eta_number = -1; + + int occupancy; + if (category_number == 0 && eta_number == 0) + occupancy = 572; + else if (category_number == 0 && eta_number == 1) + occupancy = 300; + else if (category_number == 0 && eta_number == 2) + occupancy = 183; + else if (category_number == 0 && eta_number == 3) + occupancy = 62; + else if (category_number == 1 && eta_number == 0) + occupancy = 191; + else if (category_number == 1 && eta_number == 1) + occupancy = 128; + else if (category_number == 2 && eta_number == 1) + occupancy = 107; + else if (category_number == 2 && eta_number == 2) + occupancy = 102; + else if (category_number == 3 && eta_number == 1) + occupancy = 64; + else if (category_number == 3 && eta_number == 2) + occupancy = 79; + else if (category_number == 3 && eta_number == 3) + occupancy = 85; + else { + occupancy = 0; +#ifdef WARNINGS + printf("Unhandled case in createSegmentArrayRanges! Module index = %i\n", i); +#endif + } + + int nTotSegs = alpaka::atomicAdd(acc, &nTotalSegments, occupancy, alpaka::hierarchy::Threads{}); + ranges.segmentModuleIndices()[i] = nTotSegs; + ranges.segmentModuleOccupancy()[i] = occupancy; + } + + // Wait for all threads to finish before reporting final values + alpaka::syncBlockThreads(acc); + if (cms::alpakatools::once_per_block(acc)) { + ranges.segmentModuleIndices()[modules.nLowerModules()] = nTotalSegments; + ranges.nTotalSegs() = nTotalSegments; + } + } + }; + + struct AddSegmentRangesToEventExplicit { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + SegmentsOccupancyConst segmentsOccupancy, + ObjectRanges ranges) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (segmentsOccupancy.nSegments()[i] == 0) { + ranges.segmentRanges()[i][0] = -1; + ranges.segmentRanges()[i][1] = -1; + } else { + ranges.segmentRanges()[i][0] = ranges.segmentModuleIndices()[i]; + ranges.segmentRanges()[i][1] = ranges.segmentModuleIndices()[i] + segmentsOccupancy.nSegments()[i] - 1; + } + } + } + }; + + struct AddPixelSegmentToEventKernel { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + HitsConst hits, + MiniDoublets mds, + Segments segments, + SegmentsPixel segmentsPixel, + unsigned int* hitIndices0, + unsigned int* hitIndices1, + unsigned int* hitIndices2, + unsigned int* hitIndices3, + float* dPhiChange, + uint16_t pixelModuleIndex, + int size) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (int tid = globalThreadIdx[2]; tid < size; tid += gridThreadExtent[2]) { + unsigned int innerMDIndex = ranges.miniDoubletModuleIndices()[pixelModuleIndex] + 2 * (tid); + unsigned int outerMDIndex = ranges.miniDoubletModuleIndices()[pixelModuleIndex] + 2 * (tid) + 1; + unsigned int pixelSegmentIndex = ranges.segmentModuleIndices()[pixelModuleIndex] + tid; + + addMDToMemory(acc, + mds, + hits, + modules, + hitIndices0[tid], + hitIndices1[tid], + pixelModuleIndex, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + innerMDIndex); + addMDToMemory(acc, + mds, + hits, + modules, + hitIndices2[tid], + hitIndices3[tid], + pixelModuleIndex, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + outerMDIndex); + + //in outer hits - pt, eta, phi + float slope = alpaka::math::sinh(acc, hits.ys()[mds.outerHitIndices()[innerMDIndex]]); + float intercept = + hits.zs()[mds.anchorHitIndices()[innerMDIndex]] - slope * hits.rts()[mds.anchorHitIndices()[innerMDIndex]]; + float score_lsq = (hits.rts()[mds.anchorHitIndices()[outerMDIndex]] * slope + intercept) - + (hits.zs()[mds.anchorHitIndices()[outerMDIndex]]); + score_lsq = score_lsq * score_lsq; + + unsigned int hits1[Params_pLS::kHits]; + hits1[0] = hits.idxs()[mds.anchorHitIndices()[innerMDIndex]]; + hits1[1] = hits.idxs()[mds.anchorHitIndices()[outerMDIndex]]; + hits1[2] = hits.idxs()[mds.outerHitIndices()[innerMDIndex]]; + hits1[3] = hits.idxs()[mds.outerHitIndices()[outerMDIndex]]; + addPixelSegmentToMemory(acc, + segments, + segmentsPixel, + mds, + innerMDIndex, + outerMDIndex, + pixelModuleIndex, + hits1, + hitIndices0[tid], + hitIndices2[tid], + dPhiChange[tid], + pixelSegmentIndex, + tid, + score_lsq); + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h new file mode 100644 index 0000000000000..1863f262ffd7d --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -0,0 +1,493 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_TrackCandidate_h +#define RecoTracker_LSTCore_src_alpaka_TrackCandidate_h + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" + +#include "Hit.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addpLSTrackCandidateToMemory(TrackCandidates& cands, + unsigned int trackletIndex, + unsigned int trackCandidateIndex, + uint4 hitIndices, + int pixelSeedIndex) { + cands.trackCandidateType()[trackCandidateIndex] = LSTObjType::pLS; + cands.directObjectIndices()[trackCandidateIndex] = trackletIndex; + cands.pixelSeedIndex()[trackCandidateIndex] = pixelSeedIndex; + + cands.objectIndices()[trackCandidateIndex][0] = trackletIndex; + cands.objectIndices()[trackCandidateIndex][1] = trackletIndex; + + cands.hitIndices()[trackCandidateIndex][0] = + hitIndices.x; // Order explanation in https://github.com/SegmentLinking/TrackLooper/issues/267 + cands.hitIndices()[trackCandidateIndex][1] = hitIndices.z; + cands.hitIndices()[trackCandidateIndex][2] = hitIndices.y; + cands.hitIndices()[trackCandidateIndex][3] = hitIndices.w; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTrackCandidateToMemory(TrackCandidates& cands, + short trackCandidateType, + unsigned int innerTrackletIndex, + unsigned int outerTrackletIndex, + const uint8_t* logicalLayerIndices, + const uint16_t* lowerModuleIndices, + const unsigned int* hitIndices, + int pixelSeedIndex, + float centerX, + float centerY, + float radius, + unsigned int trackCandidateIndex, + unsigned int directObjectIndex) { + cands.trackCandidateType()[trackCandidateIndex] = trackCandidateType; + cands.directObjectIndices()[trackCandidateIndex] = directObjectIndex; + cands.pixelSeedIndex()[trackCandidateIndex] = pixelSeedIndex; + + cands.objectIndices()[trackCandidateIndex][0] = innerTrackletIndex; + cands.objectIndices()[trackCandidateIndex][1] = outerTrackletIndex; + + size_t limits = trackCandidateType == LSTObjType::pT5 ? Params_pT5::kLayers : Params_pT3::kLayers; + + //send the starting pointer to the logicalLayer and hitIndices + for (size_t i = 0; i < limits; i++) { + cands.logicalLayers()[trackCandidateIndex][i] = logicalLayerIndices[i]; + cands.lowerModuleIndices()[trackCandidateIndex][i] = lowerModuleIndices[i]; + } + for (size_t i = 0; i < 2 * limits; i++) { + cands.hitIndices()[trackCandidateIndex][i] = hitIndices[i]; + } + cands.centerX()[trackCandidateIndex] = __F2H(centerX); + cands.centerY()[trackCandidateIndex] = __F2H(centerY); + cands.radius()[trackCandidateIndex] = __F2H(radius); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkPixelHits( + unsigned int ix, unsigned int jx, MiniDoubletsConst mds, SegmentsConst segments, HitsConst hits) { + int phits1[Params_pLS::kHits]; + int phits2[Params_pLS::kHits]; + + phits1[0] = hits.idxs()[mds.anchorHitIndices()[segments.mdIndices()[ix][0]]]; + phits1[1] = hits.idxs()[mds.anchorHitIndices()[segments.mdIndices()[ix][1]]]; + phits1[2] = hits.idxs()[mds.outerHitIndices()[segments.mdIndices()[ix][0]]]; + phits1[3] = hits.idxs()[mds.outerHitIndices()[segments.mdIndices()[ix][1]]]; + + phits2[0] = hits.idxs()[mds.anchorHitIndices()[segments.mdIndices()[jx][0]]]; + phits2[1] = hits.idxs()[mds.anchorHitIndices()[segments.mdIndices()[jx][1]]]; + phits2[2] = hits.idxs()[mds.outerHitIndices()[segments.mdIndices()[jx][0]]]; + phits2[3] = hits.idxs()[mds.outerHitIndices()[segments.mdIndices()[jx][1]]]; + + int npMatched = 0; + + for (int i = 0; i < Params_pLS::kHits; i++) { + bool pmatched = false; + if (phits1[i] == -1) + continue; + + for (int j = 0; j < Params_pLS::kHits; j++) { + if (phits2[j] == -1) + continue; + + if (phits1[i] == phits2[j]) { + pmatched = true; + break; + } + } + if (pmatched) + npMatched++; + } + return npMatched; + } + + struct CrossCleanpT3 { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + PixelTriplets pixelTriplets, + SegmentsPixelConst segmentsPixel, + PixelQuintupletsConst pixelQuintuplets) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + unsigned int nPixelTriplets = pixelTriplets.nPixelTriplets(); + for (unsigned int pixelTripletIndex = globalThreadIdx[2]; pixelTripletIndex < nPixelTriplets; + pixelTripletIndex += gridThreadExtent[2]) { + if (pixelTriplets.isDup()[pixelTripletIndex]) + continue; + + // Cross cleaning step + float eta1 = __H2F(pixelTriplets.eta_pix()[pixelTripletIndex]); + float phi1 = __H2F(pixelTriplets.phi_pix()[pixelTripletIndex]); + + int pixelModuleIndex = modules.nLowerModules(); + unsigned int prefix = ranges.segmentModuleIndices()[pixelModuleIndex]; + + unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets(); + for (unsigned int pixelQuintupletIndex = globalThreadIdx[1]; pixelQuintupletIndex < nPixelQuintuplets; + pixelQuintupletIndex += gridThreadExtent[1]) { + unsigned int pLS_jx = pixelQuintuplets.pixelSegmentIndices()[pixelQuintupletIndex]; + float eta2 = segmentsPixel.eta()[pLS_jx - prefix]; + float phi2 = segmentsPixel.phi()[pLS_jx - prefix]; + float dEta = alpaka::math::abs(acc, (eta1 - eta2)); + float dPhi = calculate_dPhi(phi1, phi2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if (dR2 < 1e-5f) + pixelTriplets.isDup()[pixelTripletIndex] = true; + } + } + } + }; + + struct CrossCleanT5 { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + Quintuplets quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + PixelQuintupletsConst pixelQuintuplets, + PixelTripletsConst pixelTriplets, + ObjectRangesConst ranges) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (int innerInnerInnerLowerModuleArrayIndex = globalThreadIdx[0]; + innerInnerInnerLowerModuleArrayIndex < modules.nLowerModules(); + innerInnerInnerLowerModuleArrayIndex += gridThreadExtent[0]) { + if (ranges.quintupletModuleIndices()[innerInnerInnerLowerModuleArrayIndex] == -1) + continue; + + unsigned int nQuints = quintupletsOccupancy.nQuintuplets()[innerInnerInnerLowerModuleArrayIndex]; + for (unsigned int innerObjectArrayIndex = globalThreadIdx[1]; innerObjectArrayIndex < nQuints; + innerObjectArrayIndex += gridThreadExtent[1]) { + unsigned int quintupletIndex = + ranges.quintupletModuleIndices()[innerInnerInnerLowerModuleArrayIndex] + innerObjectArrayIndex; + + // Don't add duplicate T5s or T5s that are accounted in pT5s + if (quintuplets.isDup()[quintupletIndex] or quintuplets.partOfPT5()[quintupletIndex]) + continue; + unsigned int loop_bound = pixelQuintuplets.nPixelQuintuplets() + pixelTriplets.nPixelTriplets(); + // Cross cleaning step + float eta1 = __H2F(quintuplets.eta()[quintupletIndex]); + float phi1 = __H2F(quintuplets.phi()[quintupletIndex]); + + for (unsigned int jx = globalThreadIdx[2]; jx < loop_bound; jx += gridThreadExtent[2]) { + float eta2, phi2; + if (jx < pixelQuintuplets.nPixelQuintuplets()) { + eta2 = __H2F(pixelQuintuplets.eta()[jx]); + phi2 = __H2F(pixelQuintuplets.phi()[jx]); + } else { + eta2 = __H2F(pixelTriplets.eta()[jx - pixelQuintuplets.nPixelQuintuplets()]); + phi2 = __H2F(pixelTriplets.phi()[jx - pixelQuintuplets.nPixelQuintuplets()]); + } + + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if (dR2 < 1e-3f) + quintuplets.isDup()[quintupletIndex] = true; + } + } + } + } + }; + + struct CrossCleanpLS { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + PixelTripletsConst pixelTriplets, + TrackCandidates cands, + SegmentsConst segments, + SegmentsOccupancyConst segmentsOccupancy, + SegmentsPixel segmentsPixel, + MiniDoubletsConst mds, + HitsConst hits, + QuintupletsConst quintuplets) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + int pixelModuleIndex = modules.nLowerModules(); + unsigned int nPixels = segmentsOccupancy.nSegments()[pixelModuleIndex]; + for (unsigned int pixelArrayIndex = globalThreadIdx[2]; pixelArrayIndex < nPixels; + pixelArrayIndex += gridThreadExtent[2]) { + if (!segmentsPixel.isQuad()[pixelArrayIndex] || segmentsPixel.isDup()[pixelArrayIndex]) + continue; + + float eta1 = segmentsPixel.eta()[pixelArrayIndex]; + float phi1 = segmentsPixel.phi()[pixelArrayIndex]; + unsigned int prefix = ranges.segmentModuleIndices()[pixelModuleIndex]; + + unsigned int nTrackCandidates = cands.nTrackCandidates(); + for (unsigned int trackCandidateIndex = globalThreadIdx[1]; trackCandidateIndex < nTrackCandidates; + trackCandidateIndex += gridThreadExtent[1]) { + short type = cands.trackCandidateType()[trackCandidateIndex]; + unsigned int innerTrackletIdx = cands.objectIndices()[trackCandidateIndex][0]; + if (type == LSTObjType::T5) { + unsigned int quintupletIndex = innerTrackletIdx; // T5 index + float eta2 = __H2F(quintuplets.eta()[quintupletIndex]); + float phi2 = __H2F(quintuplets.phi()[quintupletIndex]); + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if (dR2 < 1e-3f) + segmentsPixel.isDup()[pixelArrayIndex] = true; + } + if (type == LSTObjType::pT3) { + int pLSIndex = pixelTriplets.pixelSegmentIndices()[innerTrackletIdx]; + int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mds, segments, hits); + if (npMatched > 0) + segmentsPixel.isDup()[pixelArrayIndex] = true; + + int pT3Index = innerTrackletIdx; + float eta2 = __H2F(pixelTriplets.eta_pix()[pT3Index]); + float phi2 = __H2F(pixelTriplets.phi_pix()[pT3Index]); + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if (dR2 < 0.000001f) + segmentsPixel.isDup()[pixelArrayIndex] = true; + } + if (type == LSTObjType::pT5) { + unsigned int pLSIndex = innerTrackletIdx; + int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mds, segments, hits); + if (npMatched > 0) { + segmentsPixel.isDup()[pixelArrayIndex] = true; + } + + float eta2 = segmentsPixel.eta()[pLSIndex - prefix]; + float phi2 = segmentsPixel.phi()[pLSIndex - prefix]; + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if (dR2 < 0.000001f) + segmentsPixel.isDup()[pixelArrayIndex] = true; + } + } + } + } + }; + + struct AddpT3asTrackCandidates { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint16_t nLowerModules, + PixelTripletsConst pixelTriplets, + TrackCandidates cands, + SegmentsPixelConst segmentsPixel, + ObjectRangesConst ranges) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + unsigned int nPixelTriplets = pixelTriplets.nPixelTriplets(); + unsigned int pLS_offset = ranges.segmentModuleIndices()[nLowerModules]; + for (unsigned int pixelTripletIndex = globalThreadIdx[0]; pixelTripletIndex < nPixelTriplets; + pixelTripletIndex += gridThreadExtent[0]) { + if ((pixelTriplets.isDup()[pixelTripletIndex])) + continue; + + unsigned int trackCandidateIdx = + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx >= n_max_pixel_track_candidates) // This is done before any non-pixel TCs are added + { +#ifdef WARNINGS + printf("Track Candidate excess alert! Type = pT3"); +#endif + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + break; + + } else { + alpaka::atomicAdd(acc, &cands.nTrackCandidatespT3(), 1u, alpaka::hierarchy::Threads{}); + + float radius = 0.5f * (__H2F(pixelTriplets.pixelRadius()[pixelTripletIndex]) + + __H2F(pixelTriplets.tripletRadius()[pixelTripletIndex])); + unsigned int pT3PixelIndex = pixelTriplets.pixelSegmentIndices()[pixelTripletIndex]; + addTrackCandidateToMemory(cands, + LSTObjType::pT3, + pixelTripletIndex, + pixelTripletIndex, + pixelTriplets.logicalLayers()[pixelTripletIndex].data(), + pixelTriplets.lowerModuleIndices()[pixelTripletIndex].data(), + pixelTriplets.hitIndices()[pixelTripletIndex].data(), + segmentsPixel.seedIdx()[pT3PixelIndex - pLS_offset], + __H2F(pixelTriplets.centerX()[pixelTripletIndex]), + __H2F(pixelTriplets.centerY()[pixelTripletIndex]), + radius, + trackCandidateIdx, + pixelTripletIndex); + } + } + } + }; + + struct AddT5asTrackCandidate { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint16_t nLowerModules, + QuintupletsConst quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + TrackCandidates cands, + ObjectRangesConst ranges) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (int idx = globalThreadIdx[1]; idx < nLowerModules; idx += gridThreadExtent[1]) { + if (ranges.quintupletModuleIndices()[idx] == -1) + continue; + + unsigned int nQuints = quintupletsOccupancy.nQuintuplets()[idx]; + for (unsigned int jdx = globalThreadIdx[2]; jdx < nQuints; jdx += gridThreadExtent[2]) { + unsigned int quintupletIndex = ranges.quintupletModuleIndices()[idx] + jdx; + if (quintuplets.isDup()[quintupletIndex] or quintuplets.partOfPT5()[quintupletIndex]) + continue; + if (!(quintuplets.tightCutFlag()[quintupletIndex])) + continue; + + unsigned int trackCandidateIdx = + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx - cands.nTrackCandidatespT5() - cands.nTrackCandidatespT3() >= + n_max_nonpixel_track_candidates) // pT5 and pT3 TCs have been added, but not pLS TCs + { +#ifdef WARNINGS + printf("Track Candidate excess alert! Type = T5"); +#endif + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + break; + } else { + alpaka::atomicAdd(acc, &cands.nTrackCandidatesT5(), 1u, alpaka::hierarchy::Threads{}); + addTrackCandidateToMemory(cands, + LSTObjType::T5, + quintupletIndex, + quintupletIndex, + quintuplets.logicalLayers()[quintupletIndex].data(), + quintuplets.lowerModuleIndices()[quintupletIndex].data(), + quintuplets.hitIndices()[quintupletIndex].data(), + -1 /*no pixel seed index for T5s*/, + quintuplets.regressionG()[quintupletIndex], + quintuplets.regressionF()[quintupletIndex], + quintuplets.regressionRadius()[quintupletIndex], + trackCandidateIdx, + quintupletIndex); + } + } + } + } + }; + + struct AddpLSasTrackCandidate { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint16_t nLowerModules, + TrackCandidates cands, + SegmentsOccupancyConst segmentsOccupancy, + SegmentsPixelConst segmentsPixel, + bool tc_pls_triplets) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + unsigned int nPixels = segmentsOccupancy.nSegments()[nLowerModules]; + for (unsigned int pixelArrayIndex = globalThreadIdx[2]; pixelArrayIndex < nPixels; + pixelArrayIndex += gridThreadExtent[2]) { + if ((tc_pls_triplets ? 0 : !segmentsPixel.isQuad()[pixelArrayIndex]) || + (segmentsPixel.isDup()[pixelArrayIndex])) + continue; + + unsigned int trackCandidateIdx = + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx - cands.nTrackCandidatesT5() >= + n_max_pixel_track_candidates) // T5 TCs have already been added + { +#ifdef WARNINGS + printf("Track Candidate excess alert! Type = pLS"); +#endif + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + break; + + } else { + alpaka::atomicAdd(acc, &cands.nTrackCandidatespLS(), 1u, alpaka::hierarchy::Threads{}); + addpLSTrackCandidateToMemory(cands, + pixelArrayIndex, + trackCandidateIdx, + segmentsPixel.pLSHitsIdxs()[pixelArrayIndex], + segmentsPixel.seedIdx()[pixelArrayIndex]); + } + } + } + }; + + struct AddpT5asTrackCandidate { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint16_t nLowerModules, + PixelQuintupletsConst pixelQuintuplets, + TrackCandidates cands, + SegmentsPixelConst segmentsPixel, + ObjectRangesConst ranges) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets(); + unsigned int pLS_offset = ranges.segmentModuleIndices()[nLowerModules]; + for (int pixelQuintupletIndex = globalThreadIdx[0]; pixelQuintupletIndex < nPixelQuintuplets; + pixelQuintupletIndex += gridThreadExtent[0]) { + if (pixelQuintuplets.isDup()[pixelQuintupletIndex]) + continue; + + unsigned int trackCandidateIdx = + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx >= n_max_pixel_track_candidates) // No other TCs have been added yet + { +#ifdef WARNINGS + printf("Track Candidate excess alert! Type = pT5"); +#endif + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + break; + + } else { + alpaka::atomicAdd(acc, &cands.nTrackCandidatespT5(), 1u, alpaka::hierarchy::Threads{}); + + float radius = 0.5f * (__H2F(pixelQuintuplets.pixelRadius()[pixelQuintupletIndex]) + + __H2F(pixelQuintuplets.quintupletRadius()[pixelQuintupletIndex])); + unsigned int pT5PixelIndex = pixelQuintuplets.pixelSegmentIndices()[pixelQuintupletIndex]; + addTrackCandidateToMemory(cands, + LSTObjType::pT5, + pT5PixelIndex, + pixelQuintuplets.quintupletIndices()[pixelQuintupletIndex], + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex].data(), + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex].data(), + pixelQuintuplets.hitIndices()[pixelQuintupletIndex].data(), + segmentsPixel.seedIdx()[pT5PixelIndex - pLS_offset], + __H2F(pixelQuintuplets.centerX()[pixelQuintupletIndex]), + __H2F(pixelQuintuplets.centerY()[pixelQuintupletIndex]), + radius, + trackCandidateIdx, + pixelQuintupletIndex); + } + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(lst::TrackCandidatesDeviceCollection, lst::TrackCandidatesHostCollection); + +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h new file mode 100644 index 0000000000000..9192edbd9a186 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -0,0 +1,895 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_Triplet_h +#define RecoTracker_LSTCore_src_alpaka_Triplet_h + +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" + +#include "Segment.h" +#include "MiniDoublet.h" +#include "Hit.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + Triplets& triplets, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, +#ifdef CUT_VALUE_DEBUG + float zOut, + float rtOut, +#endif + float betaIn, + float betaInCut, + float circleRadius, + float circleCenterX, + float circleCenterY, + unsigned int tripletIndex) { + triplets.segmentIndices()[tripletIndex][0] = innerSegmentIndex; + triplets.segmentIndices()[tripletIndex][1] = outerSegmentIndex; + triplets.lowerModuleIndices()[tripletIndex][0] = innerInnerLowerModuleIndex; + triplets.lowerModuleIndices()[tripletIndex][1] = middleLowerModuleIndex; + triplets.lowerModuleIndices()[tripletIndex][2] = outerOuterLowerModuleIndex; + + triplets.betaIn()[tripletIndex] = __F2H(betaIn); + triplets.radius()[tripletIndex] = circleRadius; + triplets.centerX()[tripletIndex] = circleCenterX; + triplets.centerY()[tripletIndex] = circleCenterY; + triplets.logicalLayers()[tripletIndex][0] = + modules.layers()[innerInnerLowerModuleIndex] + (modules.subdets()[innerInnerLowerModuleIndex] == 4) * 6; + triplets.logicalLayers()[tripletIndex][1] = + modules.layers()[middleLowerModuleIndex] + (modules.subdets()[middleLowerModuleIndex] == 4) * 6; + triplets.logicalLayers()[tripletIndex][2] = + modules.layers()[outerOuterLowerModuleIndex] + (modules.subdets()[outerOuterLowerModuleIndex] == 4) * 6; + //get the hits + unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[innerSegmentIndex][1]; + unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][1]; + + triplets.hitIndices()[tripletIndex][0] = mds.anchorHitIndices()[firstMDIndex]; + triplets.hitIndices()[tripletIndex][1] = mds.outerHitIndices()[firstMDIndex]; + triplets.hitIndices()[tripletIndex][2] = mds.anchorHitIndices()[secondMDIndex]; + triplets.hitIndices()[tripletIndex][3] = mds.outerHitIndices()[secondMDIndex]; + triplets.hitIndices()[tripletIndex][4] = mds.anchorHitIndices()[thirdMDIndex]; + triplets.hitIndices()[tripletIndex][5] = mds.outerHitIndices()[thirdMDIndex]; +#ifdef CUT_VALUE_DEBUG + triplets.zOut()[tripletIndex] = zOut; + triplets.rtOut()[tripletIndex] = rtOut; + triplets.betaInCut()[tripletIndex] = betaInCut; +#endif + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRZConstraint(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex) { + //get the rt and z + const float& r1 = mds.anchorRt()[firstMDIndex]; + const float& r2 = mds.anchorRt()[secondMDIndex]; + const float& r3 = mds.anchorRt()[thirdMDIndex]; + + const float& z1 = mds.anchorZ()[firstMDIndex]; + const float& z2 = mds.anchorZ()[secondMDIndex]; + const float& z3 = mds.anchorZ()[thirdMDIndex]; + + // Using lst_layer numbering convention defined in ModuleMethods.h + const int layer1 = modules.lstLayers()[innerInnerLowerModuleIndex]; + const int layer2 = modules.lstLayers()[middleLowerModuleIndex]; + const int layer3 = modules.lstLayers()[outerOuterLowerModuleIndex]; + + const float residual = z2 - ((z3 - z1) / (r3 - r1) * (r2 - r1) + z1); + + if (layer1 == 12 and layer2 == 13 and layer3 == 14) { + return false; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + return alpaka::math::abs(acc, residual) < 0.53f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + return alpaka::math::abs(acc, residual) < 1; + } else if (layer1 == 13 and layer2 == 14 and layer3 == 15) { + return false; + } else if (layer1 == 14 and layer2 == 15 and layer3 == 16) { + return false; + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + return alpaka::math::abs(acc, residual) < 1; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + return alpaka::math::abs(acc, residual) < 1.21f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + return alpaka::math::abs(acc, residual) < 1.f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + return alpaka::math::abs(acc, residual) < 1.f; + } else if (layer1 == 3 and layer2 == 4 and layer3 == 5) { + return alpaka::math::abs(acc, residual) < 2.7f; + } else if (layer1 == 4 and layer2 == 5 and layer3 == 6) { + return alpaka::math::abs(acc, residual) < 3.06f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + return alpaka::math::abs(acc, residual) < 1; + } else if (layer1 == 8 and layer2 == 9 and layer3 == 10) { + return alpaka::math::abs(acc, residual) < 1; + } else if (layer1 == 9 and layer2 == 10 and layer3 == 11) { + return alpaka::math::abs(acc, residual) < 1; + } else { + return alpaka::math::abs(acc, residual) < 5; + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBB(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + float& zOut, + float& rtOut, + unsigned int innerSegmentIndex, + float& betaIn, + float& betaInCut) { + bool isPSIn = (modules.moduleType()[innerInnerLowerModuleIndex] == PS); + bool isPSOut = (modules.moduleType()[outerOuterLowerModuleIndex] == PS); + + float rtIn = mds.anchorRt()[firstMDIndex]; + float rtMid = mds.anchorRt()[secondMDIndex]; + rtOut = mds.anchorRt()[thirdMDIndex]; + + float zIn = mds.anchorZ()[firstMDIndex]; + float zMid = mds.anchorZ()[secondMDIndex]; + zOut = mds.anchorZ()[thirdMDIndex]; + + float alpha1GeVOut = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float rtRatio_OutIn = rtOut / rtIn; // Outer segment beginning rt divided by inner segment beginning rt; + float dzDrtScale = alpaka::math::tan(acc, alpha1GeVOut) / alpha1GeVOut; // The track can bend in r-z plane slightly + float zpitchIn = (isPSIn ? kPixelPSZpitch : kStrip2SZpitch); + float zpitchOut = (isPSOut ? kPixelPSZpitch : kStrip2SZpitch); + + const float zHi = + zIn + (zIn + kDeltaZLum) * (rtRatio_OutIn - 1.f) * (zIn < 0.f ? 1.f : dzDrtScale) + (zpitchIn + zpitchOut); + const float zLo = zIn + (zIn - kDeltaZLum) * (rtRatio_OutIn - 1.f) * (zIn > 0.f ? 1.f : dzDrtScale) - + (zpitchIn + zpitchOut); //slope-correction only on outer end + + //Cut 1 - z compatibility + if ((zOut < zLo) || (zOut > zHi)) + return false; + + float drt_OutIn = (rtOut - rtIn); + + float r3In = alpaka::math::sqrt(acc, zIn * zIn + rtIn * rtIn); + float drt_InSeg = rtMid - rtIn; + float dz_InSeg = zMid - zIn; + float dr3_InSeg = + alpaka::math::sqrt(acc, rtMid * rtMid + zMid * zMid) - alpaka::math::sqrt(acc, rtIn * rtIn + zIn * zIn); + + float coshEta = dr3_InSeg / drt_InSeg; + float dzErr = (zpitchIn + zpitchOut) * (zpitchIn + zpitchOut) * 2.f; + + float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rtOut - rtIn) / 50.f) * (r3In / rtIn); + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + dzErr += muls2 * drt_OutIn * drt_OutIn / 3.f * coshEta * coshEta; + dzErr = alpaka::math::sqrt(acc, dzErr); + + // Constructing upper and lower bound + const float dzMean = dz_InSeg / drt_InSeg * drt_OutIn; + const float zWindow = dzErr / drt_InSeg * drt_OutIn + + (zpitchIn + zpitchOut); //FIXME for ptCut lower than ~0.8 need to add curv path correction + const float zLoPointed = zIn + dzMean * (zIn > 0.f ? 1.f : dzDrtScale) - zWindow; + const float zHiPointed = zIn + dzMean * (zIn < 0.f ? 1.f : dzDrtScale) + zWindow; + + // Constructing upper and lower bound + + // Cut #2: Pointed Z (Inner segment two MD points to outer segment inner MD) + if ((zOut < zLoPointed) || (zOut > zHiPointed)) + return false; + + // raw betaIn value without any correction, based on the mini-doublet hit positions + float alpha_InLo = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float tl_axis_x = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; + betaIn = alpha_InLo - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); + + //beta computation + float drt_tl_axis = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + + //innerOuterAnchor - innerInnerAnchor + const float rt_InSeg = alpaka::math::sqrt(acc, + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); + betaInCut = + alpaka::math::asin(acc, alpaka::math::min(acc, (-rt_InSeg + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / drt_InSeg); + + //Cut #3: first beta cut + return alpaka::math::abs(acc, betaIn) < betaInCut; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBE(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + float& zOut, + float& rtOut, + uint16_t innerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + float& betaIn, + float& betaInCut) { + bool isPSIn = (modules.moduleType()[innerInnerLowerModuleIndex] == PS); + bool isPSOut = (modules.moduleType()[outerOuterLowerModuleIndex] == PS); + + float rtIn = mds.anchorRt()[firstMDIndex]; + float rtMid = mds.anchorRt()[secondMDIndex]; + rtOut = mds.anchorRt()[thirdMDIndex]; + + float zIn = mds.anchorZ()[firstMDIndex]; + float zMid = mds.anchorZ()[secondMDIndex]; + zOut = mds.anchorZ()[thirdMDIndex]; + + float alpha1GeV_OutLo = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly + float zpitchIn = (isPSIn ? kPixelPSZpitch : kStrip2SZpitch); + float zpitchOut = (isPSOut ? kPixelPSZpitch : kStrip2SZpitch); + float zGeom = zpitchIn + zpitchOut; + + // Cut #0: Preliminary (Only here in endcap case) + if (zIn * zOut <= 0) + return false; + + float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); + bool isOutSgInnerMDPS = modules.moduleType()[outerOuterLowerModuleIndex] == PS; + float rtGeom1 = isOutSgInnerMDPS ? kPixelPSZpitch : kStrip2SZpitch; + float zGeom1 = alpaka::math::copysign(acc, zGeom, zIn); + float rtLo = rtIn * (1.f + (zOut - zIn - zGeom1) / (zIn + zGeom1 + dLum) / dzDrtScale) - + rtGeom1; //slope correction only on the lower end + + //Cut #1: rt condition + float zInForHi = zIn - zGeom1 - dLum; + if (zInForHi * zIn < 0) { + zInForHi = alpaka::math::copysign(acc, 0.1f, zIn); + } + float rtHi = rtIn * (1.f + (zOut - zIn + zGeom1) / zInForHi) + rtGeom1; + + //Cut #2: rt condition + if ((rtOut < rtLo) || (rtOut > rtHi)) + return false; + + float rIn = alpaka::math::sqrt(acc, zIn * zIn + rtIn * rtIn); + + const float drtSDIn = rtMid - rtIn; + const float dzSDIn = zMid - zIn; + const float dr3SDIn = + alpaka::math::sqrt(acc, rtMid * rtMid + zMid * zMid) - alpaka::math::sqrt(acc, rtIn * rtIn + zIn * zIn); + + const float coshEta = dr3SDIn / drtSDIn; //direction estimate + const float dzOutInAbs = alpaka::math::abs(acc, zOut - zIn); + const float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f); + const float zGeom1_another = kPixelPSZpitch; + const float kZ = (zOut - zIn) / dzSDIn; + float drtErr = + zGeom1_another * zGeom1_another * drtSDIn * drtSDIn / dzSDIn / dzSDIn * (1.f - 2.f * kZ + 2.f * kZ * kZ); + const float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2 * (rtOut - rtIn) / 50.f) * (rIn / rtIn); + const float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + drtErr += muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta; + drtErr = alpaka::math::sqrt(acc, drtErr); + + //Cut #3: rt-z pointed + + if ((kZ < 0) || (rtOut < rtLo) || (rtOut > rtHi)) + return false; + + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InOut = mds.anchorRt()[secondMDIndex]; + + float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + + float tl_axis_x = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; + + betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); + + float betaInRHmin = betaIn; + float betaInRHmax = betaIn; + + float swapTemp; + + if (alpaka::math::abs(acc, betaInRHmin) > alpaka::math::abs(acc, betaInRHmax)) { + swapTemp = betaInRHmin; + betaInRHmin = betaInRHmax; + betaInRHmax = swapTemp; + } + + float sdIn_dr = alpaka::math::sqrt(acc, + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); + float sdIn_d = rt_InOut - rt_InLo; + + float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + betaInCut = alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdIn_d); + + //Cut #4: first beta cut + return alpaka::math::abs(acc, betaInRHmin) < betaInCut; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintEEE(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + float& zOut, + float& rtOut, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + float& betaIn, + float& betaInCut) { + float rtIn = mds.anchorRt()[firstMDIndex]; + float rtMid = mds.anchorRt()[secondMDIndex]; + rtOut = mds.anchorRt()[thirdMDIndex]; + + float zIn = mds.anchorZ()[firstMDIndex]; + float zMid = mds.anchorZ()[secondMDIndex]; + zOut = mds.anchorZ()[thirdMDIndex]; + + float alpha1GeV_Out = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_Out) / alpha1GeV_Out; // The track can bend in r-z plane slightly + + // Cut #0: Preliminary (Only here in endcap case) + if (zIn * zOut <= 0) + return false; + + float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); + bool isOutSgOuterMDPS = modules.moduleType()[outerOuterLowerModuleIndex] == PS; + bool isInSgInnerMDPS = modules.moduleType()[innerInnerLowerModuleIndex] == PS; + + float rtGeom = (isInSgInnerMDPS and isOutSgOuterMDPS) ? 2.f * kPixelPSZpitch + : (isInSgInnerMDPS or isOutSgOuterMDPS) ? kPixelPSZpitch + kStrip2SZpitch + : 2.f * kStrip2SZpitch; + + float dz = zOut - zIn; + const float rtLo = rtIn * (1.f + dz / (zIn + dLum) / dzDrtScale) - rtGeom; //slope correction only on the lower end + const float rtHi = rtIn * (1.f + dz / (zIn - dLum)) + rtGeom; + + //Cut #1: rt condition + if ((rtOut < rtLo) || (rtOut > rtHi)) + return false; + + bool isInSgOuterMDPS = modules.moduleType()[outerOuterLowerModuleIndex] == PS; + + float drtSDIn = rtMid - rtIn; + float dzSDIn = zMid - zIn; + float dr3SDIn = + alpaka::math::sqrt(acc, rtMid * rtMid + zMid * zMid) - alpaka::math::sqrt(acc, rtIn * rtIn + zIn * zIn); + + float coshEta = dr3SDIn / drtSDIn; //direction estimate + float dzOutInAbs = alpaka::math::abs(acc, zOut - zIn); + float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f); + + float kZ = (zOut - zIn) / dzSDIn; + float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rtOut - rtIn) / 50.f); + + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + + float drtErr = + alpaka::math::sqrt(acc, + kPixelPSZpitch * kPixelPSZpitch * 2.f / (dzSDIn * dzSDIn) * (dzOutInAbs * dzOutInAbs) + + muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta); + + float drtMean = drtSDIn * dzOutInAbs / alpaka::math::abs(acc, dzSDIn); + float rtWindow = drtErr + rtGeom; + float rtLo_point = rtIn + drtMean / dzDrtScale - rtWindow; + float rtHi_point = rtIn + drtMean + rtWindow; + + // Cut #3: rt-z pointed + // https://github.com/slava77/cms-tkph2-ntuple/blob/superDoubletLinked-91X-noMock/doubletAnalysis.C#L3765 + + if (isInSgInnerMDPS and isInSgOuterMDPS) // If both PS then we can point + { + if ((kZ < 0) || (rtOut < rtLo_point) || (rtOut > rtHi_point)) + return false; + } + + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InOut = mds.anchorRt()[secondMDIndex]; + float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + + float tl_axis_x = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; + + betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); + + float sdIn_alphaRHmin = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); + float sdIn_alphaRHmax = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]); + float betaInRHmin = betaIn + sdIn_alphaRHmin - sdIn_alpha; + float betaInRHmax = betaIn + sdIn_alphaRHmax - sdIn_alpha; + + float swapTemp; + + if (alpaka::math::abs(acc, betaInRHmin) > alpaka::math::abs(acc, betaInRHmax)) { + swapTemp = betaInRHmin; + betaInRHmin = betaInRHmax; + betaInRHmax = swapTemp; + } + float sdIn_dr = alpaka::math::sqrt(acc, + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); + float sdIn_d = rt_InOut - rt_InLo; + + float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + betaInCut = alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdIn_d); + + //Cut #4: first beta cut + return alpaka::math::abs(acc, betaInRHmin) < betaInCut; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraint(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + float& zOut, + float& rtOut, + uint16_t innerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + float& betaIn, + float& betaInCut) { + short innerInnerLowerModuleSubdet = modules.subdets()[innerInnerLowerModuleIndex]; + short middleLowerModuleSubdet = modules.subdets()[middleLowerModuleIndex]; + short outerOuterLowerModuleSubdet = modules.subdets()[outerOuterLowerModuleIndex]; + + if (innerInnerLowerModuleSubdet == Barrel and middleLowerModuleSubdet == Barrel and + outerOuterLowerModuleSubdet == Barrel) { + return passPointingConstraintBBB(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + zOut, + rtOut, + innerSegmentIndex, + betaIn, + betaInCut); + } else if (innerInnerLowerModuleSubdet == Barrel and middleLowerModuleSubdet == Barrel and + outerOuterLowerModuleSubdet == Endcap) { + return passPointingConstraintBBE(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + zOut, + rtOut, + innerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + betaIn, + betaInCut); + } else if (innerInnerLowerModuleSubdet == Barrel and middleLowerModuleSubdet == Endcap and + outerOuterLowerModuleSubdet == Endcap) { + return passPointingConstraintBBE(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + zOut, + rtOut, + innerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + betaIn, + betaInCut); + + } + + else if (innerInnerLowerModuleSubdet == Endcap and middleLowerModuleSubdet == Endcap and + outerOuterLowerModuleSubdet == Endcap) { + return passPointingConstraintEEE(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + zOut, + rtOut, + innerSegmentIndex, + outerSegmentIndex, + betaIn, + betaInCut); + } + return false; // failsafe + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeRadiusFromThreeAnchorHits( + TAcc const& acc, float x1, float y1, float x2, float y2, float x3, float y3, float& g, float& f) { + float radius = 0.f; + + //(g,f) -> center + //first anchor hit - (x1,y1), second anchor hit - (x2,y2), third anchor hit - (x3, y3) + + float denomInv = 1.0f / ((y1 - y3) * (x2 - x3) - (x1 - x3) * (y2 - y3)); + + float xy1sqr = x1 * x1 + y1 * y1; + + float xy2sqr = x2 * x2 + y2 * y2; + + float xy3sqr = x3 * x3 + y3 * y3; + + g = 0.5f * ((y3 - y2) * xy1sqr + (y1 - y3) * xy2sqr + (y2 - y1) * xy3sqr) * denomInv; + + f = 0.5f * ((x2 - x3) * xy1sqr + (x3 - x1) * xy2sqr + (x1 - x2) * xy3sqr) * denomInv; + + float c = ((x2 * y3 - x3 * y2) * xy1sqr + (x3 * y1 - x1 * y3) * xy2sqr + (x1 * y2 - x2 * y1) * xy3sqr) * denomInv; + + if (((y1 - y3) * (x2 - x3) - (x1 - x3) * (y2 - y3) == 0) || (g * g + f * f - c < 0)) { +#ifdef WARNINGS + printf("three collinear points or FATAL! r^2 < 0!\n"); +#endif + radius = -1.f; + } else + radius = alpaka::math::sqrt(acc, g * g + f * f - c); + + return radius; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletConstraintsAndAlgo(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + float& zOut, + float& rtOut, + float& betaIn, + float& betaInCut, + float& circleRadius, + float& circleCenterX, + float& circleCenterY) { + //this cut reduces the number of candidates by a factor of 4, i.e., 3 out of 4 warps can end right here! + if (segments.mdIndices()[innerSegmentIndex][1] != segments.mdIndices()[outerSegmentIndex][0]) + return false; + + unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[outerSegmentIndex][0]; + unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][1]; + + if (not passRZConstraint(acc, + modules, + mds, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex)) + return false; + if (not passPointingConstraint(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + zOut, + rtOut, + middleLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + betaIn, + betaInCut)) + return false; + + float x1 = mds.anchorX()[firstMDIndex]; + float x2 = mds.anchorX()[secondMDIndex]; + float x3 = mds.anchorX()[thirdMDIndex]; + float y1 = mds.anchorY()[firstMDIndex]; + float y2 = mds.anchorY()[secondMDIndex]; + float y3 = mds.anchorY()[thirdMDIndex]; + + circleRadius = computeRadiusFromThreeAnchorHits(acc, x1, y1, x2, y2, x3, y3, circleCenterX, circleCenterY); + return true; + } + + struct CreateTriplets { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsOccupancyConst segmentsOccupancy, + Triplets triplets, + TripletsOccupancy tripletsOccupancy, + ObjectRangesConst ranges, + uint16_t* index_gpu, + uint16_t nonZeroModules) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t innerLowerModuleArrayIdx = globalThreadIdx[0]; innerLowerModuleArrayIdx < nonZeroModules; + innerLowerModuleArrayIdx += gridThreadExtent[0]) { + uint16_t innerInnerLowerModuleIndex = index_gpu[innerLowerModuleArrayIdx]; + if (innerInnerLowerModuleIndex >= modules.nLowerModules()) + continue; + + uint16_t nConnectedModules = modules.nConnectedModules()[innerInnerLowerModuleIndex]; + if (nConnectedModules == 0) + continue; + + unsigned int nInnerSegments = segmentsOccupancy.nSegments()[innerInnerLowerModuleIndex]; + for (unsigned int innerSegmentArrayIndex = globalThreadIdx[1]; innerSegmentArrayIndex < nInnerSegments; + innerSegmentArrayIndex += gridThreadExtent[1]) { + unsigned int innerSegmentIndex = + ranges.segmentRanges()[innerInnerLowerModuleIndex][0] + innerSegmentArrayIndex; + + // middle lower module - outer lower module of inner segment + uint16_t middleLowerModuleIndex = segments.outerLowerModuleIndices()[innerSegmentIndex]; + + unsigned int nOuterSegments = segmentsOccupancy.nSegments()[middleLowerModuleIndex]; + for (unsigned int outerSegmentArrayIndex = globalThreadIdx[2]; outerSegmentArrayIndex < nOuterSegments; + outerSegmentArrayIndex += gridThreadExtent[2]) { + unsigned int outerSegmentIndex = ranges.segmentRanges()[middleLowerModuleIndex][0] + outerSegmentArrayIndex; + + uint16_t outerOuterLowerModuleIndex = segments.outerLowerModuleIndices()[outerSegmentIndex]; + + float zOut, rtOut, betaIn, betaInCut, circleRadius, circleCenterX, circleCenterY; + + bool success = runTripletConstraintsAndAlgo(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + zOut, + rtOut, + betaIn, + betaInCut, + circleRadius, + circleCenterX, + circleCenterY); + + if (success) { + unsigned int totOccupancyTriplets = + alpaka::atomicAdd(acc, + &tripletsOccupancy.totOccupancyTriplets()[innerInnerLowerModuleIndex], + 1u, + alpaka::hierarchy::Threads{}); + if (static_cast(totOccupancyTriplets) >= + ranges.tripletModuleOccupancy()[innerInnerLowerModuleIndex]) { +#ifdef WARNINGS + printf("Triplet excess alert! Module index = %d\n", innerInnerLowerModuleIndex); +#endif + } else { + unsigned int tripletModuleIndex = alpaka::atomicAdd( + acc, &tripletsOccupancy.nTriplets()[innerInnerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + unsigned int tripletIndex = + ranges.tripletModuleIndices()[innerInnerLowerModuleIndex] + tripletModuleIndex; + addTripletToMemory(modules, + mds, + segments, + triplets, + innerSegmentIndex, + outerSegmentIndex, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, +#ifdef CUT_VALUE_DEBUG + zOut, + rtOut, +#endif + betaIn, + betaInCut, + circleRadius, + circleCenterX, + circleCenterY, + tripletIndex); + } + } + } + } + } + } + }; + + struct CreateTripletArrayRanges { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + ObjectRanges ranges, + SegmentsOccupancyConst segmentsOccupancy) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + // Initialize variables in shared memory and set to 0 + int& nTotalTriplets = alpaka::declareSharedVar(acc); + if (cms::alpakatools::once_per_block(acc)) { + nTotalTriplets = 0; + } + alpaka::syncBlockThreads(acc); + + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (segmentsOccupancy.nSegments()[i] == 0) { + ranges.tripletModuleIndices()[i] = nTotalTriplets; + ranges.tripletModuleOccupancy()[i] = 0; + continue; + } + + short module_rings = modules.rings()[i]; + short module_layers = modules.layers()[i]; + short module_subdets = modules.subdets()[i]; + float module_eta = alpaka::math::abs(acc, modules.eta()[i]); + + int category_number; + if (module_layers <= 3 && module_subdets == 5) + category_number = 0; + else if (module_layers >= 4 && module_subdets == 5) + category_number = 1; + else if (module_layers <= 2 && module_subdets == 4 && module_rings >= 11) + category_number = 2; + else if (module_layers >= 3 && module_subdets == 4 && module_rings >= 8) + category_number = 2; + else if (module_layers <= 2 && module_subdets == 4 && module_rings <= 10) + category_number = 3; + else if (module_layers >= 3 && module_subdets == 4 && module_rings <= 7) + category_number = 3; + else + category_number = -1; + + int eta_number; + if (module_eta < 0.75f) + eta_number = 0; + else if (module_eta < 1.5f) + eta_number = 1; + else if (module_eta < 2.25f) + eta_number = 2; + else if (module_eta < 3.0f) + eta_number = 3; + else + eta_number = -1; + + int occupancy; + if (category_number == 0 && eta_number == 0) + occupancy = 543; + else if (category_number == 0 && eta_number == 1) + occupancy = 235; + else if (category_number == 0 && eta_number == 2) + occupancy = 88; + else if (category_number == 0 && eta_number == 3) + occupancy = 46; + else if (category_number == 1 && eta_number == 0) + occupancy = 755; + else if (category_number == 1 && eta_number == 1) + occupancy = 347; + else if (category_number == 2 && eta_number == 1) + occupancy = 0; + else if (category_number == 2 && eta_number == 2) + occupancy = 0; + else if (category_number == 3 && eta_number == 1) + occupancy = 38; + else if (category_number == 3 && eta_number == 2) + occupancy = 46; + else if (category_number == 3 && eta_number == 3) + occupancy = 39; + else { + occupancy = 0; +#ifdef WARNINGS + printf("Unhandled case in createTripletArrayRanges! Module index = %i\n", i); +#endif + } + + ranges.tripletModuleOccupancy()[i] = occupancy; + unsigned int nTotT = alpaka::atomicAdd(acc, &nTotalTriplets, occupancy, alpaka::hierarchy::Threads{}); + ranges.tripletModuleIndices()[i] = nTotT; + } + + // Wait for all threads to finish before reporting final values + alpaka::syncBlockThreads(acc); + if (cms::alpakatools::once_per_block(acc)) { + ranges.nTotalTrips() = nTotalTriplets; + } + } + }; + + struct AddTripletRangesToEventExplicit { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + TripletsOccupancyConst tripletsOccupancy, + ObjectRanges ranges) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (tripletsOccupancy.nTriplets()[i] == 0) { + ranges.tripletRanges()[i][0] = -1; + ranges.tripletRanges()[i][1] = -1; + } else { + ranges.tripletRanges()[i][0] = ranges.tripletModuleIndices()[i]; + ranges.tripletRanges()[i][1] = ranges.tripletModuleIndices()[i] + tripletsOccupancy.nTriplets()[i] - 1; + } + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/standalone/.gitignore b/RecoTracker/LSTCore/standalone/.gitignore new file mode 100644 index 0000000000000..29e86cb6b932a --- /dev/null +++ b/RecoTracker/LSTCore/standalone/.gitignore @@ -0,0 +1,43 @@ +mtv +*~ +results/ +*.o +debug.root +*.pdf +plots/ +plots_*/ +scripts/moduleconnection*.txt +*.root +.make.log* +bin/doAnalysis +bin/lst +bin/lst_cuda +bin/lst_cpu +bin/lst_rocm +code/rooutil/librooutil.so +code/rooutil/rooutil.so +.gitversion.txt +efficiency/doAnalysis +.jobs.txt +efficiency/results* +efficiencies/ +efficiency/bin/createEffNumDenPlots +efficiency/bin/createPerfNumDenHists +efficiency/compare +efficiency/summary +*.txt +*.pyc +output* +movetoweb.sh +*.nvvp +*.ipynb +*.log +*.nsys-rep +*.sqlite +*.ncu-rep +*.swp + +*.nfs* +.directoryhash +performance/ +notebooks/ diff --git a/RecoTracker/LSTCore/standalone/LST/.gitignore b/RecoTracker/LSTCore/standalone/LST/.gitignore new file mode 100644 index 0000000000000..32429d8358fb5 --- /dev/null +++ b/RecoTracker/LSTCore/standalone/LST/.gitignore @@ -0,0 +1,3 @@ +*.o +*.so +.vscode/ diff --git a/RecoTracker/LSTCore/standalone/LST/Makefile b/RecoTracker/LSTCore/standalone/LST/Makefile new file mode 100644 index 0000000000000..ee6f82ecccde1 --- /dev/null +++ b/RecoTracker/LSTCore/standalone/LST/Makefile @@ -0,0 +1,151 @@ +# +# stuff to make +# + +CCSOURCES=$(wildcard ../../src/*.cc) +ALPAKACCSOURCES=$(wildcard ../../src/alpaka/*.dev.cc) +CCOBJECTS_CPU=$(patsubst ../../src/alpaka/%.dev.cc, %_cpu.o, $(ALPAKACCSOURCES)) $(patsubst ../../src/%.cc, %_cpu.o, $(CCSOURCES)) +CCOBJECTS_CUDA=$(patsubst ../../src/alpaka/%.dev.cc, %_cuda.o, $(ALPAKACCSOURCES)) $(patsubst ../../src/%.cc, %_cuda.o, $(CCSOURCES)) +CCOBJECTS_ROCM=$(patsubst ../../src/alpaka/%.dev.cc, %_rocm.o, $(ALPAKACCSOURCES)) $(patsubst ../../src/%.cc, %_rocm.o, $(CCSOURCES)) + +LSTSOURCES=../../src/alpaka/LST.cc +LSTOBJECTS_CPU=$(patsubst ../../src/alpaka/%.cc, %_cpu.o, $(LSTSOURCES)) +LSTOBJECTS_CUDA=$(patsubst ../../src/alpaka/%.cc, %_cuda.o, $(LSTSOURCES)) +LSTOBJECTS_ROCM=$(patsubst ../../src/alpaka/%.cc, %_rocm.o, $(LSTSOURCES)) + +# Default to CPU and CUDA backends +ifeq ($(BACKEND),) + LIB_CPU=liblst_cpu.so + LIB_CUDA=liblst_cuda.so +endif + +ifneq ($(findstring cpu,$(BACKEND)),) + LIB_CPU=liblst_cpu.so +endif +ifneq ($(findstring cuda,$(BACKEND)),) + LIB_CUDA=liblst_cuda.so +endif +ifneq ($(findstring rocm,$(BACKEND)),) + LIB_ROCM=liblst_rocm.so +endif +ifneq ($(findstring all,$(BACKEND)),) + LIB_CPU=liblst_cpu.so + LIB_CUDA=liblst_cuda.so + LIB_ROCM=liblst_rocm.so +endif + +LIBS=$(LIB_CPU) $(LIB_CUDA) $(LIB_ROCM) + +# +# flags to keep track of +# + +# Different architectures to optimize for +GENCODE_CUDA := -gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_89,code=[sm_89,compute_89] + +CXX = g++ +CXXFLAGS_CPU = -march=native -mtune=native -Ofast -fno-reciprocal-math -fopenmp-simd -g -Wall -Woverloaded-virtual -fPIC -fopenmp -I.. +CXXFLAGS_CUDA = -O3 -g --compiler-options -Wall --compiler-options -Woverloaded-virtual --compiler-options -fPIC --compiler-options -fopenmp -dc -lineinfo --ptxas-options=-v --cudart shared $(GENCODE_CUDA) --use_fast_math --default-stream per-thread -I.. +CXXFLAGS_ROCM = -O3 -g -Wall -Woverloaded-virtual -fPIC -I${ROCM_ROOT}/include -I.. +CMSSWINCLUDE := -I${TRACKLOOPERDIR}/../../../ -I${CMSSW_BASE}/src +ifdef CMSSW_RELEASE_BASE +CMSSWINCLUDE := ${CMSSWINCLUDE} -I${CMSSW_RELEASE_BASE}/src +endif +ALPAKAINCLUDE = -I${ALPAKA_ROOT}/include -I/${BOOST_ROOT}/include -std=c++20 ${CMSSWINCLUDE} +ALPAKASERIAL = -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 +ALPAKACUDA = -DALPAKA_ACC_GPU_CUDA_ENABLED -DALPAKA_ACC_GPU_CUDA_ONLY -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 --expt-relaxed-constexpr +ALPAKAROCM = -DALPAKA_ACC_GPU_HIP_ENABLED -DALPAKA_ACC_GPU_HIP_ONLY -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 -DGNU_GCC -D_GNU_SOURCE --target=x86_64-redhat-linux-gnu --gcc-toolchain=$(patsubst %/bin/gcc,%,$(shell command -v gcc)) +ROOTINCLUDE = -I$(ROOT_ROOT)/include +ROOTCFLAGS = -pthread -m64 $(ROOTINCLUDE) +PTCUTFLAG = +LSTWARNINGSFLAG = +CMSSW_WERRORS_CPU = -Werror=pointer-arith -Werror=overlength-strings -Werror=return-type -Werror=missing-braces -Werror=unused-value -Werror=unused-label \ + -Werror=address -Werror=format -Werror=sign-compare -Werror=write-strings -Werror=delete-non-virtual-dtor -Werror=strict-aliasing -Werror=narrowing \ + -Werror=unused-but-set-variable -Werror=reorder -Werror=unused-variable -Werror=conversion-null -Werror=return-local-addr -Wnon-virtual-dtor -Werror=switch \ + -Werror=main -Werror=overflow -Werror=format-contains-nul -Werror=type-limits -Wreturn-type -Wextra -Wpessimizing-move -Wclass-memaccess -Wunused \ + -Wparentheses -Wno-vla -Wno-non-template-friend -Wno-long-long -Wno-cast-function-type -Wno-unused-but-set-parameter -Wno-ignored-qualifiers \ + -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-attributes +CMSSW_WERRORS_CUDA = $(patsubst %,-Xcompiler %,$(CMSSW_WERRORS_CPU)) +CMSSW_WERRORS_ROCM = $(CMSSW_WERRORS_CPU) +T5CUTFLAGS = $(T5DNNFLAG) $(T5RZCHI2FLAG) $(T5RPHICHI2FLAG) + +LD_CPU = g++ +SOFLAGS_CPU = -g -shared -fPIC +ALPAKABACKEND_CPU = $(ALPAKASERIAL) +COMPILE_CMD_CPU = $(LD_CPU) -c + +LD_CUDA = nvcc +SOFLAGS_CUDA = -g -shared --compiler-options -fPIC --cudart shared $(GENCODE_CUDA) +ALPAKABACKEND_CUDA = $(ALPAKACUDA) +COMPILE_CMD_CUDA = $(LD_CUDA) -x cu + +LD_ROCM = hipcc +SOFLAGS_ROCM = -g -shared -fPIC +ALPAKABACKEND_ROCM = $(ALPAKAROCM) +COMPILE_CMD_ROCM = $(LD_ROCM) -c + +CUTVALUEFLAG = +CUTVALUEFLAG_FLAGS = -DCUT_VALUE_DEBUG + +%_cpu.o: ../../src/alpaka/%.dev.cc + $(COMPILE_CMD_CPU) $(CXXFLAGS_CPU) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_CPU) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_CPU) $< -o $@ + +%_cuda.o: ../../src/alpaka/%.dev.cc + $(COMPILE_CMD_CUDA) $(CXXFLAGS_CUDA) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_CUDA) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_CUDA) $< -o $@ + +%_rocm.o: ../../src/alpaka/%.dev.cc + $(COMPILE_CMD_ROCM) $(CXXFLAGS_ROCM) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_ROCM) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_ROCM) $< -o $@ + +%_cpu.o: ../../src/alpaka/%.cc + $(COMPILE_CMD_CPU) $(CXXFLAGS_CPU) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_CPU) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_CPU) $< -o $@ + +%_cuda.o: ../../src/alpaka/%.cc + $(COMPILE_CMD_CUDA) $(CXXFLAGS_CUDA) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_CUDA) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_CUDA) $< -o $@ + +%_rocm.o: ../../src/alpaka/%.cc + $(COMPILE_CMD_ROCM) $(CXXFLAGS_ROCM) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_ROCM) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_ROCM) $< -o $@ + +%_cpu.o: ../../src/%.cc + $(COMPILE_CMD_CPU) $(CXXFLAGS_CPU) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_CPU) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_CPU) $< -o $@ + +%_cuda.o: ../../src/%.cc + $(COMPILE_CMD_CUDA) $(CXXFLAGS_CUDA) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_CUDA) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_CUDA) $< -o $@ + +%_rocm.o: ../../src/%.cc + $(COMPILE_CMD_ROCM) $(CXXFLAGS_ROCM) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_ROCM) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_ROCM) $< -o $@ + +$(LIB_CPU): $(CCOBJECTS_CPU) $(LSTOBJECTS_CPU) + $(LD_CPU) $(SOFLAGS_CPU) $^ -o $@ + +$(LIB_CUDA): $(CCOBJECTS_CUDA) $(LSTOBJECTS_CUDA) + $(LD_CUDA) $(SOFLAGS_CUDA) $^ -o $@ + +$(LIB_ROCM): $(CCOBJECTS_ROCM) $(LSTOBJECTS_ROCM) + $(LD_ROCM) $(SOFLAGS_ROCM) $^ -o $@ + +explicit: $(LIBS) + +explicit_cutvalue: CUTVALUEFLAG = $(CUTVALUEFLAG_FLAGS) +explicit_cutvalue: $(LIBS) + +clean: + rm -f *.opp + rm -f *.o + rm -f *.d + rm -f *.so + +.PHONY: clean explicit explicit_cutvalue format check check-fix + +format: + clang-format --style=file:../.clang-format -i *.cc *.h + +# Collect all the include paths from the compiler. +# The .../gcc/x86_64-redhat-linux-gnu/*/include path is excluded since .../gcc/x86_64-redhat-linux-gnu/*/include-fixed should be used instead. +TIDYINCLUDEFLAGS := $(shell g++ -E -x c++ - -v < /dev/null 2>&1 | awk '/#include <...>/,/^End of search/{if (/^ / && !/x86_64-redhat-linux-gnu\/[0-9.]+\/include$$/) print "-I"$$1}' | tr '\n' ' ') +TIDYFLAGS := --language=c++ $(CXXFLAGS_CPU) $(ALPAKAINCLUDE) $(ALPAKASERIAL) $(ROOTCFLAGS) $(DUPLICATED) $(TIDYINCLUDEFLAGS) + +check: + clang-tidy --config-file=../.clang-tidy *.cc *.h -- $(TIDYFLAGS) + +check-fix: + clang-tidy --config-file=../.clang-tidy --format-style=file:../.clang-format --fix --fix-errors --fix-notes *.cc *.h -- $(TIDYFLAGS) diff --git a/RecoTracker/LSTCore/standalone/Makefile b/RecoTracker/LSTCore/standalone/Makefile new file mode 100644 index 0000000000000..b98df31df1b5e --- /dev/null +++ b/RecoTracker/LSTCore/standalone/Makefile @@ -0,0 +1,78 @@ +# Simple makefile + +EXES := bin/lst_cpu bin/lst_cuda + +SOURCES=$(wildcard code/core/*.cc) +OBJECTS_CPU=$(SOURCES:.cc=_cpu.o) +OBJECTS_CUDA=$(SOURCES:.cc=_cuda.o) +OBJECTS_ROCM=$(SOURCES:.cc=_rocm.o) +OBJECTS=$(OBJECTS_CPU) $(OBJECTS_CUDA) $(OBJECTS_ROCM) + +CXX = g++ +CXXFLAGS = -g -O2 -Wall -fPIC -Woverloaded-virtual -Wno-unused-function -fno-var-tracking -std=c++20 +INCLUDEFLAGS= -ILST -I$(shell pwd) -Icode -Icode/core -I${ALPAKA_ROOT}/include -I/${BOOST_ROOT}/include $(shell rooutil-config --include) -I$(shell root-config --incdir) -I${TRACKLOOPERDIR}/../../../ -I${CMSSW_BASE}/src -I../interface/ -I../interface/alpaka/ -I../src/ -I../src/alpaka/ +ifdef CMSSW_RELEASE_BASE +INCLUDEFLAGS:= ${INCLUDEFLAGS} -I${CMSSW_RELEASE_BASE}/src +endif +LDFLAGS = -g -O2 $(LSTLIB) -L${TRACKLOOPERDIR}/LST $(shell rooutil-config --libs) $(shell root-config --libs) +LDFLAGS_CUDA= -L${CUDA_HOME}/lib64 -lcudart +LDFLAGS_ROCM= -L${ROCM_ROOT}/lib -lamdhip64 +ALPAKAFLAGS = -DALPAKA_DEBUG=0 +CUDAINCLUDE = -I${CUDA_HOME}/include +ROCMINCLUDE = -I${ROCM_ROOT}/include +ALPAKA_CPU = -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 +ALPAKA_CUDA = -DALPAKA_ACC_GPU_CUDA_ENABLED -DALPAKA_HOST_ONLY -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 +ALPAKA_ROCM = -DALPAKA_ACC_GPU_HIP_ENABLED -DALPAKA_HOST_ONLY -DALPAKA_DISABLE_VENDOR_RNG -D__HIP_PLATFORM_HCC__ -D__HIP_PLATFORM_AMD__ -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 +EXTRAFLAGS = -ITMultiDrawTreePlayer -Wunused-variable -lTMVA -lEG -lGenVector -lXMLIO -lMLP -lTreePlayer -fopenmp +DOQUINTUPLET = +PTCUTFLAG = +CUTVALUEFLAG = +CUTVALUEFLAG_FLAGS = -DCUT_VALUE_DEBUG + +PRIMITIVEFLAG = +PRIMITIVEFLAG_FLAGS = -DPRIMITIVE_STUDY + +all: rooutil efficiency $(EXES) + +cutvalue: CUTVALUEFLAG = ${CUTVALUEFLAG_FLAGS} +cutvalue: rooutil efficiency $(EXES) + +primitive: PRIMITIVEFLAG = ${PRIMITIVEFLAG_FLAGS} +primitive: rooutil efficiency $(EXES) + +cutvalue_primitive: CUTVALUEFLAG = ${CUTVALUEFLAG_FLAGS} +cutvalue_primitive: PRIMITIVEFLAG = ${PRIMITIVEFLAG_FLAGS} +cutvalue_primitive: rooutil efficiency $(EXES) + + +bin/lst_cpu: LSTLIB=-llst_cpu +bin/lst_cpu: bin/lst_cpu.o $(OBJECTS_CPU) + $(CXX) $(LDFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $^ $(ROOTLIBS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_CPU) -o $@ +bin/lst_cuda: LSTLIB=-llst_cuda +bin/lst_cuda: bin/lst_cuda.o $(OBJECTS_CUDA) + $(CXX) $(LDFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $^ $(ROOTLIBS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_CUDA) $(LDFLAGS_CUDA) -o $@ +bin/lst_rocm: LSTLIB=-llst_rocm +bin/lst_rocm: bin/lst_rocm.o $(OBJECTS_ROCM) + $(CXX) $(LDFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $^ $(ROOTLIBS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_ROCM) $(LDFLAGS_ROCM) -o $@ + +%_cpu.o: %.cc rooutil + $(CXX) $(CXXFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_CPU) $< -c -o $@ +%_cuda.o: %.cc rooutil + $(CXX) $(CXXFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_CUDA) $(CUDAINCLUDE) $< -c -o $@ +%_rocm.o: %.cc rooutil + $(CXX) $(CXXFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_ROCM) $(ROCMINCLUDE) $< -c -o $@ + +rooutil: + $(MAKE) -C code/rooutil/ + +efficiency: rooutil + $(MAKE) -C efficiency/ + +clean: + rm -f $(OBJECTS) bin/*.o $(EXES) bin/lst + rm -f code/rooutil/*.so code/rooutil/*.o + rm -f bin/lst.o + rm -f LST/*.o + cd efficiency/ && make clean + +.PHONY: rooutil efficiency diff --git a/RecoTracker/LSTCore/standalone/README.md b/RecoTracker/LSTCore/standalone/README.md new file mode 100644 index 0000000000000..02fbef943f697 --- /dev/null +++ b/RecoTracker/LSTCore/standalone/README.md @@ -0,0 +1,291 @@ +# TrackLooper + + +## Quick Start + + +### Setting up LSTPerformanceWeb (only for lnx7188 and lnx4555) + +For lnx7188 and lnx4555 this needs to be done once + + cd /cdat/tem/${USER}/ + git clone git@github.com:SegmentLinking/LSTPerformanceWeb.git + +### Setting up container (only for lnx7188) + +For lnx7188 this needs to be done before compiling or running the code: + + singularity shell --nv --bind /mnt/data1:/data --bind /data2/segmentlinking/ --bind /opt --bind /nfs --bind /mnt --bind /usr/local/cuda/bin/ --bind /cvmfs /cvmfs/unpacked.cern.ch/registry.hub.docker.com/cmssw/el8:x86_64 + +### Setting up the code + + git clone git@github.com:SegmentLinking/TrackLooper.git + cd TrackLooper/ + # Source one of the commands below, depending on the site + source setup.sh # if on UCSD or Cornell + source setup_hpg.sh # if on Florida + +### Running the code + + sdl_make_tracklooper -mc + sdl_ -i PU200 -o LSTNtuple.root + createPerfNumDenHists -i LSTNtuple.root -o LSTNumDen.root + lst_plot_performance.py LSTNumDen.root -t "myTag" + # python3 efficiency/python/lst_plot_performance.py LSTNumDen.root -t "myTag" # if you are on cgpu-1 or Cornell + +The above can be even simplified + + sdl_run -f -mc -s PU200 -n -1 -t myTag + +The `-f` flag can be omitted when the code has already been compiled. If multiple backends were compiled, then the `-b` flag can be used to specify a backend. For example + + sdl_run -b cpu -s PU200 -n -1 -t myTag + +## Command explanations + +Compile the code with option flags. If none of `C,G,R,A` are used, then it defaults to compiling for CUDA and CPU. + + sdl_make_tracklooper -mc + -m: make clean binaries + -c: run with the cmssw caching allocator + -C: compile CPU backend + -G: compile CUDA backend + -R: compile ROCm backend + -A: compile all backends + -h: show help screen with all options + +Run the code + + sdl_ -n -v -w -s -i -o + + -i: PU200; muonGun, etc + -n: number of events; default: all + -v: 0-no printout; 1- timing printout only; 2- multiplicity printout; default: 0 + -s: number of streams/events in flight; default: 1 + -w: 0- no writeout; 1- minimum writeout; default: 1 + -o: provide an output root file name (e.g. LSTNtuple.root); default: debug.root + -l: add lower level object (pT3, pT5, T5, etc.) branches to the output + +Plotting numerators and denominators of performance plots + + createPerfNumDenHists -i -o [-g -n ] + + -i: Path to LSTNtuple.root + -o: provide an output root file name (e.g. num_den_hist.root) + -n: (optional) number of events + -g: (optional) comma separated pdgids to add more efficiency plots with different sim particle slices + +Plotting performance plots + + lst_plot_performance.py num_den_hist.root -t "mywork" + +There are several options you can provide to restrict number of plots being produced. +And by default, it creates a certain set of objects. +One can specifcy the type, range, metric, etc. +To see the full information type + + lst_plot_performance.py --help + +To give an example of plotting efficiency, object type of lower level T5, for |eta| < 2.5 only. + + lst_plot_performance.py num_den_hist.root -t "mywork" -m eff -o T5_lower -s loweta + +NOTE: in order to plot lower level object, ```-l``` option must have been used during ```sdl``` step! + +When running on ```cgpu-1``` remember to specify python3 as there is no python. +The shebang on the ```lst_plot_performance.py``` is not updated as ```lnx7188``` works with python2... + + python3 efficiency/python/lst_plot_performance.py num_den_hist.root -t "mywork" # If running on cgpu-1 + +Comparing two different runs + + lst_plot_performance.py \ + num_den_hist_1.root \ # Reference + num_den_hist_2.root \ # New work + -L BaseLine,MyNewWork \ # Labeling + -t "mywork" \ + --compare + +## CMSSW Integration +This is the a complete set of instruction on how the TrackLooper code +can be linked as an external tool in CMSSW: + +### Build TrackLooper +```bash +git clone git@github.com:SegmentLinking/TrackLooper.git +cd TrackLooper/ +# Source one of the commands below, depending on the site +source setup.sh # if on UCSD or Cornell +source setup_hpg.sh # if on Florida +sdl_make_tracklooper -mc +cd .. +``` + +### Set up `TrackLooper` as an external +```bash +mkdir workingFolder # Create the folder you will be working in +cd workingFolder +cmsrel CMSSW_14_1_0_pre3 +cd CMSSW_14_1_0_pre3/src +cmsenv +git cms-init +git remote add SegLink git@github.com:SegmentLinking/cmssw.git +git fetch SegLink CMSSW_14_1_0_pre3_LST_X +git cms-addpkg RecoTracker Configuration +git checkout CMSSW_14_1_0_pre3_LST_X +#To include both the CPU library and GPU library into CMSSW, create 3 xml files (headers file has no library). +#Before writing the following xml file, check that libsdl_cpu.so and libsdl_gpu.so can be found under the ../../../TrackLooper/SDL/ folder. +cat <lst_headers.xml + + + + + + + +EOF +cat <lst_cpu.xml + + + + + + + + + +EOF +cat <lst_cuda.xml + + + + + + + + + +EOF +scram setup lst_headers.xml +scram setup lst_cpu.xml +scram setup lst_cuda.xml +cmsenv +git cms-checkdeps -a -A +scram b -j 12 +``` + +### Run the LST reconstruction in CMSSW +A simple test configuration of the LST reconstruction can be run with the command: +```bash +cmsRun RecoTracker/LST/test/LSTAlpakaTester.py +``` + +For a more complete workflow, one can run a modified version of the 21034.1 workflow. +To get the commands of this workflow, one can run: +```bash +runTheMatrix.py -w upgrade -n -e -l 21034.1 +``` + +For convenience, the workflow has been run for 100 events and the output is stored here: +```bash +/data2/segmentlinking/CMSSW_14_1_0_pre0/step2_21034.1_100Events.root +``` + +For enabling the LST reconstruction in the CMSSW tracking workflow, a modified step3 needs to be run. +This is based on the step3 command of the 21034.1 workflow with the following changes: + - Remove the `--pileup_input` and `--pileup` flags. + - The number of threads and streams for the job can be optionally controlled by the `--nThreads` and `--nStreams` command line options respectively (`1` ends up being the actual default value for both, and more info can be found by running `cmsDriver.py --help`). + - Add at the end of the command: `--procModifiers gpu,trackingLST,trackingIters01 --no_exec` + +Run the command and modify the output configuration file with the following: + - If want to run a cpu version, remove the ```gpu``` in the line defining the `process` object: + ```python + process = cms.Process('RECO',...,gpu,...) + ``` + - Add the following lines below the part where the import of the standard configurations happens: + ```python + process.load('Configuration.StandardSequences.Accelerators_cff') + process.load("HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi") + ``` + - Modify the input and output file names accordingly, as well as the number of events. + +Then, run the configuration file with `cmsRun`. + +To get the DQM files, one would have to run step4 of the 21034.1 workflow with the following modifications: + - Add `--no_exec` to the end of command and then run it. + - Modify the output configuration file by changing the input file (the one containing `inDQM` from the previous step) and number of events accordingly. + +Running the configuration file with `cmsRun`, the output file will have a name starting with `DQM`. The name is the same every time this step runs, +so it is good practice to rename the file, e.g. to `tracking_Iters01LST.root`. +The MTV plots can be produced with the command: +```bash +makeTrackValidationPlots.py --extended tracking_Iters01LST.root +``` +Comparison plots can be made by including multiple ROOT files as arguments. + +**Note:** In case one wants to run step2 as well, similar modifications as in step4 (`--no_exec` flag and input file/number of events) need to be applied. Moreover, the PU files have better be modified to point to local ones. This can be done by inserting a dummy file when running the command (set the argument of the `--pileup_input` flag to `file:file.root`), and then change the PU input files in the configuration to the following line (by means of replacing the corresponding line in the configuration): +```python +process.mix.input.fileNames = cms.untracked.vstring(['file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/066fc95d-1cef-4469-9e08-3913973cd4ce.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/07928a25-231b-450d-9d17-e20e751323a1.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/26bd8fb0-575e-4201-b657-94cdcb633045.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/4206a9c5-44c2-45a5-aab2-1a8a6043a08a.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/55a372bf-a234-4111-8ce0-ead6157a1810.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/59ad346c-f405-4288-96d7-795f81c43fe8.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/7280f5ec-b71d-4579-a730-7ce2de0ff906.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/b93adc85-715f-477a-afc9-65f3241933ee.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/c7a0aa46-f55c-4b01-977f-34a397b71fba.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/e77fa467-97cb-4943-884f-6965b4eb0390.root']) +``` + +### Inclusion of LST in other CMSSW packages +Including the line +``` + +``` +in the relevant package `BuildFile.xml` allows for +including our headers in the code of that package. + +## Running LST in a CVMFS-less setup + +The setup scripts included in this repository assume that the [CernVM File System (CVMFS)](https://cernvm.cern.ch/fs/) is installed. This provides a convenient way to fetch the required dependencies, but it is not necessary to run LST in standalone mode. Here, we briefly describe how to build and run it when CVMFS is not available. + +The necessary dependencies are CUDA, ROOT, the Boost libraries, Alpaka, and some CMSSW headers. CUDA, ROOT, and Boost, are fairly standard libraries and are available from multiple package managers. For the remaining necessary headers you will need to clone the [Alpaka](https://github.com/alpaka-group/alpaka) and [CMSSW](https://github.com/cms-sw/cmssw) repositories. The Alpaka repository is reasonably sized, but the CMSSW one extremely large, especially considering that we only need a tiny fraction of its files to build LST. We can get only the Alpaka interface headers from CMSSW by running the following commands. + +``` bash +git clone --filter=blob:none --no-checkout --depth 1 --sparse --branch CMSSW_14_1_X https://github.com/cms-sw/cmssw.git +cd cmssw +git sparse-checkout add HeterogeneousCore/AlpakaInterface +git checkout +``` + +Then all that is left to do is set some environment variables. We give an example of how to do this in lnx7188/cgpu-1. + +```bash +# These two lines are only needed to set the right version of gcc and nvcc. They are not needed for standard installations. +export PATH=/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/gcc/12.3.1-40d504be6370b5a30e3947a6e575ca28/bin:/cvmfs/cms.cern.ch/el8_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre3/external/el8_amd64_gcc12/bin:$PATH +export LD_LIBRARY_PATH=/cvmfs/cms.cern.ch/el8_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre3/biglib/el8_amd64_gcc12:/cvmfs/cms.cern.ch/el8_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre3/lib/el8_amd64_gcc12:/cvmfs/cms.cern.ch/el8_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre3/external/el8_amd64_gcc12/lib:/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/gcc/12.3.1-40d504be6370b5a30e3947a6e575ca28/lib64:/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/gcc/12.3.1-40d504be6370b5a30e3947a6e575ca28/lib:$LD_LIBRARY_PATH + +# These are the lines that you need to manually change for a CVMFS-less setup. +# In this example we use cvmfs paths since that is where the dependencies are in lnx7188/cgpu1, but they can point to local directories. +export BOOST_ROOT=/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/boost/1.80.0-60a217837b5db1cff00c7d88ec42f53a +export ALPAKA_ROOT=/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/alpaka/1.1.0-7d0324257db47fde2d27987e7ff98fb4 +export CUDA_HOME=/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/cuda/12.4.1-06cde0cd9f95a73a1ea05c8535f60bde +export ROOT_ROOT=/cvmfs/cms.cern.ch/el8_amd64_gcc12/lcg/root/6.30.07-21947a33e64ceb827a089697ad72e468 +export CMSSW_BASE=/cvmfs/cms.cern.ch/el8_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre3 + +# These lines are needed to account for some extra environment variables that are exported in the setup script. +export LD_LIBRARY_PATH=$PWD/SDL/cuda:$PWD/SDL/cpu:$PWD:$LD_LIBRARY_PATH +export PATH=$PWD/bin:$PATH +export PATH=$PWD/efficiency/bin:$PATH +export PATH=$PWD/efficiency/python:$PATH +export TRACKLOOPERDIR=$PWD +export TRACKINGNTUPLEDIR=/data2/segmentlinking/CMSSW_12_2_0_pre2/ +export LSTOUTPUTDIR=. +source $PWD/code/rooutil/thisrooutil.sh + +# After this, you can compile and run LST as usual. +sdl_run -f -mc -s PU200 -n -1 -t myTag +``` + +## Code formatting and checking + +The makefile in the `SDL` directory includes phony targets to run `clang-format` and `clang-tidy` on the code using the formatting and checks used in CMSSW. The following are the available commands. + +- `make format` + Formats the code in the `SDL` directory using `clang-format` following the rules specified in `.clang-format`. +- `make check` + Runs `clang-tidy` on the code in the `SDL` directory to performs the checks specified in `.clang-tidy`. +- `make check-fix` + Same as `make check`, but fixes the issues that it knows how to fix. + \ No newline at end of file diff --git a/RecoTracker/LSTCore/standalone/bin/lst.cc b/RecoTracker/LSTCore/standalone/bin/lst.cc new file mode 100644 index 0000000000000..369680bc4309e --- /dev/null +++ b/RecoTracker/LSTCore/standalone/bin/lst.cc @@ -0,0 +1,527 @@ +#include "lst.h" + +#include + +using LSTEvent = ALPAKA_ACCELERATOR_NAMESPACE::lst::LSTEvent; +using namespace ::lst; + +//___________________________________________________________________________________________________________________________________________________________________________________________ +int main(int argc, char **argv) { + //******************************************************************************** + // + // 0. Preliminary operations + // + //******************************************************************************** + + // Checking the TRACKLOOPERDIR is set + ana.track_looper_dir_path = gSystem->Getenv("TRACKLOOPERDIR"); + if (ana.track_looper_dir_path.IsNull()) { + RooUtil::error( + "TRACKLOOPERDIR is not set! Did you run $ source setup.sh from TrackLooper/ main repository directory?"); + } + RooUtil::print(TString::Format("TRACKLOOPERDIR=%s", ana.track_looper_dir_path.Data())); + + // Write the command line used to run it + // N.B. This needs to be before the argument parsing as it will change some values + std::vector allArgs(argv, argv + argc); + ana.full_cmd_line = ""; + for (auto &str : allArgs) { + ana.full_cmd_line += TString::Format(" %s", str.c_str()); + } + + //******************************************************************************** + // + // 1. Parsing options + // + //******************************************************************************** + + // cxxopts is just a tool to parse argc, and argv easily + + // Grand option setting + cxxopts::Options options("\n $ lst", + "\n **********************\n * *\n * " + "Looper *\n * *\n **********************\n"); + + // Read the options + options.add_options()("m,mode", "Run mode (NOT DEFINED)", cxxopts::value()->default_value("5"))( + "i,input", + "Comma separated input file list OR if just a directory is provided it will glob all in the directory BUT must " + "end with '/' for the path", + cxxopts::value()->default_value("muonGun"))( + "t,tree", + "Name of the tree in the root file to open and loop over", + cxxopts::value()->default_value("trackingNtuple/tree"))( + "o,output", "Output file name", cxxopts::value())( + "N,nmatch", "N match for MTV-like matching", cxxopts::value()->default_value("9"))( + "n,nevents", "N events to loop over", cxxopts::value()->default_value("-1"))( + "x,event_index", "specific event index to process", cxxopts::value()->default_value("-1"))( + "g,pdg_id", "The simhit pdgId match option", cxxopts::value()->default_value("0"))( + "v,verbose", + "Verbose mode (0: no print, 1: only final timing, 2: object multiplitcity", + cxxopts::value()->default_value("0"))( + "w,write_ntuple", "Write Ntuple", cxxopts::value()->default_value("1"))( + "s,streams", "Set number of streams", cxxopts::value()->default_value("1"))( + "d,debug", "Run debug job. i.e. overrides output option to 'debug.root' and 'recreate's the file.")( + "l,lower_level", "write lower level objects ntuple results")("G,gnn_ntuple", "write gnn input variable ntuple")( + "j,nsplit_jobs", "Enable splitting jobs by N blocks (--job_index must be set)", cxxopts::value())( + "I,job_index", + "job_index of split jobs (--nsplit_jobs must be set. index starts from 0. i.e. 0, 1, 2, 3, etc...)", + cxxopts::value())("3,tc_pls_triplets", "Allow triplet pLSs in TC collection")( + "2,no_pls_dupclean", "Disable pLS duplicate cleaning (both steps)")("h,help", "Print help"); + + auto result = options.parse(argc, argv); + + // NOTE: When an option was provided (e.g. -i or --input), then the result.count("