Added LSTCore

Co-authored-by: Tres Reid <[email protected]> Co-authored-by: Philip Chang <[email protected]> Co-authored-by: Gavin Niendorf <[email protected]> Co-authored-by: YonsiG <[email protected]> Co-authored-by: Balaji Sathia Narayanan <[email protected]> Co-authored-by: Manos Vourliotis <[email protected]> Co-authored-by: Slava Krutelyov <[email protected]> Co-authored-by: Jonathan Guiang <[email protected]> Co-authored-by: Bei Wang <[email protected]>
SegmentLinking · May 29, 2024 · 4ab60f2 · 4ab60f2
1 parent 7f1c904
commit 4ab60f2
Show file tree

Hide file tree

Showing 85 changed files with 43,874 additions and 0 deletions.
diff --git a/RecoTracker/LSTCore/BuildFile.xml b/RecoTracker/LSTCore/BuildFile.xml
@@ -0,0 +1,9 @@
+<use name="alpaka"/>
+<use name="boost_header"/>
+<use name="root"/>
+<use name="HeterogeneousCore/AlpakaInterface"/>
+<flags CXXFLAGS="-DLST_IS_CMSSW_PACKAGE -DCACHE_ALLOC -DT4FromT3 -DUSE_RZCHI2 -DUSE_T5_DNN -DPT_CUT=0.8 -DDUP_pLS -DDUP_T5 -DDUP_pT5 -DDUP_pT3 -DCrossclean_T5 -DCrossclean_pT3 -Wshadow"/>
+<flags ALPAKA_BACKENDS="1"/>
+<export>
+  <lib name="1"/>
+</export>
diff --git a/RecoTracker/LSTCore/README.md b/RecoTracker/LSTCore/README.md
@@ -0,0 +1,42 @@
+# LSTCore proof of concept
+
+**This is a proof of concept for how I think we could continue working towards the CMSSW integration while keeping the standalone version alive.**
+
+This branch of CMSSW contains all of the relevant LST code and can be built entirely within CMSSW. The setup process is what you would expect.
+
+```bash
+export CMSSW_VERSION=CMSSW_14_1_0_pre3
+export CMSSW_BRANCH=${CMSSW_VERSION}_LST_X_LSTCore
+source /cvmfs/cms.cern.ch/cmsset_default.sh
+cmsrel $CMSSW_VERSION
+cd $CMSSW_VERSION/src
+cmsenv
+git cms-init
+git remote add SegLink https://github.com/SegmentLinking/cmssw.git
+git fetch SegLink ${CMSSW_BRANCH}:SegLink_cmssw
+git checkout SegLink_cmssw
+git cms-addpkg RecoTracker/LST RecoTracker/LSTCore Configuration/ProcessModifiers RecoTracker/ConversionSeedGenerators RecoTracker/FinalTrackSelectors RecoTracker/IterativeTracking
+git submodule update --init --recursive
+scram b -j 8
+```
+
+## How it works
+
+The [TrackLooper repository](https://github.com/SegmentLinking/TrackLooper) is included as a git submodule in `RecoTracker/LSTCore` and the rest of the structure is set up using symlinks. Since we have made a lot of progress getting the code ready for CMSSW, it was just a matter of writing a simple `BuildFile.xml` file.
+
+## Benefits
+
+- It would make it easier to work towards the full integration if we have a self-contained thing. It would probably be easier to slowly adapt more of the "proper" CMSSW conventions instead of having to switch them all at once.
+- We can keep the standalone version alive for as long as needed.
+- Our CI can start running the checks that are done by the `cms-bot` for CMSSW PRs.
+
+## Disadvantages
+
+- I might be better to work towards having a single CMSSW package instead of having them separated in `LST` and `LSTCore`. However, I think we could use a similar approach in that case.
+- I couldn't think of anything else, but there's likely other disadvantages.
+
+## Things to do
+
+- There are a few minor changes that need to be made to the current LST package to get it to work with LSTCore.
+- At some point we'll have to figure out how to properly integrate the `data` directory.
+
diff --git a/RecoTracker/LSTCore/interface/alpaka/Constants.h b/RecoTracker/LSTCore/interface/alpaka/Constants.h
@@ -0,0 +1,157 @@
+#ifndef Constants_cuh
+#define Constants_cuh
+
+#include <alpaka/alpaka.hpp>
+
+#include "HeterogeneousCore/AlpakaInterface/interface/config.h"
+
+#ifdef CACHE_ALLOC
+#include "HeterogeneousCore/AlpakaInterface/interface/CachedBufAlloc.h"
+#endif
+
+#ifdef ALPAKA_ACC_GPU_CUDA_ENABLED
+#include <cuda_fp16.h>
+#endif
+
+namespace SDL {
+// Half precision wrapper functions.
+#if defined(FP16_Base)
+#define __F2H __float2half
+#define __H2F __half2float
+  typedef __half float FPX;
+#else
+#define __F2H
+#define __H2F
+  typedef float FPX;
+#endif
+
+  using Idx = alpaka_common::Idx;
+  using Dim = alpaka_common::Dim3D;
+  using Dim1d = alpaka_common::Dim1D;
+  using Vec = alpaka_common::Vec3D;
+  using Vec1d = alpaka_common::Vec1D;
+  using WorkDiv = alpaka_common::WorkDiv3D;
+
+  using Acc = ALPAKA_ACCELERATOR_NAMESPACE::Acc3D;
+  using Dev = ALPAKA_ACCELERATOR_NAMESPACE::Device;
+  using DevHost = ALPAKA_ACCELERATOR_NAMESPACE::DevHost;
+  using QueueAcc = ALPAKA_ACCELERATOR_NAMESPACE::Queue;
+
+  Vec const elementsPerThread(Vec::all(static_cast<Idx>(1)));
+
+// Needed for files that are compiled by g++ to not throw an error.
+// uint4 is defined only for CUDA, so we will have to revisit this soon when running on other backends.
+#if !defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !defined(ALPAKA_ACC_GPU_HIP_ENABLED)
+  struct uint4 {
+    unsigned int x;
+    unsigned int y;
+    unsigned int z;
+    unsigned int w;
+  };
+#endif
+
+  // Buffer type for allocations where auto type can't be used.
+  template <typename TDev, typename TData>
+  using Buf = alpaka::Buf<TDev, TData, Dim1d, Idx>;
+
+  // Allocation wrapper function to make integration of the caching allocator easier and reduce code boilerplate.
+  template <typename T, typename TAcc, typename TSize, typename TQueue>
+  ALPAKA_FN_HOST ALPAKA_FN_INLINE Buf<alpaka::Dev<TAcc>, T> allocBufWrapper(TAcc const& devAccIn,
+                                                                            TSize nElements,
+                                                                            TQueue queue) {
+#ifdef CACHE_ALLOC
+    return cms::alpakatools::allocCachedBuf<T, Idx>(devAccIn, queue, Vec1d(static_cast<Idx>(nElements)));
+#else
+    return alpaka::allocBuf<T, Idx>(devAccIn, Vec1d(static_cast<Idx>(nElements)));
+#endif
+  }
+
+  // Second allocation wrapper function when queue is not given. Reduces code boilerplate.
+  template <typename T, typename TAcc, typename TSize>
+  ALPAKA_FN_HOST ALPAKA_FN_INLINE Buf<alpaka::Dev<TAcc>, T> allocBufWrapper(TAcc const& devAccIn, TSize nElements) {
+    return alpaka::allocBuf<T, Idx>(devAccIn, Vec1d(static_cast<Idx>(nElements)));
+  }
+
+  // Wrapper function to reduce code boilerplate for defining grid/block sizes.
+  ALPAKA_FN_HOST ALPAKA_FN_INLINE Vec createVec(int x, int y, int z) {
+    return Vec(static_cast<Idx>(x), static_cast<Idx>(y), static_cast<Idx>(z));
+  }
+
+  // Adjust grid and block sizes based on backend configuration
+  template <typename Vec>
+  ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv createWorkDiv(const Vec& blocksPerGrid,
+                                                        const Vec& threadsPerBlock,
+                                                        const Vec& elementsPerThreadArg) {
+    Vec adjustedBlocks = blocksPerGrid;
+    Vec adjustedThreads = threadsPerBlock;
+
+    // Serial execution, so all launch parameters set to 1.
+#if defined(ALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED)
+    adjustedBlocks = Vec::all(static_cast<Idx>(1));
+    adjustedThreads = Vec::all(static_cast<Idx>(1));
+#endif
+
+    // Threads enabled, set number of blocks to 1.
+#if defined(ALPAKA_ACC_CPU_B_SEQ_T_THREADS_ENABLED)
+    adjustedBlocks = Vec::all(static_cast<Idx>(1));
+#endif
+
+    return WorkDiv(adjustedBlocks, adjustedThreads, elementsPerThreadArg);
+  }
+
+// If a compile time flag does not define PT_CUT, default to 0.8 (GeV)
+#ifndef PT_CUT
+  constexpr float PT_CUT = 0.8f;
+#endif
+
+  const unsigned int MAX_BLOCKS = 80;
+  const unsigned int MAX_CONNECTED_MODULES = 40;
+
+  const unsigned int N_MAX_PIXEL_SEGMENTS_PER_MODULE = 50000;
+
+  const unsigned int N_MAX_PIXEL_MD_PER_MODULES = 2 * N_MAX_PIXEL_SEGMENTS_PER_MODULE;
+
+  const unsigned int N_MAX_PIXEL_TRIPLETS = 5000;
+  const unsigned int N_MAX_PIXEL_QUINTUPLETS = 15000;
+
+  const unsigned int N_MAX_PIXEL_TRACK_CANDIDATES = 30000;
+  const unsigned int N_MAX_NONPIXEL_TRACK_CANDIDATES = 1000;
+
+  const unsigned int size_superbins = 45000;
+
+  //defining the constant host device variables right up here
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float miniMulsPtScaleBarrel[6] = {0.0052, 0.0038, 0.0034, 0.0034, 0.0032, 0.0034};
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float miniMulsPtScaleEndcap[5] = {0.006, 0.006, 0.006, 0.006, 0.006};
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float miniRminMeanBarrel[6] = {
+      25.007152356, 37.2186993757, 52.3104270826, 68.6658656666, 85.9770373007, 108.301772384};
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float miniRminMeanEndcap[5] = {
+      130.992832231, 154.813883559, 185.352604327, 221.635123002, 265.022076742};
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float k2Rinv1GeVf = (2.99792458e-3 * 3.8) / 2;
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float kR1GeVf = 1. / (2.99792458e-3 * 3.8);
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float sinAlphaMax = 0.95;
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float ptCut = PT_CUT;
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float deltaZLum = 15.0;
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float pixelPSZpitch = 0.15;
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float strip2SZpitch = 5.0;
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float pt_betaMax = 7.0;
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float magnetic_field = 3.8112;
+  // Since C++ can't represent infinity, SDL_INF = 123456789 was used to represent infinity in the data table
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float SDL_INF = 123456789;
+}  //namespace SDL
+
+namespace T5DNN {
+  // Working points matching LST fake rate (43.9%) or signal acceptance (82.0%)
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float LSTWP1 = 0.3418833f;  // 94.0% TPR, 43.9% FPR
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float LSTWP2 = 0.6177366f;  // 82.0% TPR, 20.0% FPR
+  // Other working points
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float WP70 = 0.7776195f;    // 70.0% TPR, 10.0% FPR
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float WP75 = 0.7181118f;    // 75.0% TPR, 13.5% FPR
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float WP80 = 0.6492643f;    // 80.0% TPR, 17.9% FPR
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float WP85 = 0.5655319f;    // 85.0% TPR, 23.8% FPR
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float WP90 = 0.4592205f;    // 90.0% TPR, 32.6% FPR
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float WP95 = 0.3073708f;    // 95.0% TPR, 47.7% FPR
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float WP97p5 = 0.2001348f;  // 97.5% TPR, 61.2% FPR
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float WP99 = 0.1120605f;    // 99.0% TPR, 75.9% FPR
+  ALPAKA_STATIC_ACC_MEM_GLOBAL const float WP99p9 = 0.0218196f;  // 99.9% TPR, 95.4% FPR
+}  // namespace T5DNN
+#endif
diff --git a/RecoTracker/LSTCore/interface/alpaka/LST.h b/RecoTracker/LSTCore/interface/alpaka/LST.h
@@ -0,0 +1,115 @@
+#ifndef LST_H
+#define LST_H
+
+#ifdef LST_IS_CMSSW_PACKAGE
+#include "RecoTracker/LSTCore/interface/alpaka/Constants.h"
+#include "RecoTracker/LSTCore/interface/alpaka/LSTESData.h"
+#else
+#include "Constants.h"
+#include "LSTESData.h"
+#endif
+
+#include <cstdlib>
+#include <numeric>
+#include <mutex>
+#include <alpaka/alpaka.hpp>
+
+namespace SDL {
+  template <typename>
+  class Event;
+
+  template <typename>
+  class LST;
+
+  template <>
+  class LST<SDL::Acc> {
+  public:
+    LST() = default;
+
+    void run(QueueAcc& queue,
+             bool verbose,
+             const LSTESDeviceData<Dev>* deviceESData,
+             const std::vector<float> see_px,
+             const std::vector<float> see_py,
+             const std::vector<float> see_pz,
+             const std::vector<float> see_dxy,
+             const std::vector<float> see_dz,
+             const std::vector<float> see_ptErr,
+             const std::vector<float> see_etaErr,
+             const std::vector<float> see_stateTrajGlbX,
+             const std::vector<float> see_stateTrajGlbY,
+             const std::vector<float> see_stateTrajGlbZ,
+             const std::vector<float> see_stateTrajGlbPx,
+             const std::vector<float> see_stateTrajGlbPy,
+             const std::vector<float> see_stateTrajGlbPz,
+             const std::vector<int> see_q,
+             const std::vector<std::vector<int>> see_hitIdx,
+             const std::vector<unsigned int> ph2_detId,
+             const std::vector<float> ph2_x,
+             const std::vector<float> ph2_y,
+             const std::vector<float> ph2_z);
+    std::vector<std::vector<unsigned int>> hits() { return out_tc_hitIdxs_; }
+    std::vector<unsigned int> len() { return out_tc_len_; }
+    std::vector<int> seedIdx() { return out_tc_seedIdx_; }
+    std::vector<short> trackCandidateType() { return out_tc_trackCandidateType_; }
+
+  private:
+    void prepareInput(const std::vector<float> see_px,
+                      const std::vector<float> see_py,
+                      const std::vector<float> see_pz,
+                      const std::vector<float> see_dxy,
+                      const std::vector<float> see_dz,
+                      const std::vector<float> see_ptErr,
+                      const std::vector<float> see_etaErr,
+                      const std::vector<float> see_stateTrajGlbX,
+                      const std::vector<float> see_stateTrajGlbY,
+                      const std::vector<float> see_stateTrajGlbZ,
+                      const std::vector<float> see_stateTrajGlbPx,
+                      const std::vector<float> see_stateTrajGlbPy,
+                      const std::vector<float> see_stateTrajGlbPz,
+                      const std::vector<int> see_q,
+                      const std::vector<std::vector<int>> see_hitIdx,
+                      const std::vector<unsigned int> ph2_detId,
+                      const std::vector<float> ph2_x,
+                      const std::vector<float> ph2_y,
+                      const std::vector<float> ph2_z);
+
+    void getOutput(SDL::Event<Acc>& event);
+    std::vector<unsigned int> getHitIdxs(const short trackCandidateType,
+                                         const unsigned int TCIdx,
+                                         const unsigned int* TCHitIndices,
+                                         const unsigned int* hitIndices);
+
+    // Input and output vectors
+    std::vector<float> in_trkX_;
+    std::vector<float> in_trkY_;
+    std::vector<float> in_trkZ_;
+    std::vector<unsigned int> in_hitId_;
+    std::vector<unsigned int> in_hitIdxs_;
+    std::vector<unsigned int> in_hitIndices_vec0_;
+    std::vector<unsigned int> in_hitIndices_vec1_;
+    std::vector<unsigned int> in_hitIndices_vec2_;
+    std::vector<unsigned int> in_hitIndices_vec3_;
+    std::vector<float> in_deltaPhi_vec_;
+    std::vector<float> in_ptIn_vec_;
+    std::vector<float> in_ptErr_vec_;
+    std::vector<float> in_px_vec_;
+    std::vector<float> in_py_vec_;
+    std::vector<float> in_pz_vec_;
+    std::vector<float> in_eta_vec_;
+    std::vector<float> in_etaErr_vec_;
+    std::vector<float> in_phi_vec_;
+    std::vector<int> in_charge_vec_;
+    std::vector<unsigned int> in_seedIdx_vec_;
+    std::vector<int> in_superbin_vec_;
+    std::vector<int8_t> in_pixelType_vec_;
+    std::vector<char> in_isQuad_vec_;
+    std::vector<std::vector<unsigned int>> out_tc_hitIdxs_;
+    std::vector<unsigned int> out_tc_len_;
+    std::vector<int> out_tc_seedIdx_;
+    std::vector<short> out_tc_trackCandidateType_;
+  };
+
+}  // namespace SDL
+
+#endif