From 1c51f23e8b94ece65cd9238aab231ad87dd0f034 Mon Sep 17 00:00:00 2001 From: Andres Rios Tascon Date: Wed, 13 Nov 2024 10:27:40 -0800 Subject: [PATCH 1/2] CMSSW Integration of LST Squashed commit of the following: commit c21a4819f73910911f8aac1cba4b70681a4e6192 Author: Slava Krutelyov Date: Tue Nov 12 11:04:49 2024 -0800 batch9 of updates for LST integration in CMSSW (#121) * FP16_Base compilation fixes * Delete obsolete comment * Remove duplicate enum * Remove unused arrays and functions * correct return type * Removed unnecessary CopyToDevice * Small fixes to address review comments * Simplify moving to common enum --------- Co-authored-by: Slava Krutelyov Co-authored-by: Manos Vourliotis Co-authored-by: Andres Rios Tascon commit 1a0ab7e4952d8fd1309d35f79c424195fa6b4a3a Merge: 8928adfc111 036340d2aeb Author: Andres Rios Tascon Date: Tue Nov 12 10:21:39 2024 -0800 Merge tag 'CMSSW_14_2_X_2024-11-11-2300' into CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles commit 8928adfc1112b6e5b18ad7661b51f0eec7320bab Author: Manos Vourliotis Date: Thu Nov 7 05:30:25 2024 -0800 Update LST workflows to use Run4 instead of 2026 commit bedeb453f4c1650571bac972b802a167b1559bd5 Merge: 12ead85fa72 476d3aa87bf Author: Manos Vourliotis Date: Wed Nov 6 21:30:25 2024 -0800 Merge remote-tracking branch 'SegLink/CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles' into CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_142X-11-06-1100 commit 476d3aa87bfcb43915df1cc5485a54c35a49420a Author: Slava Krutelyov Date: Mon Nov 4 06:02:24 2024 -0800 batch8 of updates for LST integration in CMSSW (#113) * Removed allocBufWrapper and cache toggle * Removed references to temporaries * Fixed headers * Removed devAcc_ * Simplified view creation * File and class renaming * Update workflow, enums, compilation flags * Move LSTOutputConverter to edm::stream::EDProducer * Merge standalone and CMSSW functions to get hits and TCs * Updates to remove syncs when creating or destroying the LSTEvent in CMSSW * Rename lst_INF and finalize simplification of compilation flags * Create lstProducerTask, move to C++20 pi and use C++20 in standalone compilation * Include numbers * Limit number of make threads --------- Co-authored-by: Andres Rios Tascon Co-authored-by: Manos Vourliotis commit 4d603caa608fa09717852763b4329670ad894b10 Merge: 04d02b480a8 b96fd024cd7 Author: Andres Rios Tascon Date: Tue Oct 29 11:57:22 2024 -0700 Merge tag 'CMSSW_14_2_0_pre3' into CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles commit 04d02b480a8ed0a9b2328ae07cf93d27659036d8 Merge: 83897c845c9 cb1557e4180 Author: Slava Krutelyov Date: Tue Oct 29 05:49:09 2024 -0700 Merge pull request #109 from SegmentLinking/CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_batch7 migrate to SoATemplate commit cb1557e41801d7baa20770a097ad62cd24a70862 Merge: 83897c845c9 f8e78842f65 Author: Slava Krutelyov Date: Mon Oct 28 15:06:13 2024 -0700 Merge branch 'CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_batch7' into CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles commit f8e78842f65be132086665ad62a0430d67ab2ee9 Merge: ccc4bf40c25 48d01f3ca2b Author: Slava Krutelyov Date: Mon Oct 28 14:35:23 2024 -0700 Merge pull request #106 from SegmentLinking/more_soa_migrations continuing migration to SoA: update LS, migrate ranges, hits, endcap, modules commit 48d01f3ca2b28eb23fea6e96fbad01f7f212f44c Author: Andres Rios Tascon Date: Mon Oct 28 11:37:42 2024 -0700 Review fixes commit d08032ad9a090a089eeb2233e29c024aaa9785c5 Author: Andres Rios Tascon Date: Fri Oct 25 13:14:50 2024 -0700 Consistency fixes commit 891a97daaf88dad70576d8b8e613d3a264bc3fe3 Merge: 043dfe04a60 ccc4bf40c25 Author: Andres Rios Tascon Date: Fri Oct 25 12:34:18 2024 -0700 Merge branch 'CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_batch7' into more_soa_migrations commit 043dfe04a6049ef40315bf5736931ed839d498cc Author: Andres Rios Tascon Date: Fri Oct 25 11:35:59 2024 -0700 migrate modules to SoATemplate commit ccc4bf40c2548311ab430860e74530e258c8b109 Merge: cb84b9c25e6 00013ddd202 Author: Andres Rios Tascon Date: Fri Oct 25 14:29:22 2024 -0400 Merge pull request #104 from slava77/CMSSW_14_2_0_pre1/LSTb7-SoA-TC-T5 continuing migration to SoA: upd TC, migrate T3s, T5s, pT3s and pT5s ... commit 00013ddd202dcd2f50761b903430f137d2b7fa19 Author: Slava Krutelyov Date: Thu Oct 24 12:02:14 2024 -0700 migrate pT3 and pT5 to SoATemplate commit 5e2bf7b81838a4d4b85c72a50782a725875c44e8 Author: Andres Rios Tascon Date: Thu Oct 24 11:22:00 2024 -0700 migrate EndcapGeometry to SoATemplate commit 018204d3cc159492222cdb74335beb2b74362c29 Author: Andres Rios Tascon Date: Thu Oct 24 11:18:59 2024 -0700 migrate hits to SoATemplate commit 534ac1b287856251cbb1fefd2f3254ce437f70d2 Author: Andres Rios Tascon Date: Thu Oct 24 11:00:55 2024 -0700 migrate ranges to SoATemplate commit 9b8d26df4fe0de3c9332d07733bc074cc42133bd Author: Slava Krutelyov Date: Tue Oct 22 16:11:02 2024 -0700 migrate quintuplets to SoATemplate commit fd99b50be8ac61172596e7b846f483555c345b25 Author: Andres Rios Tascon Date: Tue Oct 22 08:35:15 2024 -0700 Made view sizes clearer commit 1bbe66374e0b720a6fb04dff563051dc9f21d27c Author: Slava Krutelyov Date: Tue Oct 22 06:52:37 2024 -0700 migrate triplets to SoATemplate commit 6f1ea3fd2d28bb476b38f36a68330c7b27287d4d Author: Slava Krutelyov Date: Thu Oct 10 10:55:17 2024 -0700 simplify a bit TC SoAs commit cb84b9c25e65576046df7a14ebb0c7a6e4212c2b Merge: fc3fc6ac8a5 0e0b5e1c2e6 Author: Slava Krutelyov Date: Fri Oct 18 07:26:37 2024 -0700 Merge pull request #93 from SegmentLinking/segments_soa Migrate segments to SoA+PortableCollection commit 0e0b5e1c2e6c938757bae174b6ff876787120b1d Author: Andres Rios Tascon Date: Wed Oct 16 08:22:33 2024 -0700 Match conventions from previous PRs commit 6b37c0730cbc00b220b476e91451097a0d99bd18 Merge: efef777788a fc3fc6ac8a5 Author: Andres Rios Tascon Date: Wed Oct 16 08:04:42 2024 -0700 Merge branch 'CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_batch7' into segments_soa commit fc3fc6ac8a5b29d328ac0b473c2cbffa7a6a392d Merge: 8338254f698 d3156d1ce8e Author: Slava Krutelyov Date: Wed Oct 16 05:04:48 2024 -0700 Merge pull request #101 from SegmentLinking/mds_soa Migrate MDs to SoA+PortableCollection commit d3156d1ce8e3dd616f02a3fc31570fc6b12c5678 Author: Andres Rios Tascon Date: Tue Oct 15 20:11:11 2024 -0700 Fixed formatting commit 36a8c436166930b6aeaa8eaa3db6d1c8520750ee Author: Andres Rios Tascon Date: Mon Oct 14 13:51:19 2024 -0700 Moved MiniDoubletsDeviceCollection definition commit 3b4fa9ee78aa68fc4afaf0a34f90363809a4d217 Author: Andres Rios Tascon Date: Thu Oct 10 13:22:05 2024 -0700 Made view sizes clearer commit 66612e839e0c1631735e367809f601819d54f00c Author: Andres Rios Tascon Date: Thu Oct 10 12:48:15 2024 -0700 Moved MD SoA definition to its own header commit 129a2d484060de210d54862e73bf9704de477f37 Author: Andres Rios Tascon Date: Thu Oct 10 07:02:06 2024 -0700 Adjusted naming convention commit 8fba6cbc6f14cd2828367a44fad1ef7182267a71 Merge: f430d1b9528 8338254f698 Author: Andres Rios Tascon Date: Thu Oct 10 06:48:51 2024 -0700 Merge branch 'CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_batch7' into mds_soa commit 8338254f6989ee6fa07bc4b7e8a0ebbe528aa5e4 Merge: 3858cf3f060 69804f049b3 Author: Andres Rios Tascon Date: Tue Oct 8 14:11:10 2024 -0400 Merge pull request #90 from slava77/CMSSW_14_1_0_pre5/LSTb6-SoA-TCs migrate TrackCandidate to use SoATemplate and PortableCollection commit 69804f049b30839427a81989052e36e825b08cd3 Author: Slava Krutelyov Date: Fri Oct 4 16:04:19 2024 -0700 move TrackCandidatesDeviceCollection and related aliases to interface/alpaka commit f430d1b952808116a4026b7f0296d575f6380d43 Author: Andres Rios Tascon Date: Fri Oct 4 06:37:25 2024 -0700 Formatted and removed unnecessary CopyToHost commit efef777788aa9d0f112ba868aa41b8d6cc5cd99f Author: Andres Rios Tascon Date: Fri Oct 4 06:21:52 2024 -0700 Removed unnecessary CopyToHost commit a296da7f2b7112941fe05eb185c44ce37df9f87e Author: Slava Krutelyov Date: Tue Oct 1 15:46:02 2024 -0700 get rid of TrackCandidates* trackCandidatesD_ and simplify syntax commit 52377d108572c634bb02e8f0d45d90ac686bd180 Author: Andres Rios Tascon Date: Tue Oct 1 08:33:37 2024 -0400 Fixed makefile commit 20f6f9c67a21abdb5d18807f35b5059ef8ca441f Author: Andres Rios Tascon Date: Mon Sep 30 17:11:03 2024 -0400 Migrated MDs to SoA+PortableCollection commit 155fc82f071f792393fd1f75a28b9b391ac29dd1 Author: Slava Krutelyov Date: Thu Aug 29 09:50:08 2024 -0700 switch to TrackCandidates = ::lst::TrackCandidatesSoA::View commit af1f68ff3bfde38596b06fad16ff0e5bb4e71d20 Author: Slava Krutelyov Date: Thu Aug 29 07:46:09 2024 -0700 code checks commit 8864fd13b58c8dc33c8ab6402d0f937679531990 Author: Slava Krutelyov Date: Thu Aug 29 07:42:03 2024 -0700 sync DALPAKA flags in standalone builds with cmsdist scram-tools.file/tools/alpaka/alpaka.xml commit a5822d0c626a43f7292c6000f2a547e5a77f9a3c Author: Slava Krutelyov Date: Thu Aug 29 07:41:06 2024 -0700 add a check that device and host match for host=device commit 3e98eb4e71fa9573ccff222ba0b9f18ebc44f809 Author: Slava Krutelyov Date: Wed Aug 28 20:49:20 2024 -0700 code checks commit 18a1f6e1b476d1a1af2dd61f9cf818247d1a064e Author: Slava Krutelyov Date: Wed Aug 28 17:18:14 2024 -0700 drop -Wshadow in standalone builds: DataFormats/SoATemplate and related generates around 30MB of warnings with only partial TrackCandidate SoA use commit 94df11b62e4b17d84c2e8efb6b934bddeb069534 Author: Slava Krutelyov Date: Wed Aug 28 17:16:08 2024 -0700 migrate TrackCandidate to SoA from DataFormats/SoATemplate: kernels internally are still using the POD TrackCandidates SoA (to be migrated later) commit 96be4f729e1750923f3fbd5903dd819bbe5fbb73 Author: Andres Rios Tascon Date: Wed Sep 25 10:29:07 2024 -0700 Initialize only required columns commit 3858cf3f06071b5d1bbb0322e931028024e3e51b Author: Andres Rios Tascon Date: Mon Sep 23 12:26:35 2024 -0700 Fixed cxxopts compilation commit 9e2a402dd393a9dacc591512b8997345083bd707 Author: Andres Rios Tascon Date: Mon Sep 23 12:01:31 2024 -0700 Format code commit 77ff0e56b8328e8c93d1651bb10403a5edd50b29 Author: Andres Rios Tascon Date: Mon Sep 23 11:59:41 2024 -0700 Fixed memset commit 37fe122b9c6460b2ed1c8de6638a21857066cbf4 Author: Andres Rios Tascon Date: Mon Sep 23 11:44:35 2024 -0700 Fixed indexing issues commit acff43f8e726cf2395b257bfce777268d40504b3 Author: Andres Rios Tascon Date: Fri Sep 20 08:30:54 2024 -0700 Switched to only using views commit a7e3ec324b85abd49a4c4e19625aec1f5d622066 Author: Andres Rios Tascon Date: Wed Sep 18 13:43:33 2024 -0700 Fixed standalone compilation commit d5da7562ebbfc8af5d605c573cc28719773299a5 Author: Andres Rios Tascon Date: Wed Sep 18 12:44:07 2024 -0700 Moved segments to SoA+PortableCollection commit 83897c845c9e4a69adcd3350e55cfcec00a7cfc1 Author: Andres Rios Tascon Date: Mon Sep 16 07:04:19 2024 -0700 Updated setup script to CMSSW_14_2_0_pre1 commit 135e18b033a2ecc5d75aa8bc17ef233392352850 Merge: 5f9c2f6b176 c312b85e047 Author: Andres Rios Tascon Date: Mon Sep 16 07:01:32 2024 -0700 Merge tag 'CMSSW_14_2_0_pre1' into CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles commit f68981e85418b59d650e7f087c0dbfa0ec038c19 Author: Andres Rios Tascon Date: Mon Sep 16 06:57:59 2024 -0700 Updated setup script to CMSSW_14_2_0_pre1 commit d76431238ca6cf599c5a304603d33a06037103e2 Merge: 5f9c2f6b176 c312b85e047 Author: Andres Rios Tascon Date: Mon Sep 16 06:52:36 2024 -0700 Merge tag 'CMSSW_14_2_0_pre1' into CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_batch7 commit 5f9c2f6b176fcf13fb8ff0972be81b5493642805 Author: Slava Krutelyov Date: Fri Sep 13 10:46:41 2024 -0700 new style code-checks from cms-bot https://cmssdt.cern.ch/SDT/code-checks/cms-sw-PR-45117/41778/code-format.patch; the result looks less readable, but we have to comply commit 2bc333b03c80a726ba00cc1626e5600e77c1ac82 Merge: ff27cf3a741 46f7f22952c Author: Slava Krutelyov Date: Fri Sep 13 09:13:42 2024 -0700 Merge pull request #88 from SegmentLinking/CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_batch6 Batch 6 for updates to LST integration in cms-sw commit 46f7f22952c51b2a3150812c7120f272c2194c74 Author: Andres Rios Tascon Date: Tue Sep 3 08:30:40 2024 -0700 Finished cleaning namespaces commit d2b9e89fdf231ae63be1a2e10a09ee9cd580d6cf Merge: f245be099c8 f0e560b5189 Author: Slava Krutelyov Date: Fri Aug 30 15:18:33 2024 -0700 Merge pull request #89 from SegmentLinking/more_review_fixes More review fixes: smart pointers, cleaner namespaces, naming fixes commit f0e560b51891bacc74af14cbf4e2bcc9c4eddb86 Author: Andres Rios Tascon Date: Fri Aug 30 11:35:38 2024 -0700 Renamed private data members commit 3eb7a5645e7b27c33792a7fe56241a0de601b837 Author: Andres Rios Tascon Date: Fri Aug 30 11:03:29 2024 -0700 Addressed review comments commit 034022106e1499c0e7264b919b0bf396946ffc5a Author: Andres Rios Tascon Date: Fri Aug 30 10:42:33 2024 -0700 Cleaned up namespaces commit 1bff72d3d782a796490d905cfc2733ddf586cdfa Author: Andres Rios Tascon Date: Tue Aug 27 10:48:36 2024 -0700 Started moving to smart pointers commit f245be099c8c78b02b48e85e6ef1cc0303cd6e33 Merge: 971349945a2 24f587e451f Author: Slava Krutelyov Date: Wed Aug 28 10:25:07 2024 -0700 Merge pull request #85 from SegmentLinking/CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_smallFixesPostBatch5 Small fixes post batch 5 commit 24f587e451f5643f99b63337690d200db7bb54d3 Author: Manos Vourliotis Date: Wed Aug 28 08:11:54 2024 -0700 Invert DeltaPhi arguments in CMSSW to harmonize with standalone commit b5a138320eb426265b4d081cfc6eb226f0dc3cba Author: Manos Vourliotis Date: Sun Aug 25 11:19:49 2024 -0700 Harmonize pixelType determination in CMSSW with the standalone commit bb8402d7001fa177141d6a79c0ec42fe896798e2 Author: Manos Vourliotis Date: Sun Aug 25 10:40:56 2024 -0700 Addressing review comments on PixelType commit 1b6a0e11a1352bb707b2c785c6a98bdeb8f7d297 Author: Manos Vourliotis Date: Thu Aug 22 16:09:54 2024 -0700 Apply review comments commit 971349945a2532ee3897b5c32e5641d72a2a4c42 Merge: ff27cf3a741 3864e821513 Author: Manos Vourliotis Date: Wed Aug 21 18:52:16 2024 +0200 Merge pull request #87 from slava77/CMSSW_14_1_0_pre5/LSTb6-alpaka-wait-bugfix bugfixes to incomplete alpaka::wait cleanup (for batch6) commit c4d12fc7e0a6c95f8e4819599faa77007d9477a1 Author: Manos Vourliotis Date: Wed Aug 21 03:04:50 2024 -0700 Simplify consumes(), esConsumes(), produces() and code checks/format commit 03da84598bbfd7c606bb4ecbc6639b63ed95690d Author: Manos Vourliotis Date: Wed Aug 21 02:45:29 2024 -0700 Make a data member private commit 3864e82151365f15b9034f8d5625bfed066e4deb Author: Slava Krutelyov Date: Tue Aug 20 17:34:59 2024 -0700 bugfixes to incomplete alpaka::wait cleanup: need to wait to get counters; need to write to CPU buffer asyncronously or directly only after a sync commit fcb5e324613185fe0954574bbc91ded2e9dbe12b Author: Manos Vourliotis Date: Tue Aug 20 11:45:11 2024 -0700 Work on pixelType and removal of deprecated functions in standalone commit 9544a58f04085d8afd6fc4804f5db2858afbef7c Author: Manos Vourliotis Date: Mon Aug 19 11:22:56 2024 -0700 Revise some includes commit ff27cf3a74126ec211afe219415f242d7797228f Merge: 77854d99c5f fe0d0acfec0 Author: Slava Krutelyov Date: Sat Aug 17 10:08:17 2024 -0700 Merge pull request #79 from SegmentLinking/CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_batch5 Batch 5 for updates to LST integration in cms-sw commit fe0d0acfec04c29c094c5f6a1c958652bd5b0893 Merge: fb5c0ea46ed 79ea879d884 Author: Slava Krutelyov Date: Sat Aug 17 10:02:33 2024 -0700 Merge pull request #84 from SegmentLinking/use_namespaces_instead_of_templates Moved LST and Event classes to ALPAKA_ACCELERATOR_NAMESPACE commit 79ea879d884fba79175bb0748c5e92debf1af55a Author: Andres Rios Tascon Date: Sat Aug 17 07:31:46 2024 -0700 Moved all alpaka code to ALPAKA_ACCELERATOR_NAMESPACE commit 1a5e2c430b1810d7a91125dfaaa676cf154361b7 Merge: 9f6d61262e3 fb5c0ea46ed Author: Andres Rios Tascon Date: Sat Aug 17 05:27:27 2024 -0700 Merge branch 'CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_batch5' into use_namespaces_instead_of_templates commit fb5c0ea46ede9da7c019d30261a648836a41eb49 Merge: 4ff4aafe447 5cce69a3d81 Author: Slava Krutelyov Date: Fri Aug 16 15:08:14 2024 -0700 Merge pull request #83 from SegmentLinking/CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_destructorsMovesKernelNames Remove user-defined destructors, make vector moves useful, rename kernels commit 9f6d61262e352d4c05b71824c68ba5849ca22dec Author: Andres Rios Tascon Date: Fri Aug 16 12:55:26 2024 -0700 Moved LST and Event classes to ALPAKA_ACCELERATOR_NAMESPACE commit 5cce69a3d8140f161e16ac3b2cbbb8706fc26ab7 Author: Manos Vourliotis Date: Fri Aug 16 11:04:46 2024 -0700 Code format and checks commit 787dfe1faca472b4dd067388964ca229c24452f0 Author: Manos Vourliotis Date: Fri Aug 16 10:38:02 2024 -0700 Renaming of kernels commit d5e443a15b4f267fbf18084abbc0fc044a2e79b1 Author: Manos Vourliotis Date: Fri Aug 16 09:33:38 2024 -0700 Remove user-defined destructors commit 8d2366bea51a0c8778c1256c0d6a9f27ac2edd94 Author: Manos Vourliotis Date: Fri Aug 16 09:15:04 2024 -0700 Remove pass by const reference when std::move-ing commit 4ff4aafe447a8af320fe2dace9708730c103c5da Merge: c1767faf889 7889093ac6b Author: Andres Rios Tascon Date: Fri Aug 16 13:13:52 2024 -0400 Merge pull request #77 from slava77/CMSSW_14_1_0_pre5/LSTb5-alpaka-calls-review batch5: alpaka::wait and use views to local host data instead of buffers commit 7889093ac6bd4e1f2e01ee5452fca01e776d6f0c Author: Slava Krutelyov Date: Thu Aug 15 16:43:14 2024 -0700 add synchronizations in callers of the event methods where it matters; make synchronization more explicit/flexible in names or function arguments commit c1767faf889bfa6f840f09de1973262880c43e22 Merge: d3549cba529 22192ec1932 Author: Slava Krutelyov Date: Wed Aug 14 15:49:57 2024 -0700 Merge pull request #82 from SegmentLinking/more_review_fixes More review fixes commit 43ce20eee979ffc8b41d38629e91605d7cce3c54 Author: Slava Krutelyov Date: Mon Aug 12 15:26:08 2024 -0700 explicitly require 1D single block kernels to use Acc1D and have one block with asserts commit 22192ec1932fc2cb8886152bbb4b2d94d6e1c0c3 Author: Andres Rios Tascon Date: Mon Aug 12 12:19:53 2024 -0700 Add include for fp16 on HIP commit a0432cce254d354bd744b8ddea3676cab87e3b4b Author: Andres Rios Tascon Date: Mon Aug 12 12:09:26 2024 -0700 Reduce number of moduleConnections_ lookups commit 80ffdc3cdc26110a999dd883d3596b7eba55677b Author: Andres Rios Tascon Date: Mon Aug 12 11:51:40 2024 -0700 Changed syntax of atomic operations commit c6a246874122bc22055440db295f3bc370a5f053 Author: Slava Krutelyov Date: Thu Aug 8 15:35:06 2024 -0700 lst::createWorkDiv now depends on Acc to avoid ODR; allocBufWrapper correct parameter is TDev commit a7d73c09a9c7a7ecbf25c45d56f70ebcadddb285 Author: Slava Krutelyov Date: Thu Aug 8 06:37:00 2024 -0700 replace alpaka::getPtrNative(buf) with buf.data() commit 4ef678d1c691665c37506f39c2274afb0bfef00a Author: Slava Krutelyov Date: Thu Aug 8 06:21:53 2024 -0700 switch to using alpaka::exec commit 73431560c541ac40e180f4b179182bb1d336860c Author: Slava Krutelyov Date: Thu Aug 8 05:31:06 2024 -0700 cleanup unnecessary alpaka::wait; add comments justifying other alpaka::wait calls; switch to cms::alpakatools::make_host_buffer for a few local buffers commit e8cc7a8789a6345818a9a400bd56ee10a5d39817 Author: Slava Krutelyov Date: Fri Aug 2 16:20:25 2024 -0700 cleanup unnecessary alpaka::wait commit 85576445ee6b89c8675978d738d170895ce1c25a Author: Slava Krutelyov Date: Fri Aug 2 08:16:42 2024 -0700 remove spurious semicolons at the end of method implementations commit d3549cba529a729f26b10d69fe73ad10aa0b037b Merge: 77854d99c5f 8f720ad0a85 Author: Slava Krutelyov Date: Mon Aug 5 14:33:06 2024 -0700 Merge pull request #76 from SegmentLinking/LSTESData_fixes LSTESData fixes commit 8f720ad0a855687648cad43780c15adb542e2b9e Author: Andres Rios Tascon Date: Mon Aug 5 07:57:46 2024 -0700 Fixed memcpy commit 77854d99c5fde98a09240bad892294081fbd904d Author: Slava Krutelyov Date: Fri Aug 2 16:13:09 2024 -0700 name=1 -> name=RecoTracker/LSTCore commit 4cd59747c63441d939e3e8a30eafaa6f843246d4 Merge: 1d9b3967669 ecd8e3ee4eb Author: Slava Krutelyov Date: Fri Aug 2 13:42:50 2024 -0700 Merge pull request #66 from SegmentLinking/CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_batch4 Batch 4 for updates to LST integration in cms-sw commit 14b8bd918bddab91e3f0f85f3f06dfdd5ffe6198 Author: Andres Rios Tascon Date: Fri Aug 2 08:36:24 2024 -0700 Minor cleanup commit 0bcb06cc65252f0fad82bec228c72051d78d9209 Author: Andres Rios Tascon Date: Wed Jul 31 14:00:24 2024 -0700 Removed alpaka functions from host ES code commit ecd8e3ee4ebe5d0be7055f1a9b7d2ec9df8b2d23 Merge: d7689bba113 a076afb78f8 Author: Slava Krutelyov Date: Wed Jul 31 11:11:06 2024 -0700 Merge pull request #72 from SegmentLinking/removalOfSDL Total removal of sdl/SDL commit d7689bba113548ce7ec83c24c6acaf6d5b46ac40 Merge: 24aa3a29d36 5b1e21ce596 Author: Slava Krutelyov Date: Wed Jul 31 07:09:00 2024 -0700 Merge pull request #73 from SegmentLinking/CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_fixWorkflowConflict2 Resolving latest workflow conflict commit 5b1e21ce59698054e5317e9bc87723efc9f4ec80 Author: Manos Vourliotis Date: Wed Jul 31 02:50:16 2024 -0700 Fix workflow conflict commit a076afb78f8a32cc130a1e046ac9b7bee1af287c Author: Manos Vourliotis Date: Wed Jul 31 01:30:09 2024 -0700 Remove forgotten flag commit c3f41a7710988de11a6cf10011fc5d202766d085 Author: Manos Vourliotis Date: Tue Jul 30 11:26:35 2024 -0700 Code checks and format commit 67c23760c695d454ea07c0b26526d035d85caf01 Author: Manos Vourliotis Date: Tue Jul 30 10:57:10 2024 -0700 Simplification of variable naming commit d378a5955e4e94199cbba415aea4c6248857c5b1 Author: Manos Vourliotis Date: Tue Jul 30 10:33:43 2024 -0700 Removal of sdl/SDL from all standalone code commit 24aa3a29d3622a3c0bd653a8b0d9b15c71748a88 Merge: 3fc9904d053 944e1b4ef39 Author: Slava Krutelyov Date: Tue Jul 30 08:34:03 2024 -0700 Merge pull request #71 from SegmentLinking/renameClassesRemoveCopies More work on Constants and removal of vector copies commit 961d01e517934edda8f92d67583a4668d04cd91f Author: Manos Vourliotis Date: Tue Jul 30 07:53:28 2024 -0700 Remove sdl from variable names commit 944e1b4ef398fa959e0f4c2647b8d88340b4a78c Author: Manos Vourliotis Date: Tue Jul 30 07:27:10 2024 -0700 Code checks and format commit 5d5bd29c33581a0c0da2caf9cfecfdb6873754bc Author: Manos Vourliotis Date: Tue Jul 30 05:37:34 2024 -0700 Split PixelTriplets to PixelQuintuplets commit ae851e05b2e75dae44be8e9b74930153f6e1400d Author: Manos Vourliotis Date: Tue Jul 30 05:27:15 2024 -0700 Fourth batch of fixing function arguments commit 3fc9904d053c8ee98f4130e5299d1882660d0eaf Merge: fac29589568 d5e0b699a4f Author: Manos Vourliotis Date: Tue Jul 30 13:46:03 2024 +0300 Merge pull request #70 from slava77/CMSSW_14_1_0_pre5/LSTb4-buf-lite alpaka modernise/improve for batch4 commit 954a22ab377638ddbaf3f2922d7d95c1e8fa0dba Author: Manos Vourliotis Date: Tue Jul 30 03:31:08 2024 -0700 Third batch of fixing function arguments commit 2af068537c26492b1ff57b7d70f5a5b081d22334 Author: Manos Vourliotis Date: Mon Jul 29 08:01:16 2024 -0700 Second batch of fixing function arguments commit ce79bc1f6d478a6173b82baf3182c390f0226c1b Author: Manos Vourliotis Date: Mon Jul 29 06:03:10 2024 -0700 First batch of fixing function arguments commit 96aa7c0a97661d078ed9e03c891e1ebc8d3717ca Author: Manos Vourliotis Date: Sat Jul 27 17:11:01 2024 -0700 Revert constness on arguments passed by value commit e076050e099e1e1af41a9c8c25add98e14ca0976 Author: Manos Vourliotis Date: Sat Jul 27 14:51:16 2024 -0700 Revert moving general purpose functions to common CMSSW file commit 9268f4998ffe10e32366223032c966b259bb83ed Author: Manos Vourliotis Date: Fri Jul 26 06:27:32 2024 -0700 More constness, removal of vector copies, and movement of general-purpose alpaka functions commit d5e0b699a4fe8cf0aa72b462d623f26018f37d2e Author: Slava Krutelyov Date: Wed Jul 24 16:12:31 2024 -0700 remove Vec3D createVec commit 07e0f4013fa4169aa33b88cf55c1b36933bce154 Author: Slava Krutelyov Date: Wed Jul 24 15:55:20 2024 -0700 geometry ::get single search commit 77e6b9a450853457ea6e351da5ac39ae94350ff3 Author: Manos Vourliotis Date: Wed Jul 24 09:44:52 2024 -0700 Removal of vector copies, as per comments commit 375a06c135e011cd1a5bc0db2bbd23ebd3a66086 Author: Manos Vourliotis Date: Wed Jul 24 06:45:27 2024 -0700 Renaming of constants and removal of custom functions now implemented in alpaka commit 69debc2bac724a1baf6a9aadfd60e396c862c975 Author: Slava Krutelyov Date: Tue Jul 23 15:58:55 2024 -0700 use cms::alpakatools::once_per_block commit fac2958956823939866762ca8b1a58baff84ce79 Merge: ff12237bfd6 7fdcabd5030 Author: Slava Krutelyov Date: Mon Jul 22 14:27:41 2024 -0700 Merge pull request #68 from SegmentLinking/renameClassesRemoveCopies Renaming of namespaces/classes/macros and removal of vector, etc. copies commit 7fdcabd5030b7d35d8fa2e3674650ec5fe9e18dc Author: Manos Vourliotis Date: Mon Jul 22 10:06:11 2024 -0700 Renaming of classes/structs/macros/constants commit a22faeb741008fedb497f25350284e24a341b02d Author: Manos Vourliotis Date: Sun Jul 21 14:34:26 2024 -0700 Revert change in TrackListMerger commit 304929e4ce29475e1f7ddc1b2c12904a02679509 Author: Manos Vourliotis Date: Sun Jul 21 14:25:50 2024 -0700 Fixes after rebase commit e54c6f1b16096990de48d7006a69ff8f262e0d68 Author: Manos Vourliotis Date: Sat Jul 20 08:04:57 2024 -0700 Apply comments namespace and type renaming commit 1a6e27b25632c354875192bf79fb55188d9434df Author: Manos Vourliotis Date: Sat Jul 20 07:42:27 2024 -0700 Fix for wrong loop range in pT3 kernel commit 5d0ef1ea6041adeba4bf782d5228f25e067fcb81 Author: Manos Vourliotis Date: Fri Jul 19 09:44:07 2024 -0700 Namespace renaming SDL->lst and inclusion of the T5DNN->t5dnn namespace under it commit ff12237bfd679a7ff0fdbee1f9ab0a7342bba94b Merge: 1250a8ce39d d0e320a22b2 Author: Andres Rios Tascon Date: Fri Jul 19 19:50:11 2024 -0400 Merge pull request #67 from slava77/CMSSW_14_1_0_pre5/LSTb4-buf-base remove inheritance of buffers on underlying SoAs commit d0e320a22b274cb9e531ab6196dba796035522d2 Author: Slava Krutelyov Date: Fri Jul 19 08:56:04 2024 -0700 remove inheritance of buffers on underlying SoAs commit 1fb0489e012102c3ca544316859d3a3b1f9a20ac Author: Slava Krutelyov Date: Fri Jul 19 08:55:13 2024 -0700 split ranges from modules commit 1250a8ce39d4c15d2d8eb8ec9c45dda4bdaf6976 Merge: 1d9b3967669 fb12af04e4f Author: Slava Krutelyov Date: Thu Jul 18 14:34:11 2024 -0700 Merge pull request #65 from SegmentLinking/use_common_library Move some things out into a common library commit fb12af04e4f9df57e6a8be760da1d8ce807b1fd0 Author: Andres Rios Tascon Date: Thu Jul 18 07:44:57 2024 -0700 Format code commit d33ac3204e4db76e080894042510adbbf1ab87ec Author: Andres Rios Tascon Date: Thu Jul 18 07:28:52 2024 -0700 A bit more cleanup commit ccd29f4e497ce905a2b99a385d67c8146e561684 Author: Andres Rios Tascon Date: Wed Jul 17 11:17:35 2024 -0700 Format and cleanup commit c4ac510a038f0107a917099ea0d08de58d03a0c2 Author: Andres Rios Tascon Date: Wed Jul 17 11:10:51 2024 -0700 Fixed standalone compilation commit 64a7bf13bcb0c0a1cf2ea57c30386061d5de088d Author: Andres Rios Tascon Date: Wed Jul 17 10:41:54 2024 -0700 Moved more things out of the alpaka directory commit 9fd33554c447b83df09a09ac90864632e8f99ecd Author: Andres Rios-Tascon Date: Mon Jul 15 11:28:07 2024 -0400 Use pre-defined alpaka types commit 9c87a88664662dc53db14cb35a673859b8f19c38 Author: Andres Rios-Tascon Date: Thu Jul 11 13:04:39 2024 -0400 Moved some things out into a common library commit 1d9b396766940883638fdcab96123b772accc4a1 Author: Andres Rios Tascon Date: Tue Jul 16 11:25:54 2024 -0700 Updated setup scripts to use CMSSW 14_1_0_pre5 commit 23ec786d5386c862ea5896be4e02bc2cb54bc502 Merge: 71879273552 b20c7aa0a75 Author: Andres Rios Tascon Date: Tue Jul 16 11:17:08 2024 -0700 Merge tag 'CMSSW_14_1_0_pre5' into CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles commit 71879273552005e37a273f7ff852dcb2c86f9cfa Merge: 4cd89243ee6 34c56c6eefe Author: Slava Krutelyov Date: Wed Jul 10 15:55:04 2024 -0700 Merge pull request #50 from SegmentLinking/CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_batch3 Batch 3 for updates to LST integration in cms-sw commit 34c56c6eefe351cd143a5f6182f50c05abb7b6d1 Merge: ae67290cb97 86503a29d74 Author: Slava Krutelyov Date: Tue Jul 9 16:29:56 2024 -0700 Merge pull request #49 from SegmentLinking/ESProducer_fixes ESProducer fixes commit ae67290cb97aed9e53efc9e340c7bd030b2f7748 Author: Manos Vourliotis Date: Tue Jul 9 06:07:31 2024 -0700 Simplify LST workflows commit 7cfe3a5bccce359c4f7a9cb073cf4533883ca481 Author: Manos Vourliotis Date: Tue Jul 9 06:06:39 2024 -0700 Code format and checks commit a3a8054945636973191c9c4cc76e2ed36e271e1e Author: Manos Vourliotis Date: Tue Jul 9 05:19:42 2024 -0700 Even further work on naming values and sqrt, loop, etc. simplifications commit 86503a29d741a5654caf588fedd7240addc7dff6 Author: Andres Rios Tascon Date: Mon Jul 8 07:41:35 2024 -0700 Fixed include guards for headers in interface directory commit f47212f13ca208b20aa0fcd5f30ec5b411a647e1 Author: Andres Rios Tascon Date: Mon Jul 8 07:32:46 2024 -0700 Renamed new endcap geometry class commit d3e599935a82334eb5086023ea35dae63d507b07 Merge: f46b6cb13c9 c563eb04a71 Author: Andres Rios Tascon Date: Mon Jul 8 08:07:27 2024 -0700 Merge remote-tracking branch 'SegLink/CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_batch3' into ESProducer_fixes commit c563eb04a71057bd169d0997e406e3d8b57417e9 Author: Manos Vourliotis Date: Mon Jul 8 07:39:49 2024 -0700 Further work on naming values and sqrt, loop, etc. simplifications commit 4d7e54a369f7040cb559e5f51aa09fe812382355 Merge: a97714a574a dc13ac526b3 Author: Slava Krutelyov Date: Fri Jul 5 09:15:23 2024 -0700 Merge pull request #58 from SegmentLinking/remove_LST_IS_CMSSW_PACKAGE combo of: Remove LST_IS_CMSSW_PACKAGE (#47), configurable dup cleaning (#37), typo in T3 code (#44) commit a97714a574a35371b982d6d80ff4deff8593c9bd Author: Manos Vourliotis Date: Wed Jul 3 07:16:19 2024 -0700 Apply reco comments: Naming values and sqrt, loop, etc. simplifications commit 236ad8c9f19a9376f42f540779a4e6a4bb1b24ce Author: Manos Vourliotis Date: Sat Jun 29 02:43:48 2024 -0700 Simplification of sqrt operations for pT3s commit 149c628fc13ab77dbcde0cbde9e7880c38c560ee Author: Manos Vourliotis Date: Fri Jun 28 21:23:31 2024 +0200 Resolve conflicts regarding workflows commit f46b6cb13c9c14df422fc16e7ed3cf7e0755a6bc Author: Andres Rios Tascon Date: Mon Jun 24 12:52:12 2024 -0700 Removed -Wshadow flag commit eb5de05acde834fecafba919b609f6d416476c25 Author: Andres Rios Tascon Date: Mon Jun 24 11:30:45 2024 -0700 Minor cleanup commit a3f65dacbb686313574fb36be5f4d421e3333284 Author: Andres Rios Tascon Date: Mon Jun 24 11:00:13 2024 -0700 Separated EndcapGeometry into host-only and buffers struct commit ef48f7427d3d08b8c92f6554883a5b5f4b72f220 Author: Manos Vourliotis Date: Mon Jun 24 02:21:11 2024 -0700 Removal of gpu modifier from LST workflows commit e0acfa56fd37bb85b32ae1973720be32806af0d0 Author: Andres Rios Tascon Date: Tue Jun 18 10:40:38 2024 -0700 Addressed ESProducer comments commit 4cd89243ee6f437cd408e59f601b686b4402facc Author: Slava Krutelyov Date: Fri Jun 14 18:00:59 2024 -0700 code-format/checks commit e64c2cc7d8b98169ed128502f1f63e6d5e745b77 Merge: 314e95d4162 3552ad35bf2 Author: Slava Krutelyov Date: Fri Jun 14 17:45:17 2024 -0700 Merge pull request #38 from SegmentLinking/CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_batch2 Batch 2 for updates to LST integration in cms-sw commit dc13ac526b32b4c3ad1b391fb7b97511ce7802ea Author: Andres Rios Tascon Date: Fri Jun 14 16:53:31 2024 -0400 Removed LST_IS_CMSSW_PACKAGE flag commit 14af98162c821fc69026e1b11acdb6f9859a42c6 Merge: f5554858b35 f4bd7029805 Author: Slava Krutelyov Date: Thu Jun 13 14:18:35 2024 -0700 Merge pull request #44 from SegmentLinking/CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_TrackLooperPR407 Fix typo in T3 code commit 3552ad35bf23796b767d0b6c068492d305d183d4 Author: Manos Vourliotis Date: Thu Jun 13 09:42:00 2024 -0700 Fixing CMS coding rule 1 commit f4bd702980596133915a66f4373b6567bc9fb94e Author: Manos Vourliotis Date: Wed Jun 12 06:28:40 2024 -0700 Fix typo in T3 code commit 944652f36985a42d150edde23a4b47b970103a19 Author: Manos Vourliotis Date: Tue Jun 11 01:17:16 2024 +0200 Keep comment closer to the added task object Co-authored-by: Slava Krutelyov commit f5554858b3538e056909dc98fa7c6930e9a0d704 Merge: 314e95d4162 0829926db96 Author: Slava Krutelyov Date: Mon Jun 10 14:41:22 2024 -0700 Merge pull request #37 from SegmentLinking/CMSSW_14_1_0_pre3_LST_X_LSTCore_realfiles_TrackLooperPR408 Make compilation flags for duplicate cleaning and triplet pLSs into runtime flags (TrackLooper PR#408 + cmssw PR#31) commit 85682e83cb3b0ae36f7199d98da705419c7be18f Author: Manos Vourliotis Date: Mon Jun 10 09:08:42 2024 -0700 Resolving PR comments LST folder commit 0829926db96e0d74c17a57c1e5f6e2299138dc48 Author: Manos Vourliotis Date: Fri Jun 7 18:36:46 2024 -0700 Code checks and format commit 4747b417dae2c7ff210bf56c86687f65de8c5800 Author: Manos Vourliotis Date: Fri Jun 7 16:53:38 2024 -0700 Make compilation flags for duplicate cleaning and triplet pLSs into runtime flags (TrackLooper PR#408 + cmssw PR#31) commit 314e95d4162a0ece6a047a6d04d1731221c66e9a Author: Slava Krutelyov Date: Thu Jun 6 16:51:30 2024 -0700 code-format commit 9f697fd3199a4dba3aaae09385c1924ae147784d Author: Slava Krutelyov Date: Wed Jun 5 16:39:31 2024 -0700 put lstModulesDevESProducer on a dummy visible task to bypass the module creation commit e9733d479832d29332d127f7edc6b71a1ae3fb5b Author: Manos Vourliotis Date: Thu Jun 6 14:58:08 2024 -0700 Remove outdated files commit f1dee274c6e265c8cdbcda39ca828be775c965bd Author: Andres Rios Tascon Date: Wed Jun 5 09:33:05 2024 -0400 Truncate git logs to 100 commits commit ee1e94e01523b57ea69c23ada152c6afd2487359 Author: Andres Rios Tascon Date: Wed Jun 5 09:32:32 2024 -0400 Fixed standalone build commit 0edc1162c4118e8d93c498b79d1478eee993f3bb Author: Manos Vourliotis Date: Wed Jun 5 15:00:16 2024 -0700 Update directory for LST data files commit 7c6bcfa967ac7a671b39b12087610eb2d55dcc9f Author: Manos Vourliotis Date: Mon Jun 3 10:16:56 2024 -0700 Add two-iteration,tracking-only LST workflows, both on CPU and GPU commit 7bc53be7b6f236e24b65863c6194a4bbb7fb9da7 Author: Andres Rios Tascon Date: Fri May 31 14:21:58 2024 -0400 Added RecoTracker/LST package Co-authored-by: Manos Vourliotis Co-authored-by: Slava Krutelyov commit 0cf5c0354d76e268ec9cf061a977da13397a3561 Author: Andres Rios Tascon Date: Fri May 31 14:19:46 2024 -0400 Added RecoTracker/LSTCore package Co-authored-by: Tres Reid Co-authored-by: Philip Chang Co-authored-by: Gavin Niendorf Co-authored-by: YonsiG Co-authored-by: Balaji Sathia Narayanan Co-authored-by: Manos Vourliotis Co-authored-by: Slava Krutelyov Co-authored-by: Jonathan Guiang Co-authored-by: Bei Wang Co-authored-by: Tres Reid Co-authored-by: Philip Chang Co-authored-by: Gavin Niendorf Co-authored-by: YonsiG Co-authored-by: Balaji Sathia Narayanan Co-authored-by: Manos Vourliotis Co-authored-by: Slava Krutelyov Co-authored-by: Jonathan Guiang Co-authored-by: Bei Wang --- .../python/trackingIters01_cff.py | 4 + .../python/trackingLST_cff.py | 5 + Configuration/PyReleaseValidation/README.md | 2 + .../PyReleaseValidation/python/relval_Run4.py | 3 + .../python/upgradeWorkflowComponents.py | 50 + .../python/ConversionStep_cff.py | 10 + .../python/MergeTrackCollections_cff.py | 6 + .../python/earlyGeneralTracks_cfi.py | 20 + .../python/HighPtTripletStep_cff.py | 56 + .../python/LowPtQuadStep_cff.py | 3 + .../python/iterativeTkConfig.py | 17 +- RecoTracker/LST/BuildFile.xml | 9 + RecoTracker/LST/interface/LSTOutput.h | 40 + .../LST/interface/LSTPhase2OTHitsInput.h | 33 + RecoTracker/LST/interface/LSTPixelSeedInput.h | 75 + RecoTracker/LST/plugins/BuildFile.xml | 41 + RecoTracker/LST/plugins/LSTOutputConverter.cc | 273 + .../plugins/LSTPhase2OTHitsInputProducer.cc | 67 + .../LST/plugins/LSTPixelSeedInputProducer.cc | 171 + .../plugins/alpaka/LSTModulesDevESProducer.cc | 31 + RecoTracker/LST/plugins/alpaka/LSTProducer.cc | 99 + RecoTracker/LST/python/lstProducerTask_cff.py | 7 + RecoTracker/LST/python/lstSeedTracks_cff.py | 15 + RecoTracker/LST/python/lst_cff.py | 6 + RecoTracker/LST/src/ES_ModulesDev.cc | 5 + RecoTracker/LST/src/alpaka/ES_ModulesDev.cc | 4 + RecoTracker/LST/src/classes.h | 9 + RecoTracker/LST/src/classes_def.xml | 10 + RecoTracker/LSTCore/BuildFile.xml | 10 + RecoTracker/LSTCore/interface/Common.h | 107 + .../LSTCore/interface/EndcapGeometry.h | 29 + .../EndcapGeometryDevHostCollection.h | 10 + .../LSTCore/interface/EndcapGeometryDevSoA.h | 18 + .../LSTCore/interface/HitsHostCollection.h | 10 + RecoTracker/LSTCore/interface/HitsSoA.h | 43 + RecoTracker/LSTCore/interface/LSTESData.h | 80 + .../interface/MiniDoubletsHostCollection.h | 10 + .../LSTCore/interface/MiniDoubletsSoA.h | 58 + .../LSTCore/interface/ModuleConnectionMap.h | 29 + .../LSTCore/interface/ModulesHostCollection.h | 10 + RecoTracker/LSTCore/interface/ModulesSoA.h | 57 + .../interface/ObjectRangesHostCollection.h | 10 + .../LSTCore/interface/ObjectRangesSoA.h | 38 + RecoTracker/LSTCore/interface/PixelMap.h | 31 + .../PixelQuintupletsHostCollection.h | 10 + .../LSTCore/interface/PixelQuintupletsSoA.h | 35 + .../interface/PixelTripletsHostCollection.h | 10 + .../LSTCore/interface/PixelTripletsSoA.h | 39 + .../interface/QuintupletsHostCollection.h | 10 + .../LSTCore/interface/QuintupletsSoA.h | 46 + .../interface/SegmentsHostCollection.h | 10 + RecoTracker/LSTCore/interface/SegmentsSoA.h | 63 + .../LSTCore/interface/TiltedGeometry.h | 26 + .../interface/TrackCandidatesHostCollection.h | 10 + .../LSTCore/interface/TrackCandidatesSoA.h | 32 + .../interface/TripletsHostCollection.h | 10 + RecoTracker/LSTCore/interface/TripletsSoA.h | 42 + RecoTracker/LSTCore/interface/alpaka/Common.h | 82 + .../EndcapGeometryDevDeviceCollection.h | 13 + .../interface/alpaka/HitsDeviceCollection.h | 13 + RecoTracker/LSTCore/interface/alpaka/LST.h | 102 + .../alpaka/MiniDoubletsDeviceCollection.h | 13 + .../alpaka/ModulesDeviceCollection.h | 13 + .../alpaka/ObjectRangesDeviceCollection.h | 13 + .../alpaka/PixelQuintupletsDeviceCollection.h | 12 + .../alpaka/PixelTripletsDeviceCollection.h | 12 + .../alpaka/QuintupletsDeviceCollection.h | 12 + .../alpaka/SegmentsDeviceCollection.h | 13 + .../alpaka/TrackCandidatesDeviceCollection.h | 12 + .../alpaka/TripletsDeviceCollection.h | 12 + RecoTracker/LSTCore/src/EndcapGeometry.cc | 59 + RecoTracker/LSTCore/src/LSTESData.cc | 120 + .../LSTCore/src/ModuleConnectionMap.cc | 108 + RecoTracker/LSTCore/src/ModuleMethods.h | 395 + RecoTracker/LSTCore/src/TiltedGeometry.cc | 48 + RecoTracker/LSTCore/src/alpaka/Hit.h | 164 + RecoTracker/LSTCore/src/alpaka/Kernels.h | 421 + RecoTracker/LSTCore/src/alpaka/LST.cc | 414 + .../LSTCore/src/alpaka/LSTEvent.dev.cc | 1680 ++++ RecoTracker/LSTCore/src/alpaka/LSTEvent.h | 195 + RecoTracker/LSTCore/src/alpaka/MiniDoublet.h | 914 +++ .../LSTCore/src/alpaka/NeuralNetwork.h | 165 + .../LSTCore/src/alpaka/NeuralNetworkWeights.h | 315 + .../LSTCore/src/alpaka/PixelQuintuplet.h | 818 ++ RecoTracker/LSTCore/src/alpaka/PixelTriplet.h | 1587 ++++ RecoTracker/LSTCore/src/alpaka/Quintuplet.h | 2592 ++++++ RecoTracker/LSTCore/src/alpaka/Segment.h | 853 ++ .../LSTCore/src/alpaka/TrackCandidate.h | 493 ++ RecoTracker/LSTCore/src/alpaka/Triplet.h | 895 +++ RecoTracker/LSTCore/standalone/.gitignore | 43 + RecoTracker/LSTCore/standalone/LST/.gitignore | 3 + RecoTracker/LSTCore/standalone/LST/Makefile | 151 + RecoTracker/LSTCore/standalone/Makefile | 78 + RecoTracker/LSTCore/standalone/README.md | 291 + RecoTracker/LSTCore/standalone/bin/lst.cc | 527 ++ RecoTracker/LSTCore/standalone/bin/lst.h | 30 + .../standalone/bin/lst_make_tracklooper | 256 + RecoTracker/LSTCore/standalone/bin/lst_run | 174 + .../standalone/code/core/AccessHelper.cc | 455 ++ .../standalone/code/core/AccessHelper.h | 85 + .../standalone/code/core/AnalysisConfig.cc | 3 + .../standalone/code/core/AnalysisConfig.h | 135 + .../LSTCore/standalone/code/core/Trktree.cc | 7009 +++++++++++++++++ .../LSTCore/standalone/code/core/Trktree.h | 1550 ++++ .../LSTCore/standalone/code/core/lst_math.h | 213 + .../LSTCore/standalone/code/core/trkCore.cc | 1140 +++ .../LSTCore/standalone/code/core/trkCore.h | 115 + .../standalone/code/core/write_lst_ntuple.cc | 1113 +++ .../standalone/code/core/write_lst_ntuple.h | 55 + .../LSTCore/standalone/code/rooutil/Makefile | 27 + .../standalone/code/rooutil/Makefile.arch | 582 ++ .../standalone/code/rooutil/anautil.cc | 1227 +++ .../LSTCore/standalone/code/rooutil/anautil.h | 271 + .../standalone/code/rooutil/cutflowutil.cc | 129 + .../standalone/code/rooutil/cutflowutil.h | 635 ++ .../standalone/code/rooutil/cxxopts.cc | 1 + .../LSTCore/standalone/code/rooutil/cxxopts.h | 1360 ++++ .../standalone/code/rooutil/eventindexmap.cc | 47 + .../standalone/code/rooutil/eventindexmap.h | 27 + .../standalone/code/rooutil/fileutil.cc | 127 + .../standalone/code/rooutil/fileutil.h | 34 + .../LSTCore/standalone/code/rooutil/looper.cc | 5 + .../LSTCore/standalone/code/rooutil/looper.h | 933 +++ .../standalone/code/rooutil/printutil.cc | 184 + .../standalone/code/rooutil/printutil.h | 64 + .../standalone/code/rooutil/rooutil-config | 14 + .../LSTCore/standalone/code/rooutil/rooutil.h | 9 + .../standalone/code/rooutil/stringutil.cc | 216 + .../standalone/code/rooutil/stringutil.h | 54 + .../standalone/code/rooutil/thisrooutil.sh | 11 + .../LSTCore/standalone/code/rooutil/ttreex.cc | 1100 +++ .../LSTCore/standalone/code/rooutil/ttreex.h | 438 + .../LSTCore/standalone/code/rooutil/xargs.sh | 58 + .../LSTCore/standalone/efficiency/Makefile | 44 + .../standalone/efficiency/bin/lst_timing | 153 + .../standalone/efficiency/python/dupObjEff.py | 425 + .../efficiency/python/lst_plot_performance.py | 787 ++ .../efficiency/python/make_classfiles.py | 492 ++ .../standalone/efficiency/src/LSTEff.cc | 3393 ++++++++ .../standalone/efficiency/src/LSTEff.h | 765 ++ .../standalone/efficiency/src/helper.cc | 280 + .../standalone/efficiency/src/helper.h | 102 + .../standalone/efficiency/src/performance.cc | 738 ++ .../standalone/efficiency/src/performance.h | 23 + RecoTracker/LSTCore/standalone/setup.sh | 57 + 145 files changed, 42755 insertions(+), 5 deletions(-) create mode 100644 Configuration/ProcessModifiers/python/trackingIters01_cff.py create mode 100644 Configuration/ProcessModifiers/python/trackingLST_cff.py create mode 100644 RecoTracker/LST/BuildFile.xml create mode 100644 RecoTracker/LST/interface/LSTOutput.h create mode 100644 RecoTracker/LST/interface/LSTPhase2OTHitsInput.h create mode 100644 RecoTracker/LST/interface/LSTPixelSeedInput.h create mode 100644 RecoTracker/LST/plugins/BuildFile.xml create mode 100644 RecoTracker/LST/plugins/LSTOutputConverter.cc create mode 100644 RecoTracker/LST/plugins/LSTPhase2OTHitsInputProducer.cc create mode 100644 RecoTracker/LST/plugins/LSTPixelSeedInputProducer.cc create mode 100644 RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc create mode 100644 RecoTracker/LST/plugins/alpaka/LSTProducer.cc create mode 100644 RecoTracker/LST/python/lstProducerTask_cff.py create mode 100644 RecoTracker/LST/python/lstSeedTracks_cff.py create mode 100644 RecoTracker/LST/python/lst_cff.py create mode 100644 RecoTracker/LST/src/ES_ModulesDev.cc create mode 100644 RecoTracker/LST/src/alpaka/ES_ModulesDev.cc create mode 100644 RecoTracker/LST/src/classes.h create mode 100644 RecoTracker/LST/src/classes_def.xml create mode 100644 RecoTracker/LSTCore/BuildFile.xml create mode 100644 RecoTracker/LSTCore/interface/Common.h create mode 100644 RecoTracker/LSTCore/interface/EndcapGeometry.h create mode 100644 RecoTracker/LSTCore/interface/EndcapGeometryDevHostCollection.h create mode 100644 RecoTracker/LSTCore/interface/EndcapGeometryDevSoA.h create mode 100644 RecoTracker/LSTCore/interface/HitsHostCollection.h create mode 100644 RecoTracker/LSTCore/interface/HitsSoA.h create mode 100644 RecoTracker/LSTCore/interface/LSTESData.h create mode 100644 RecoTracker/LSTCore/interface/MiniDoubletsHostCollection.h create mode 100644 RecoTracker/LSTCore/interface/MiniDoubletsSoA.h create mode 100644 RecoTracker/LSTCore/interface/ModuleConnectionMap.h create mode 100644 RecoTracker/LSTCore/interface/ModulesHostCollection.h create mode 100644 RecoTracker/LSTCore/interface/ModulesSoA.h create mode 100644 RecoTracker/LSTCore/interface/ObjectRangesHostCollection.h create mode 100644 RecoTracker/LSTCore/interface/ObjectRangesSoA.h create mode 100644 RecoTracker/LSTCore/interface/PixelMap.h create mode 100644 RecoTracker/LSTCore/interface/PixelQuintupletsHostCollection.h create mode 100644 RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h create mode 100644 RecoTracker/LSTCore/interface/PixelTripletsHostCollection.h create mode 100644 RecoTracker/LSTCore/interface/PixelTripletsSoA.h create mode 100644 RecoTracker/LSTCore/interface/QuintupletsHostCollection.h create mode 100644 RecoTracker/LSTCore/interface/QuintupletsSoA.h create mode 100644 RecoTracker/LSTCore/interface/SegmentsHostCollection.h create mode 100644 RecoTracker/LSTCore/interface/SegmentsSoA.h create mode 100644 RecoTracker/LSTCore/interface/TiltedGeometry.h create mode 100644 RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h create mode 100644 RecoTracker/LSTCore/interface/TrackCandidatesSoA.h create mode 100644 RecoTracker/LSTCore/interface/TripletsHostCollection.h create mode 100644 RecoTracker/LSTCore/interface/TripletsSoA.h create mode 100644 RecoTracker/LSTCore/interface/alpaka/Common.h create mode 100644 RecoTracker/LSTCore/interface/alpaka/EndcapGeometryDevDeviceCollection.h create mode 100644 RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h create mode 100644 RecoTracker/LSTCore/interface/alpaka/LST.h create mode 100644 RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h create mode 100644 RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h create mode 100644 RecoTracker/LSTCore/interface/alpaka/ObjectRangesDeviceCollection.h create mode 100644 RecoTracker/LSTCore/interface/alpaka/PixelQuintupletsDeviceCollection.h create mode 100644 RecoTracker/LSTCore/interface/alpaka/PixelTripletsDeviceCollection.h create mode 100644 RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h create mode 100644 RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h create mode 100644 RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h create mode 100644 RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h create mode 100644 RecoTracker/LSTCore/src/EndcapGeometry.cc create mode 100644 RecoTracker/LSTCore/src/LSTESData.cc create mode 100644 RecoTracker/LSTCore/src/ModuleConnectionMap.cc create mode 100644 RecoTracker/LSTCore/src/ModuleMethods.h create mode 100644 RecoTracker/LSTCore/src/TiltedGeometry.cc create mode 100644 RecoTracker/LSTCore/src/alpaka/Hit.h create mode 100644 RecoTracker/LSTCore/src/alpaka/Kernels.h create mode 100644 RecoTracker/LSTCore/src/alpaka/LST.cc create mode 100644 RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc create mode 100644 RecoTracker/LSTCore/src/alpaka/LSTEvent.h create mode 100644 RecoTracker/LSTCore/src/alpaka/MiniDoublet.h create mode 100644 RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h create mode 100644 RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h create mode 100644 RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h create mode 100644 RecoTracker/LSTCore/src/alpaka/PixelTriplet.h create mode 100644 RecoTracker/LSTCore/src/alpaka/Quintuplet.h create mode 100644 RecoTracker/LSTCore/src/alpaka/Segment.h create mode 100644 RecoTracker/LSTCore/src/alpaka/TrackCandidate.h create mode 100644 RecoTracker/LSTCore/src/alpaka/Triplet.h create mode 100644 RecoTracker/LSTCore/standalone/.gitignore create mode 100644 RecoTracker/LSTCore/standalone/LST/.gitignore create mode 100644 RecoTracker/LSTCore/standalone/LST/Makefile create mode 100644 RecoTracker/LSTCore/standalone/Makefile create mode 100644 RecoTracker/LSTCore/standalone/README.md create mode 100644 RecoTracker/LSTCore/standalone/bin/lst.cc create mode 100644 RecoTracker/LSTCore/standalone/bin/lst.h create mode 100755 RecoTracker/LSTCore/standalone/bin/lst_make_tracklooper create mode 100755 RecoTracker/LSTCore/standalone/bin/lst_run create mode 100644 RecoTracker/LSTCore/standalone/code/core/AccessHelper.cc create mode 100644 RecoTracker/LSTCore/standalone/code/core/AccessHelper.h create mode 100644 RecoTracker/LSTCore/standalone/code/core/AnalysisConfig.cc create mode 100644 RecoTracker/LSTCore/standalone/code/core/AnalysisConfig.h create mode 100644 RecoTracker/LSTCore/standalone/code/core/Trktree.cc create mode 100644 RecoTracker/LSTCore/standalone/code/core/Trktree.h create mode 100644 RecoTracker/LSTCore/standalone/code/core/lst_math.h create mode 100644 RecoTracker/LSTCore/standalone/code/core/trkCore.cc create mode 100644 RecoTracker/LSTCore/standalone/code/core/trkCore.h create mode 100644 RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.cc create mode 100644 RecoTracker/LSTCore/standalone/code/core/write_lst_ntuple.h create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/Makefile create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/Makefile.arch create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/anautil.cc create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/anautil.h create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/cutflowutil.cc create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/cutflowutil.h create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/cxxopts.cc create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/cxxopts.h create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/eventindexmap.cc create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/eventindexmap.h create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/fileutil.cc create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/fileutil.h create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/looper.cc create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/looper.h create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/printutil.cc create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/printutil.h create mode 100755 RecoTracker/LSTCore/standalone/code/rooutil/rooutil-config create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/rooutil.h create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/stringutil.cc create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/stringutil.h create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/thisrooutil.sh create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/ttreex.cc create mode 100644 RecoTracker/LSTCore/standalone/code/rooutil/ttreex.h create mode 100755 RecoTracker/LSTCore/standalone/code/rooutil/xargs.sh create mode 100644 RecoTracker/LSTCore/standalone/efficiency/Makefile create mode 100755 RecoTracker/LSTCore/standalone/efficiency/bin/lst_timing create mode 100644 RecoTracker/LSTCore/standalone/efficiency/python/dupObjEff.py create mode 100755 RecoTracker/LSTCore/standalone/efficiency/python/lst_plot_performance.py create mode 100644 RecoTracker/LSTCore/standalone/efficiency/python/make_classfiles.py create mode 100644 RecoTracker/LSTCore/standalone/efficiency/src/LSTEff.cc create mode 100644 RecoTracker/LSTCore/standalone/efficiency/src/LSTEff.h create mode 100644 RecoTracker/LSTCore/standalone/efficiency/src/helper.cc create mode 100644 RecoTracker/LSTCore/standalone/efficiency/src/helper.h create mode 100644 RecoTracker/LSTCore/standalone/efficiency/src/performance.cc create mode 100644 RecoTracker/LSTCore/standalone/efficiency/src/performance.h create mode 100644 RecoTracker/LSTCore/standalone/setup.sh diff --git a/Configuration/ProcessModifiers/python/trackingIters01_cff.py b/Configuration/ProcessModifiers/python/trackingIters01_cff.py new file mode 100644 index 0000000000000..9f7506d27b51c --- /dev/null +++ b/Configuration/ProcessModifiers/python/trackingIters01_cff.py @@ -0,0 +1,4 @@ +import FWCore.ParameterSet.Config as cms + +# This modifier sets the iterative tracking to use a minimal set of iterations, first two +trackingIters01 = cms.Modifier() diff --git a/Configuration/ProcessModifiers/python/trackingLST_cff.py b/Configuration/ProcessModifiers/python/trackingLST_cff.py new file mode 100644 index 0000000000000..ae1dd83e20b0b --- /dev/null +++ b/Configuration/ProcessModifiers/python/trackingLST_cff.py @@ -0,0 +1,5 @@ +import FWCore.ParameterSet.Config as cms + +# This modifier sets the LST (Phase-2 line segment tracking) used for track building +trackingLST = cms.Modifier() + diff --git a/Configuration/PyReleaseValidation/README.md b/Configuration/PyReleaseValidation/README.md index a3c4177c0fcb8..e5b127010e5c6 100644 --- a/Configuration/PyReleaseValidation/README.md +++ b/Configuration/PyReleaseValidation/README.md @@ -65,6 +65,8 @@ The offsets currently in use are: * 0.7: trackingMkFit modifier * 0.701: DisplacedRegionalStep tracking iteration for Run-3 * 0.702: trackingMkFit modifier for Phase-2 (initialStep only) +* 0.703: LST tracking, initialStep+HighPtTripletStep only, on CPU +* 0.704: LST tracking, initialStep+HighPtTripletStep only, on GPU * 0.75: HLT phase-2 timing menu * 0.751: HLT phase-2 timing menu Alpaka variant * 0.752: HLT phase-2 timing menu ticl_v5 variant diff --git a/Configuration/PyReleaseValidation/python/relval_Run4.py b/Configuration/PyReleaseValidation/python/relval_Run4.py index 3b866ca55c000..530145fe0c0b7 100644 --- a/Configuration/PyReleaseValidation/python/relval_Run4.py +++ b/Configuration/PyReleaseValidation/python/relval_Run4.py @@ -36,6 +36,9 @@ numWFIB.extend([31234.0]) #Run4D114 numWFIB.extend([32034.0]) #Run4D115 +# Temporary placement for LST workflow to workaround PR conflicts - to be formatted and placed in an upcoming PR +numWFIB.extend([24834.703,24834.704]) #2026D98 LST tracking (initialStep+HighPtTripletStep only): CPU, GPU + #Additional sample for short matrix and IB #Default Phase-2 Det NoPU numWFIB.extend([prefixDet+34.911]) #DD4hep XML diff --git a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py index fbfd6b28801aa..31543ebd3b380 100644 --- a/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py +++ b/Configuration/PyReleaseValidation/python/upgradeWorkflowComponents.py @@ -490,6 +490,56 @@ def condition_(self, fragment, stepList, key, hasHarvest): '--procModifiers': 'trackingMkFitCommon,trackingMkFitInitialStep' } +# LST on CPU, initialStep+highPtTripletStep-only tracking-only +class UpgradeWorkflow_lstOnCPUIters01TrackingOnly(UpgradeWorkflowTracking): + def setup__(self, step, stepName, stepDict, k, properties): + if 'Reco' in step: stepDict[stepName][k] = merge([self.step3, stepDict[step][k]]) + elif 'HARVEST' in step: stepDict[stepName][k] = merge([{'-s': 'HARVESTING:@trackingOnlyValidation+@trackingOnlyDQM'}, stepDict[step][k]]) + elif 'ALCA' in step: stepDict[stepName][k] = None + def condition_(self, fragment, stepList, key, hasHarvest): + return ('Run4' in key) +upgradeWFs['lstOnCPUIters01TrackingOnly'] = UpgradeWorkflow_lstOnCPUIters01TrackingOnly( + steps = [ + 'RecoGlobal', + 'HARVESTGlobal', + # Add ALCA steps explicitly, so that they can be properly removed + 'ALCA', + 'ALCAPhase2' + ], + PU = [], + suffix = '_lstOnCPUIters01TrackingOnly', + offset = 0.703, +) +upgradeWFs['lstOnCPUIters01TrackingOnly'].step3 = upgradeWFs['trackingOnly'].step3 | { + '--procModifiers': 'trackingIters01,trackingLST', + '--accelerators' : 'cpu' +} + +# LST on GPU, initialStep+highPtTripletStep-only tracking-only +class UpgradeWorkflow_lstOnGPUIters01TrackingOnly(UpgradeWorkflowTracking): + def setup__(self, step, stepName, stepDict, k, properties): + if 'Reco' in step: stepDict[stepName][k] = merge([self.step3, stepDict[step][k]]) + elif 'HARVEST' in step: stepDict[stepName][k] = merge([{'-s': 'HARVESTING:@trackingOnlyValidation+@trackingOnlyDQM'}, stepDict[step][k]]) + elif 'ALCA' in step: stepDict[stepName][k] = None + def condition_(self, fragment, stepList, key, hasHarvest): + return ('Run4' in key) +upgradeWFs['lstOnGPUIters01TrackingOnly'] = UpgradeWorkflow_lstOnGPUIters01TrackingOnly( + steps = [ + 'RecoGlobal', + 'HARVESTGlobal', + # Add ALCA steps explicitly, so that they can be properly removed + 'ALCA', + 'ALCAPhase2' + ], + PU = [], + suffix = '_lstOnGPUIters01TrackingOnly', + offset = 0.704, +) +upgradeWFs['lstOnGPUIters01TrackingOnly'].step3 = upgradeWFs['trackingOnly'].step3 | { + '--procModifiers': 'trackingIters01,trackingLST', + '--accelerators' : 'gpu-*' +} + #DeepCore seeding for JetCore iteration workflow class UpgradeWorkflow_seedingDeepCore(UpgradeWorkflow): def setup_(self, step, stepName, stepDict, k, properties): diff --git a/RecoTracker/ConversionSeedGenerators/python/ConversionStep_cff.py b/RecoTracker/ConversionSeedGenerators/python/ConversionStep_cff.py index 256432c1180c8..6d44990855324 100644 --- a/RecoTracker/ConversionSeedGenerators/python/ConversionStep_cff.py +++ b/RecoTracker/ConversionSeedGenerators/python/ConversionStep_cff.py @@ -33,6 +33,16 @@ oldClusterRemovalInfo = 'detachedQuadStepClusters', overrideTrkQuals = 'detachedQuadStepSelector:detachedQuadStepTrk' )) +from Configuration.ProcessModifiers.trackingIters01_cff import trackingIters01 +trackingIters01.toModify(convClusters, + trajectories = "highPtTripletStepTracks", + oldClusterRemovalInfo = "highPtTripletStepClusters", + overrideTrkQuals = "highPtTripletStepSelector:highPtTripletStep" +) +from Configuration.ProcessModifiers.trackingLST_cff import trackingLST +(trackingIters01 & trackingPhase2PU140 & trackingLST).toModify(convClusters, + overrideTrkQuals = "" +) _convLayerPairsStripOnlyLayers = ['TIB1+TID1_pos', 'TIB1+TID1_neg', diff --git a/RecoTracker/FinalTrackSelectors/python/MergeTrackCollections_cff.py b/RecoTracker/FinalTrackSelectors/python/MergeTrackCollections_cff.py index 907e3126a5cd7..d5256c19a1756 100644 --- a/RecoTracker/FinalTrackSelectors/python/MergeTrackCollections_cff.py +++ b/RecoTracker/FinalTrackSelectors/python/MergeTrackCollections_cff.py @@ -17,6 +17,8 @@ ttrhBuilderName = "WithAngleAndTemplate", chi2EstimatorName = "duplicateTrackCandidatesChi2Est" ) +from Configuration.ProcessModifiers.trackingIters01_cff import trackingIters01 +trackingIters01.toModify(duplicateTrackCandidates, source = "earlyGeneralTracks") import RecoTracker.TrackProducer.TrackProducer_cfi mergedDuplicateTracks = RecoTracker.TrackProducer.TrackProducer_cfi.TrackProducer.clone( @@ -44,6 +46,10 @@ candidateSource = "duplicateTrackCandidates:candidates", candidateComponents = "duplicateTrackCandidates:candidateMap" ) +trackingIters01.toModify(generalTracks, + originalSource = "earlyGeneralTracks", + originalMVAVals = "earlyGeneralTracks:MVAValues" +) generalTracksTask = cms.Task( duplicateTrackCandidates, diff --git a/RecoTracker/FinalTrackSelectors/python/earlyGeneralTracks_cfi.py b/RecoTracker/FinalTrackSelectors/python/earlyGeneralTracks_cfi.py index 525640861f3ea..d03744c8bdfe5 100644 --- a/RecoTracker/FinalTrackSelectors/python/earlyGeneralTracks_cfi.py +++ b/RecoTracker/FinalTrackSelectors/python/earlyGeneralTracks_cfi.py @@ -109,6 +109,16 @@ def _extend_displacedGeneral(x): makeReKeyedSeeds = cms.untracked.bool(False) ) ) +from Configuration.ProcessModifiers.trackingIters01_cff import trackingIters01 +trackingIters01.toModify(earlyGeneralTracks, + TrackProducers = ['initialStepTracks', 'highPtTripletStepTracks'], + hasSelector = [1,1], + indivShareFrac = [1,0.16], + selectedTrackQuals = ['initialStepSelector:initialStep', + 'highPtTripletStepSelector:highPtTripletStep' + ], + setsToMerge = {0: dict(tLists = [0,1])} +) from Configuration.ProcessModifiers.vectorHits_cff import vectorHits def _extend_pixelLess(x): x.TrackProducers += ['pixelLessStepTracks'] @@ -118,3 +128,13 @@ def _extend_pixelLess(x): x.setsToMerge[0].tLists += [6] (trackingPhase2PU140 & vectorHits).toModify(earlyGeneralTracks, _extend_pixelLess) +from Configuration.ProcessModifiers.trackingLST_cff import trackingLST +(trackingPhase2PU140 & trackingLST).toModify(earlyGeneralTracks, + TrackProducers = ['highPtTripletStepLSTpTracks', 'highPtTripletStepLSTT5Tracks'], + hasSelector = [1,0], + indivShareFrac = [0.1,0.1], + selectedTrackQuals = ['highPtTripletStepSelector:highPtTripletStep', + 'highPtTripletStepSelectorLSTT5:highPtTripletStepLSTT5' + ], + setsToMerge = {0: dict(tLists = [0,1])} +) diff --git a/RecoTracker/IterativeTracking/python/HighPtTripletStep_cff.py b/RecoTracker/IterativeTracking/python/HighPtTripletStep_cff.py index 8f35832cc75d5..7c1dab22afd58 100644 --- a/RecoTracker/IterativeTracking/python/HighPtTripletStep_cff.py +++ b/RecoTracker/IterativeTracking/python/HighPtTripletStep_cff.py @@ -259,6 +259,10 @@ phase2clustersToSkip = 'highPtTripletStepClusters' ) +from Configuration.ProcessModifiers.trackingLST_cff import trackingLST +from RecoTracker.LST.lstOutputConverter_cfi import lstOutputConverter as _lstOutputConverter +(trackingPhase2PU140 & trackingLST).toReplaceWith(highPtTripletStepTrackCandidates, _lstOutputConverter.clone()) + #For FastSim phase1 tracking import FastSimulation.Tracking.TrackCandidateProducer_cfi _fastSim_highPtTripletStepTrackCandidates = FastSimulation.Tracking.TrackCandidateProducer_cfi.trackCandidateProducer.clone( @@ -280,6 +284,25 @@ from Configuration.Eras.Modifier_phase2_timing_layer_cff import phase2_timing_layer phase2_timing_layer.toModify(highPtTripletStepTracks, TrajectoryInEvent = True) +highPtTripletStepLSTpTracks = highPtTripletStepTracks.clone( + src = 'highPtTripletStepTrackCandidates:pTCsLST' +) +highPtTripletStepLSTT5Tracks = highPtTripletStepTracks.clone( + src = 'highPtTripletStepTrackCandidates:t5TCsLST' +) +_highPtTripletStepTracks_LST = RecoTracker.FinalTrackSelectors.trackListMerger_cfi.trackListMerger.clone( + TrackProducers = ['highPtTripletStepLSTpTracks', + 'highPtTripletStepLSTT5Tracks'], + hasSelector = [1,0], + indivShareFrac = [0.1,0.1], + selectedTrackQuals = ['highPtTripletStepSelector:highPtTripletStep', + 'highPtTripletStepSelectorLSTT5:highPtTripletStepLSTT5'], + copyExtras = True, + copyMVA = False, + setsToMerge = [cms.PSet( tLists=cms.vint32(0,1), pQual=cms.bool(True) )] +) +(trackingPhase2PU140 & trackingLST).toReplaceWith(highPtTripletStepTracks, _highPtTripletStepTracks_LST) + # Final selection from RecoTracker.FinalTrackSelectors.TrackMVAClassifierPrompt_cfi import * highPtTripletStep = TrackMVAClassifierPrompt.clone( @@ -357,6 +380,28 @@ from Configuration.ProcessModifiers.vectorHits_cff import vectorHits vectorHits.toModify(highPtTripletStepSelector.trackSelectors[2], minNumberLayers = 3, minNumber3DLayers = 3, d0_par1 = ( 0.5, 4.0 ), dz_par1 = ( 0.6, 4.0 )) +(trackingPhase2PU140 & trackingLST).toModify(highPtTripletStepSelector, src = 'highPtTripletStepLSTpTracks') +# Passthrough selector to satisfy the TrackListMerger requirement for selector values +highPtTripletStepSelectorLSTT5 = RecoTracker.FinalTrackSelectors.multiTrackSelector_cfi.multiTrackSelector.clone( + src = 'highPtTripletStepLSTT5Tracks', + trackSelectors = [ + RecoTracker.FinalTrackSelectors.multiTrackSelector_cfi.looseMTS.clone( + name = 'highPtTripletStepLSTT5Loose', + minHitsToBypassChecks = 0 + ), #end of pset + RecoTracker.FinalTrackSelectors.multiTrackSelector_cfi.tightMTS.clone( + name = 'highPtTripletStepLSTT5Tight', + preFilterName = 'highPtTripletStepLSTT5Loose', + minHitsToBypassChecks = 0 + ), + RecoTracker.FinalTrackSelectors.multiTrackSelector_cfi.highpurityMTS.clone( + name = 'highPtTripletStepLSTT5', + preFilterName = 'highPtTripletStepLSTT5Tight', + minHitsToBypassChecks = 0 + ), + ] #end of vpset +) #end of clone + # Final sequence HighPtTripletStepTask = cms.Task(highPtTripletStepClusters, highPtTripletStepSeedLayers, @@ -378,6 +423,17 @@ _HighPtTripletStep_Phase2PU140 = cms.Sequence(_HighPtTripletStepTask_Phase2PU140) trackingPhase2PU140.toReplaceWith(HighPtTripletStepTask, _HighPtTripletStepTask_Phase2PU140) +_HighPtTripletStepTask_LST = HighPtTripletStepTask.copy() +from RecoLocalTracker.Phase2TrackerRecHits.Phase2TrackerRecHits_cfi import siPhase2RecHits +from RecoTracker.LST.lstSeedTracks_cff import lstInitialStepSeedTracks,lstHighPtTripletStepSeedTracks +from RecoTracker.LST.lstPixelSeedInputProducer_cfi import lstPixelSeedInputProducer +from RecoTracker.LST.lstPhase2OTHitsInputProducer_cfi import lstPhase2OTHitsInputProducer +from RecoTracker.LST.lstProducerTask_cff import * + +_HighPtTripletStepTask_LST.add(siPhase2RecHits, lstInitialStepSeedTracks, lstHighPtTripletStepSeedTracks, lstPixelSeedInputProducer, lstPhase2OTHitsInputProducer, + lstProducerTask, highPtTripletStepLSTpTracks, highPtTripletStepLSTT5Tracks, highPtTripletStepSelectorLSTT5) +(trackingPhase2PU140 & trackingLST).toReplaceWith(HighPtTripletStepTask, _HighPtTripletStepTask_LST) + # fast tracking mask producer from FastSimulation.Tracking.FastTrackerRecHitMaskProducer_cfi import maskProducerFromClusterRemover highPtTripletStepMasks = maskProducerFromClusterRemover(highPtTripletStepClusters) diff --git a/RecoTracker/IterativeTracking/python/LowPtQuadStep_cff.py b/RecoTracker/IterativeTracking/python/LowPtQuadStep_cff.py index 84d87c18c883a..40a1161b15f13 100644 --- a/RecoTracker/IterativeTracking/python/LowPtQuadStep_cff.py +++ b/RecoTracker/IterativeTracking/python/LowPtQuadStep_cff.py @@ -14,6 +14,9 @@ for _eraName, _postfix, _era in _cfg.nonDefaultEras(): _era.toReplaceWith(lowPtQuadStepClusters, _cfg.clusterRemoverForIter('LowPtQuadStep', _eraName, _postfix)) +from Configuration.ProcessModifiers.trackingLST_cff import trackingLST +# with LST, this is the first iteration with proper cluster masking +trackingLST.toModify(lowPtQuadStepClusters, oldClusterRemovalInfo = "") # SEEDING LAYERS import RecoTracker.TkSeedingLayers.PixelLayerQuadruplets_cfi diff --git a/RecoTracker/IterativeTracking/python/iterativeTkConfig.py b/RecoTracker/IterativeTracking/python/iterativeTkConfig.py index d409cae8d3340..8d9fd5fb45824 100644 --- a/RecoTracker/IterativeTracking/python/iterativeTkConfig.py +++ b/RecoTracker/IterativeTracking/python/iterativeTkConfig.py @@ -53,16 +53,20 @@ _iterations_trackingPhase1.append('JetCoreRegionalStep') -_iterations_trackingPhase2PU140 = [ +_iterations_trackingPhase2PU140_VS = cms.PSet(names = cms.vstring( "InitialStep", "HighPtTripletStep", "LowPtQuadStep", "LowPtTripletStep", "DetachedQuadStep", "PixelPairStep", -] +)) from Configuration.ProcessModifiers.vectorHits_cff import vectorHits -vectorHits.toModify(_iterations_trackingPhase2PU140, func=lambda x: x.append('PixelLessStep')) +vectorHits.toModify(_iterations_trackingPhase2PU140_VS.names, func=lambda x: x.append('PixelLessStep')) +from Configuration.ProcessModifiers.trackingIters01_cff import trackingIters01 +trackingIters01.toModify(_iterations_trackingPhase2PU140_VS, names = ["InitialStep", "HighPtTripletStep"]) +# apply all procModifiers before this +_iterations_trackingPhase2PU140 = _iterations_trackingPhase2PU140_VS.names.value() from Configuration.ProcessModifiers.jetCoreInPhase2_cff import jetCoreInPhase2 jetCoreInPhase2.toModify(_iterations_trackingPhase2PU140, func=lambda x: x.append('JetCoreRegionalStep')) @@ -76,10 +80,13 @@ "MuonSeededStepOutIn", ] #Phase2 -_iterations_muonSeeded_trackingPhase2PU140 = [ +_iterations_muonSeeded_trackingPhase2PU140_VS = cms.PSet(names = cms.vstring( "MuonSeededStepInOut", "MuonSeededStepOutIn", -] +)) +trackingIters01.toModify(_iterations_muonSeeded_trackingPhase2PU140_VS, names = []) +_iterations_muonSeeded_trackingPhase2PU140 = _iterations_muonSeeded_trackingPhase2PU140_VS.names.value() + _multipleSeedProducers = { "MixedTripletStep": ["A", "B"], "TobTecStep": ["Pair", "Tripl"], diff --git a/RecoTracker/LST/BuildFile.xml b/RecoTracker/LST/BuildFile.xml new file mode 100644 index 0000000000000..07a6ae1d26eaf --- /dev/null +++ b/RecoTracker/LST/BuildFile.xml @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/RecoTracker/LST/interface/LSTOutput.h b/RecoTracker/LST/interface/LSTOutput.h new file mode 100644 index 0000000000000..f50000b83cf21 --- /dev/null +++ b/RecoTracker/LST/interface/LSTOutput.h @@ -0,0 +1,40 @@ +#ifndef RecoTracker_LST_interface_LSTOutput_h +#define RecoTracker_LST_interface_LSTOutput_h + +#include +#include + +#include "RecoTracker/LSTCore/interface/Common.h" + +class LSTOutput { +public: + LSTOutput() = default; + LSTOutput(std::vector> const hitIdx, + std::vector const len, + std::vector const seedIdx, + std::vector const trackCandidateType) + : hitIdx_(std::move(hitIdx)), + len_(std::move(len)), + seedIdx_(std::move(seedIdx)), + trackCandidateType_(std::move(trackCandidateType)) {} + + using LSTTCType = lst::LSTObjType; + + // Hit indices of each of the LST track candidates. + std::vector> const& hitIdx() const { return hitIdx_; } + // Number of hits of each of the LST track candidates. + std::vector const& len() const { return len_; } + // Index of the pixel track associated to each of the LST track candidates. + // If not associated to a pixel track, which is the case for T5s, it defaults to -1. + std::vector const& seedIdx() const { return seedIdx_; } + // LSTTCType from RecoTracker/LSTCore/interface/Common.h + std::vector const& trackCandidateType() const { return trackCandidateType_; } + +private: + std::vector> hitIdx_; + std::vector len_; + std::vector seedIdx_; + std::vector trackCandidateType_; +}; + +#endif diff --git a/RecoTracker/LST/interface/LSTPhase2OTHitsInput.h b/RecoTracker/LST/interface/LSTPhase2OTHitsInput.h new file mode 100644 index 0000000000000..00fd77846c4c3 --- /dev/null +++ b/RecoTracker/LST/interface/LSTPhase2OTHitsInput.h @@ -0,0 +1,33 @@ +#ifndef RecoTracker_LST_interface_LSTPhase2OTHitsInput_h +#define RecoTracker_LST_interface_LSTPhase2OTHitsInput_h + +#include +#include + +#include "DataFormats/TrackerRecHit2D/interface/Phase2TrackerRecHit1D.h" + +class LSTPhase2OTHitsInput { +public: + LSTPhase2OTHitsInput() = default; + LSTPhase2OTHitsInput(std::vector const detId, + std::vector const x, + std::vector const y, + std::vector const z, + std::vector const hits) + : detId_(std::move(detId)), x_(std::move(x)), y_(std::move(y)), z_(std::move(z)), hits_(std::move(hits)) {} + + std::vector const& detId() const { return detId_; } + std::vector const& x() const { return x_; } + std::vector const& y() const { return y_; } + std::vector const& z() const { return z_; } + std::vector const& hits() const { return hits_; } + +private: + std::vector detId_; + std::vector x_; + std::vector y_; + std::vector z_; + std::vector hits_; +}; + +#endif diff --git a/RecoTracker/LST/interface/LSTPixelSeedInput.h b/RecoTracker/LST/interface/LSTPixelSeedInput.h new file mode 100644 index 0000000000000..18d3768b2e0fc --- /dev/null +++ b/RecoTracker/LST/interface/LSTPixelSeedInput.h @@ -0,0 +1,75 @@ +#ifndef RecoTracker_LST_interface_LSTPixelSeedInput_h +#define RecoTracker_LST_interface_LSTPixelSeedInput_h + +#include +#include + +class LSTPixelSeedInput { +public: + LSTPixelSeedInput() = default; + LSTPixelSeedInput(std::vector const px, + std::vector const py, + std::vector const pz, + std::vector const dxy, + std::vector const dz, + std::vector const ptErr, + std::vector const etaErr, + std::vector const stateTrajGlbX, + std::vector const stateTrajGlbY, + std::vector const stateTrajGlbZ, + std::vector const stateTrajGlbPx, + std::vector const stateTrajGlbPy, + std::vector const stateTrajGlbPz, + std::vector const q, + std::vector> const hitIdx) + : px_(std::move(px)), + py_(std::move(py)), + pz_(std::move(pz)), + dxy_(std::move(dxy)), + dz_(std::move(dz)), + ptErr_(std::move(ptErr)), + etaErr_(std::move(etaErr)), + stateTrajGlbX_(std::move(stateTrajGlbX)), + stateTrajGlbY_(std::move(stateTrajGlbY)), + stateTrajGlbZ_(std::move(stateTrajGlbZ)), + stateTrajGlbPx_(std::move(stateTrajGlbPx)), + stateTrajGlbPy_(std::move(stateTrajGlbPy)), + stateTrajGlbPz_(std::move(stateTrajGlbPz)), + q_(std::move(q)), + hitIdx_(std::move(hitIdx)) {} + + std::vector const& px() const { return px_; } + std::vector const& py() const { return py_; } + std::vector const& pz() const { return pz_; } + std::vector const& dxy() const { return dxy_; } + std::vector const& dz() const { return dz_; } + std::vector const& ptErr() const { return ptErr_; } + std::vector const& etaErr() const { return etaErr_; } + std::vector const& stateTrajGlbX() const { return stateTrajGlbX_; } + std::vector const& stateTrajGlbY() const { return stateTrajGlbY_; } + std::vector const& stateTrajGlbZ() const { return stateTrajGlbZ_; } + std::vector const& stateTrajGlbPx() const { return stateTrajGlbPx_; } + std::vector const& stateTrajGlbPy() const { return stateTrajGlbPy_; } + std::vector const& stateTrajGlbPz() const { return stateTrajGlbPz_; } + std::vector const& q() const { return q_; } + std::vector> const& hitIdx() const { return hitIdx_; } + +private: + std::vector px_; + std::vector py_; + std::vector pz_; + std::vector dxy_; + std::vector dz_; + std::vector ptErr_; + std::vector etaErr_; + std::vector stateTrajGlbX_; + std::vector stateTrajGlbY_; + std::vector stateTrajGlbZ_; + std::vector stateTrajGlbPx_; + std::vector stateTrajGlbPy_; + std::vector stateTrajGlbPz_; + std::vector q_; + std::vector> hitIdx_; +}; + +#endif diff --git a/RecoTracker/LST/plugins/BuildFile.xml b/RecoTracker/LST/plugins/BuildFile.xml new file mode 100644 index 0000000000000..49e9ee77f5a3b --- /dev/null +++ b/RecoTracker/LST/plugins/BuildFile.xml @@ -0,0 +1,41 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/RecoTracker/LST/plugins/LSTOutputConverter.cc b/RecoTracker/LST/plugins/LSTOutputConverter.cc new file mode 100644 index 0000000000000..0bbdd68051b87 --- /dev/null +++ b/RecoTracker/LST/plugins/LSTOutputConverter.cc @@ -0,0 +1,273 @@ +#include "DataFormats/TrackerRecHit2D/interface/Phase2TrackerRecHit1D.h" +#include "DataFormats/TrackerRecHit2D/interface/SiPixelRecHitCollection.h" +#include "DataFormats/TrackCandidate/interface/TrackCandidateCollection.h" +#include "DataFormats/TrackReco/interface/SeedStopInfo.h" +#include "DataFormats/TrajectorySeed/interface/TrajectorySeedCollection.h" +#include "FWCore/Framework/interface/stream/EDProducer.h" +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/Utilities/interface/Exception.h" +#include "Geometry/CommonDetUnit/interface/GeomDet.h" +#include "Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h" +#include "MagneticField/Engine/interface/MagneticField.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" +#include "RecoTracker/LST/interface/LSTPhase2OTHitsInput.h" +#include "RecoTracker/LST/interface/LSTOutput.h" +#include "RecoTracker/TkSeedingLayers/interface/SeedingHitSet.h" + +#include "RecoTracker/TkSeedGenerator/interface/SeedCreator.h" +#include "RecoTracker/TkSeedGenerator/interface/SeedCreatorFactory.h" + +#include "RecoTracker/TkTrackingRegions/interface/GlobalTrackingRegion.h" +#include "TrackingTools/GeomPropagators/interface/Propagator.h" +#include "TrackingTools/Records/interface/TrackingComponentsRecord.h" +#include "TrackingTools/TrajectoryState/interface/TrajectoryStateTransform.h" + +class LSTOutputConverter : public edm::stream::EDProducer<> { +public: + explicit LSTOutputConverter(edm::ParameterSet const& iConfig); + ~LSTOutputConverter() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::Event& iEvent, const edm::EventSetup& iSetup) override; + + const edm::EDGetTokenT lstOutputToken_; + const edm::EDGetTokenT lstPhase2OTHitsInputToken_; + const edm::EDGetTokenT lstPixelSeedToken_; + const bool includeT5s_; + const bool includeNonpLSTSs_; + const edm::ESGetToken mfToken_; + const edm::ESGetToken propagatorAlongToken_; + const edm::ESGetToken propagatorOppositeToken_; + const edm::ESGetToken tGeomToken_; + std::unique_ptr seedCreator_; + const edm::EDPutTokenT trajectorySeedPutToken_; + const edm::EDPutTokenT trajectorySeedpLSPutToken_; + const edm::EDPutTokenT trackCandidatePutToken_; + const edm::EDPutTokenT trackCandidatepTCPutToken_; + const edm::EDPutTokenT trackCandidateT5TCPutToken_; + const edm::EDPutTokenT trackCandidateNopLSTCPutToken_; + const edm::EDPutTokenT trackCandidatepTTCPutToken_; + const edm::EDPutTokenT trackCandidatepLSTCPutToken_; + const edm::EDPutTokenT> seedStopInfoPutToken_; +}; + +LSTOutputConverter::LSTOutputConverter(edm::ParameterSet const& iConfig) + : lstOutputToken_(consumes(iConfig.getParameter("lstOutput"))), + lstPhase2OTHitsInputToken_{consumes(iConfig.getParameter("phase2OTHits"))}, + lstPixelSeedToken_{consumes(iConfig.getParameter("lstPixelSeeds"))}, + includeT5s_(iConfig.getParameter("includeT5s")), + includeNonpLSTSs_(iConfig.getParameter("includeNonpLSTSs")), + mfToken_(esConsumes()), + propagatorAlongToken_{esConsumes(iConfig.getParameter("propagatorAlong"))}, + propagatorOppositeToken_{esConsumes(iConfig.getParameter("propagatorOpposite"))}, + tGeomToken_(esConsumes()), + seedCreator_(SeedCreatorFactory::get()->create("SeedFromConsecutiveHitsCreator", + iConfig.getParameter("SeedCreatorPSet"), + consumesCollector())), + // FIXME: need to make creation configurable: + // - A toggle to not produce TSs at all could be useful to save memory; + // it won't affect speed though + // - The minimal set for TCs is t5TCsLST, pTTCsLST and pLSTCsLST. + // That would complicate the handling of collections though, + // so it is deferred to when we have a clearer picture of what's needed. + trajectorySeedPutToken_(produces("")), + trajectorySeedpLSPutToken_(produces("pLSTSsLST")), + trackCandidatePutToken_(produces("")), + trackCandidatepTCPutToken_(produces("pTCsLST")), + trackCandidateT5TCPutToken_(produces("t5TCsLST")), + trackCandidateNopLSTCPutToken_(produces("nopLSTCsLST")), + trackCandidatepTTCPutToken_(produces("pTTCsLST")), + trackCandidatepLSTCPutToken_(produces("pLSTCsLST")), + seedStopInfoPutToken_(produces()) {} + +void LSTOutputConverter::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("lstOutput", edm::InputTag("lstProducer")); + desc.add("phase2OTHits", edm::InputTag("lstPhase2OTHitsInputProducer")); + desc.add("lstPixelSeeds", edm::InputTag("lstPixelSeedInputProducer")); + desc.add("includeT5s", true); + desc.add("includeNonpLSTSs", false); + desc.add("propagatorAlong", edm::ESInputTag{"", "PropagatorWithMaterial"}); + desc.add("propagatorOpposite", edm::ESInputTag{"", "PropagatorWithMaterialOpposite"}); + + edm::ParameterSetDescription psd0; + psd0.add("ComponentName", std::string("SeedFromConsecutiveHitsCreator")); + psd0.add("propagator", std::string("PropagatorWithMaterial")); + psd0.add("SeedMomentumForBOFF", 5.0); + psd0.add("OriginTransverseErrorMultiplier", 1.0); + psd0.add("MinOneOverPtError", 1.0); + psd0.add("magneticField", std::string("")); + psd0.add("TTRHBuilder", std::string("WithTrackAngle")); + psd0.add("forceKinematicWithRegionDirection", false); + desc.add("SeedCreatorPSet", psd0); + + descriptions.addWithDefaultLabel(desc); +} + +void LSTOutputConverter::produce(edm::Event& iEvent, const edm::EventSetup& iSetup) { + // Setup + auto const& lstOutput = iEvent.get(lstOutputToken_); + auto const& phase2OTRecHits = iEvent.get(lstPhase2OTHitsInputToken_); + auto const& pixelSeeds = iEvent.get(lstPixelSeedToken_); + auto const& mf = iSetup.getData(mfToken_); + auto const& propAlo = iSetup.getData(propagatorAlongToken_); + auto const& propOppo = iSetup.getData(propagatorOppositeToken_); + auto const& tracker = iSetup.getData(tGeomToken_); + + // Vector definitions + std::vector> const& lstTC_hitIdx = lstOutput.hitIdx(); + std::vector const& lstTC_len = lstOutput.len(); + std::vector const& lstTC_seedIdx = lstOutput.seedIdx(); + std::vector const& lstTC_trackCandidateType = lstOutput.trackCandidateType(); + + TrajectorySeedCollection outputTS, outputpLSTS; + outputTS.reserve(lstTC_len.size()); + outputpLSTS.reserve(lstTC_len.size()); + TrackCandidateCollection outputTC, outputpTC, outputT5TC, outputNopLSTC, outputpTTC, outputpLSTC; + outputTC.reserve(lstTC_len.size()); + outputpTC.reserve(lstTC_len.size()); + outputT5TC.reserve(lstTC_len.size()); + outputNopLSTC.reserve(lstTC_len.size()); + outputpTTC.reserve(lstTC_len.size()); + outputpLSTC.reserve(lstTC_len.size()); + + auto const& OTHits = phase2OTRecHits.hits(); + + LogDebug("LSTOutputConverter") << "lstTC size " << lstTC_len.size(); + for (unsigned int i = 0; i < lstTC_len.size(); i++) { + LogDebug("LSTOutputConverter") << " cand " << i << " " << lstTC_len[i] << " " << lstTC_seedIdx[i]; + TrajectorySeed seed; + if (lstTC_trackCandidateType[i] != LSTOutput::LSTTCType::T5) + seed = pixelSeeds[lstTC_seedIdx[i]]; + + edm::OwnVector recHits; + if (lstTC_trackCandidateType[i] != LSTOutput::LSTTCType::T5) { + for (auto const& hit : seed.recHits()) + recHits.push_back(hit.clone()); + } + + unsigned int const nPixelHits = lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5 ? 0 : recHits.size(); + for (unsigned int j = nPixelHits; j < lstTC_hitIdx[i].size(); j++) + recHits.push_back(OTHits[lstTC_hitIdx[i][j]]->clone()); + + recHits.sort([](const auto& a, const auto& b) { + const auto asub = a.det()->subDetector(); + const auto bsub = b.det()->subDetector(); + if (GeomDetEnumerators::isInnerTracker(asub) && GeomDetEnumerators::isOuterTracker(bsub)) { + return true; + } else if (GeomDetEnumerators::isOuterTracker(asub) && GeomDetEnumerators::isInnerTracker(bsub)) { + return false; + } else if (asub != bsub) { + return asub < bsub; + } else { + const auto& apos = a.surface(); + const auto& bpos = b.surface(); + if (GeomDetEnumerators::isBarrel(asub)) { + return apos->rSpan().first < bpos->rSpan().first; + } else { + return std::abs(apos->zSpan().first) < std::abs(bpos->zSpan().first); + } + } + }); + + TrajectorySeedCollection seeds; + if (lstTC_trackCandidateType[i] != LSTOutput::LSTTCType::pLS) { + // Construct a full-length TrajectorySeed always for T5s, + // only when required by a flag for other pT objects. + if (includeNonpLSTSs_ || lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5) { + using Hit = SeedingHitSet::ConstRecHitPointer; + std::vector hitsForSeed; + hitsForSeed.reserve(lstTC_len[i]); + int nHits = 0; + for (auto const& hit : recHits) { + if (lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5) { + auto hType = tracker.getDetectorType(hit.geographicalId()); + if (hType != TrackerGeometry::ModuleType::Ph2PSP && nHits < 2) + continue; // the first two should be P + } + hitsForSeed.emplace_back(dynamic_cast(&hit)); + nHits++; + } + + seedCreator_->init(GlobalTrackingRegion(), iSetup, nullptr); + seedCreator_->makeSeed(seeds, hitsForSeed); + if (seeds.empty()) { + edm::LogInfo("LSTOutputConverter") + << "failed to convert a LST object to a seed" << i << " " << lstTC_len[i] << " " << lstTC_seedIdx[i]; + if (lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5) + continue; + } + if (lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5) + seed = seeds[0]; + + auto trajectorySeed = (seeds.empty() ? seed : seeds[0]); + outputTS.emplace_back(trajectorySeed); + auto const& ss = trajectorySeed.startingState(); + LogDebug("LSTOutputConverter") << "Created a seed with " << seed.nHits() << " " << ss.detId() << " " << ss.pt() + << " " << ss.parameters().vector() << " " << ss.error(0); + } + } else { + outputTS.emplace_back(seed); + outputpLSTS.emplace_back(seed); + } + + TrajectoryStateOnSurface tsos = + trajectoryStateTransform::transientState(seed.startingState(), (seed.recHits().end() - 1)->surface(), &mf); + tsos.rescaleError(100.); + auto tsosPair = propOppo.propagateWithPath(tsos, *recHits[0].surface()); + if (!tsosPair.first.isValid()) { + LogDebug("LSTOutputConverter") << "Propagating to startingState opposite to momentum failed, trying along next"; + tsosPair = propAlo.propagateWithPath(tsos, *recHits[0].surface()); + } + if (tsosPair.first.isValid()) { + PTrajectoryStateOnDet st = + trajectoryStateTransform::persistentState(tsosPair.first, recHits[0].det()->geographicalId().rawId()); + + if (lstTC_trackCandidateType[i] == LSTOutput::LSTTCType::T5) { + if (!includeT5s_) { + continue; + } else { + auto tc = TrackCandidate(recHits, seed, st); + outputTC.emplace_back(tc); + outputT5TC.emplace_back(tc); + outputNopLSTC.emplace_back(tc); + } + } else { + auto tc = TrackCandidate(recHits, seed, st); + outputTC.emplace_back(tc); + outputpTC.emplace_back(tc); + if (lstTC_trackCandidateType[i] != LSTOutput::LSTTCType::pLS) { + outputNopLSTC.emplace_back(tc); + outputpTTC.emplace_back(tc); + } else { + outputpLSTC.emplace_back(tc); + } + } + } else { + edm::LogInfo("LSTOutputConverter") << "Failed to make a candidate initial state. Seed state is " << tsos + << " TC cand " << i << " " << lstTC_len[i] << " " << lstTC_seedIdx[i] + << " first hit " << recHits.front().globalPosition() << " last hit " + << recHits.back().globalPosition(); + } + } + + LogDebug("LSTOutputConverter") << "done with conversion: Track candidate output size = " << outputpTC.size() + << " (p* objects) + " << outputT5TC.size() << " (T5 objects)"; + iEvent.emplace(trajectorySeedPutToken_, std::move(outputTS)); + iEvent.emplace(trajectorySeedpLSPutToken_, std::move(outputpLSTS)); + iEvent.emplace(trackCandidatePutToken_, std::move(outputTC)); + iEvent.emplace(trackCandidatepTCPutToken_, std::move(outputpTC)); + iEvent.emplace(trackCandidateT5TCPutToken_, std::move(outputT5TC)); + iEvent.emplace(trackCandidateNopLSTCPutToken_, std::move(outputNopLSTC)); + iEvent.emplace(trackCandidatepTTCPutToken_, std::move(outputpTTC)); + iEvent.emplace(trackCandidatepLSTCPutToken_, std::move(outputpLSTC)); + iEvent.emplace(seedStopInfoPutToken_, 0U); //dummy stop info +} + +DEFINE_FWK_MODULE(LSTOutputConverter); diff --git a/RecoTracker/LST/plugins/LSTPhase2OTHitsInputProducer.cc b/RecoTracker/LST/plugins/LSTPhase2OTHitsInputProducer.cc new file mode 100644 index 0000000000000..a0fcc72f598b6 --- /dev/null +++ b/RecoTracker/LST/plugins/LSTPhase2OTHitsInputProducer.cc @@ -0,0 +1,67 @@ +#include "FWCore/Framework/interface/global/EDProducer.h" + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include "RecoTracker/LST/interface/LSTPhase2OTHitsInput.h" + +class LSTPhase2OTHitsInputProducer : public edm::global::EDProducer<> { +public: + explicit LSTPhase2OTHitsInputProducer(edm::ParameterSet const& iConfig); + ~LSTPhase2OTHitsInputProducer() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + const edm::EDGetTokenT phase2OTRecHitToken_; + const edm::EDPutTokenT lstPhase2OTHitsInputPutToken_; +}; + +LSTPhase2OTHitsInputProducer::LSTPhase2OTHitsInputProducer(edm::ParameterSet const& iConfig) + : phase2OTRecHitToken_(consumes(iConfig.getParameter("phase2OTRecHits"))), + lstPhase2OTHitsInputPutToken_(produces()) {} + +void LSTPhase2OTHitsInputProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("phase2OTRecHits", edm::InputTag("siPhase2RecHits")); + + descriptions.addWithDefaultLabel(desc); +} + +void LSTPhase2OTHitsInputProducer::produce(edm::StreamID iID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { + // Setup + auto const& phase2OTHits = iEvent.get(phase2OTRecHitToken_); + + // Vector definitions + std::vector ph2_detId; + ph2_detId.reserve(phase2OTHits.dataSize()); + std::vector ph2_x; + ph2_x.reserve(phase2OTHits.dataSize()); + std::vector ph2_y; + ph2_y.reserve(phase2OTHits.dataSize()); + std::vector ph2_z; + ph2_z.reserve(phase2OTHits.dataSize()); + std::vector ph2_hits; + ph2_hits.reserve(phase2OTHits.dataSize()); + + for (auto const& it : phase2OTHits) { + const DetId hitId = it.detId(); + for (auto const& hit : it) { + ph2_detId.push_back(hitId.rawId()); + ph2_x.push_back(hit.globalPosition().x()); + ph2_y.push_back(hit.globalPosition().y()); + ph2_z.push_back(hit.globalPosition().z()); + ph2_hits.push_back(&hit); + } + } + + LSTPhase2OTHitsInput phase2OTHitsInput( + std::move(ph2_detId), std::move(ph2_x), std::move(ph2_y), std::move(ph2_z), std::move(ph2_hits)); + iEvent.emplace(lstPhase2OTHitsInputPutToken_, std::move(phase2OTHitsInput)); +} + +DEFINE_FWK_MODULE(LSTPhase2OTHitsInputProducer); diff --git a/RecoTracker/LST/plugins/LSTPixelSeedInputProducer.cc b/RecoTracker/LST/plugins/LSTPixelSeedInputProducer.cc new file mode 100644 index 0000000000000..819baf78c6aa4 --- /dev/null +++ b/RecoTracker/LST/plugins/LSTPixelSeedInputProducer.cc @@ -0,0 +1,171 @@ +#include "FWCore/Framework/interface/global/EDProducer.h" + +#include "FWCore/Framework/interface/Event.h" +#include "FWCore/Framework/interface/MakerMacros.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include "FWCore/Utilities/interface/transform.h" + +#include "MagneticField/Engine/interface/MagneticField.h" +#include "MagneticField/Records/interface/IdealMagneticFieldRecord.h" + +#include "DataFormats/TrackerRecHit2D/interface/SiStripMatchedRecHit2DCollection.h" +#include "DataFormats/TrajectorySeed/interface/TrajectorySeedCollection.h" + +#include "Validation/RecoTrack/interface/trackFromSeedFitFailed.h" + +#include "TrackingTools/Records/interface/TransientRecHitRecord.h" +#include "TrackingTools/TrajectoryState/interface/TrajectoryStateTransform.h" +#include "TrackingTools/TransientTrackingRecHit/interface/TransientTrackingRecHitBuilder.h" + +#include "RecoTracker/LST/interface/LSTPixelSeedInput.h" + +class LSTPixelSeedInputProducer : public edm::global::EDProducer<> { +public: + explicit LSTPixelSeedInputProducer(edm::ParameterSet const& iConfig); + ~LSTPixelSeedInputProducer() override = default; + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions); + +private: + void produce(edm::StreamID, edm::Event& iEvent, const edm::EventSetup& iSetup) const override; + + const edm::ESGetToken mfToken_; + const edm::EDGetTokenT beamSpotToken_; + std::vector>> seedTokens_; + const edm::EDPutTokenT lstPixelSeedInputPutToken_; + const edm::EDPutTokenT lstPixelSeedsPutToken_; +}; + +LSTPixelSeedInputProducer::LSTPixelSeedInputProducer(edm::ParameterSet const& iConfig) + : mfToken_(esConsumes()), + beamSpotToken_(consumes(iConfig.getParameter("beamSpot"))), + lstPixelSeedInputPutToken_(produces()), + lstPixelSeedsPutToken_(produces()) { + seedTokens_ = edm::vector_transform(iConfig.getParameter>("seedTracks"), + [&](const edm::InputTag& tag) { return consumes>(tag); }); +} + +void LSTPixelSeedInputProducer::fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + + desc.add("beamSpot", edm::InputTag("offlineBeamSpot")); + + desc.add>("seedTracks", + std::vector{edm::InputTag("lstInitialStepSeedTracks"), + edm::InputTag("lstHighPtTripletStepSeedTracks")}); + + descriptions.addWithDefaultLabel(desc); +} + +void LSTPixelSeedInputProducer::produce(edm::StreamID iID, edm::Event& iEvent, const edm::EventSetup& iSetup) const { + // Setup + auto const& mf = iSetup.getData(mfToken_); + auto const& bs = iEvent.get(beamSpotToken_); + + // Vector definitions + std::vector see_px; + std::vector see_py; + std::vector see_pz; + std::vector see_dxy; + std::vector see_dz; + std::vector see_ptErr; + std::vector see_etaErr; + std::vector see_stateTrajGlbX; + std::vector see_stateTrajGlbY; + std::vector see_stateTrajGlbZ; + std::vector see_stateTrajGlbPx; + std::vector see_stateTrajGlbPy; + std::vector see_stateTrajGlbPz; + std::vector see_q; + std::vector> see_hitIdx; + TrajectorySeedCollection see_seeds; + + for (size_t iColl = 0; iColl < seedTokens_.size(); ++iColl) { + // Get seed tokens + auto const& seedToken = seedTokens_[iColl]; + auto const& seedTracks = iEvent.get(seedToken); + + if (seedTracks.empty()) + continue; + + // Get seed track refs + edm::RefToBaseVector seedTrackRefs; + for (edm::View::size_type i = 0; i < seedTracks.size(); ++i) { + seedTrackRefs.push_back(seedTracks.refAt(i)); + } + + edm::ProductID id = seedTracks[0].seedRef().id(); + + for (size_t iSeed = 0; iSeed < seedTrackRefs.size(); ++iSeed) { + auto const& seedTrackRef = seedTrackRefs[iSeed]; + auto const& seedTrack = *seedTrackRef; + auto const& seedRef = seedTrack.seedRef(); + auto const& seed = *seedRef; + + if (seedRef.id() != id) + throw cms::Exception("LogicError") + << "All tracks in 'TracksFromSeeds' collection should point to seeds in the same collection. Now the " + "element 0 had ProductID " + << id << " while the element " << seedTrackRef.key() << " had " << seedTrackRef.id() << "."; + + const bool seedFitOk = !trackFromSeedFitFailed(seedTrack); + + const TrackingRecHit* lastRecHit = &*(seed.recHits().end() - 1); + TrajectoryStateOnSurface tsos = + trajectoryStateTransform::transientState(seed.startingState(), lastRecHit->surface(), &mf); + auto const& stateGlobal = tsos.globalParameters(); + + std::vector hitIdx; + for (auto const& hit : seed.recHits()) { + int subid = hit.geographicalId().subdetId(); + if (subid == (int)PixelSubdetector::PixelBarrel || subid == (int)PixelSubdetector::PixelEndcap) { + const BaseTrackerRecHit* bhit = dynamic_cast(&hit); + const auto& clusterRef = bhit->firstClusterRef(); + const auto clusterKey = clusterRef.cluster_pixel().key(); + hitIdx.push_back(clusterKey); + } else { + throw cms::Exception("LSTPixelSeedInputProducer") << "Not pixel hits found!"; + } + } + + // Fill output + see_px.push_back(seedFitOk ? seedTrack.px() : 0); + see_py.push_back(seedFitOk ? seedTrack.py() : 0); + see_pz.push_back(seedFitOk ? seedTrack.pz() : 0); + see_dxy.push_back(seedFitOk ? seedTrack.dxy(bs.position()) : 0); + see_dz.push_back(seedFitOk ? seedTrack.dz(bs.position()) : 0); + see_ptErr.push_back(seedFitOk ? seedTrack.ptError() : 0); + see_etaErr.push_back(seedFitOk ? seedTrack.etaError() : 0); + see_stateTrajGlbX.push_back(stateGlobal.position().x()); + see_stateTrajGlbY.push_back(stateGlobal.position().y()); + see_stateTrajGlbZ.push_back(stateGlobal.position().z()); + see_stateTrajGlbPx.push_back(stateGlobal.momentum().x()); + see_stateTrajGlbPy.push_back(stateGlobal.momentum().y()); + see_stateTrajGlbPz.push_back(stateGlobal.momentum().z()); + see_q.push_back(seedTrack.charge()); + see_hitIdx.push_back(hitIdx); + see_seeds.push_back(seed); + } + } + + LSTPixelSeedInput pixelSeedInput(std::move(see_px), + std::move(see_py), + std::move(see_pz), + std::move(see_dxy), + std::move(see_dz), + std::move(see_ptErr), + std::move(see_etaErr), + std::move(see_stateTrajGlbX), + std::move(see_stateTrajGlbY), + std::move(see_stateTrajGlbZ), + std::move(see_stateTrajGlbPx), + std::move(see_stateTrajGlbPy), + std::move(see_stateTrajGlbPz), + std::move(see_q), + std::move(see_hitIdx)); + iEvent.emplace(lstPixelSeedInputPutToken_, std::move(pixelSeedInput)); + iEvent.emplace(lstPixelSeedsPutToken_, std::move(see_seeds)); +} + +DEFINE_FWK_MODULE(LSTPixelSeedInputProducer); diff --git a/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc b/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc new file mode 100644 index 0000000000000..d0e103b1e315b --- /dev/null +++ b/RecoTracker/LST/plugins/alpaka/LSTModulesDevESProducer.cc @@ -0,0 +1,31 @@ +// LST includes +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" + +#include "FWCore/ParameterSet/interface/ParameterSet.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ESProducer.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/ModuleFactory.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +#include "RecoTracker/Record/interface/TrackerRecoGeometryRecord.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class LSTModulesDevESProducer : public ESProducer { + public: + LSTModulesDevESProducer(edm::ParameterSet const& iConfig) : ESProducer(iConfig) { setWhatProduced(this); } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + descriptions.addWithDefaultLabel(desc); + } + + std::unique_ptr> produce(TrackerRecoGeometryRecord const& iRecord) { + return lst::loadAndFillESHost(); + } + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +DEFINE_FWK_EVENTSETUP_ALPAKA_MODULE(LSTModulesDevESProducer); diff --git a/RecoTracker/LST/plugins/alpaka/LSTProducer.cc b/RecoTracker/LST/plugins/alpaka/LSTProducer.cc new file mode 100644 index 0000000000000..7eb6c57ade05c --- /dev/null +++ b/RecoTracker/LST/plugins/alpaka/LSTProducer.cc @@ -0,0 +1,99 @@ +#include + +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" + +#include "FWCore/MessageLogger/interface/MessageLogger.h" +#include "FWCore/ParameterSet/interface/ConfigurationDescriptions.h" +#include "FWCore/ParameterSet/interface/ParameterSet.h" +#include "FWCore/ParameterSet/interface/ParameterSetDescription.h" +#include "FWCore/Utilities/interface/InputTag.h" + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDGetToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EDPutToken.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/Event.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/EventSetup.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/stream/SynchronizingEDProducer.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" + +#include "RecoTracker/LST/interface/LSTOutput.h" +#include "RecoTracker/LST/interface/LSTPhase2OTHitsInput.h" +#include "RecoTracker/LST/interface/LSTPixelSeedInput.h" + +#include "RecoTracker/Record/interface/TrackerRecoGeometryRecord.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE { + + class LSTProducer : public stream::SynchronizingEDProducer<> { + public: + LSTProducer(edm::ParameterSet const& config) + : lstPixelSeedInputToken_{consumes(config.getParameter("pixelSeedInput"))}, + lstPhase2OTHitsInputToken_{consumes(config.getParameter("phase2OTHitsInput"))}, + lstESToken_{esConsumes()}, + verbose_(config.getParameter("verbose")), + nopLSDupClean_(config.getParameter("nopLSDupClean")), + tcpLSTriplets_(config.getParameter("tcpLSTriplets")), + lstOutputToken_{produces()} {} + + void acquire(device::Event const& event, device::EventSetup const& setup) override { + // Inputs + auto const& pixelSeeds = event.get(lstPixelSeedInputToken_); + auto const& phase2OTHits = event.get(lstPhase2OTHitsInputToken_); + + auto const& lstESDeviceData = setup.getData(lstESToken_); + + lst_.run(event.queue(), + verbose_, + &lstESDeviceData, + pixelSeeds.px(), + pixelSeeds.py(), + pixelSeeds.pz(), + pixelSeeds.dxy(), + pixelSeeds.dz(), + pixelSeeds.ptErr(), + pixelSeeds.etaErr(), + pixelSeeds.stateTrajGlbX(), + pixelSeeds.stateTrajGlbY(), + pixelSeeds.stateTrajGlbZ(), + pixelSeeds.stateTrajGlbPx(), + pixelSeeds.stateTrajGlbPy(), + pixelSeeds.stateTrajGlbPz(), + pixelSeeds.q(), + pixelSeeds.hitIdx(), + phase2OTHits.detId(), + phase2OTHits.x(), + phase2OTHits.y(), + phase2OTHits.z(), + nopLSDupClean_, + tcpLSTriplets_); + } + + void produce(device::Event& event, device::EventSetup const&) override { + // Output + LSTOutput lstOutput(lst_.hits(), lst_.len(), lst_.seedIdx(), lst_.trackCandidateType()); + event.emplace(lstOutputToken_, std::move(lstOutput)); + } + + static void fillDescriptions(edm::ConfigurationDescriptions& descriptions) { + edm::ParameterSetDescription desc; + desc.add("pixelSeedInput", edm::InputTag{"lstPixelSeedInputProducer"}); + desc.add("phase2OTHitsInput", edm::InputTag{"lstPhase2OTHitsInputProducer"}); + desc.add("verbose", false); + desc.add("nopLSDupClean", false); + desc.add("tcpLSTriplets", false); + descriptions.addWithDefaultLabel(desc); + } + + private: + edm::EDGetTokenT lstPixelSeedInputToken_; + edm::EDGetTokenT lstPhase2OTHitsInputToken_; + device::ESGetToken, TrackerRecoGeometryRecord> lstESToken_; + const bool verbose_, nopLSDupClean_, tcpLSTriplets_; + edm::EDPutTokenT lstOutputToken_; + + lst::LST lst_; + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE + +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/MakerMacros.h" +DEFINE_FWK_ALPAKA_MODULE(LSTProducer); diff --git a/RecoTracker/LST/python/lstProducerTask_cff.py b/RecoTracker/LST/python/lstProducerTask_cff.py new file mode 100644 index 0000000000000..588b354788635 --- /dev/null +++ b/RecoTracker/LST/python/lstProducerTask_cff.py @@ -0,0 +1,7 @@ +import FWCore.ParameterSet.Config as cms + +from RecoTracker.LST.lstProducer_cfi import lstProducer + +from RecoTracker.LST.lstModulesDevESProducer_cfi import lstModulesDevESProducer + +lstProducerTask = cms.Task(lstModulesDevESProducer, lstProducer) diff --git a/RecoTracker/LST/python/lstSeedTracks_cff.py b/RecoTracker/LST/python/lstSeedTracks_cff.py new file mode 100644 index 0000000000000..7046c616b0054 --- /dev/null +++ b/RecoTracker/LST/python/lstSeedTracks_cff.py @@ -0,0 +1,15 @@ +import FWCore.ParameterSet.Config as cms + +lstInitialStepSeedTracks = cms.EDProducer( + "TrackFromSeedProducer", + src = cms.InputTag("initialStepSeeds"), + beamSpot = cms.InputTag("offlineBeamSpot"), + TTRHBuilder = cms.string("WithoutRefit") +) + +lstHighPtTripletStepSeedTracks = cms.EDProducer( + "TrackFromSeedProducer", + src = cms.InputTag("highPtTripletStepSeeds"), + beamSpot = cms.InputTag("offlineBeamSpot"), + TTRHBuilder = cms.string("WithoutRefit") +) diff --git a/RecoTracker/LST/python/lst_cff.py b/RecoTracker/LST/python/lst_cff.py new file mode 100644 index 0000000000000..af3a80ae77e18 --- /dev/null +++ b/RecoTracker/LST/python/lst_cff.py @@ -0,0 +1,6 @@ +import FWCore.ParameterSet.Config as cms + +from RecoTracker.LST.lstSeedTracks_cff import * +from RecoTracker.LST.lstPixelSeedInputProducer_cfi import * +from RecoTracker.LST.lstPhase2OTHitsInputProducer_cfi import * +from RecoTracker.LST.lstOutputConverter_cfi import * diff --git a/RecoTracker/LST/src/ES_ModulesDev.cc b/RecoTracker/LST/src/ES_ModulesDev.cc new file mode 100644 index 0000000000000..06a357860a7d5 --- /dev/null +++ b/RecoTracker/LST/src/ES_ModulesDev.cc @@ -0,0 +1,5 @@ +#include "RecoTracker/LSTCore/interface/LSTESData.h" +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "FWCore/Utilities/interface/typelookup.h" + +TYPELOOKUP_DATA_REG(lst::LSTESData); diff --git a/RecoTracker/LST/src/alpaka/ES_ModulesDev.cc b/RecoTracker/LST/src/alpaka/ES_ModulesDev.cc new file mode 100644 index 0000000000000..54ded5e7a7c98 --- /dev/null +++ b/RecoTracker/LST/src/alpaka/ES_ModulesDev.cc @@ -0,0 +1,4 @@ +#include "RecoTracker/LSTCore/interface/LSTESData.h" +#include "HeterogeneousCore/AlpakaCore/interface/alpaka/typelookup.h" + +TYPELOOKUP_ALPAKA_TEMPLATED_DATA_REG(lst::LSTESData); diff --git a/RecoTracker/LST/src/classes.h b/RecoTracker/LST/src/classes.h new file mode 100644 index 0000000000000..6a6817d9b538e --- /dev/null +++ b/RecoTracker/LST/src/classes.h @@ -0,0 +1,9 @@ +#ifndef RecoTracker_LST_classes_h +#define RecoTracker_LST_classes_h + +#include "DataFormats/Common/interface/Wrapper.h" +#include "RecoTracker/LST/interface/LSTPixelSeedInput.h" +#include "RecoTracker/LST/interface/LSTPhase2OTHitsInput.h" +#include "RecoTracker/LST/interface/LSTOutput.h" + +#endif diff --git a/RecoTracker/LST/src/classes_def.xml b/RecoTracker/LST/src/classes_def.xml new file mode 100644 index 0000000000000..d386e7b92a215 --- /dev/null +++ b/RecoTracker/LST/src/classes_def.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/RecoTracker/LSTCore/BuildFile.xml b/RecoTracker/LSTCore/BuildFile.xml new file mode 100644 index 0000000000000..a58a1898046ae --- /dev/null +++ b/RecoTracker/LSTCore/BuildFile.xml @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/RecoTracker/LSTCore/interface/Common.h b/RecoTracker/LSTCore/interface/Common.h new file mode 100644 index 0000000000000..f65ca7a50d867 --- /dev/null +++ b/RecoTracker/LSTCore/interface/Common.h @@ -0,0 +1,107 @@ +#ifndef RecoTracker_LSTCore_interface_Common_h +#define RecoTracker_LSTCore_interface_Common_h + +#include "HeterogeneousCore/AlpakaInterface/interface/config.h" +#include "DataFormats/Common/interface/StdArray.h" + +#if defined(FP16_Base) +#if defined ALPAKA_ACC_GPU_CUDA_ENABLED +#include +#elif defined ALPAKA_ACC_GPU_HIP_ENABLED +#include +#endif +#endif + +namespace lst { + + // Named constants for pixelTypes + enum PixelType : int8_t { kInvalid = -1, kHighPt = 0, kLowPtPosCurv = 1, kLowPtNegCurv = 2 }; + + // Named types for LST objects + enum LSTObjType { T5 = 4, pT3 = 5, pT5 = 7, pLS = 8 }; + +// If a compile time flag does not define PT_CUT, default to 0.8 (GeV) +#ifndef PT_CUT + constexpr float PT_CUT = 0.8f; +#endif + + constexpr unsigned int max_blocks = 80; + constexpr unsigned int max_connected_modules = 40; + + constexpr unsigned int n_max_pixel_segments_per_module = 50000; + + constexpr unsigned int n_max_pixel_md_per_modules = 2 * n_max_pixel_segments_per_module; + + constexpr unsigned int n_max_pixel_triplets = 5000; + constexpr unsigned int n_max_pixel_quintuplets = 15000; + + constexpr unsigned int n_max_pixel_track_candidates = 30000; + constexpr unsigned int n_max_nonpixel_track_candidates = 1000; + + constexpr unsigned int size_superbins = 45000; + +// Half precision wrapper functions. +#if defined(FP16_Base) +#define __F2H __float2half +#define __H2F __half2float + typedef __half FPX; +#else +#define __F2H +#define __H2F + typedef float FPX; +#endif + +// Needed for files that are compiled by g++ to not throw an error. +// uint4 is defined only for CUDA, so we will have to revisit this soon when running on other backends. +#if !defined(ALPAKA_ACC_GPU_CUDA_ENABLED) && !defined(ALPAKA_ACC_GPU_HIP_ENABLED) + struct uint4 { + unsigned int x; + unsigned int y; + unsigned int z; + unsigned int w; + }; +#endif + + // Defining the constant host device variables right up here + // Currently pixel tracks treated as LSs with 2 double layers (IT layers 1+2 and 3+4) and 4 hits. To be potentially handled better in the future. + struct Params_Modules { + using ArrayU16xMaxConnected = edm::StdArray; + }; + struct Params_pLS { + static constexpr int kLayers = 2, kHits = 4; + }; + struct Params_LS { + static constexpr int kLayers = 2, kHits = 4; + using ArrayUxLayers = edm::StdArray; + }; + struct Params_T3 { + static constexpr int kLayers = 3, kHits = 6; + using ArrayU8xLayers = edm::StdArray; + using ArrayU16xLayers = edm::StdArray; + using ArrayUxHits = edm::StdArray; + }; + struct Params_pT3 { + static constexpr int kLayers = 5, kHits = 10; + using ArrayU8xLayers = edm::StdArray; + using ArrayU16xLayers = edm::StdArray; + using ArrayUxHits = edm::StdArray; + }; + struct Params_T5 { + static constexpr int kLayers = 5, kHits = 10; + using ArrayU8xLayers = edm::StdArray; + using ArrayU16xLayers = edm::StdArray; + using ArrayUxHits = edm::StdArray; + }; + struct Params_pT5 { + static constexpr int kLayers = 7, kHits = 14; + using ArrayU8xLayers = edm::StdArray; + using ArrayU16xLayers = edm::StdArray; + using ArrayUxHits = edm::StdArray; + }; + + using ArrayIx2 = edm::StdArray; + using ArrayUx2 = edm::StdArray; + +} //namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/EndcapGeometry.h b/RecoTracker/LSTCore/interface/EndcapGeometry.h new file mode 100644 index 0000000000000..b8c44c14fb143 --- /dev/null +++ b/RecoTracker/LSTCore/interface/EndcapGeometry.h @@ -0,0 +1,29 @@ +#ifndef RecoTracker_LSTCore_interface_EndcapGeometry_h +#define RecoTracker_LSTCore_interface_EndcapGeometry_h + +#include +#include +#include + +namespace lst { + class EndcapGeometry { + private: + std::map dxdy_slope_; // dx/dy slope + std::map centroid_phis_; // centroid phi + + public: + std::vector geoMapDetId_buf; + std::vector geoMapPhi_buf; + + unsigned int nEndCapMap; + + EndcapGeometry() = default; + EndcapGeometry(std::string const& filename); + + void load(std::string const&); + void fillGeoMapArraysExplicit(); + float getdxdy_slope(unsigned int detid) const; + }; +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/EndcapGeometryDevHostCollection.h b/RecoTracker/LSTCore/interface/EndcapGeometryDevHostCollection.h new file mode 100644 index 0000000000000..e761ac5942bf8 --- /dev/null +++ b/RecoTracker/LSTCore/interface/EndcapGeometryDevHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_EndcapGeometryDevHostCollection_h +#define RecoTracker_LSTCore_interface_EndcapGeometryDevHostCollection_h + +#include "RecoTracker/LSTCore/interface/EndcapGeometryDevSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using EndcapGeometryDevHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/EndcapGeometryDevSoA.h b/RecoTracker/LSTCore/interface/EndcapGeometryDevSoA.h new file mode 100644 index 0000000000000..587abfdaec66a --- /dev/null +++ b/RecoTracker/LSTCore/interface/EndcapGeometryDevSoA.h @@ -0,0 +1,18 @@ +#ifndef RecoTracker_LSTCore_interface_EndcapGeometryDevSoA_h +#define RecoTracker_LSTCore_interface_EndcapGeometryDevSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +namespace lst { + + GENERATE_SOA_LAYOUT(EndcapGeometryDevSoALayout, SOA_COLUMN(unsigned int, geoMapDetId), SOA_COLUMN(float, geoMapPhi)) + + using EndcapGeometryDevSoA = EndcapGeometryDevSoALayout<>; + + using EndcapGeometryDev = EndcapGeometryDevSoA::View; + using EndcapGeometryDevConst = EndcapGeometryDevSoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/HitsHostCollection.h b/RecoTracker/LSTCore/interface/HitsHostCollection.h new file mode 100644 index 0000000000000..f26c98c36e069 --- /dev/null +++ b/RecoTracker/LSTCore/interface/HitsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_HitsHostCollection_h +#define RecoTracker_LSTCore_interface_HitsHostCollection_h + +#include "RecoTracker/LSTCore/interface/HitsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using HitsHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/HitsSoA.h b/RecoTracker/LSTCore/interface/HitsSoA.h new file mode 100644 index 0000000000000..b1f5de9eff46e --- /dev/null +++ b/RecoTracker/LSTCore/interface/HitsSoA.h @@ -0,0 +1,43 @@ +#ifndef RecoTracker_LSTCore_interface_HitsSoA_h +#define RecoTracker_LSTCore_interface_HitsSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + + GENERATE_SOA_LAYOUT(HitsSoALayout, + SOA_COLUMN(float, xs), + SOA_COLUMN(float, ys), + SOA_COLUMN(float, zs), + SOA_COLUMN(uint16_t, moduleIndices), + SOA_COLUMN(unsigned int, idxs), + SOA_COLUMN(unsigned int, detid), + SOA_COLUMN(float, rts), + SOA_COLUMN(float, phis), + SOA_COLUMN(float, etas), + SOA_COLUMN(float, highEdgeXs), + SOA_COLUMN(float, highEdgeYs), + SOA_COLUMN(float, lowEdgeXs), + SOA_COLUMN(float, lowEdgeYs)) + + GENERATE_SOA_LAYOUT(HitsRangesSoALayout, + SOA_COLUMN(ArrayIx2, hitRanges), + SOA_COLUMN(int, hitRangesLower), + SOA_COLUMN(int, hitRangesUpper), + SOA_COLUMN(int8_t, hitRangesnLower), + SOA_COLUMN(int8_t, hitRangesnUpper)) + + using HitsSoA = HitsSoALayout<>; + using HitsRangesSoA = HitsRangesSoALayout<>; + + using Hits = HitsSoA::View; + using HitsConst = HitsSoA::ConstView; + using HitsRanges = HitsRangesSoA::View; + using HitsRangesConst = HitsRangesSoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/LSTESData.h b/RecoTracker/LSTCore/interface/LSTESData.h new file mode 100644 index 0000000000000..45887d3cb1fea --- /dev/null +++ b/RecoTracker/LSTCore/interface/LSTESData.h @@ -0,0 +1,80 @@ +#ifndef RecoTracker_LSTCore_interface_LSTESData_h +#define RecoTracker_LSTCore_interface_LSTESData_h + +#include "RecoTracker/LSTCore/interface/Common.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometryDevHostCollection.h" +#include "RecoTracker/LSTCore/interface/ModulesHostCollection.h" +#include "RecoTracker/LSTCore/interface/PixelMap.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/CopyToDevice.h" + +#include + +namespace lst { + + template + struct LSTESData { + uint16_t nModules; + uint16_t nLowerModules; + unsigned int nPixels; + unsigned int nEndCapMap; + // Using shared_ptr so that for the serial backend all streams can use the same data + std::shared_ptr> modules; + std::shared_ptr> endcapGeometry; + // Host-side object that is shared between the LSTESData objects for different devices + std::shared_ptr pixelMapping; + + LSTESData(uint16_t const& nModulesIn, + uint16_t const& nLowerModulesIn, + unsigned int const& nPixelsIn, + unsigned int const& nEndCapMapIn, + std::shared_ptr> modulesIn, + std::shared_ptr> endcapGeometryIn, + std::shared_ptr const& pixelMappingIn) + : nModules(nModulesIn), + nLowerModules(nLowerModulesIn), + nPixels(nPixelsIn), + nEndCapMap(nEndCapMapIn), + modules(std::move(modulesIn)), + endcapGeometry(std::move(endcapGeometryIn)), + pixelMapping(pixelMappingIn) {} + }; + + std::unique_ptr> loadAndFillESHost(); + +} // namespace lst + +namespace cms::alpakatools { + + template <> + struct CopyToDevice> { + template + static lst::LSTESData> copyAsync(TQueue& queue, + lst::LSTESData const& srcData) { + using TDev = alpaka::Dev; + std::shared_ptr> deviceModules; + std::shared_ptr> deviceEndcapGeometry; + + if constexpr (std::is_same_v) { + deviceModules = srcData.modules; + deviceEndcapGeometry = srcData.endcapGeometry; + } else { + deviceModules = std::make_shared>( + CopyToDevice>::copyAsync( + queue, *srcData.modules)); + deviceEndcapGeometry = std::make_shared>( + CopyToDevice>::copyAsync(queue, *srcData.endcapGeometry)); + } + + return lst::LSTESData>(srcData.nModules, + srcData.nLowerModules, + srcData.nPixels, + srcData.nEndCapMap, + std::move(deviceModules), + std::move(deviceEndcapGeometry), + srcData.pixelMapping); + } + }; +} // namespace cms::alpakatools + +#endif diff --git a/RecoTracker/LSTCore/interface/MiniDoubletsHostCollection.h b/RecoTracker/LSTCore/interface/MiniDoubletsHostCollection.h new file mode 100644 index 0000000000000..33169a07b9e51 --- /dev/null +++ b/RecoTracker/LSTCore/interface/MiniDoubletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_MiniDoubletsHostCollection_h +#define RecoTracker_LSTCore_interface_MiniDoubletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using MiniDoubletsHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/MiniDoubletsSoA.h b/RecoTracker/LSTCore/interface/MiniDoubletsSoA.h new file mode 100644 index 0000000000000..84375502c34b0 --- /dev/null +++ b/RecoTracker/LSTCore/interface/MiniDoubletsSoA.h @@ -0,0 +1,58 @@ +#ifndef RecoTracker_LSTCore_interface_MiniDoubletsSoA_h +#define RecoTracker_LSTCore_interface_MiniDoubletsSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +namespace lst { + + GENERATE_SOA_LAYOUT(MiniDoubletsSoALayout, + SOA_COLUMN(unsigned int, anchorHitIndices), + SOA_COLUMN(unsigned int, outerHitIndices), + SOA_COLUMN(uint16_t, moduleIndices), + SOA_COLUMN(float, dphichanges), + SOA_COLUMN(float, dzs), + SOA_COLUMN(float, dphis), + SOA_COLUMN(float, shiftedXs), + SOA_COLUMN(float, shiftedYs), + SOA_COLUMN(float, shiftedZs), + SOA_COLUMN(float, noShiftedDphis), + SOA_COLUMN(float, noShiftedDphiChanges), + SOA_COLUMN(float, anchorX), + SOA_COLUMN(float, anchorY), + SOA_COLUMN(float, anchorZ), + SOA_COLUMN(float, anchorRt), + SOA_COLUMN(float, anchorPhi), + SOA_COLUMN(float, anchorEta), + SOA_COLUMN(float, anchorHighEdgeX), + SOA_COLUMN(float, anchorHighEdgeY), + SOA_COLUMN(float, anchorLowEdgeX), + SOA_COLUMN(float, anchorLowEdgeY), + SOA_COLUMN(float, anchorLowEdgePhi), + SOA_COLUMN(float, anchorHighEdgePhi), + SOA_COLUMN(float, outerX), + SOA_COLUMN(float, outerY), + SOA_COLUMN(float, outerZ), + SOA_COLUMN(float, outerRt), + SOA_COLUMN(float, outerPhi), + SOA_COLUMN(float, outerEta), + SOA_COLUMN(float, outerHighEdgeX), + SOA_COLUMN(float, outerHighEdgeY), + SOA_COLUMN(float, outerLowEdgeX), + SOA_COLUMN(float, outerLowEdgeY)) + + GENERATE_SOA_LAYOUT(MiniDoubletsOccupancySoALayout, + SOA_COLUMN(unsigned int, nMDs), + SOA_COLUMN(unsigned int, totOccupancyMDs)) + + using MiniDoubletsSoA = MiniDoubletsSoALayout<>; + using MiniDoubletsOccupancySoA = MiniDoubletsOccupancySoALayout<>; + + using MiniDoublets = MiniDoubletsSoA::View; + using MiniDoubletsConst = MiniDoubletsSoA::ConstView; + using MiniDoubletsOccupancy = MiniDoubletsOccupancySoA::View; + using MiniDoubletsOccupancyConst = MiniDoubletsOccupancySoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/ModuleConnectionMap.h b/RecoTracker/LSTCore/interface/ModuleConnectionMap.h new file mode 100644 index 0000000000000..63c3496523c0d --- /dev/null +++ b/RecoTracker/LSTCore/interface/ModuleConnectionMap.h @@ -0,0 +1,29 @@ +#ifndef RecoTracker_LSTCore_interface_ModuleConnectionMap_h +#define RecoTracker_LSTCore_interface_ModuleConnectionMap_h + +#include +#include +#include +#include + +namespace lst { + class ModuleConnectionMap { + private: + std::map> moduleConnections_; + + public: + ModuleConnectionMap(); + ModuleConnectionMap(std::string const& filename); + + void load(std::string const&); + void add(std::string const&); + void print(); + + const std::vector& getConnectedModuleDetIds(unsigned int detid) const; + int size() const; + }; + + using MapPLStoLayer = std::array, 3>; +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/ModulesHostCollection.h b/RecoTracker/LSTCore/interface/ModulesHostCollection.h new file mode 100644 index 0000000000000..4119fb6ffb1a2 --- /dev/null +++ b/RecoTracker/LSTCore/interface/ModulesHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_ModulesHostCollection_h +#define RecoTracker_LSTCore_interface_ModulesHostCollection_h + +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using ModulesHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/ModulesSoA.h b/RecoTracker/LSTCore/interface/ModulesSoA.h new file mode 100644 index 0000000000000..241dce953b293 --- /dev/null +++ b/RecoTracker/LSTCore/interface/ModulesSoA.h @@ -0,0 +1,57 @@ +#ifndef RecoTracker_LSTCore_interface_ModulesSoA_h +#define RecoTracker_LSTCore_interface_ModulesSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + + enum SubDet { InnerPixel = 0, Barrel = 5, Endcap = 4 }; + + enum Side { NegZ = 1, PosZ = 2, Center = 3 }; + + enum ModuleType { PS, TwoS, PixelModule }; + + enum ModuleLayerType { Pixel, Strip, InnerPixelLayer }; + + GENERATE_SOA_LAYOUT(ModulesSoALayout, + SOA_COLUMN(unsigned int, detIds), + SOA_COLUMN(Params_Modules::ArrayU16xMaxConnected, moduleMap), + SOA_COLUMN(unsigned int, mapdetId), + SOA_COLUMN(uint16_t, mapIdx), + SOA_COLUMN(uint16_t, nConnectedModules), + SOA_COLUMN(float, drdzs), + SOA_COLUMN(float, dxdys), + SOA_COLUMN(uint16_t, partnerModuleIndices), + SOA_COLUMN(short, layers), + SOA_COLUMN(short, rings), + SOA_COLUMN(short, modules), + SOA_COLUMN(short, rods), + SOA_COLUMN(short, subdets), + SOA_COLUMN(short, sides), + SOA_COLUMN(float, eta), + SOA_COLUMN(float, r), + SOA_COLUMN(bool, isInverted), + SOA_COLUMN(bool, isLower), + SOA_COLUMN(bool, isAnchor), + SOA_COLUMN(ModuleType, moduleType), + SOA_COLUMN(ModuleLayerType, moduleLayerType), + SOA_COLUMN(int, lstLayers), + SOA_SCALAR(uint16_t, nModules), + SOA_SCALAR(uint16_t, nLowerModules)) + + GENERATE_SOA_LAYOUT(ModulesPixelSoALayout, SOA_COLUMN(unsigned int, connectedPixels)) + + using ModulesSoA = ModulesSoALayout<>; + using ModulesPixelSoA = ModulesPixelSoALayout<>; + + using Modules = ModulesSoA::View; + using ModulesConst = ModulesSoA::ConstView; + using ModulesPixel = ModulesPixelSoA::View; + using ModulesPixelConst = ModulesPixelSoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/ObjectRangesHostCollection.h b/RecoTracker/LSTCore/interface/ObjectRangesHostCollection.h new file mode 100644 index 0000000000000..5a6d3e8ca13b4 --- /dev/null +++ b/RecoTracker/LSTCore/interface/ObjectRangesHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_ObjectRangesHostCollection_h +#define RecoTracker_LSTCore_interface_ObjectRangesHostCollection_h + +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using ObjectRangesHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/ObjectRangesSoA.h b/RecoTracker/LSTCore/interface/ObjectRangesSoA.h new file mode 100644 index 0000000000000..ccab6b23909f6 --- /dev/null +++ b/RecoTracker/LSTCore/interface/ObjectRangesSoA.h @@ -0,0 +1,38 @@ +#ifndef RecoTracker_LSTCore_interface_ObjectRangesSoA_h +#define RecoTracker_LSTCore_interface_ObjectRangesSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + + GENERATE_SOA_LAYOUT(ObjectRangesSoALayout, + SOA_COLUMN(ArrayIx2, mdRanges), + SOA_COLUMN(ArrayIx2, segmentRanges), + SOA_COLUMN(ArrayIx2, tripletRanges), + SOA_COLUMN(ArrayIx2, quintupletRanges), + SOA_COLUMN(int, miniDoubletModuleIndices), + SOA_COLUMN(int, miniDoubletModuleOccupancy), + SOA_COLUMN(int, segmentModuleIndices), + SOA_COLUMN(int, segmentModuleOccupancy), + SOA_COLUMN(int, tripletModuleIndices), + SOA_COLUMN(int, tripletModuleOccupancy), + SOA_COLUMN(int, quintupletModuleIndices), + SOA_COLUMN(int, quintupletModuleOccupancy), + SOA_COLUMN(uint16_t, indicesOfEligibleT5Modules), + SOA_SCALAR(unsigned int, nTotalMDs), + SOA_SCALAR(unsigned int, nTotalSegs), + SOA_SCALAR(unsigned int, nTotalTrips), + SOA_SCALAR(unsigned int, nTotalQuints), + SOA_SCALAR(uint16_t, nEligibleT5Modules)) + + using ObjectRangesSoA = ObjectRangesSoALayout<>; + + using ObjectRanges = ObjectRangesSoA::View; + using ObjectRangesConst = ObjectRangesSoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/PixelMap.h b/RecoTracker/LSTCore/interface/PixelMap.h new file mode 100644 index 0000000000000..763686142056c --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelMap.h @@ -0,0 +1,31 @@ +#ifndef RecoTracker_LSTCore_interface_PixelMap_h +#define RecoTracker_LSTCore_interface_PixelMap_h + +#include +#include + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + struct PixelMap { + uint16_t pixelModuleIndex; + + std::vector connectedPixelsIndex; + std::vector connectedPixelsSizes; + std::vector connectedPixelsIndexPos; + std::vector connectedPixelsSizesPos; + std::vector connectedPixelsIndexNeg; + std::vector connectedPixelsSizesNeg; + + PixelMap(unsigned int sizef = size_superbins) + : pixelModuleIndex(0), + connectedPixelsIndex(sizef), + connectedPixelsSizes(sizef), + connectedPixelsIndexPos(sizef), + connectedPixelsSizesPos(sizef), + connectedPixelsIndexNeg(sizef), + connectedPixelsSizesNeg(sizef) {} + }; +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/PixelQuintupletsHostCollection.h b/RecoTracker/LSTCore/interface/PixelQuintupletsHostCollection.h new file mode 100644 index 0000000000000..afb2560680621 --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelQuintupletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_PixelQuintupletsHostCollection_h +#define RecoTracker_LSTCore_interface_PixelQuintupletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using PixelQuintupletsHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h b/RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h new file mode 100644 index 0000000000000..e8ea89b9a2547 --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h @@ -0,0 +1,35 @@ +#ifndef RecoTracker_LSTCore_interface_PixelQuintupletsSoA_h +#define RecoTracker_LSTCore_interface_PixelQuintupletsSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + GENERATE_SOA_LAYOUT(PixelQuintupletsSoALayout, + SOA_COLUMN(unsigned int, pixelSegmentIndices), + SOA_COLUMN(unsigned int, quintupletIndices), + SOA_COLUMN(Params_pT5::ArrayU16xLayers, lowerModuleIndices), // lower module index (OT part) + SOA_COLUMN(Params_pT5::ArrayU8xLayers, logicalLayers), // layer ID + SOA_COLUMN(Params_pT5::ArrayUxHits, hitIndices), // hit indices + SOA_COLUMN(float, rPhiChiSquared), // chi2 from pLS to T5 + SOA_COLUMN(float, rPhiChiSquaredInwards), // chi2 from T5 to pLS + SOA_COLUMN(float, rzChiSquared), + SOA_COLUMN(FPX, pixelRadius), // pLS pt converted + SOA_COLUMN(FPX, quintupletRadius), // T5 circle + SOA_COLUMN(FPX, eta), + SOA_COLUMN(FPX, phi), + SOA_COLUMN(FPX, score), // used for ranking (in e.g. duplicate cleaning) + SOA_COLUMN(FPX, centerX), // T3-based circle center x + SOA_COLUMN(FPX, centerY), // T3-based circle center y + SOA_COLUMN(bool, isDup), + SOA_SCALAR(unsigned int, nPixelQuintuplets), + SOA_SCALAR(unsigned int, totOccupancyPixelQuintuplets)); + + using PixelQuintupletsSoA = PixelQuintupletsSoALayout<>; + using PixelQuintuplets = PixelQuintupletsSoA::View; + using PixelQuintupletsConst = PixelQuintupletsSoA::ConstView; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/PixelTripletsHostCollection.h b/RecoTracker/LSTCore/interface/PixelTripletsHostCollection.h new file mode 100644 index 0000000000000..67678e64bfc03 --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelTripletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_PixelTripletsHostCollection_h +#define RecoTracker_LSTCore_interface_PixelTripletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using PixelTripletsHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/PixelTripletsSoA.h b/RecoTracker/LSTCore/interface/PixelTripletsSoA.h new file mode 100644 index 0000000000000..a0f2c9c416539 --- /dev/null +++ b/RecoTracker/LSTCore/interface/PixelTripletsSoA.h @@ -0,0 +1,39 @@ +#ifndef RecoTracker_LSTCore_interface_PixelTripletsSoA_h +#define RecoTracker_LSTCore_interface_PixelTripletsSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + GENERATE_SOA_LAYOUT(PixelTripletsSoALayout, + SOA_COLUMN(unsigned int, pixelSegmentIndices), + SOA_COLUMN(unsigned int, tripletIndices), + SOA_COLUMN(Params_pT3::ArrayU16xLayers, lowerModuleIndices), // lower module index (OT part) + SOA_COLUMN(Params_pT3::ArrayU8xLayers, logicalLayers), // layer ID + SOA_COLUMN(Params_pT3::ArrayUxHits, hitIndices), // hit indices + SOA_COLUMN(float, rPhiChiSquared), // chi2 from pLS to T3 + SOA_COLUMN(float, rPhiChiSquaredInwards), // chi2 from T3 to pLS + SOA_COLUMN(float, rzChiSquared), + SOA_COLUMN(FPX, pixelRadius), // pLS pt converted + SOA_COLUMN(FPX, tripletRadius), // T3 circle + SOA_COLUMN(FPX, pt), + SOA_COLUMN(FPX, eta), + SOA_COLUMN(FPX, phi), + SOA_COLUMN(FPX, eta_pix), // eta from pLS + SOA_COLUMN(FPX, phi_pix), // phi from pLS + SOA_COLUMN(FPX, score), // used for ranking (in e.g. duplicate cleaning) + SOA_COLUMN(FPX, centerX), // T3-based circle center x + SOA_COLUMN(FPX, centerY), // T3-based circle center y + SOA_COLUMN(bool, isDup), + SOA_SCALAR(unsigned int, nPixelTriplets), + SOA_SCALAR(unsigned int, totOccupancyPixelTriplets)); + + using PixelTripletsSoA = PixelTripletsSoALayout<>; + using PixelTriplets = PixelTripletsSoA::View; + using PixelTripletsConst = PixelTripletsSoA::ConstView; + +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/QuintupletsHostCollection.h b/RecoTracker/LSTCore/interface/QuintupletsHostCollection.h new file mode 100644 index 0000000000000..734ce03057be7 --- /dev/null +++ b/RecoTracker/LSTCore/interface/QuintupletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_QuintupletsHostCollection_h +#define RecoTracker_LSTCore_interface_QuintupletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using QuintupletsHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/QuintupletsSoA.h b/RecoTracker/LSTCore/interface/QuintupletsSoA.h new file mode 100644 index 0000000000000..4ece80cd11ddd --- /dev/null +++ b/RecoTracker/LSTCore/interface/QuintupletsSoA.h @@ -0,0 +1,46 @@ +#ifndef RecoTracker_LSTCore_interface_QuintupletsSoA_h +#define RecoTracker_LSTCore_interface_QuintupletsSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + GENERATE_SOA_LAYOUT(QuintupletsSoALayout, + SOA_COLUMN(ArrayUx2, tripletIndices), // inner and outer triplet indices + SOA_COLUMN(Params_T5::ArrayU16xLayers, lowerModuleIndices), // lower module index in each layer + SOA_COLUMN(Params_T5::ArrayU8xLayers, logicalLayers), // layer ID + SOA_COLUMN(Params_T5::ArrayUxHits, hitIndices), // hit indices + SOA_COLUMN(FPX, innerRadius), // inner triplet circle radius + SOA_COLUMN(FPX, bridgeRadius), // "middle"/bridge triplet radius + SOA_COLUMN(FPX, outerRadius), // outer triplet radius + SOA_COLUMN(FPX, pt), + SOA_COLUMN(FPX, eta), + SOA_COLUMN(FPX, phi), + SOA_COLUMN(FPX, score_rphisum), // r-phi based score + SOA_COLUMN(char, isDup), // duplicate flag + SOA_COLUMN(bool, tightCutFlag), // tight pass to be a TC + SOA_COLUMN(bool, partOfPT5), + SOA_COLUMN(float, regressionRadius), + SOA_COLUMN(float, regressionG), + SOA_COLUMN(float, regressionF), + SOA_COLUMN(float, rzChiSquared), // r-z only chi2 + SOA_COLUMN(float, chiSquared), + SOA_COLUMN(float, nonAnchorChiSquared)); + + using QuintupletsSoA = QuintupletsSoALayout<>; + using Quintuplets = QuintupletsSoA::View; + using QuintupletsConst = QuintupletsSoA::ConstView; + + GENERATE_SOA_LAYOUT(QuintupletsOccupancySoALayout, + SOA_COLUMN(unsigned int, nQuintuplets), + SOA_COLUMN(unsigned int, totOccupancyQuintuplets)); + + using QuintupletsOccupancySoA = QuintupletsOccupancySoALayout<>; + using QuintupletsOccupancy = QuintupletsOccupancySoA::View; + using QuintupletsOccupancyConst = QuintupletsOccupancySoA::ConstView; + +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/SegmentsHostCollection.h b/RecoTracker/LSTCore/interface/SegmentsHostCollection.h new file mode 100644 index 0000000000000..2fa6ac912a732 --- /dev/null +++ b/RecoTracker/LSTCore/interface/SegmentsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_SegmentsHostCollection_h +#define RecoTracker_LSTCore_interface_SegmentsHostCollection_h + +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using SegmentsHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/SegmentsSoA.h b/RecoTracker/LSTCore/interface/SegmentsSoA.h new file mode 100644 index 0000000000000..24df2fb5a42a0 --- /dev/null +++ b/RecoTracker/LSTCore/interface/SegmentsSoA.h @@ -0,0 +1,63 @@ +#ifndef RecoTracker_LSTCore_interface_SegmentsSoA_h +#define RecoTracker_LSTCore_interface_SegmentsSoA_h + +#include "DataFormats/SoATemplate/interface/SoALayout.h" +#include "DataFormats/Portable/interface/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + + GENERATE_SOA_LAYOUT(SegmentsSoALayout, + SOA_COLUMN(FPX, dPhis), + SOA_COLUMN(FPX, dPhiMins), + SOA_COLUMN(FPX, dPhiMaxs), + SOA_COLUMN(FPX, dPhiChanges), + SOA_COLUMN(FPX, dPhiChangeMins), + SOA_COLUMN(FPX, dPhiChangeMaxs), + SOA_COLUMN(uint16_t, innerLowerModuleIndices), + SOA_COLUMN(uint16_t, outerLowerModuleIndices), + SOA_COLUMN(Params_LS::ArrayUxLayers, mdIndices), + SOA_COLUMN(unsigned int, innerMiniDoubletAnchorHitIndices), + SOA_COLUMN(unsigned int, outerMiniDoubletAnchorHitIndices)) + + GENERATE_SOA_LAYOUT(SegmentsOccupancySoALayout, + SOA_COLUMN(unsigned int, nSegments), //number of segments per inner lower module + SOA_COLUMN(unsigned int, totOccupancySegments)) + + GENERATE_SOA_LAYOUT(SegmentsPixelSoALayout, + SOA_COLUMN(unsigned int, seedIdx), + SOA_COLUMN(int, charge), + SOA_COLUMN(int, superbin), + SOA_COLUMN(uint4, pLSHitsIdxs), + SOA_COLUMN(PixelType, pixelType), + SOA_COLUMN(char, isQuad), + SOA_COLUMN(char, isDup), + SOA_COLUMN(bool, partOfPT5), + SOA_COLUMN(float, ptIn), + SOA_COLUMN(float, ptErr), + SOA_COLUMN(float, px), + SOA_COLUMN(float, py), + SOA_COLUMN(float, pz), + SOA_COLUMN(float, etaErr), + SOA_COLUMN(float, eta), + SOA_COLUMN(float, phi), + SOA_COLUMN(float, score), + SOA_COLUMN(float, circleCenterX), + SOA_COLUMN(float, circleCenterY), + SOA_COLUMN(float, circleRadius)) + + using SegmentsSoA = SegmentsSoALayout<>; + using SegmentsOccupancySoA = SegmentsOccupancySoALayout<>; + using SegmentsPixelSoA = SegmentsPixelSoALayout<>; + + using Segments = SegmentsSoA::View; + using SegmentsConst = SegmentsSoA::ConstView; + using SegmentsOccupancy = SegmentsOccupancySoA::View; + using SegmentsOccupancyConst = SegmentsOccupancySoA::ConstView; + using SegmentsPixel = SegmentsPixelSoA::View; + using SegmentsPixelConst = SegmentsPixelSoA::ConstView; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/TiltedGeometry.h b/RecoTracker/LSTCore/interface/TiltedGeometry.h new file mode 100644 index 0000000000000..7a17106195522 --- /dev/null +++ b/RecoTracker/LSTCore/interface/TiltedGeometry.h @@ -0,0 +1,26 @@ +#ifndef RecoTracker_LSTCore_interface_TiltedGeometry_h +#define RecoTracker_LSTCore_interface_TiltedGeometry_h + +#include +#include +#include + +namespace lst { + class TiltedGeometry { + private: + std::map drdzs_; // dr/dz slope + std::map dxdys_; // dx/dy slope + + public: + TiltedGeometry() = default; + TiltedGeometry(std::string const& filename); + + void load(std::string const&); + + float getDrDz(unsigned int detid) const; + float getDxDy(unsigned int detid) const; + }; + +} // namespace lst + +#endif diff --git a/RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h b/RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h new file mode 100644 index 0000000000000..3ffd2bedf945e --- /dev/null +++ b/RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_TrackCandidatesHostCollection_h +#define RecoTracker_LSTCore_interface_TrackCandidatesHostCollection_h + +#include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using TrackCandidatesHostCollection = PortableHostCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h b/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h new file mode 100644 index 0000000000000..b1fdecf75526a --- /dev/null +++ b/RecoTracker/LSTCore/interface/TrackCandidatesSoA.h @@ -0,0 +1,32 @@ +#ifndef RecoTracker_LSTCore_interface_TrackCandidatesSoA_h +#define RecoTracker_LSTCore_interface_TrackCandidatesSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + GENERATE_SOA_LAYOUT(TrackCandidatesSoALayout, + SOA_COLUMN(short, trackCandidateType), // 4-T5 5-pT3 7-pT5 8-pLS + SOA_COLUMN(unsigned int, directObjectIndices), // direct indices to each type containers + SOA_COLUMN(ArrayUx2, objectIndices), // tracklet and triplet indices + SOA_COLUMN(Params_pT5::ArrayU8xLayers, logicalLayers), // + SOA_COLUMN(Params_pT5::ArrayUxHits, hitIndices), // + SOA_COLUMN(int, pixelSeedIndex), // + SOA_COLUMN(Params_pT5::ArrayU16xLayers, lowerModuleIndices), // + SOA_COLUMN(FPX, centerX), // + SOA_COLUMN(FPX, centerY), // + SOA_COLUMN(FPX, radius), // + SOA_SCALAR(unsigned int, nTrackCandidates), // + SOA_SCALAR(unsigned int, nTrackCandidatespT3), // + SOA_SCALAR(unsigned int, nTrackCandidatespT5), // + SOA_SCALAR(unsigned int, nTrackCandidatespLS), // + SOA_SCALAR(unsigned int, nTrackCandidatesT5)) // + + using TrackCandidatesSoA = TrackCandidatesSoALayout<>; + using TrackCandidates = TrackCandidatesSoA::View; + using TrackCandidatesConst = TrackCandidatesSoA::ConstView; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/TripletsHostCollection.h b/RecoTracker/LSTCore/interface/TripletsHostCollection.h new file mode 100644 index 0000000000000..6eaebd97e5bf6 --- /dev/null +++ b/RecoTracker/LSTCore/interface/TripletsHostCollection.h @@ -0,0 +1,10 @@ +#ifndef RecoTracker_LSTCore_interface_TripletsHostCollection_h +#define RecoTracker_LSTCore_interface_TripletsHostCollection_h + +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" +#include "DataFormats/Portable/interface/PortableHostCollection.h" + +namespace lst { + using TripletsHostCollection = PortableHostMultiCollection; +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/TripletsSoA.h b/RecoTracker/LSTCore/interface/TripletsSoA.h new file mode 100644 index 0000000000000..69c2d97449df3 --- /dev/null +++ b/RecoTracker/LSTCore/interface/TripletsSoA.h @@ -0,0 +1,42 @@ +#ifndef RecoTracker_LSTCore_interface_TripletsSoA_h +#define RecoTracker_LSTCore_interface_TripletsSoA_h + +#include +#include "DataFormats/Common/interface/StdArray.h" +#include "DataFormats/SoATemplate/interface/SoALayout.h" + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace lst { + GENERATE_SOA_LAYOUT(TripletsSoALayout, + SOA_COLUMN(ArrayUx2, segmentIndices), // inner and outer segment indices + SOA_COLUMN(Params_T3::ArrayU16xLayers, lowerModuleIndices), // lower module index in each layer + SOA_COLUMN(Params_T3::ArrayU8xLayers, logicalLayers), // layer ID + SOA_COLUMN(Params_T3::ArrayUxHits, hitIndices), // hit indices + SOA_COLUMN(FPX, betaIn), // beta/chord angle of the inner segment + SOA_COLUMN(float, centerX), // lower/anchor-hit based circle center x + SOA_COLUMN(float, centerY), // lower/anchor-hit based circle center y + SOA_COLUMN(float, radius), // lower/anchor-hit based circle radius +#ifdef CUT_VALUE_DEBUG + SOA_COLUMN(float, zOut), + SOA_COLUMN(float, rtOut), + SOA_COLUMN(float, betaInCut), +#endif + SOA_COLUMN(bool, partOfPT5), // is it used in a pT5 + SOA_COLUMN(bool, partOfT5), // is it used in a T5 + SOA_COLUMN(bool, partOfPT3)); // is it used in a pT3 + + using TripletsSoA = TripletsSoALayout<>; + using Triplets = TripletsSoA::View; + using TripletsConst = TripletsSoA::ConstView; + + GENERATE_SOA_LAYOUT(TripletsOccupancySoALayout, + SOA_COLUMN(unsigned int, nTriplets), + SOA_COLUMN(unsigned int, totOccupancyTriplets)); + + using TripletsOccupancySoA = TripletsOccupancySoALayout<>; + using TripletsOccupancy = TripletsOccupancySoA::View; + using TripletsOccupancyConst = TripletsOccupancySoA::ConstView; + +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/Common.h b/RecoTracker/LSTCore/interface/alpaka/Common.h new file mode 100644 index 0000000000000..7a1feabfcf076 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/Common.h @@ -0,0 +1,82 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_Common_h +#define RecoTracker_LSTCore_interface_alpaka_Common_h + +#include + +#include "RecoTracker/LSTCore/interface/Common.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + using namespace ::lst; + + Vec3D constexpr elementsPerThread(Vec3D::all(static_cast(1))); + + // Adjust grid and block sizes based on backend configuration + template > + ALPAKA_FN_HOST ALPAKA_FN_INLINE WorkDiv createWorkDiv(const Vec& blocksPerGrid, + const Vec& threadsPerBlock, + const Vec& elementsPerThreadArg) { + Vec adjustedBlocks = blocksPerGrid; + Vec adjustedThreads = threadsPerBlock; + + // special overrides for CPU/host cases + if constexpr (std::is_same_v) { + adjustedBlocks = Vec::all(static_cast(1)); + + if constexpr (alpaka::accMatchesTags) { + // Serial execution, set threads to 1 as well + adjustedThreads = Vec::all(static_cast(1)); // probably redundant + } + } + + return WorkDiv(adjustedBlocks, adjustedThreads, elementsPerThreadArg); + } + + // The constants below are usually used in functions like alpaka::math::min(), + // expecting a reference (T const&) in the arguments. Hence, + // ALPAKA_STATIC_ACC_MEM_GLOBAL needs to be used in addition to constexpr. + + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kPi = std::numbers::pi_v; + // 15 MeV constant from the approximate Bethe-Bloch formula + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMulsInGeV = 0.015; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniMulsPtScaleBarrel[6] = { + 0.0052, 0.0038, 0.0034, 0.0034, 0.0032, 0.0034}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniMulsPtScaleEndcap[5] = {0.006, 0.006, 0.006, 0.006, 0.006}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniRminMeanBarrel[6] = { + 25.007152356, 37.2186993757, 52.3104270826, 68.6658656666, 85.9770373007, 108.301772384}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kMiniRminMeanEndcap[5] = { + 130.992832231, 154.813883559, 185.352604327, 221.635123002, 265.022076742}; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float k2Rinv1GeVf = (2.99792458e-3 * 3.8) / 2; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kR1GeVf = 1. / (2.99792458e-3 * 3.8); + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kSinAlphaMax = 0.95; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float ptCut = PT_CUT; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kDeltaZLum = 15.0; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kPixelPSZpitch = 0.15; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kStripPSZpitch = 2.4; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kStrip2SZpitch = 5.0; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWidth2S = 0.009; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWidthPS = 0.01; + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kPt_betaMax = 7.0; + // To be updated with std::numeric_limits::infinity() in the code and data files + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kVerticalModuleSlope = 123456789.0; + + namespace t5dnn { + + // Working points matching LST fake rate (43.9%) or signal acceptance (82.0%) + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kLSTWp1 = 0.3418833f; // 94.0% TPR, 43.9% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kLSTWp2 = 0.6177366f; // 82.0% TPR, 20.0% FPR + // Other working points + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp70 = 0.7776195f; // 70.0% TPR, 10.0% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp75 = 0.7181118f; // 75.0% TPR, 13.5% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp80 = 0.6492643f; // 80.0% TPR, 17.9% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp85 = 0.5655319f; // 85.0% TPR, 23.8% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp90 = 0.4592205f; // 90.0% TPR, 32.6% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp95 = 0.3073708f; // 95.0% TPR, 47.7% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp97p5 = 0.2001348f; // 97.5% TPR, 61.2% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp99 = 0.1120605f; // 99.0% TPR, 75.9% FPR + ALPAKA_STATIC_ACC_MEM_GLOBAL constexpr float kWp99p9 = 0.0218196f; // 99.9% TPR, 95.4% FPR + + } // namespace t5dnn + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/EndcapGeometryDevDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/EndcapGeometryDevDeviceCollection.h new file mode 100644 index 0000000000000..12f510cf6b367 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/EndcapGeometryDevDeviceCollection.h @@ -0,0 +1,13 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_EndcapGeometryDevDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_EndcapGeometryDevDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometryDevSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using EndcapGeometryDevDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h new file mode 100644 index 0000000000000..5bafd9df246bc --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h @@ -0,0 +1,13 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_HitsDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_HitsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/HitsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using HitsDeviceCollection = PortableCollection2; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/LST.h b/RecoTracker/LSTCore/interface/alpaka/LST.h new file mode 100644 index 0000000000000..40d912de3f291 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/LST.h @@ -0,0 +1,102 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_LST_h +#define RecoTracker_LSTCore_interface_alpaka_LST_h + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/LSTESData.h" + +#include +#include +#include + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + class LSTEvent; + + class LST { + public: + LST() = default; + + void run(Queue& queue, + bool verbose, + LSTESData const* deviceESData, + std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z, + bool no_pls_dupclean, + bool tc_pls_triplets); + std::vector> const& hits() const { return out_tc_hitIdxs_; } + std::vector const& len() const { return out_tc_len_; } + std::vector const& seedIdx() const { return out_tc_seedIdx_; } + std::vector const& trackCandidateType() const { return out_tc_trackCandidateType_; } + + private: + void prepareInput(std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z); + + void getOutput(LSTEvent& event); + + // Input and output vectors + std::vector in_trkX_; + std::vector in_trkY_; + std::vector in_trkZ_; + std::vector in_hitId_; + std::vector in_hitIdxs_; + std::vector in_hitIndices_vec0_; + std::vector in_hitIndices_vec1_; + std::vector in_hitIndices_vec2_; + std::vector in_hitIndices_vec3_; + std::vector in_deltaPhi_vec_; + std::vector in_ptIn_vec_; + std::vector in_ptErr_vec_; + std::vector in_px_vec_; + std::vector in_py_vec_; + std::vector in_pz_vec_; + std::vector in_eta_vec_; + std::vector in_etaErr_vec_; + std::vector in_phi_vec_; + std::vector in_charge_vec_; + std::vector in_seedIdx_vec_; + std::vector in_superbin_vec_; + std::vector in_pixelType_vec_; + std::vector in_isQuad_vec_; + std::vector> out_tc_hitIdxs_; + std::vector out_tc_len_; + std::vector out_tc_seedIdx_; + std::vector out_tc_trackCandidateType_; + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h new file mode 100644 index 0000000000000..7751f75ac5ec9 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h @@ -0,0 +1,13 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_MiniDoubletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_MiniDoubletsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using MiniDoubletsDeviceCollection = PortableCollection2; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h new file mode 100644 index 0000000000000..73152a47b6a42 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h @@ -0,0 +1,13 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_ModulesDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_ModulesDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using ModulesDeviceCollection = PortableCollection2; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/ObjectRangesDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/ObjectRangesDeviceCollection.h new file mode 100644 index 0000000000000..36c6584280fe0 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/ObjectRangesDeviceCollection.h @@ -0,0 +1,13 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_ObjectRangesDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_ObjectRangesDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using ObjectRangesDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/PixelQuintupletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/PixelQuintupletsDeviceCollection.h new file mode 100644 index 0000000000000..e5feed7677c38 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/PixelQuintupletsDeviceCollection.h @@ -0,0 +1,12 @@ +#ifndef RecoTracker_LSTCore_interface_PixelQuintupletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_PixelQuintupletsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using PixelQuintupletsDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/PixelTripletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/PixelTripletsDeviceCollection.h new file mode 100644 index 0000000000000..a5938ed82bd8b --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/PixelTripletsDeviceCollection.h @@ -0,0 +1,12 @@ +#ifndef RecoTracker_LSTCore_interface_PixelTripletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_PixelTripletsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using PixelTripletsDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h new file mode 100644 index 0000000000000..13fb5484ea0fd --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h @@ -0,0 +1,12 @@ +#ifndef RecoTracker_LSTCore_interface_QuintupletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_QuintupletsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using QuintupletsDeviceCollection = PortableCollection2; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h new file mode 100644 index 0000000000000..934e6314ae320 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h @@ -0,0 +1,13 @@ +#ifndef RecoTracker_LSTCore_interface_alpaka_SegmentsDeviceCollection_h +#define RecoTracker_LSTCore_interface_alpaka_SegmentsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using SegmentsDeviceCollection = PortableCollection3; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h new file mode 100644 index 0000000000000..387ca5a108453 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h @@ -0,0 +1,12 @@ +#ifndef RecoTracker_LSTCore_interface_TrackCandidatesDeviceCollection_h +#define RecoTracker_LSTCore_interface_TrackCandidatesDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using TrackCandidatesDeviceCollection = PortableCollection; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h b/RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h new file mode 100644 index 0000000000000..6db827680cee3 --- /dev/null +++ b/RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h @@ -0,0 +1,12 @@ +#ifndef RecoTracker_LSTCore_interface_TripletsDeviceCollection_h +#define RecoTracker_LSTCore_interface_TripletsDeviceCollection_h + +#include "DataFormats/Portable/interface/alpaka/PortableCollection.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + using TripletsDeviceCollection = PortableCollection2; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/EndcapGeometry.cc b/RecoTracker/LSTCore/src/EndcapGeometry.cc new file mode 100644 index 0000000000000..17e72379bb2ec --- /dev/null +++ b/RecoTracker/LSTCore/src/EndcapGeometry.cc @@ -0,0 +1,59 @@ +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" + +#include +#include +#include +#include + +lst::EndcapGeometry::EndcapGeometry(std::string const& filename) { load(filename); } + +void lst::EndcapGeometry::load(std::string const& filename) { + dxdy_slope_.clear(); + centroid_phis_.clear(); + + std::ifstream ifile(filename, std::ios::binary); + if (!ifile.is_open()) { + throw std::runtime_error("Unable to open file: " + filename); + } + + while (!ifile.eof()) { + unsigned int detid; + float dxdy_slope, centroid_phi; + + // Read the detid, dxdy_slope, and centroid_phi from binary file + ifile.read(reinterpret_cast(&detid), sizeof(detid)); + ifile.read(reinterpret_cast(&dxdy_slope), sizeof(dxdy_slope)); + ifile.read(reinterpret_cast(¢roid_phi), sizeof(centroid_phi)); + + if (ifile) { + dxdy_slope_[detid] = dxdy_slope; + centroid_phis_[detid] = centroid_phi; + } else { + // End of file or read failed + if (!ifile.eof()) { + throw std::runtime_error("Failed to read Endcap Geometry binary data."); + } + } + } + + fillGeoMapArraysExplicit(); +} + +void lst::EndcapGeometry::fillGeoMapArraysExplicit() { + nEndCapMap = centroid_phis_.size(); + + geoMapDetId_buf.reserve(nEndCapMap); + geoMapPhi_buf.reserve(nEndCapMap); + + for (auto it = centroid_phis_.begin(); it != centroid_phis_.end(); ++it) { + unsigned int detId = it->first; + float Phi = it->second; + geoMapPhi_buf.push_back(Phi); + geoMapDetId_buf.push_back(detId); + } +} + +float lst::EndcapGeometry::getdxdy_slope(unsigned int detid) const { + auto res = dxdy_slope_.find(detid); + return res == dxdy_slope_.end() ? 0.f : res->second; +} diff --git a/RecoTracker/LSTCore/src/LSTESData.cc b/RecoTracker/LSTCore/src/LSTESData.cc new file mode 100644 index 0000000000000..66163d39beb2e --- /dev/null +++ b/RecoTracker/LSTCore/src/LSTESData.cc @@ -0,0 +1,120 @@ +#include "RecoTracker/LSTCore/interface/LSTESData.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" +#include "RecoTracker/LSTCore/interface/ModuleConnectionMap.h" +#include "RecoTracker/LSTCore/interface/TiltedGeometry.h" +#include "RecoTracker/LSTCore/interface/PixelMap.h" + +#include "ModuleMethods.h" + +#include + +namespace { + std::string geometryDataDir() { + const char* path_lst_base = std::getenv("LST_BASE"); + const char* path_tracklooperdir = std::getenv("TRACKLOOPERDIR"); + std::string path_str; + if (path_lst_base != nullptr) { + path_str = path_lst_base; + } else if (path_tracklooperdir != nullptr) { + path_str = path_tracklooperdir; + path_str += "/../"; + } else { + std::stringstream search_path(std::getenv("CMSSW_SEARCH_PATH")); + std::string path; + while (std::getline(search_path, path, ':')) { + if (std::filesystem::exists(path + "/RecoTracker/LSTCore/data")) { + path_str = path; + break; + } + } + path_str += "/RecoTracker/LSTCore"; + } + return path_str; + } + + std::string get_absolute_path_after_check_file_exists(std::string const& name) { + std::filesystem::path fullpath = std::filesystem::absolute(name); + if (not std::filesystem::exists(fullpath)) { + throw std::runtime_error("Could not find the file = " + fullpath.string()); + } + return fullpath.string(); + } + + void loadMapsHost(lst::MapPLStoLayer& pLStoLayer, + lst::EndcapGeometry& endcapGeometry, + lst::TiltedGeometry& tiltedGeometry, + lst::ModuleConnectionMap& moduleConnectionMap) { + // Module orientation information (DrDz or phi angles) + auto endcap_geom = + get_absolute_path_after_check_file_exists(geometryDataDir() + "/data/OT800_IT615_pt0.8/endcap_orientation.bin"); + auto tilted_geom = get_absolute_path_after_check_file_exists( + geometryDataDir() + "/data/OT800_IT615_pt0.8/tilted_barrel_orientation.bin"); + // Module connection map (for line segment building) + auto mappath = get_absolute_path_after_check_file_exists( + geometryDataDir() + "/data/OT800_IT615_pt0.8/module_connection_tracing_merged.bin"); + + endcapGeometry.load(endcap_geom); + tiltedGeometry.load(tilted_geom); + moduleConnectionMap.load(mappath); + + auto pLSMapDir = geometryDataDir() + "/data/OT800_IT615_pt0.8/pixelmap/pLS_map"; + const std::array connects{ + {"_layer1_subdet5", "_layer2_subdet5", "_layer1_subdet4", "_layer2_subdet4"}}; + std::string path; + + static_assert(connects.size() == std::tuple_size>{}); + for (unsigned int i = 0; i < connects.size(); i++) { + auto connectData = connects[i].data(); + + path = pLSMapDir + connectData + ".bin"; + pLStoLayer[0][i] = lst::ModuleConnectionMap(get_absolute_path_after_check_file_exists(path)); + + path = pLSMapDir + "_pos" + connectData + ".bin"; + pLStoLayer[1][i] = lst::ModuleConnectionMap(get_absolute_path_after_check_file_exists(path)); + + path = pLSMapDir + "_neg" + connectData + ".bin"; + pLStoLayer[2][i] = lst::ModuleConnectionMap(get_absolute_path_after_check_file_exists(path)); + } + } +} // namespace + +std::unique_ptr> lst::loadAndFillESHost() { + uint16_t nModules; + uint16_t nLowerModules; + unsigned int nPixels; + MapPLStoLayer pLStoLayer; + EndcapGeometry endcapGeometry; + TiltedGeometry tiltedGeometry; + PixelMap pixelMapping; + ModuleConnectionMap moduleConnectionMap; + ::loadMapsHost(pLStoLayer, endcapGeometry, tiltedGeometry, moduleConnectionMap); + + auto endcapGeometryDev = + std::make_shared(endcapGeometry.nEndCapMap, cms::alpakatools::host()); + std::memcpy(endcapGeometryDev->view().geoMapDetId(), + endcapGeometry.geoMapDetId_buf.data(), + endcapGeometry.nEndCapMap * sizeof(unsigned int)); + std::memcpy(endcapGeometryDev->view().geoMapPhi(), + endcapGeometry.geoMapPhi_buf.data(), + endcapGeometry.nEndCapMap * sizeof(float)); + + auto path = + get_absolute_path_after_check_file_exists(geometryDataDir() + "/data/OT800_IT615_pt0.8/sensor_centroids.bin"); + auto modulesBuffers = lst::loadModulesFromFile(pLStoLayer, + path.c_str(), + nModules, + nLowerModules, + nPixels, + pixelMapping, + endcapGeometry, + tiltedGeometry, + moduleConnectionMap); + auto pixelMappingPtr = std::make_shared(std::move(pixelMapping)); + return std::make_unique>(nModules, + nLowerModules, + nPixels, + endcapGeometry.nEndCapMap, + std::move(modulesBuffers), + std::move(endcapGeometryDev), + pixelMappingPtr); +} diff --git a/RecoTracker/LSTCore/src/ModuleConnectionMap.cc b/RecoTracker/LSTCore/src/ModuleConnectionMap.cc new file mode 100644 index 0000000000000..0da0f4cc4ac6f --- /dev/null +++ b/RecoTracker/LSTCore/src/ModuleConnectionMap.cc @@ -0,0 +1,108 @@ +#include "RecoTracker/LSTCore/interface/ModuleConnectionMap.h" + +#include +#include +#include +#include + +lst::ModuleConnectionMap::ModuleConnectionMap() {} + +lst::ModuleConnectionMap::ModuleConnectionMap(std::string const& filename) { load(filename); } + +void lst::ModuleConnectionMap::load(std::string const& filename) { + moduleConnections_.clear(); + + std::ifstream ifile(filename, std::ios::binary); + if (!ifile.is_open()) { + throw std::runtime_error("Unable to open file: " + filename); + } + + while (!ifile.eof()) { + unsigned int detid, number_of_connections; + + // Read the detid and the number of connections from the binary file + ifile.read(reinterpret_cast(&detid), sizeof(detid)); + ifile.read(reinterpret_cast(&number_of_connections), sizeof(number_of_connections)); + + if (ifile) { + std::vector connected_detids; + connected_detids.reserve(number_of_connections); + + // Read the connections for the given detid + for (unsigned int i = 0; i < number_of_connections; ++i) { + unsigned int connected_detid; + ifile.read(reinterpret_cast(&connected_detid), sizeof(connected_detid)); + if (ifile) { + connected_detids.push_back(connected_detid); + } else { + if (!ifile.eof()) { + throw std::runtime_error("Failed to read connection data."); + } + break; // Exit loop on read failure that's not EOF + } + } + + if (ifile) { + moduleConnections_[detid] = std::move(connected_detids); + } + } else { + if (!ifile.eof()) { + throw std::runtime_error("Failed to read module connection binary data."); + } + } + } +} + +void lst::ModuleConnectionMap::add(std::string const& filename) { + std::ifstream ifile; + ifile.open(filename.c_str()); + std::string line; + + while (std::getline(ifile, line)) { + unsigned int detid; + int number_of_connections; + std::vector connected_detids; + unsigned int connected_detid; + + std::stringstream ss(line); + + ss >> detid >> number_of_connections; + connected_detids.reserve(number_of_connections); + + for (int ii = 0; ii < number_of_connections; ++ii) { + ss >> connected_detid; + connected_detids.push_back(connected_detid); + } + + auto& thisModuleConnections = moduleConnections_.at(detid); + + // Concatenate + thisModuleConnections.insert(thisModuleConnections.end(), connected_detids.begin(), connected_detids.end()); + + // Sort + std::sort(thisModuleConnections.begin(), thisModuleConnections.end()); + + // Unique + thisModuleConnections.erase(std::unique(thisModuleConnections.begin(), thisModuleConnections.end()), + thisModuleConnections.end()); + } +} + +void lst::ModuleConnectionMap::print() { + std::cout << "Printing ModuleConnectionMap" << std::endl; + for (auto& pair : moduleConnections_) { + unsigned int detid = pair.first; + std::vector const& connected_detids = pair.second; + std::cout << " detid: " << detid << std::endl; + for (auto& connected_detid : connected_detids) { + std::cout << " connected_detid: " << connected_detid << std::endl; + } + } +} + +const std::vector& lst::ModuleConnectionMap::getConnectedModuleDetIds(unsigned int detid) const { + static const std::vector dummy; + auto const mList = moduleConnections_.find(detid); + return mList != moduleConnections_.end() ? mList->second : dummy; +} +int lst::ModuleConnectionMap::size() const { return moduleConnections_.size(); } diff --git a/RecoTracker/LSTCore/src/ModuleMethods.h b/RecoTracker/LSTCore/src/ModuleMethods.h new file mode 100644 index 0000000000000..d2cf81be21d02 --- /dev/null +++ b/RecoTracker/LSTCore/src/ModuleMethods.h @@ -0,0 +1,395 @@ +#ifndef RecoTracker_LSTCore_src_ModuleMethods_h +#define RecoTracker_LSTCore_src_ModuleMethods_h + +#include +#include + +#include "RecoTracker/LSTCore/interface/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/ModulesHostCollection.h" +#include "RecoTracker/LSTCore/interface/TiltedGeometry.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" +#include "RecoTracker/LSTCore/interface/ModuleConnectionMap.h" +#include "RecoTracker/LSTCore/interface/PixelMap.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +namespace lst { + struct ModuleMetaData { + std::map detIdToIndex; + std::map module_x; + std::map module_y; + std::map module_z; + std::map module_type; // 23 : Ph2PSP, 24 : Ph2PSS, 25 : Ph2SS + // https://github.com/cms-sw/cmssw/blob/5e809e8e0a625578aa265dc4b128a93830cb5429/Geometry/TrackerGeometryBuilder/interface/TrackerGeometry.h#L29 + }; + + bool parseIsLower(bool isInvertedx, unsigned int detId) { return (isInvertedx) ? !(detId & 1) : (detId & 1); } + + unsigned int parsePartnerModuleId(unsigned int detId, bool isLowerx, bool isInvertedx) { + return isLowerx ? (isInvertedx ? detId - 1 : detId + 1) : (isInvertedx ? detId + 1 : detId - 1); + } + + bool parseIsInverted(short subdet, short side, short module, short layer) { + if (subdet == Endcap) { + if (side == NegZ) { + return module % 2 == 1; + } else if (side == PosZ) { + return module % 2 == 0; + } else { + return false; + } + } else if (subdet == Barrel) { + if (side == Center) { + if (layer <= 3) { + return module % 2 == 1; + } else if (layer >= 4) { + return module % 2 == 0; + } else { + return false; + } + } else if (side == NegZ or side == PosZ) { + if (layer <= 2) { + return module % 2 == 1; + } else if (layer == 3) { + return module % 2 == 0; + } else { + return false; + } + } else { + return false; + } + } else { + return false; + } + } + + inline std::tuple, + unsigned int, + std::vector, + unsigned int, + std::vector> + getConnectedPixels(uint16_t nModules, unsigned int& nPixels, PixelMap& pixelMapping, MapPLStoLayer const& pLStoLayer) { + std::vector connectedModuleDetIds; + std::vector connectedModuleDetIds_pos; + std::vector connectedModuleDetIds_neg; + + unsigned int totalSizes = 0; + unsigned int totalSizes_pos = 0; + unsigned int totalSizes_neg = 0; + for (unsigned int isuperbin = 0; isuperbin < size_superbins; isuperbin++) { + int sizes = 0; + for (auto const& mCM_pLS : pLStoLayer[0]) { + std::vector connectedModuleDetIds_pLS = + mCM_pLS.getConnectedModuleDetIds(isuperbin + size_superbins); + connectedModuleDetIds.insert( + connectedModuleDetIds.end(), connectedModuleDetIds_pLS.begin(), connectedModuleDetIds_pLS.end()); + sizes += connectedModuleDetIds_pLS.size(); + } + pixelMapping.connectedPixelsIndex[isuperbin] = totalSizes; + pixelMapping.connectedPixelsSizes[isuperbin] = sizes; + totalSizes += sizes; + + int sizes_pos = 0; + for (auto const& mCM_pLS : pLStoLayer[1]) { + std::vector connectedModuleDetIds_pLS_pos = mCM_pLS.getConnectedModuleDetIds(isuperbin); + connectedModuleDetIds_pos.insert(connectedModuleDetIds_pos.end(), + connectedModuleDetIds_pLS_pos.begin(), + connectedModuleDetIds_pLS_pos.end()); + sizes_pos += connectedModuleDetIds_pLS_pos.size(); + } + pixelMapping.connectedPixelsIndexPos[isuperbin] = totalSizes_pos; + pixelMapping.connectedPixelsSizesPos[isuperbin] = sizes_pos; + totalSizes_pos += sizes_pos; + + int sizes_neg = 0; + for (auto const& mCM_pLS : pLStoLayer[2]) { + std::vector connectedModuleDetIds_pLS_neg = mCM_pLS.getConnectedModuleDetIds(isuperbin); + connectedModuleDetIds_neg.insert(connectedModuleDetIds_neg.end(), + connectedModuleDetIds_pLS_neg.begin(), + connectedModuleDetIds_pLS_neg.end()); + sizes_neg += connectedModuleDetIds_pLS_neg.size(); + } + pixelMapping.connectedPixelsIndexNeg[isuperbin] = totalSizes_neg; + pixelMapping.connectedPixelsSizesNeg[isuperbin] = sizes_neg; + totalSizes_neg += sizes_neg; + } + + nPixels = totalSizes + totalSizes_pos + totalSizes_neg; + + return {totalSizes, + connectedModuleDetIds, + totalSizes_pos, + connectedModuleDetIds_pos, + totalSizes_neg, + connectedModuleDetIds_neg}; + } + + inline void fillConnectedModuleArrayExplicit(Modules modules, + ModuleMetaData const& mmd, + ModuleConnectionMap const& moduleConnectionMap) { + Params_Modules::ArrayU16xMaxConnected* moduleMap = modules.moduleMap(); + uint16_t* nConnectedModules = modules.nConnectedModules(); + + for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); ++it) { + unsigned int detId = it->first; + uint16_t index = it->second; + auto& connectedModules = moduleConnectionMap.getConnectedModuleDetIds(detId); + nConnectedModules[index] = connectedModules.size(); + for (uint16_t i = 0; i < nConnectedModules[index]; i++) { + moduleMap[index][i] = mmd.detIdToIndex.at(connectedModules[i]); + } + } + } + + inline void fillMapArraysExplicit(Modules modules, ModuleMetaData const& mmd) { + uint16_t* mapIdx = modules.mapIdx(); + unsigned int* mapdetId = modules.mapdetId(); + + unsigned int counter = 0; + for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); ++it) { + unsigned int detId = it->first; + unsigned int index = it->second; + mapIdx[counter] = index; + mapdetId[counter] = detId; + counter++; + } + } + + inline void setDerivedQuantities(unsigned int detId, + unsigned short& layer, + unsigned short& ring, + unsigned short& rod, + unsigned short& module, + unsigned short& subdet, + unsigned short& side, + float m_x, + float m_y, + float m_z, + float& eta, + float& r) { + subdet = (detId & (7 << 25)) >> 25; + side = (subdet == Endcap) ? (detId & (3 << 23)) >> 23 : (detId & (3 << 18)) >> 18; + layer = (subdet == Endcap) ? (detId & (7 << 18)) >> 18 : (detId & (7 << 20)) >> 20; + ring = (subdet == Endcap) ? (detId & (15 << 12)) >> 12 : 0; + module = (detId & (127 << 2)) >> 2; + rod = (subdet == Endcap) ? 0 : (detId & (127 << 10)) >> 10; + + r = std::sqrt(m_x * m_x + m_y * m_y + m_z * m_z); + eta = ((m_z > 0) - (m_z < 0)) * std::acosh(r / std::sqrt(m_x * m_x + m_y * m_y)); + } + + inline void loadCentroidsFromFile(const char* filePath, ModuleMetaData& mmd, uint16_t& nModules) { + std::ifstream ifile(filePath, std::ios::binary); + if (!ifile.is_open()) { + throw std::runtime_error("Unable to open file: " + std::string(filePath)); + } + + uint16_t counter = 0; + while (!ifile.eof()) { + unsigned int temp_detId; + float module_x, module_y, module_z; + int module_type; + + ifile.read(reinterpret_cast(&temp_detId), sizeof(temp_detId)); + ifile.read(reinterpret_cast(&module_x), sizeof(module_x)); + ifile.read(reinterpret_cast(&module_y), sizeof(module_y)); + ifile.read(reinterpret_cast(&module_z), sizeof(module_z)); + ifile.read(reinterpret_cast(&module_type), sizeof(module_type)); + + if (ifile) { + mmd.detIdToIndex[temp_detId] = counter; + mmd.module_x[temp_detId] = module_x; + mmd.module_y[temp_detId] = module_y; + mmd.module_z[temp_detId] = module_z; + mmd.module_type[temp_detId] = module_type; + counter++; + } else { + if (!ifile.eof()) { + throw std::runtime_error("Failed to read data for detId: " + std::to_string(temp_detId)); + } + } + } + + mmd.detIdToIndex[1] = counter; //pixel module is the last module in the module list + counter++; + nModules = counter; + } + + inline std::shared_ptr loadModulesFromFile(MapPLStoLayer const& pLStoLayer, + const char* moduleMetaDataFilePath, + uint16_t& nModules, + uint16_t& nLowerModules, + unsigned int& nPixels, + PixelMap& pixelMapping, + const EndcapGeometry& endcapGeometry, + const TiltedGeometry& tiltedGeometry, + const ModuleConnectionMap& moduleConnectionMap) { + ModuleMetaData mmd; + + loadCentroidsFromFile(moduleMetaDataFilePath, mmd, nModules); + + // TODO: this whole section could use some refactoring + auto [totalSizes, + connectedModuleDetIds, + totalSizes_pos, + connectedModuleDetIds_pos, + totalSizes_neg, + connectedModuleDetIds_neg] = getConnectedPixels(nModules, nPixels, pixelMapping, pLStoLayer); + + std::array const modules_sizes{{static_cast(nModules), static_cast(nPixels)}}; + + auto modulesHC = std::make_shared(modules_sizes, cms::alpakatools::host()); + + auto modules_view = modulesHC->view(); + + // Getting the underlying data pointers + unsigned int* host_detIds = modules_view.detIds(); + short* host_layers = modules_view.layers(); + short* host_rings = modules_view.rings(); + short* host_rods = modules_view.rods(); + short* host_modules = modules_view.modules(); + short* host_subdets = modules_view.subdets(); + short* host_sides = modules_view.sides(); + float* host_eta = modules_view.eta(); + float* host_r = modules_view.r(); + bool* host_isInverted = modules_view.isInverted(); + bool* host_isLower = modules_view.isLower(); + bool* host_isAnchor = modules_view.isAnchor(); + ModuleType* host_moduleType = modules_view.moduleType(); + ModuleLayerType* host_moduleLayerType = modules_view.moduleLayerType(); + float* host_dxdys = modules_view.dxdys(); + float* host_drdzs = modules_view.drdzs(); + uint16_t* host_nModules = &modules_view.nModules(); + uint16_t* host_nLowerModules = &modules_view.nLowerModules(); + uint16_t* host_partnerModuleIndices = modules_view.partnerModuleIndices(); + int* host_lstLayers = modules_view.lstLayers(); + + //reassign detIdToIndex indices here + nLowerModules = (nModules - 1) / 2; + uint16_t lowerModuleCounter = 0; + uint16_t upperModuleCounter = nLowerModules + 1; + //0 to nLowerModules - 1 => only lower modules, nLowerModules - pixel module, nLowerModules + 1 to nModules => upper modules + for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); it++) { + unsigned int detId = it->first; + float m_x = mmd.module_x[detId]; + float m_y = mmd.module_y[detId]; + float m_z = mmd.module_z[detId]; + unsigned int m_t = mmd.module_type[detId]; + + float eta, r; + + uint16_t index; + unsigned short layer, ring, rod, module, subdet, side; + bool isInverted, isLower; + if (detId == 1) { + layer = 0; + ring = 0; + rod = 0; + module = 0; + subdet = 0; + side = 0; + isInverted = false; + isLower = false; + eta = 0; + r = 0; + } else { + setDerivedQuantities(detId, layer, ring, rod, module, subdet, side, m_x, m_y, m_z, eta, r); + isInverted = parseIsInverted(subdet, side, module, layer); + isLower = parseIsLower(isInverted, detId); + } + if (isLower) { + index = lowerModuleCounter; + lowerModuleCounter++; + } else if (detId != 1) { + index = upperModuleCounter; + upperModuleCounter++; + } else { + index = nLowerModules; //pixel + } + //reassigning indices! + mmd.detIdToIndex[detId] = index; + host_detIds[index] = detId; + host_layers[index] = layer; + host_rings[index] = ring; + host_rods[index] = rod; + host_modules[index] = module; + host_subdets[index] = subdet; + host_sides[index] = side; + host_eta[index] = eta; + host_r[index] = r; + host_isInverted[index] = isInverted; + host_isLower[index] = isLower; + + //assigning other variables! + if (detId == 1) { + host_moduleType[index] = PixelModule; + host_moduleLayerType[index] = lst::InnerPixelLayer; + host_dxdys[index] = 0; + host_drdzs[index] = 0; + host_isAnchor[index] = false; + } else { + host_moduleType[index] = (m_t == 25 ? lst::TwoS : lst::PS); + host_moduleLayerType[index] = (m_t == 23 ? lst::Pixel : lst::Strip); + + if (host_moduleType[index] == lst::PS and host_moduleLayerType[index] == lst::Pixel) { + host_isAnchor[index] = true; + } else if (host_moduleType[index] == lst::TwoS and host_isLower[index]) { + host_isAnchor[index] = true; + } else { + host_isAnchor[index] = false; + } + + host_dxdys[index] = (subdet == Endcap) ? endcapGeometry.getdxdy_slope(detId) : tiltedGeometry.getDxDy(detId); + host_drdzs[index] = (subdet == Barrel) ? tiltedGeometry.getDrDz(detId) : 0; + } + + host_lstLayers[index] = + layer + 6 * (subdet == lst::Endcap) + 5 * (subdet == lst::Endcap and host_moduleType[index] == lst::TwoS); + } + + //partner module stuff, and slopes and drdz move around + for (auto it = mmd.detIdToIndex.begin(); it != mmd.detIdToIndex.end(); it++) { + auto& detId = it->first; + auto& index = it->second; + if (detId != 1) { + host_partnerModuleIndices[index] = + mmd.detIdToIndex[parsePartnerModuleId(detId, host_isLower[index], host_isInverted[index])]; + //add drdz and slope importing stuff here! + if (host_drdzs[index] == 0) { + host_drdzs[index] = host_drdzs[host_partnerModuleIndices[index]]; + } + if (host_dxdys[index] == 0) { + host_dxdys[index] = host_dxdys[host_partnerModuleIndices[index]]; + } + } + } + + *host_nModules = nModules; + *host_nLowerModules = nLowerModules; + + // Fill pixel part + pixelMapping.pixelModuleIndex = mmd.detIdToIndex.at(1); + + auto modulesPixel_view = modulesHC->view(); + auto connectedPixels = + cms::alpakatools::make_host_view(modulesPixel_view.connectedPixels(), modulesPixel_view.metadata().size()); + for (unsigned int icondet = 0; icondet < totalSizes; icondet++) { + connectedPixels[icondet] = mmd.detIdToIndex.at(connectedModuleDetIds[icondet]); + } + for (unsigned int icondet = 0; icondet < totalSizes_pos; icondet++) { + connectedPixels[icondet + totalSizes] = mmd.detIdToIndex.at(connectedModuleDetIds_pos[icondet]); + } + for (unsigned int icondet = 0; icondet < totalSizes_neg; icondet++) { + connectedPixels[icondet + totalSizes + totalSizes_pos] = mmd.detIdToIndex.at(connectedModuleDetIds_neg[icondet]); + } + + fillConnectedModuleArrayExplicit(modules_view, mmd, moduleConnectionMap); + fillMapArraysExplicit(modules_view, mmd); + + return modulesHC; + } +} // namespace lst +#endif diff --git a/RecoTracker/LSTCore/src/TiltedGeometry.cc b/RecoTracker/LSTCore/src/TiltedGeometry.cc new file mode 100644 index 0000000000000..d65a9a4a5f7b9 --- /dev/null +++ b/RecoTracker/LSTCore/src/TiltedGeometry.cc @@ -0,0 +1,48 @@ +#include "RecoTracker/LSTCore/interface/TiltedGeometry.h" + +#include +#include +#include +#include + +lst::TiltedGeometry::TiltedGeometry(std::string const& filename) { load(filename); } + +void lst::TiltedGeometry::load(std::string const& filename) { + drdzs_.clear(); + dxdys_.clear(); + + std::ifstream ifile(filename, std::ios::binary); + if (!ifile.is_open()) { + throw std::runtime_error("Unable to open file: " + filename); + } + + while (!ifile.eof()) { + unsigned int detid; + float drdz, dxdy; + + // Read the detid, drdz, and dxdy from binary file + ifile.read(reinterpret_cast(&detid), sizeof(detid)); + ifile.read(reinterpret_cast(&drdz), sizeof(drdz)); + ifile.read(reinterpret_cast(&dxdy), sizeof(dxdy)); + + if (ifile) { + drdzs_[detid] = drdz; + dxdys_[detid] = dxdy; + } else { + // End of file or read failed + if (!ifile.eof()) { + throw std::runtime_error("Failed to read Tilted Geometry binary data."); + } + } + } +} + +float lst::TiltedGeometry::getDrDz(unsigned int detid) const { + auto res = drdzs_.find(detid); + return res == drdzs_.end() ? 0.f : res->second; +} + +float lst::TiltedGeometry::getDxDy(unsigned int detid) const { + auto res = dxdys_.find(detid); + return res == dxdys_.end() ? 0.f : res->second; +} diff --git a/RecoTracker/LSTCore/src/alpaka/Hit.h b/RecoTracker/LSTCore/src/alpaka/Hit.h new file mode 100644 index 0000000000000..166be95cb432f --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/Hit.h @@ -0,0 +1,164 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_Hit_h +#define RecoTracker_LSTCore_src_alpaka_Hit_h + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/alpaka/HitsDeviceCollection.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float eta(TAcc const& acc, float x, float y, float z) { + float r3 = alpaka::math::sqrt(acc, x * x + y * y + z * z); + float rt = alpaka::math::sqrt(acc, x * x + y * y); + float eta = ((z > 0) - (z < 0)) * alpaka::math::acosh(acc, r3 / rt); + return eta; + } + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float phi_mpi_pi(TAcc const& acc, float x) { + if (alpaka::math::abs(acc, x) <= kPi) + return x; + + constexpr float o2pi = 1.f / (2.f * kPi); + float n = alpaka::math::round(acc, x * o2pi); + return x - n * float(2.f * kPi); + } + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float phi(TAcc const& acc, float x, float y) { + return phi_mpi_pi(acc, kPi + alpaka::math::atan2(acc, -y, -x)); + } + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float deltaPhi(TAcc const& acc, float x1, float y1, float x2, float y2) { + float phi1 = phi(acc, x1, y1); + float phi2 = phi(acc, x2, y2); + return phi_mpi_pi(acc, (phi2 - phi1)); + } + + template + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE float deltaPhiChange(TAcc const& acc, float x1, float y1, float x2, float y2) { + return deltaPhi(acc, x1, y1, x2 - x1, y2 - y1); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float calculate_dPhi(float phi1, float phi2) { + // Calculate dPhi + float dPhi = phi1 - phi2; + + // Normalize dPhi to be between -pi and pi + if (dPhi > kPi) { + dPhi -= 2 * kPi; + } else if (dPhi < -kPi) { + dPhi += 2 * kPi; + } + + return dPhi; + } + + ALPAKA_FN_HOST_ACC ALPAKA_FN_INLINE int binary_search(const unsigned int* data, // Array that we are searching over + unsigned int search_val, // Value we want to find in data array + unsigned int ndata) // Number of elements in data array + { + unsigned int low = 0; + unsigned int high = ndata - 1; + + while (low <= high) { + unsigned int mid = (low + high) / 2; + unsigned int test_val = data[mid]; + if (test_val == search_val) + return mid; + else if (test_val > search_val) + high = mid - 1; + else + low = mid + 1; + } + // Couldn't find search value in array. + return -1; + } + + struct ModuleRangesKernel { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + HitsRanges hitsRanges, + int nLowerModules) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (int lowerIndex = globalThreadIdx[2]; lowerIndex < nLowerModules; lowerIndex += gridThreadExtent[2]) { + uint16_t upperIndex = modules.partnerModuleIndices()[lowerIndex]; + if (hitsRanges.hitRanges()[lowerIndex][0] != -1 && hitsRanges.hitRanges()[upperIndex][0] != -1) { + hitsRanges.hitRangesLower()[lowerIndex] = hitsRanges.hitRanges()[lowerIndex][0]; + hitsRanges.hitRangesUpper()[lowerIndex] = hitsRanges.hitRanges()[upperIndex][0]; + hitsRanges.hitRangesnLower()[lowerIndex] = + hitsRanges.hitRanges()[lowerIndex][1] - hitsRanges.hitRanges()[lowerIndex][0] + 1; + hitsRanges.hitRangesnUpper()[lowerIndex] = + hitsRanges.hitRanges()[upperIndex][1] - hitsRanges.hitRanges()[upperIndex][0] + 1; + } + } + } + }; + + struct HitLoopKernel { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint16_t Endcap, // Integer corresponding to endcap in module subdets + uint16_t TwoS, // Integer corresponding to TwoS in moduleType + unsigned int nModules, // Number of modules + unsigned int nEndCapMap, // Number of elements in endcap map + EndcapGeometryDevConst endcapGeometry, + ModulesConst modules, + Hits hits, + HitsRanges hitsRanges, + unsigned int nHits) const // Total number of hits in event + { + auto geoMapDetId = endcapGeometry.geoMapDetId(); // DetId's from endcap map + auto geoMapPhi = endcapGeometry.geoMapPhi(); // Phi values from endcap map + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + for (unsigned int ihit = globalThreadIdx[2]; ihit < nHits; ihit += gridThreadExtent[2]) { + float ihit_x = hits.xs()[ihit]; + float ihit_y = hits.ys()[ihit]; + float ihit_z = hits.zs()[ihit]; + int iDetId = hits.detid()[ihit]; + + hits.rts()[ihit] = alpaka::math::sqrt(acc, ihit_x * ihit_x + ihit_y * ihit_y); + hits.phis()[ihit] = phi(acc, ihit_x, ihit_y); + hits.etas()[ihit] = + ((ihit_z > 0) - (ihit_z < 0)) * + alpaka::math::acosh( + acc, alpaka::math::sqrt(acc, ihit_x * ihit_x + ihit_y * ihit_y + ihit_z * ihit_z) / hits.rts()[ihit]); + int found_index = binary_search(modules.mapdetId(), iDetId, nModules); + uint16_t lastModuleIndex = modules.mapIdx()[found_index]; + + hits.moduleIndices()[ihit] = lastModuleIndex; + + if (modules.subdets()[lastModuleIndex] == Endcap && modules.moduleType()[lastModuleIndex] == TwoS) { + found_index = binary_search(geoMapDetId, iDetId, nEndCapMap); + float phi = geoMapPhi[found_index]; + float cos_phi = alpaka::math::cos(acc, phi); + hits.highEdgeXs()[ihit] = ihit_x + 2.5f * cos_phi; + hits.lowEdgeXs()[ihit] = ihit_x - 2.5f * cos_phi; + float sin_phi = alpaka::math::sin(acc, phi); + hits.highEdgeYs()[ihit] = ihit_y + 2.5f * sin_phi; + hits.lowEdgeYs()[ihit] = ihit_y - 2.5f * sin_phi; + } + // Need to set initial value if index hasn't been seen before. + int old = alpaka::atomicCas(acc, + &(hitsRanges.hitRanges()[lastModuleIndex][0]), + -1, + static_cast(ihit), + alpaka::hierarchy::Threads{}); + // For subsequent visits, stores the min value. + if (old != -1) + alpaka::atomicMin( + acc, &hitsRanges.hitRanges()[lastModuleIndex][0], static_cast(ihit), alpaka::hierarchy::Threads{}); + + alpaka::atomicMax( + acc, &hitsRanges.hitRanges()[lastModuleIndex][1], static_cast(ihit), alpaka::hierarchy::Threads{}); + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/Kernels.h b/RecoTracker/LSTCore/src/alpaka/Kernels.h new file mode 100644 index 0000000000000..c642f2427fa84 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/Kernels.h @@ -0,0 +1,421 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_Kernels_h +#define RecoTracker_LSTCore_src_alpaka_Kernels_h + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmQuintupletFromMemory(Quintuplets quintuplets, + unsigned int quintupletIndex, + bool secondpass = false) { + quintuplets.isDup()[quintupletIndex] |= 1 + secondpass; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelTripletFromMemory(PixelTriplets pixelTriplets, + unsigned int pixelTripletIndex) { + pixelTriplets.isDup()[pixelTripletIndex] = true; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelQuintupletFromMemory(PixelQuintuplets pixelQuintuplets, + unsigned int pixelQuintupletIndex) { + pixelQuintuplets.isDup()[pixelQuintupletIndex] = true; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void rmPixelSegmentFromMemory(SegmentsPixel segmentsPixel, + unsigned int pixelSegmentArrayIndex, + bool secondpass = false) { + segmentsPixel.isDup()[pixelSegmentArrayIndex] |= 1 + secondpass; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitsT5(unsigned int ix, unsigned int jx, QuintupletsConst quintuplets) { + unsigned int hits1[Params_T5::kHits]; + unsigned int hits2[Params_T5::kHits]; + + for (int i = 0; i < Params_T5::kHits; i++) { + hits1[i] = quintuplets.hitIndices()[ix][i]; + hits2[i] = quintuplets.hitIndices()[jx][i]; + } + + int nMatched = 0; + for (int i = 0; i < Params_T5::kHits; i++) { + bool matched = false; + for (int j = 0; j < Params_T5::kHits; j++) { + if (hits1[i] == hits2[j]) { + matched = true; + break; + } + } + if (matched) { + nMatched++; + } + } + return nMatched; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkHitspT5(unsigned int ix, + unsigned int jx, + PixelQuintupletsConst pixelQuintuplets) { + unsigned int hits1[Params_pT5::kHits]; + unsigned int hits2[Params_pT5::kHits]; + + for (int i = 0; i < Params_pT5::kHits; i++) { + hits1[i] = pixelQuintuplets.hitIndices()[ix][i]; + hits2[i] = pixelQuintuplets.hitIndices()[jx][i]; + } + + int nMatched = 0; + for (int i = 0; i < Params_pT5::kHits; i++) { + bool matched = false; + for (int j = 0; j < Params_pT5::kHits; j++) { + if (hits1[i] == hits2[j]) { + matched = true; + break; + } + } + if (matched) { + nMatched++; + } + } + return nMatched; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void checkHitspT3(unsigned int ix, + unsigned int jx, + PixelTripletsConst pixelTriplets, + int* matched) { + int phits1[Params_pLS::kHits]; + int phits2[Params_pLS::kHits]; + + for (int i = 0; i < Params_pLS::kHits; i++) { + phits1[i] = pixelTriplets.hitIndices()[ix][i]; + phits2[i] = pixelTriplets.hitIndices()[jx][i]; + } + + int npMatched = 0; + for (int i = 0; i < Params_pLS::kHits; i++) { + bool pmatched = false; + for (int j = 0; j < Params_pLS::kHits; j++) { + if (phits1[i] == phits2[j]) { + pmatched = true; + break; + } + } + if (pmatched) { + npMatched++; + } + } + + int hits1[Params_T3::kHits]; + int hits2[Params_T3::kHits]; + + for (int i = 0; i < Params_T3::kHits; i++) { + hits1[i] = pixelTriplets.hitIndices()[ix][i + 4]; // Omitting the pLS hits + hits2[i] = pixelTriplets.hitIndices()[jx][i + 4]; // Omitting the pLS hits + } + + int nMatched = 0; + for (int i = 0; i < Params_T3::kHits; i++) { + bool tmatched = false; + for (int j = 0; j < Params_T3::kHits; j++) { + if (hits1[i] == hits2[j]) { + tmatched = true; + break; + } + } + if (tmatched) { + nMatched++; + } + } + + matched[0] = npMatched; + matched[1] = nMatched; + } + + struct RemoveDupQuintupletsAfterBuild { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + Quintuplets quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + ObjectRangesConst ranges) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (unsigned int lowmod = globalThreadIdx[0]; lowmod < modules.nLowerModules(); lowmod += gridThreadExtent[0]) { + unsigned int nQuintuplets_lowmod = quintupletsOccupancy.nQuintuplets()[lowmod]; + int quintupletModuleIndices_lowmod = ranges.quintupletModuleIndices()[lowmod]; + + for (unsigned int ix1 = globalThreadIdx[1]; ix1 < nQuintuplets_lowmod; ix1 += gridThreadExtent[1]) { + unsigned int ix = quintupletModuleIndices_lowmod + ix1; + float eta1 = __H2F(quintuplets.eta()[ix]); + float phi1 = __H2F(quintuplets.phi()[ix]); + float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]); + + for (unsigned int jx1 = globalThreadIdx[2] + ix1 + 1; jx1 < nQuintuplets_lowmod; jx1 += gridThreadExtent[2]) { + unsigned int jx = quintupletModuleIndices_lowmod + jx1; + + float eta2 = __H2F(quintuplets.eta()[jx]); + float phi2 = __H2F(quintuplets.phi()[jx]); + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]); + + if (dEta > 0.1f) + continue; + + if (alpaka::math::abs(acc, dPhi) > 0.1f) + continue; + + int nMatched = checkHitsT5(ix, jx, quintuplets); + const int minNHitsForDup_T5 = 7; + if (nMatched >= minNHitsForDup_T5) { + if (score_rphisum1 >= score_rphisum2) { + rmQuintupletFromMemory(quintuplets, ix); + } else { + rmQuintupletFromMemory(quintuplets, jx); + } + } + } + } + } + } + }; + + struct RemoveDupQuintupletsBeforeTC { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + Quintuplets quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + ObjectRangesConst ranges) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (unsigned int lowmodIdx1 = globalThreadIdx[1]; lowmodIdx1 < ranges.nEligibleT5Modules(); + lowmodIdx1 += gridThreadExtent[1]) { + uint16_t lowmod1 = ranges.indicesOfEligibleT5Modules()[lowmodIdx1]; + unsigned int nQuintuplets_lowmod1 = quintupletsOccupancy.nQuintuplets()[lowmod1]; + if (nQuintuplets_lowmod1 == 0) + continue; + + unsigned int quintupletModuleIndices_lowmod1 = ranges.quintupletModuleIndices()[lowmod1]; + + for (unsigned int lowmodIdx2 = globalThreadIdx[2] + lowmodIdx1; lowmodIdx2 < ranges.nEligibleT5Modules(); + lowmodIdx2 += gridThreadExtent[2]) { + uint16_t lowmod2 = ranges.indicesOfEligibleT5Modules()[lowmodIdx2]; + unsigned int nQuintuplets_lowmod2 = quintupletsOccupancy.nQuintuplets()[lowmod2]; + if (nQuintuplets_lowmod2 == 0) + continue; + + unsigned int quintupletModuleIndices_lowmod2 = ranges.quintupletModuleIndices()[lowmod2]; + + for (unsigned int ix1 = 0; ix1 < nQuintuplets_lowmod1; ix1 += 1) { + unsigned int ix = quintupletModuleIndices_lowmod1 + ix1; + if (quintuplets.partOfPT5()[ix] || (quintuplets.isDup()[ix] & 1)) + continue; + + for (unsigned int jx1 = 0; jx1 < nQuintuplets_lowmod2; jx1++) { + unsigned int jx = quintupletModuleIndices_lowmod2 + jx1; + if (ix == jx) + continue; + + if (quintuplets.partOfPT5()[jx] || (quintuplets.isDup()[jx] & 1)) + continue; + + float eta1 = __H2F(quintuplets.eta()[ix]); + float phi1 = __H2F(quintuplets.phi()[ix]); + float score_rphisum1 = __H2F(quintuplets.score_rphisum()[ix]); + + float eta2 = __H2F(quintuplets.eta()[jx]); + float phi2 = __H2F(quintuplets.phi()[jx]); + float score_rphisum2 = __H2F(quintuplets.score_rphisum()[jx]); + + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + + if (dEta > 0.1f) + continue; + + if (alpaka::math::abs(acc, dPhi) > 0.1f) + continue; + + float dR2 = dEta * dEta + dPhi * dPhi; + int nMatched = checkHitsT5(ix, jx, quintuplets); + const int minNHitsForDup_T5 = 5; + if (dR2 < 0.001f || nMatched >= minNHitsForDup_T5) { + if (score_rphisum1 > score_rphisum2) { + rmQuintupletFromMemory(quintuplets, ix, true); + } else if (score_rphisum1 < score_rphisum2) { + rmQuintupletFromMemory(quintuplets, jx, true); + } else { + rmQuintupletFromMemory(quintuplets, (ix < jx ? ix : jx), true); + } + } + } + } + } + } + } + }; + + struct RemoveDupPixelTripletsFromMap { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelTriplets pixelTriplets) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (unsigned int ix = globalThreadIdx[1]; ix < pixelTriplets.nPixelTriplets(); ix += gridThreadExtent[1]) { + for (unsigned int jx = globalThreadIdx[2]; jx < pixelTriplets.nPixelTriplets(); jx += gridThreadExtent[2]) { + if (ix == jx) + continue; + + int nMatched[2]; + checkHitspT3(ix, jx, pixelTriplets, nMatched); + const int minNHitsForDup_pT3 = 5; + if ((nMatched[0] + nMatched[1]) >= minNHitsForDup_pT3) { + // Check the layers + if (pixelTriplets.logicalLayers()[jx][2] < pixelTriplets.logicalLayers()[ix][2]) { + rmPixelTripletFromMemory(pixelTriplets, ix); + break; + } else if (pixelTriplets.logicalLayers()[ix][2] == pixelTriplets.logicalLayers()[jx][2] && + __H2F(pixelTriplets.score()[ix]) > __H2F(pixelTriplets.score()[jx])) { + rmPixelTripletFromMemory(pixelTriplets, ix); + break; + } else if (pixelTriplets.logicalLayers()[ix][2] == pixelTriplets.logicalLayers()[jx][2] && + (__H2F(pixelTriplets.score()[ix]) == __H2F(pixelTriplets.score()[jx])) && (ix < jx)) { + rmPixelTripletFromMemory(pixelTriplets, ix); + break; + } + } + } + } + } + }; + + struct RemoveDupPixelQuintupletsFromMap { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, PixelQuintuplets pixelQuintuplets) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets(); + for (unsigned int ix = globalThreadIdx[1]; ix < nPixelQuintuplets; ix += gridThreadExtent[1]) { + float score1 = __H2F(pixelQuintuplets.score()[ix]); + for (unsigned int jx = globalThreadIdx[2]; jx < nPixelQuintuplets; jx += gridThreadExtent[2]) { + if (ix == jx) + continue; + + int nMatched = checkHitspT5(ix, jx, pixelQuintuplets); + float score2 = __H2F(pixelQuintuplets.score()[jx]); + const int minNHitsForDup_pT5 = 7; + if (nMatched >= minNHitsForDup_pT5) { + if (score1 > score2 or ((score1 == score2) and (ix > jx))) { + rmPixelQuintupletFromMemory(pixelQuintuplets, ix); + break; + } + } + } + } + } + }; + + struct CheckHitspLS { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + SegmentsOccupancyConst segmentsOccupancy, + SegmentsPixel segmentsPixel, + bool secondpass) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + int pixelModuleIndex = modules.nLowerModules(); + unsigned int nPixelSegments = segmentsOccupancy.nSegments()[pixelModuleIndex]; + + if (nPixelSegments > n_max_pixel_segments_per_module) + nPixelSegments = n_max_pixel_segments_per_module; + + for (unsigned int ix = globalThreadIdx[1]; ix < nPixelSegments; ix += gridThreadExtent[1]) { + if (secondpass && (!segmentsPixel.isQuad()[ix] || (segmentsPixel.isDup()[ix] & 1))) + continue; + + unsigned int phits1[Params_pLS::kHits]; + phits1[0] = segmentsPixel.pLSHitsIdxs()[ix].x; + phits1[1] = segmentsPixel.pLSHitsIdxs()[ix].y; + phits1[2] = segmentsPixel.pLSHitsIdxs()[ix].z; + phits1[3] = segmentsPixel.pLSHitsIdxs()[ix].w; + float eta_pix1 = segmentsPixel.eta()[ix]; + float phi_pix1 = segmentsPixel.phi()[ix]; + + for (unsigned int jx = ix + 1 + globalThreadIdx[2]; jx < nPixelSegments; jx += gridThreadExtent[2]) { + float eta_pix2 = segmentsPixel.eta()[jx]; + float phi_pix2 = segmentsPixel.phi()[jx]; + + if (alpaka::math::abs(acc, eta_pix2 - eta_pix1) > 0.1f) + continue; + + if (secondpass && (!segmentsPixel.isQuad()[jx] || (segmentsPixel.isDup()[jx] & 1))) + continue; + + int8_t quad_diff = segmentsPixel.isQuad()[ix] - segmentsPixel.isQuad()[jx]; + float score_diff = segmentsPixel.score()[ix] - segmentsPixel.score()[jx]; + // Always keep quads over trips. If they are the same, we want the object with better score + int idxToRemove; + if (quad_diff > 0) + idxToRemove = jx; + else if (quad_diff < 0) + idxToRemove = ix; + else if (score_diff < 0) + idxToRemove = jx; + else if (score_diff > 0) + idxToRemove = ix; + else + idxToRemove = ix; + + unsigned int phits2[Params_pLS::kHits]; + phits2[0] = segmentsPixel.pLSHitsIdxs()[jx].x; + phits2[1] = segmentsPixel.pLSHitsIdxs()[jx].y; + phits2[2] = segmentsPixel.pLSHitsIdxs()[jx].z; + phits2[3] = segmentsPixel.pLSHitsIdxs()[jx].w; + + int npMatched = 0; + for (int i = 0; i < Params_pLS::kHits; i++) { + bool pmatched = false; + for (int j = 0; j < Params_pLS::kHits; j++) { + if (phits1[i] == phits2[j]) { + pmatched = true; + break; + } + } + if (pmatched) { + npMatched++; + // Only one hit is enough + if (secondpass) + break; + } + } + const int minNHitsForDup_pLS = 3; + if (npMatched >= minNHitsForDup_pLS) { + rmPixelSegmentFromMemory(segmentsPixel, idxToRemove, secondpass); + } + if (secondpass) { + float dEta = alpaka::math::abs(acc, eta_pix1 - eta_pix2); + float dPhi = calculate_dPhi(phi_pix1, phi_pix2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if ((npMatched >= 1) || (dR2 < 1e-5f)) { + rmPixelSegmentFromMemory(segmentsPixel, idxToRemove, secondpass); + } + } + } + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/LST.cc b/RecoTracker/LSTCore/src/alpaka/LST.cc new file mode 100644 index 0000000000000..3c1638677eab2 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/LST.cc @@ -0,0 +1,414 @@ +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" + +#include "LSTEvent.h" + +using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; + +#include "Math/Vector3D.h" +#include "Math/VectorUtil.h" +using XYZVector = ROOT::Math::XYZVector; + +namespace { + XYZVector calculateR3FromPCA(const XYZVector& p3, float dxy, float dz) { + const float pt = p3.rho(); + const float p = p3.r(); + const float vz = dz * pt * pt / p / p; + + const float vx = -dxy * p3.y() / pt - p3.x() / p * p3.z() / p * dz; + const float vy = dxy * p3.x() / pt - p3.y() / p * p3.z() / p * dz; + return {vx, vy, vz}; + } + + using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; + std::vector getHitIdxs(short trackCandidateType, + Params_pT5::ArrayUxHits const& tcHitIndices, + unsigned int const* hitIndices) { + std::vector hits; + + unsigned int maxNHits = 0; + if (trackCandidateType == LSTObjType::pT5) + maxNHits = Params_pT5::kHits; + else if (trackCandidateType == LSTObjType::pT3) + maxNHits = Params_pT3::kHits; + else if (trackCandidateType == LSTObjType::T5) + maxNHits = Params_T5::kHits; + else if (trackCandidateType == LSTObjType::pLS) + maxNHits = Params_pLS::kHits; + + for (unsigned int i = 0; i < maxNHits; i++) { + unsigned int hitIdxDev = tcHitIndices[i]; + unsigned int hitIdx = + (trackCandidateType == LSTObjType::pLS) + ? hitIdxDev + : hitIndices[hitIdxDev]; // Hit indices are stored differently in the standalone for pLS. + + // For p objects, the 3rd and 4th hit maybe the same, + // due to the way pLS hits are stored in the standalone. + // This is because pixel seeds can be either triplets or quadruplets. + if (trackCandidateType != LSTObjType::T5 && hits.size() == 3 && + hits.back() == hitIdx) // Remove duplicate 4th hits. + continue; + + hits.push_back(hitIdx); + } + + return hits; + } + +} // namespace + +void LST::prepareInput(std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z) { + in_trkX_.clear(); + in_trkY_.clear(); + in_trkZ_.clear(); + in_hitId_.clear(); + in_hitIdxs_.clear(); + in_hitIndices_vec0_.clear(); + in_hitIndices_vec1_.clear(); + in_hitIndices_vec2_.clear(); + in_hitIndices_vec3_.clear(); + in_deltaPhi_vec_.clear(); + in_ptIn_vec_.clear(); + in_ptErr_vec_.clear(); + in_px_vec_.clear(); + in_py_vec_.clear(); + in_pz_vec_.clear(); + in_eta_vec_.clear(); + in_etaErr_vec_.clear(); + in_phi_vec_.clear(); + in_charge_vec_.clear(); + in_seedIdx_vec_.clear(); + in_superbin_vec_.clear(); + in_pixelType_vec_.clear(); + in_isQuad_vec_.clear(); + + unsigned int count = 0; + auto n_see = see_stateTrajGlbPx.size(); + in_px_vec_.reserve(n_see); + in_py_vec_.reserve(n_see); + in_pz_vec_.reserve(n_see); + in_hitIndices_vec0_.reserve(n_see); + in_hitIndices_vec1_.reserve(n_see); + in_hitIndices_vec2_.reserve(n_see); + in_hitIndices_vec3_.reserve(n_see); + in_ptIn_vec_.reserve(n_see); + in_ptErr_vec_.reserve(n_see); + in_etaErr_vec_.reserve(n_see); + in_eta_vec_.reserve(n_see); + in_phi_vec_.reserve(n_see); + in_charge_vec_.reserve(n_see); + in_seedIdx_vec_.reserve(n_see); + in_deltaPhi_vec_.reserve(n_see); + in_trkX_ = ph2_x; + in_trkY_ = ph2_y; + in_trkZ_ = ph2_z; + in_hitId_ = ph2_detId; + in_hitIdxs_.resize(ph2_detId.size()); + + std::iota(in_hitIdxs_.begin(), in_hitIdxs_.end(), 0); + const int hit_size = in_trkX_.size(); + + for (size_t iSeed = 0; iSeed < n_see; iSeed++) { + XYZVector p3LH(see_stateTrajGlbPx[iSeed], see_stateTrajGlbPy[iSeed], see_stateTrajGlbPz[iSeed]); + float ptIn = p3LH.rho(); + float eta = p3LH.eta(); + float ptErr = see_ptErr[iSeed]; + + if ((ptIn > 0.8 - 2 * ptErr)) { + XYZVector r3LH(see_stateTrajGlbX[iSeed], see_stateTrajGlbY[iSeed], see_stateTrajGlbZ[iSeed]); + XYZVector p3PCA(see_px[iSeed], see_py[iSeed], see_pz[iSeed]); + XYZVector r3PCA(calculateR3FromPCA(p3PCA, see_dxy[iSeed], see_dz[iSeed])); + + // The charge could be used directly in the line below + float pixelSegmentDeltaPhiChange = ROOT::Math::VectorUtil::DeltaPhi(p3LH, r3LH); + float etaErr = see_etaErr[iSeed]; + float px = p3LH.x(); + float py = p3LH.y(); + float pz = p3LH.z(); + + int charge = see_q[iSeed]; + PixelType pixtype = PixelType::kInvalid; + + if (ptIn >= 2.0) + pixtype = PixelType::kHighPt; + else if (ptIn >= (0.8 - 2 * ptErr) and ptIn < 2.0) { + if (pixelSegmentDeltaPhiChange >= 0) + pixtype = PixelType::kLowPtPosCurv; + else + pixtype = PixelType::kLowPtNegCurv; + } else + continue; + + unsigned int hitIdx0 = hit_size + count; + count++; + unsigned int hitIdx1 = hit_size + count; + count++; + unsigned int hitIdx2 = hit_size + count; + count++; + unsigned int hitIdx3; + if (see_hitIdx[iSeed].size() <= 3) + hitIdx3 = hitIdx2; + else { + hitIdx3 = hit_size + count; + count++; + } + + in_trkX_.push_back(r3PCA.x()); + in_trkY_.push_back(r3PCA.y()); + in_trkZ_.push_back(r3PCA.z()); + in_trkX_.push_back(p3PCA.rho()); + float p3PCA_Eta = p3PCA.eta(); + in_trkY_.push_back(p3PCA_Eta); + float p3PCA_Phi = p3PCA.phi(); + in_trkZ_.push_back(p3PCA_Phi); + in_trkX_.push_back(r3LH.x()); + in_trkY_.push_back(r3LH.y()); + in_trkZ_.push_back(r3LH.z()); + in_hitId_.push_back(1); + in_hitId_.push_back(1); + in_hitId_.push_back(1); + if (see_hitIdx[iSeed].size() > 3) { + in_trkX_.push_back(r3LH.x()); + in_trkY_.push_back(see_dxy[iSeed]); + in_trkZ_.push_back(see_dz[iSeed]); + in_hitId_.push_back(1); + } + in_px_vec_.push_back(px); + in_py_vec_.push_back(py); + in_pz_vec_.push_back(pz); + + in_hitIndices_vec0_.push_back(hitIdx0); + in_hitIndices_vec1_.push_back(hitIdx1); + in_hitIndices_vec2_.push_back(hitIdx2); + in_hitIndices_vec3_.push_back(hitIdx3); + in_ptIn_vec_.push_back(ptIn); + in_ptErr_vec_.push_back(ptErr); + in_etaErr_vec_.push_back(etaErr); + in_eta_vec_.push_back(eta); + float phi = p3LH.phi(); + in_phi_vec_.push_back(phi); + in_charge_vec_.push_back(charge); + in_seedIdx_vec_.push_back(iSeed); + in_deltaPhi_vec_.push_back(pixelSegmentDeltaPhiChange); + + in_hitIdxs_.push_back(see_hitIdx[iSeed][0]); + in_hitIdxs_.push_back(see_hitIdx[iSeed][1]); + in_hitIdxs_.push_back(see_hitIdx[iSeed][2]); + char isQuad = false; + if (see_hitIdx[iSeed].size() > 3) { + isQuad = true; + in_hitIdxs_.push_back(see_hitIdx[iSeed][3]); + } + float neta = 25.; + float nphi = 72.; + float nz = 25.; + int etabin = (p3PCA_Eta + 2.6) / ((2 * 2.6) / neta); + int phibin = (p3PCA_Phi + kPi) / ((2. * kPi) / nphi); + int dzbin = (see_dz[iSeed] + 30) / (2 * 30 / nz); + int isuperbin = (nz * nphi) * etabin + (nz)*phibin + dzbin; + in_superbin_vec_.push_back(isuperbin); + in_pixelType_vec_.push_back(pixtype); + in_isQuad_vec_.push_back(isQuad); + } + } +} + +void LST::getOutput(LSTEvent& event) { + out_tc_hitIdxs_.clear(); + out_tc_len_.clear(); + out_tc_seedIdx_.clear(); + out_tc_trackCandidateType_.clear(); + + auto const hits = event.getHits(/*inCMSSW*/ true, /*sync*/ false); // sync on next line + auto const& trackCandidates = event.getTrackCandidates(/*inCMSSW*/ true, /*sync*/ true); + + unsigned int nTrackCandidates = trackCandidates.nTrackCandidates(); + + for (unsigned int idx = 0; idx < nTrackCandidates; idx++) { + short trackCandidateType = trackCandidates.trackCandidateType()[idx]; + std::vector hit_idx = getHitIdxs(trackCandidateType, trackCandidates.hitIndices()[idx], hits.idxs()); + + out_tc_hitIdxs_.push_back(hit_idx); + out_tc_len_.push_back(hit_idx.size()); + out_tc_seedIdx_.push_back(trackCandidates.pixelSeedIndex()[idx]); + out_tc_trackCandidateType_.push_back(trackCandidateType); + } +} + +void LST::run(Queue& queue, + bool verbose, + LSTESData const* deviceESData, + std::vector const& see_px, + std::vector const& see_py, + std::vector const& see_pz, + std::vector const& see_dxy, + std::vector const& see_dz, + std::vector const& see_ptErr, + std::vector const& see_etaErr, + std::vector const& see_stateTrajGlbX, + std::vector const& see_stateTrajGlbY, + std::vector const& see_stateTrajGlbZ, + std::vector const& see_stateTrajGlbPx, + std::vector const& see_stateTrajGlbPy, + std::vector const& see_stateTrajGlbPz, + std::vector const& see_q, + std::vector> const& see_hitIdx, + std::vector const& ph2_detId, + std::vector const& ph2_x, + std::vector const& ph2_y, + std::vector const& ph2_z, + bool no_pls_dupclean, + bool tc_pls_triplets) { + auto event = LSTEvent(verbose, queue, deviceESData); + prepareInput(see_px, + see_py, + see_pz, + see_dxy, + see_dz, + see_ptErr, + see_etaErr, + see_stateTrajGlbX, + see_stateTrajGlbY, + see_stateTrajGlbZ, + see_stateTrajGlbPx, + see_stateTrajGlbPy, + see_stateTrajGlbPz, + see_q, + see_hitIdx, + ph2_detId, + ph2_x, + ph2_y, + ph2_z); + + event.addHitToEvent(in_trkX_, in_trkY_, in_trkZ_, in_hitId_, in_hitIdxs_); + event.addPixelSegmentToEvent(in_hitIndices_vec0_, + in_hitIndices_vec1_, + in_hitIndices_vec2_, + in_hitIndices_vec3_, + in_deltaPhi_vec_, + in_ptIn_vec_, + in_ptErr_vec_, + in_px_vec_, + in_py_vec_, + in_pz_vec_, + in_eta_vec_, + in_etaErr_vec_, + in_phi_vec_, + in_charge_vec_, + in_seedIdx_vec_, + in_superbin_vec_, + in_pixelType_vec_, + in_isQuad_vec_); + event.createMiniDoublets(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of Mini-doublets produced: %d\n", event.getNumberOfMiniDoublets()); + printf("# of Mini-doublets produced barrel layer 1: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(0)); + printf("# of Mini-doublets produced barrel layer 2: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(1)); + printf("# of Mini-doublets produced barrel layer 3: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(2)); + printf("# of Mini-doublets produced barrel layer 4: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(3)); + printf("# of Mini-doublets produced barrel layer 5: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(4)); + printf("# of Mini-doublets produced barrel layer 6: %d\n", event.getNumberOfMiniDoubletsByLayerBarrel(5)); + printf("# of Mini-doublets produced endcap layer 1: %d\n", event.getNumberOfMiniDoubletsByLayerEndcap(0)); + printf("# of Mini-doublets produced endcap layer 2: %d\n", event.getNumberOfMiniDoubletsByLayerEndcap(1)); + printf("# of Mini-doublets produced endcap layer 3: %d\n", event.getNumberOfMiniDoubletsByLayerEndcap(2)); + printf("# of Mini-doublets produced endcap layer 4: %d\n", event.getNumberOfMiniDoubletsByLayerEndcap(3)); + printf("# of Mini-doublets produced endcap layer 5: %d\n", event.getNumberOfMiniDoubletsByLayerEndcap(4)); + } + + event.createSegmentsWithModuleMap(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of Segments produced: %d\n", event.getNumberOfSegments()); + printf("# of Segments produced layer 1-2: %d\n", event.getNumberOfSegmentsByLayerBarrel(0)); + printf("# of Segments produced layer 2-3: %d\n", event.getNumberOfSegmentsByLayerBarrel(1)); + printf("# of Segments produced layer 3-4: %d\n", event.getNumberOfSegmentsByLayerBarrel(2)); + printf("# of Segments produced layer 4-5: %d\n", event.getNumberOfSegmentsByLayerBarrel(3)); + printf("# of Segments produced layer 5-6: %d\n", event.getNumberOfSegmentsByLayerBarrel(4)); + printf("# of Segments produced endcap layer 1: %d\n", event.getNumberOfSegmentsByLayerEndcap(0)); + printf("# of Segments produced endcap layer 2: %d\n", event.getNumberOfSegmentsByLayerEndcap(1)); + printf("# of Segments produced endcap layer 3: %d\n", event.getNumberOfSegmentsByLayerEndcap(2)); + printf("# of Segments produced endcap layer 4: %d\n", event.getNumberOfSegmentsByLayerEndcap(3)); + printf("# of Segments produced endcap layer 5: %d\n", event.getNumberOfSegmentsByLayerEndcap(4)); + } + + event.createTriplets(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of T3s produced: %d\n", event.getNumberOfTriplets()); + printf("# of T3s produced layer 1-2-3: %d\n", event.getNumberOfTripletsByLayerBarrel(0)); + printf("# of T3s produced layer 2-3-4: %d\n", event.getNumberOfTripletsByLayerBarrel(1)); + printf("# of T3s produced layer 3-4-5: %d\n", event.getNumberOfTripletsByLayerBarrel(2)); + printf("# of T3s produced layer 4-5-6: %d\n", event.getNumberOfTripletsByLayerBarrel(3)); + printf("# of T3s produced endcap layer 1-2-3: %d\n", event.getNumberOfTripletsByLayerEndcap(0)); + printf("# of T3s produced endcap layer 2-3-4: %d\n", event.getNumberOfTripletsByLayerEndcap(1)); + printf("# of T3s produced endcap layer 3-4-5: %d\n", event.getNumberOfTripletsByLayerEndcap(2)); + printf("# of T3s produced endcap layer 1: %d\n", event.getNumberOfTripletsByLayerEndcap(0)); + printf("# of T3s produced endcap layer 2: %d\n", event.getNumberOfTripletsByLayerEndcap(1)); + printf("# of T3s produced endcap layer 3: %d\n", event.getNumberOfTripletsByLayerEndcap(2)); + printf("# of T3s produced endcap layer 4: %d\n", event.getNumberOfTripletsByLayerEndcap(3)); + printf("# of T3s produced endcap layer 5: %d\n", event.getNumberOfTripletsByLayerEndcap(4)); + } + + event.createQuintuplets(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of Quintuplets produced: %d\n", event.getNumberOfQuintuplets()); + printf("# of Quintuplets produced layer 1-2-3-4-5-6: %d\n", event.getNumberOfQuintupletsByLayerBarrel(0)); + printf("# of Quintuplets produced layer 2: %d\n", event.getNumberOfQuintupletsByLayerBarrel(1)); + printf("# of Quintuplets produced layer 3: %d\n", event.getNumberOfQuintupletsByLayerBarrel(2)); + printf("# of Quintuplets produced layer 4: %d\n", event.getNumberOfQuintupletsByLayerBarrel(3)); + printf("# of Quintuplets produced layer 5: %d\n", event.getNumberOfQuintupletsByLayerBarrel(4)); + printf("# of Quintuplets produced layer 6: %d\n", event.getNumberOfQuintupletsByLayerBarrel(5)); + printf("# of Quintuplets produced endcap layer 1: %d\n", event.getNumberOfQuintupletsByLayerEndcap(0)); + printf("# of Quintuplets produced endcap layer 2: %d\n", event.getNumberOfQuintupletsByLayerEndcap(1)); + printf("# of Quintuplets produced endcap layer 3: %d\n", event.getNumberOfQuintupletsByLayerEndcap(2)); + printf("# of Quintuplets produced endcap layer 4: %d\n", event.getNumberOfQuintupletsByLayerEndcap(3)); + printf("# of Quintuplets produced endcap layer 5: %d\n", event.getNumberOfQuintupletsByLayerEndcap(4)); + } + + event.pixelLineSegmentCleaning(no_pls_dupclean); + + event.createPixelQuintuplets(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of Pixel Quintuplets produced: %d\n", event.getNumberOfPixelQuintuplets()); + } + + event.createPixelTriplets(); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of Pixel T3s produced: %d\n", event.getNumberOfPixelTriplets()); + } + + event.createTrackCandidates(no_pls_dupclean, tc_pls_triplets); + if (verbose) { + alpaka::wait(queue); // event calls are asynchronous: wait before printing + printf("# of TrackCandidates produced: %d\n", event.getNumberOfTrackCandidates()); + printf(" # of Pixel TrackCandidates produced: %d\n", event.getNumberOfPixelTrackCandidates()); + printf(" # of pT5 TrackCandidates produced: %d\n", event.getNumberOfPT5TrackCandidates()); + printf(" # of pT3 TrackCandidates produced: %d\n", event.getNumberOfPT3TrackCandidates()); + printf(" # of pLS TrackCandidates produced: %d\n", event.getNumberOfPLSTrackCandidates()); + printf(" # of T5 TrackCandidates produced: %d\n", event.getNumberOfT5TrackCandidates()); + } + + getOutput(event); +} diff --git a/RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc b/RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc new file mode 100644 index 0000000000000..be6c2b88b73c8 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/LSTEvent.dev.cc @@ -0,0 +1,1680 @@ +#include "HeterogeneousCore/AlpakaInterface/interface/memory.h" + +#include "LSTEvent.h" + +#include "MiniDoublet.h" +#include "PixelQuintuplet.h" +#include "PixelTriplet.h" +#include "Quintuplet.h" +#include "Segment.h" +#include "TrackCandidate.h" +#include "Triplet.h" + +using Device = ALPAKA_ACCELERATOR_NAMESPACE::Device; +using Queue = ALPAKA_ACCELERATOR_NAMESPACE::Queue; +using Acc1D = ALPAKA_ACCELERATOR_NAMESPACE::Acc1D; +using Acc3D = ALPAKA_ACCELERATOR_NAMESPACE::Acc3D; + +using namespace ALPAKA_ACCELERATOR_NAMESPACE::lst; + +void LSTEvent::initSync() { + alpaka::wait(queue_); // other calls can be asynchronous + + //reset the arrays + for (int i = 0; i < 6; i++) { + n_minidoublets_by_layer_barrel_[i] = 0; + n_segments_by_layer_barrel_[i] = 0; + n_triplets_by_layer_barrel_[i] = 0; + n_quintuplets_by_layer_barrel_[i] = 0; + if (i < 5) { + n_minidoublets_by_layer_endcap_[i] = 0; + n_segments_by_layer_endcap_[i] = 0; + n_triplets_by_layer_endcap_[i] = 0; + n_quintuplets_by_layer_endcap_[i] = 0; + } + } +} + +void LSTEvent::resetEventSync() { + alpaka::wait(queue_); // synchronize to reset consistently + //reset the arrays + for (int i = 0; i < 6; i++) { + n_minidoublets_by_layer_barrel_[i] = 0; + n_segments_by_layer_barrel_[i] = 0; + n_triplets_by_layer_barrel_[i] = 0; + n_quintuplets_by_layer_barrel_[i] = 0; + if (i < 5) { + n_minidoublets_by_layer_endcap_[i] = 0; + n_segments_by_layer_endcap_[i] = 0; + n_triplets_by_layer_endcap_[i] = 0; + n_quintuplets_by_layer_endcap_[i] = 0; + } + } + hitsDC_.reset(); + miniDoubletsDC_.reset(); + rangesDC_.reset(); + segmentsDC_.reset(); + tripletsDC_.reset(); + quintupletsDC_.reset(); + trackCandidatesDC_.reset(); + pixelTripletsDC_.reset(); + pixelQuintupletsDC_.reset(); + + hitsHC_.reset(); + rangesHC_.reset(); + miniDoubletsHC_.reset(); + segmentsHC_.reset(); + tripletsHC_.reset(); + quintupletsHC_.reset(); + pixelTripletsHC_.reset(); + pixelQuintupletsHC_.reset(); + trackCandidatesHC_.reset(); + modulesHC_.reset(); +} + +void LSTEvent::addHitToEvent(std::vector const& x, + std::vector const& y, + std::vector const& z, + std::vector const& detId, + std::vector const& idxInNtuple) { + // Use the actual number of hits instead of a max. + unsigned int nHits = x.size(); + + // Initialize space on device/host for next event. + if (!hitsDC_) { + std::array const hits_sizes{{static_cast(nHits), static_cast(nModules_)}}; + hitsDC_.emplace(hits_sizes, queue_); + + auto hitsRanges = hitsDC_->view(); + auto hitRanges_view = + cms::alpakatools::make_device_view(queue_, hitsRanges.hitRanges(), hitsRanges.metadata().size()); + auto hitRangesLower_view = + cms::alpakatools::make_device_view(queue_, hitsRanges.hitRangesLower(), hitsRanges.metadata().size()); + auto hitRangesUpper_view = + cms::alpakatools::make_device_view(queue_, hitsRanges.hitRangesUpper(), hitsRanges.metadata().size()); + auto hitRangesnLower_view = + cms::alpakatools::make_device_view(queue_, hitsRanges.hitRangesnLower(), hitsRanges.metadata().size()); + auto hitRangesnUpper_view = + cms::alpakatools::make_device_view(queue_, hitsRanges.hitRangesnUpper(), hitsRanges.metadata().size()); + alpaka::memset(queue_, hitRanges_view, 0xff); + alpaka::memset(queue_, hitRangesLower_view, 0xff); + alpaka::memset(queue_, hitRangesUpper_view, 0xff); + alpaka::memset(queue_, hitRangesnLower_view, 0xff); + alpaka::memset(queue_, hitRangesnUpper_view, 0xff); + } + + if (!rangesDC_) { + rangesDC_.emplace(nLowerModules_ + 1, queue_); + auto buf = rangesDC_->buffer(); + alpaka::memset(queue_, buf, 0xff); + } + + // Copy the host arrays to the GPU. + auto hits = hitsDC_->view(); + auto xs_d = cms::alpakatools::make_device_view(queue_, hits.xs(), (Idx)hits.metadata().size()); + auto ys_d = cms::alpakatools::make_device_view(queue_, hits.ys(), (Idx)hits.metadata().size()); + auto zs_d = cms::alpakatools::make_device_view(queue_, hits.zs(), (Idx)hits.metadata().size()); + auto detId_d = cms::alpakatools::make_device_view(queue_, hits.detid(), (Idx)hits.metadata().size()); + auto idxs_d = cms::alpakatools::make_device_view(queue_, hits.idxs(), (Idx)hits.metadata().size()); + alpaka::memcpy(queue_, xs_d, x, (Idx)nHits); + alpaka::memcpy(queue_, ys_d, y, (Idx)nHits); + alpaka::memcpy(queue_, zs_d, z, (Idx)nHits); + alpaka::memcpy(queue_, detId_d, detId, (Idx)nHits); + alpaka::memcpy(queue_, idxs_d, idxInNtuple, (Idx)nHits); + alpaka::wait(queue_); // FIXME: remove synch after inputs refactored to be in pinned memory + + Vec3D const threadsPerBlock1{1, 1, 256}; + Vec3D const blocksPerGrid1{1, 1, max_blocks}; + WorkDiv3D const hit_loop_workdiv = createWorkDiv(blocksPerGrid1, threadsPerBlock1, elementsPerThread); + + alpaka::exec(queue_, + hit_loop_workdiv, + HitLoopKernel{}, + Endcap, + TwoS, + nModules_, + nEndCapMap_, + endcapGeometry_.const_view(), + modules_.const_view(), + hitsDC_->view(), + hitsDC_->view(), + nHits); + + Vec3D const threadsPerBlock2{1, 1, 256}; + Vec3D const blocksPerGrid2{1, 1, max_blocks}; + WorkDiv3D const module_ranges_workdiv = createWorkDiv(blocksPerGrid2, threadsPerBlock2, elementsPerThread); + + alpaka::exec(queue_, + module_ranges_workdiv, + ModuleRangesKernel{}, + modules_.const_view(), + hitsDC_->view(), + nLowerModules_); +} + +void LSTEvent::addPixelSegmentToEvent(std::vector const& hitIndices0, + std::vector const& hitIndices1, + std::vector const& hitIndices2, + std::vector const& hitIndices3, + std::vector const& dPhiChange, + std::vector const& ptIn, + std::vector const& ptErr, + std::vector const& px, + std::vector const& py, + std::vector const& pz, + std::vector const& eta, + std::vector const& etaErr, + std::vector const& phi, + std::vector const& charge, + std::vector const& seedIdx, + std::vector const& superbin, + std::vector const& pixelType, + std::vector const& isQuad) { + unsigned int size = ptIn.size(); + + if (size > n_max_pixel_segments_per_module) { + printf( + "*********************************************************\n" + "* Warning: Pixel line segments will be truncated. *\n" + "* You need to increase n_max_pixel_segments_per_module. *\n" + "*********************************************************\n"); + size = n_max_pixel_segments_per_module; + } + + unsigned int mdSize = 2 * size; + uint16_t pixelModuleIndex = pixelMapping_.pixelModuleIndex; + + if (!miniDoubletsDC_) { + // Create a view for the element nLowerModules_ inside rangesOccupancy->miniDoubletModuleOccupancy + auto rangesOccupancy = rangesDC_->view(); + auto dst_view_miniDoubletModuleOccupancy = + cms::alpakatools::make_device_view(queue_, rangesOccupancy.miniDoubletModuleOccupancy()[pixelModuleIndex]); + + // Create a host buffer for a value to be passed to the device + auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue_); + *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; + + alpaka::memcpy(queue_, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); + + WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + createMDArrayRangesGPU_workDiv, + CreateMDArrayRangesGPU{}, + modules_.const_view(), + rangesDC_->view()); + + auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue_); + auto nTotalMDs_buf_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nTotalMDs()); + alpaka::memcpy(queue_, nTotalMDs_buf_h, nTotalMDs_buf_d); + alpaka::wait(queue_); // wait to get the data before manipulation + + *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; + unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); + + std::array const mds_sizes{{static_cast(nTotalMDs), static_cast(nLowerModules_ + 1)}}; + miniDoubletsDC_.emplace(mds_sizes, queue_); + + auto mdsOccupancy = miniDoubletsDC_->view(); + auto nMDs_view = cms::alpakatools::make_device_view(queue_, mdsOccupancy.nMDs(), mdsOccupancy.metadata().size()); + auto totOccupancyMDs_view = + cms::alpakatools::make_device_view(queue_, mdsOccupancy.totOccupancyMDs(), mdsOccupancy.metadata().size()); + alpaka::memset(queue_, nMDs_view, 0u); + alpaka::memset(queue_, totOccupancyMDs_view, 0u); + } + if (!segmentsDC_) { + // can be optimized here: because we didn't distinguish pixel segments and outer-tracker segments and call them both "segments", so they use the index continuously. + // If we want to further study the memory footprint in detail, we can separate the two and allocate different memories to them + + WorkDiv1D const createSegmentArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + createSegmentArrayRanges_workDiv, + CreateSegmentArrayRanges{}, + modules_.const_view(), + rangesDC_->view(), + miniDoubletsDC_->const_view()); + + auto rangesOccupancy = rangesDC_->view(); + auto nTotalSegments_view_h = cms::alpakatools::make_host_view(nTotalSegments_); + auto nTotalSegments_view_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nTotalSegs()); + alpaka::memcpy(queue_, nTotalSegments_view_h, nTotalSegments_view_d); + alpaka::wait(queue_); // wait to get the value before manipulation + + nTotalSegments_ += n_max_pixel_segments_per_module; + + std::array const segments_sizes{{static_cast(nTotalSegments_), + static_cast(nLowerModules_ + 1), + static_cast(n_max_pixel_segments_per_module)}}; + segmentsDC_.emplace(segments_sizes, queue_); + + auto segmentsOccupancy = segmentsDC_->view(); + auto nSegments_view = + cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments(), segmentsOccupancy.metadata().size()); + auto totOccupancySegments_view = cms::alpakatools::make_device_view( + queue_, segmentsOccupancy.totOccupancySegments(), segmentsOccupancy.metadata().size()); + alpaka::memset(queue_, nSegments_view, 0u); + alpaka::memset(queue_, totOccupancySegments_view, 0u); + } + + auto hitIndices0_dev = cms::alpakatools::make_device_buffer(queue_, size); + auto hitIndices1_dev = cms::alpakatools::make_device_buffer(queue_, size); + auto hitIndices2_dev = cms::alpakatools::make_device_buffer(queue_, size); + auto hitIndices3_dev = cms::alpakatools::make_device_buffer(queue_, size); + auto dPhiChange_dev = cms::alpakatools::make_device_buffer(queue_, size); + + alpaka::memcpy(queue_, hitIndices0_dev, hitIndices0, size); + alpaka::memcpy(queue_, hitIndices1_dev, hitIndices1, size); + alpaka::memcpy(queue_, hitIndices2_dev, hitIndices2, size); + alpaka::memcpy(queue_, hitIndices3_dev, hitIndices3, size); + alpaka::memcpy(queue_, dPhiChange_dev, dPhiChange, size); + + SegmentsPixel segmentsPixel = segmentsDC_->view(); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.ptIn(), size), ptIn, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.ptErr(), size), ptErr, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.px(), size), px, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.py(), size), py, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.pz(), size), pz, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.etaErr(), size), etaErr, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.isQuad(), size), isQuad, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.eta(), size), eta, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.phi(), size), phi, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.charge(), size), charge, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.seedIdx(), size), seedIdx, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.superbin(), size), superbin, size); + alpaka::memcpy(queue_, cms::alpakatools::make_device_view(queue_, segmentsPixel.pixelType(), size), pixelType, size); + + // Create source views for size and mdSize + auto src_view_size = cms::alpakatools::make_host_view(size); + auto src_view_mdSize = cms::alpakatools::make_host_view(mdSize); + + auto segmentsOccupancy = segmentsDC_->view(); + auto dst_view_segments = cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments()[pixelModuleIndex]); + alpaka::memcpy(queue_, dst_view_segments, src_view_size); + + auto dst_view_totOccupancySegments = + cms::alpakatools::make_device_view(queue_, segmentsOccupancy.totOccupancySegments()[pixelModuleIndex]); + alpaka::memcpy(queue_, dst_view_totOccupancySegments, src_view_size); + + auto mdsOccupancy = miniDoubletsDC_->view(); + auto dst_view_nMDs = cms::alpakatools::make_device_view(queue_, mdsOccupancy.nMDs()[pixelModuleIndex]); + alpaka::memcpy(queue_, dst_view_nMDs, src_view_mdSize); + + auto dst_view_totOccupancyMDs = + cms::alpakatools::make_device_view(queue_, mdsOccupancy.totOccupancyMDs()[pixelModuleIndex]); + alpaka::memcpy(queue_, dst_view_totOccupancyMDs, src_view_mdSize); + + alpaka::wait(queue_); // FIXME: remove synch after inputs refactored to be in pinned memory + + Vec3D const threadsPerBlock{1, 1, 256}; + Vec3D const blocksPerGrid{1, 1, max_blocks}; + WorkDiv3D const addPixelSegmentToEvent_workdiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); + + alpaka::exec(queue_, + addPixelSegmentToEvent_workdiv, + AddPixelSegmentToEventKernel{}, + modules_.const_view(), + rangesDC_->const_view(), + hitsDC_->view(), + miniDoubletsDC_->view(), + segmentsDC_->view(), + segmentsDC_->view(), + hitIndices0_dev.data(), + hitIndices1_dev.data(), + hitIndices2_dev.data(), + hitIndices3_dev.data(), + dPhiChange_dev.data(), + pixelModuleIndex, + size); +} + +void LSTEvent::createMiniDoublets() { + // Create a view for the element nLowerModules_ inside rangesOccupancy->miniDoubletModuleOccupancy + auto rangesOccupancy = rangesDC_->view(); + auto dst_view_miniDoubletModuleOccupancy = + cms::alpakatools::make_device_view(queue_, rangesOccupancy.miniDoubletModuleOccupancy()[nLowerModules_]); + + // Create a host buffer for a value to be passed to the device + auto pixelMaxMDs_buf_h = cms::alpakatools::make_host_buffer(queue_); + *pixelMaxMDs_buf_h.data() = n_max_pixel_md_per_modules; + + alpaka::memcpy(queue_, dst_view_miniDoubletModuleOccupancy, pixelMaxMDs_buf_h); + + WorkDiv1D const createMDArrayRangesGPU_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + createMDArrayRangesGPU_workDiv, + CreateMDArrayRangesGPU{}, + modules_.const_view(), + rangesDC_->view()); + + auto nTotalMDs_buf_h = cms::alpakatools::make_host_buffer(queue_); + auto nTotalMDs_buf_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nTotalMDs()); + alpaka::memcpy(queue_, nTotalMDs_buf_h, nTotalMDs_buf_d); + alpaka::wait(queue_); // wait to get the data before manipulation + + *nTotalMDs_buf_h.data() += n_max_pixel_md_per_modules; + unsigned int nTotalMDs = *nTotalMDs_buf_h.data(); + + if (!miniDoubletsDC_) { + std::array const mds_sizes{{static_cast(nTotalMDs), static_cast(nLowerModules_ + 1)}}; + miniDoubletsDC_.emplace(mds_sizes, queue_); + + auto mdsOccupancy = miniDoubletsDC_->view(); + auto nMDs_view = cms::alpakatools::make_device_view(queue_, mdsOccupancy.nMDs(), mdsOccupancy.metadata().size()); + auto totOccupancyMDs_view = + cms::alpakatools::make_device_view(queue_, mdsOccupancy.totOccupancyMDs(), mdsOccupancy.metadata().size()); + alpaka::memset(queue_, nMDs_view, 0u); + alpaka::memset(queue_, totOccupancyMDs_view, 0u); + } + + Vec3D const threadsPerBlockCreateMD{1, 16, 32}; + Vec3D const blocksPerGridCreateMD{1, nLowerModules_ / threadsPerBlockCreateMD[1], 1}; + WorkDiv3D const createMiniDoublets_workDiv = + createWorkDiv(blocksPerGridCreateMD, threadsPerBlockCreateMD, elementsPerThread); + + alpaka::exec(queue_, + createMiniDoublets_workDiv, + CreateMiniDoublets{}, + modules_.const_view(), + hitsDC_->const_view(), + hitsDC_->const_view(), + miniDoubletsDC_->view(), + miniDoubletsDC_->view(), + rangesDC_->const_view()); + + WorkDiv1D const addMiniDoubletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + addMiniDoubletRangesToEventExplicit_workDiv, + AddMiniDoubletRangesToEventExplicit{}, + modules_.const_view(), + miniDoubletsDC_->view(), + rangesDC_->view(), + hitsDC_->const_view()); + + if (addObjects_) { + addMiniDoubletsToEventExplicit(); + } +} + +void LSTEvent::createSegmentsWithModuleMap() { + if (!segmentsDC_) { + std::array const segments_sizes{{static_cast(nTotalSegments_), + static_cast(nLowerModules_ + 1), + static_cast(n_max_pixel_segments_per_module)}}; + segmentsDC_.emplace(segments_sizes, queue_); + + auto segmentsOccupancy = segmentsDC_->view(); + auto nSegments_view = + cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments(), segmentsOccupancy.metadata().size()); + auto totOccupancySegments_view = cms::alpakatools::make_device_view( + queue_, segmentsOccupancy.totOccupancySegments(), segmentsOccupancy.metadata().size()); + alpaka::memset(queue_, nSegments_view, 0u); + alpaka::memset(queue_, totOccupancySegments_view, 0u); + } + + Vec3D const threadsPerBlockCreateSeg{1, 1, 64}; + Vec3D const blocksPerGridCreateSeg{1, 1, nLowerModules_}; + WorkDiv3D const createSegments_workDiv = + createWorkDiv(blocksPerGridCreateSeg, threadsPerBlockCreateSeg, elementsPerThread); + + alpaka::exec(queue_, + createSegments_workDiv, + CreateSegments{}, + modules_.const_view(), + miniDoubletsDC_->const_view(), + miniDoubletsDC_->const_view(), + segmentsDC_->view(), + segmentsDC_->view(), + rangesDC_->const_view()); + + WorkDiv1D const addSegmentRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + addSegmentRangesToEventExplicit_workDiv, + AddSegmentRangesToEventExplicit{}, + modules_.const_view(), + segmentsDC_->view(), + rangesDC_->view()); + + if (addObjects_) { + addSegmentsToEventExplicit(); + } +} + +void LSTEvent::createTriplets() { + if (!tripletsDC_) { + WorkDiv1D const createTripletArrayRanges_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + createTripletArrayRanges_workDiv, + CreateTripletArrayRanges{}, + modules_.const_view(), + rangesDC_->view(), + segmentsDC_->const_view()); + + // TODO: Why are we pulling this back down only to put it back on the device in a new struct? + auto rangesOccupancy = rangesDC_->view(); + auto maxTriplets_buf_h = cms::alpakatools::make_host_buffer(queue_); + auto maxTriplets_buf_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nTotalTrips()); + alpaka::memcpy(queue_, maxTriplets_buf_h, maxTriplets_buf_d); + alpaka::wait(queue_); // wait to get the value before using it + + std::array const triplets_sizes{ + {static_cast(*maxTriplets_buf_h.data()), static_cast(nLowerModules_)}}; + tripletsDC_.emplace(triplets_sizes, queue_); + + auto tripletsOccupancy = tripletsDC_->view(); + auto nTriplets_view = + cms::alpakatools::make_device_view(queue_, tripletsOccupancy.nTriplets(), tripletsOccupancy.metadata().size()); + alpaka::memset(queue_, nTriplets_view, 0u); + auto totOccupancyTriplets_view = cms::alpakatools::make_device_view( + queue_, tripletsOccupancy.totOccupancyTriplets(), tripletsOccupancy.metadata().size()); + alpaka::memset(queue_, totOccupancyTriplets_view, 0u); + auto triplets = tripletsDC_->view(); + auto partOfPT5_view = cms::alpakatools::make_device_view(queue_, triplets.partOfPT5(), triplets.metadata().size()); + alpaka::memset(queue_, partOfPT5_view, 0u); + auto partOfT5_view = cms::alpakatools::make_device_view(queue_, triplets.partOfT5(), triplets.metadata().size()); + alpaka::memset(queue_, partOfT5_view, 0u); + auto partOfPT3_view = cms::alpakatools::make_device_view(queue_, triplets.partOfPT3(), triplets.metadata().size()); + alpaka::memset(queue_, partOfPT3_view, 0u); + } + + uint16_t nonZeroModules = 0; + unsigned int max_InnerSeg = 0; + + // Allocate and copy nSegments from device to host (only nLowerModules in OT, not the +1 with pLSs) + auto nSegments_buf_h = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto nSegments_buf_d = cms::alpakatools::make_device_view( + queue_, segmentsDC_->const_view().nSegments(), nLowerModules_); + alpaka::memcpy(queue_, nSegments_buf_h, nSegments_buf_d, nLowerModules_); + + // ... same for module_nConnectedModules + // FIXME: replace by ES host data + auto modules = modules_.const_view(); + auto module_nConnectedModules_buf_h = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_nConnectedModules_buf_d = + cms::alpakatools::make_device_view(queue_, modules.nConnectedModules(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_nConnectedModules_buf_h, module_nConnectedModules_buf_d, nLowerModules_); + + alpaka::wait(queue_); // wait for nSegments and module_nConnectedModules before using + + auto const* nSegments = nSegments_buf_h.data(); + auto const* module_nConnectedModules = module_nConnectedModules_buf_h.data(); + + // Allocate host index and fill it directly + auto index_buf_h = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto* index = index_buf_h.data(); + + for (uint16_t innerLowerModuleIndex = 0; innerLowerModuleIndex < nLowerModules_; innerLowerModuleIndex++) { + uint16_t nConnectedModules = module_nConnectedModules[innerLowerModuleIndex]; + unsigned int nInnerSegments = nSegments[innerLowerModuleIndex]; + if (nConnectedModules != 0 and nInnerSegments != 0) { + index[nonZeroModules] = innerLowerModuleIndex; + nonZeroModules++; + } + max_InnerSeg = std::max(max_InnerSeg, nInnerSegments); + } + + // Allocate and copy to device index + auto index_gpu_buf = cms::alpakatools::make_device_buffer(queue_, nLowerModules_); + alpaka::memcpy(queue_, index_gpu_buf, index_buf_h, nonZeroModules); + + Vec3D const threadsPerBlockCreateTrip{1, 16, 16}; + Vec3D const blocksPerGridCreateTrip{max_blocks, 1, 1}; + WorkDiv3D const createTriplets_workDiv = + createWorkDiv(blocksPerGridCreateTrip, threadsPerBlockCreateTrip, elementsPerThread); + + alpaka::exec(queue_, + createTriplets_workDiv, + CreateTriplets{}, + modules_.const_view(), + miniDoubletsDC_->const_view(), + segmentsDC_->const_view(), + segmentsDC_->const_view(), + tripletsDC_->view(), + tripletsDC_->view(), + rangesDC_->const_view(), + index_gpu_buf.data(), + nonZeroModules); + + WorkDiv1D const addTripletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + addTripletRangesToEventExplicit_workDiv, + AddTripletRangesToEventExplicit{}, + modules_.const_view(), + tripletsDC_->const_view(), + rangesDC_->view()); + + if (addObjects_) { + addTripletsToEventExplicit(); + } +} + +void LSTEvent::createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets) { + if (!trackCandidatesDC_) { + trackCandidatesDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); + auto buf = trackCandidatesDC_->buffer(); + alpaka::memset(queue_, buf, 0u); + } + + Vec3D const threadsPerBlock_crossCleanpT3{1, 16, 64}; + Vec3D const blocksPerGrid_crossCleanpT3{1, 4, 20}; + WorkDiv3D const crossCleanpT3_workDiv = + createWorkDiv(blocksPerGrid_crossCleanpT3, threadsPerBlock_crossCleanpT3, elementsPerThread); + + alpaka::exec(queue_, + crossCleanpT3_workDiv, + CrossCleanpT3{}, + modules_.const_view(), + rangesDC_->const_view(), + pixelTripletsDC_->view(), + segmentsDC_->const_view(), + pixelQuintupletsDC_->const_view()); + + WorkDiv1D const addpT3asTrackCandidates_workDiv = createWorkDiv({1}, {512}, {1}); + + alpaka::exec(queue_, + addpT3asTrackCandidates_workDiv, + AddpT3asTrackCandidates{}, + nLowerModules_, + pixelTripletsDC_->const_view(), + trackCandidatesDC_->view(), + segmentsDC_->const_view(), + rangesDC_->const_view()); + + // Pull nEligibleT5Modules from the device. + auto rangesOccupancy = rangesDC_->view(); + auto nEligibleModules_buf_h = cms::alpakatools::make_host_buffer(queue_); + auto nEligibleModules_buf_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nEligibleT5Modules()); + alpaka::memcpy(queue_, nEligibleModules_buf_h, nEligibleModules_buf_d); + alpaka::wait(queue_); // wait to get the value before using + auto const nEligibleModules = *nEligibleModules_buf_h.data(); + + Vec3D const threadsPerBlockRemoveDupQuints{1, 16, 32}; + Vec3D const blocksPerGridRemoveDupQuints{1, std::max(nEligibleModules / 16, 1), std::max(nEligibleModules / 32, 1)}; + WorkDiv3D const removeDupQuintupletsBeforeTC_workDiv = + createWorkDiv(blocksPerGridRemoveDupQuints, threadsPerBlockRemoveDupQuints, elementsPerThread); + + alpaka::exec(queue_, + removeDupQuintupletsBeforeTC_workDiv, + RemoveDupQuintupletsBeforeTC{}, + quintupletsDC_->view(), + quintupletsDC_->view(), + rangesDC_->const_view()); + + Vec3D const threadsPerBlock_crossCleanT5{32, 1, 32}; + Vec3D const blocksPerGrid_crossCleanT5{(13296 / 32) + 1, 1, max_blocks}; + WorkDiv3D const crossCleanT5_workDiv = + createWorkDiv(blocksPerGrid_crossCleanT5, threadsPerBlock_crossCleanT5, elementsPerThread); + + alpaka::exec(queue_, + crossCleanT5_workDiv, + CrossCleanT5{}, + modules_.const_view(), + quintupletsDC_->view(), + quintupletsDC_->const_view(), + pixelQuintupletsDC_->const_view(), + pixelTripletsDC_->const_view(), + rangesDC_->const_view()); + + Vec3D const threadsPerBlock_addT5asTrackCandidate{1, 8, 128}; + Vec3D const blocksPerGrid_addT5asTrackCandidate{1, 8, 10}; + WorkDiv3D const addT5asTrackCandidate_workDiv = + createWorkDiv(blocksPerGrid_addT5asTrackCandidate, threadsPerBlock_addT5asTrackCandidate, elementsPerThread); + + alpaka::exec(queue_, + addT5asTrackCandidate_workDiv, + AddT5asTrackCandidate{}, + nLowerModules_, + quintupletsDC_->const_view(), + quintupletsDC_->const_view(), + trackCandidatesDC_->view(), + rangesDC_->const_view()); + + if (!no_pls_dupclean) { + Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; + Vec3D const blocksPerGridCheckHitspLS{1, max_blocks * 4, max_blocks / 4}; + WorkDiv3D const checkHitspLS_workDiv = + createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); + + alpaka::exec(queue_, + checkHitspLS_workDiv, + CheckHitspLS{}, + modules_.const_view(), + segmentsDC_->const_view(), + segmentsDC_->view(), + true); + } + + Vec3D const threadsPerBlock_crossCleanpLS{1, 16, 32}; + Vec3D const blocksPerGrid_crossCleanpLS{1, 4, 20}; + WorkDiv3D const crossCleanpLS_workDiv = + createWorkDiv(blocksPerGrid_crossCleanpLS, threadsPerBlock_crossCleanpLS, elementsPerThread); + + alpaka::exec(queue_, + crossCleanpLS_workDiv, + CrossCleanpLS{}, + modules_.const_view(), + rangesDC_->const_view(), + pixelTripletsDC_->const_view(), + trackCandidatesDC_->view(), + segmentsDC_->const_view(), + segmentsDC_->const_view(), + segmentsDC_->view(), + miniDoubletsDC_->const_view(), + hitsDC_->const_view(), + quintupletsDC_->const_view()); + + Vec3D const threadsPerBlock_addpLSasTrackCandidate{1, 1, 384}; + Vec3D const blocksPerGrid_addpLSasTrackCandidate{1, 1, max_blocks}; + WorkDiv3D const addpLSasTrackCandidate_workDiv = + createWorkDiv(blocksPerGrid_addpLSasTrackCandidate, threadsPerBlock_addpLSasTrackCandidate, elementsPerThread); + + alpaka::exec(queue_, + addpLSasTrackCandidate_workDiv, + AddpLSasTrackCandidate{}, + nLowerModules_, + trackCandidatesDC_->view(), + segmentsDC_->const_view(), + segmentsDC_->const_view(), + tc_pls_triplets); + + // Check if either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates was reached + auto nTrackCanpT5Host_buf = cms::alpakatools::make_host_buffer(queue_); + auto nTrackCanpT3Host_buf = cms::alpakatools::make_host_buffer(queue_); + auto nTrackCanpLSHost_buf = cms::alpakatools::make_host_buffer(queue_); + auto nTrackCanT5Host_buf = cms::alpakatools::make_host_buffer(queue_); + alpaka::memcpy(queue_, + nTrackCanpT5Host_buf, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespT5())); + alpaka::memcpy(queue_, + nTrackCanpT3Host_buf, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespT3())); + alpaka::memcpy(queue_, + nTrackCanpLSHost_buf, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespLS())); + alpaka::memcpy(queue_, + nTrackCanT5Host_buf, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatesT5())); + alpaka::wait(queue_); // wait to get the values before using them + + auto nTrackCandidatespT5 = *nTrackCanpT5Host_buf.data(); + auto nTrackCandidatespT3 = *nTrackCanpT3Host_buf.data(); + auto nTrackCandidatespLS = *nTrackCanpLSHost_buf.data(); + auto nTrackCandidatesT5 = *nTrackCanT5Host_buf.data(); + if ((nTrackCandidatespT5 + nTrackCandidatespT3 + nTrackCandidatespLS == n_max_pixel_track_candidates) || + (nTrackCandidatesT5 == n_max_nonpixel_track_candidates)) { + printf( + "****************************************************************************************************\n" + "* Warning: Track candidates were possibly truncated. *\n" + "* You may need to increase either n_max_pixel_track_candidates or n_max_nonpixel_track_candidates. *\n" + "* Run the code with the WARNINGS flag activated for more details. *\n" + "****************************************************************************************************\n"); + } +} + +void LSTEvent::createPixelTriplets() { + if (!pixelTripletsDC_) { + pixelTripletsDC_.emplace(n_max_pixel_triplets, queue_); + auto nPixelTriplets_view = cms::alpakatools::make_device_view(queue_, (*pixelTripletsDC_)->nPixelTriplets()); + alpaka::memset(queue_, nPixelTriplets_view, 0u); + auto totOccupancyPixelTriplets_view = + cms::alpakatools::make_device_view(queue_, (*pixelTripletsDC_)->totOccupancyPixelTriplets()); + alpaka::memset(queue_, totOccupancyPixelTriplets_view, 0u); + } + SegmentsOccupancy segmentsOccupancy = segmentsDC_->view(); + SegmentsPixelConst segmentsPixel = segmentsDC_->view(); + + auto superbins_buf = cms::alpakatools::make_host_buffer(queue_, n_max_pixel_segments_per_module); + auto pixelTypes_buf = cms::alpakatools::make_host_buffer(queue_, n_max_pixel_segments_per_module); + + alpaka::memcpy(queue_, + superbins_buf, + cms::alpakatools::make_device_view(queue_, segmentsPixel.superbin(), n_max_pixel_segments_per_module)); + alpaka::memcpy( + queue_, + pixelTypes_buf, + cms::alpakatools::make_device_view(queue_, segmentsPixel.pixelType(), n_max_pixel_segments_per_module)); + auto const* superbins = superbins_buf.data(); + auto const* pixelTypes = pixelTypes_buf.data(); + + unsigned int nInnerSegments; + auto nInnerSegments_src_view = cms::alpakatools::make_host_view(nInnerSegments); + + // Create a sub-view for the device buffer + auto dev_view_nSegments = cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments()[nLowerModules_]); + + alpaka::memcpy(queue_, nInnerSegments_src_view, dev_view_nSegments); + alpaka::wait(queue_); // wait to get nInnerSegments (also superbins and pixelTypes) before using + + auto connectedPixelSize_host_buf = cms::alpakatools::make_host_buffer(queue_, nInnerSegments); + auto connectedPixelIndex_host_buf = cms::alpakatools::make_host_buffer(queue_, nInnerSegments); + auto connectedPixelSize_dev_buf = cms::alpakatools::make_device_buffer(queue_, nInnerSegments); + auto connectedPixelIndex_dev_buf = cms::alpakatools::make_device_buffer(queue_, nInnerSegments); + + unsigned int* connectedPixelSize_host = connectedPixelSize_host_buf.data(); + unsigned int* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); + + int pixelIndexOffsetPos = + pixelMapping_.connectedPixelsIndex[size_superbins - 1] + pixelMapping_.connectedPixelsSizes[size_superbins - 1]; + int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[size_superbins - 1] + + pixelMapping_.connectedPixelsSizesPos[size_superbins - 1] + pixelIndexOffsetPos; + + // TODO: check if a map/reduction to just eligible pLSs would speed up the kernel + // the current selection still leaves a significant fraction of unmatchable pLSs + for (unsigned int i = 0; i < nInnerSegments; i++) { // loop over # pLS + PixelType pixelType = pixelTypes[i]; // Get pixel type for this pLS + int superbin = superbins[i]; // Get superbin for this pixel + if ((superbin < 0) or (superbin >= (int)size_superbins) or + ((pixelType != PixelType::kHighPt) and (pixelType != PixelType::kLowPtPosCurv) and + (pixelType != PixelType::kLowPtNegCurv))) { + connectedPixelSize_host[i] = 0; + connectedPixelIndex_host[i] = 0; + continue; + } + + // Used pixel type to select correct size-index arrays + switch (pixelType) { + case PixelType::kInvalid: + break; + case PixelType::kHighPt: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizes[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndex[superbin]; + break; + case PixelType::kLowPtPosCurv: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesPos[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndexPos[superbin] + pixelIndexOffsetPos; + break; + case PixelType::kLowPtNegCurv: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesNeg[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndexNeg[superbin] + pixelIndexOffsetNeg; + break; + } + } + + alpaka::memcpy(queue_, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments); + alpaka::memcpy(queue_, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); + + Vec3D const threadsPerBlock{1, 4, 32}; + Vec3D const blocksPerGrid{16 /* above median of connected modules*/, 4096, 1}; + WorkDiv3D const createPixelTripletsFromMap_workDiv = createWorkDiv(blocksPerGrid, threadsPerBlock, elementsPerThread); + + alpaka::exec(queue_, + createPixelTripletsFromMap_workDiv, + CreatePixelTripletsFromMap{}, + modules_.const_view(), + modules_.const_view(), + rangesDC_->const_view(), + miniDoubletsDC_->const_view(), + segmentsDC_->const_view(), + segmentsDC_->const_view(), + tripletsDC_->view(), + tripletsDC_->const_view(), + pixelTripletsDC_->view(), + connectedPixelSize_dev_buf.data(), + connectedPixelIndex_dev_buf.data(), + nInnerSegments); + +#ifdef WARNINGS + auto nPixelTriplets_buf = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy( + queue_, nPixelTriplets_buf, cms::alpakatools::make_device_view(queue_, (*pixelTripletsDC_)->nPixelTriplets())); + alpaka::wait(queue_); // wait to get the value before using it + + std::cout << "number of pixel triplets = " << *nPixelTriplets_buf.data() << std::endl; +#endif + + //pT3s can be cleaned here because they're not used in making pT5s! + Vec3D const threadsPerBlockDupPixTrip{1, 16, 16}; + //seems like more blocks lead to conflicting writes + Vec3D const blocksPerGridDupPixTrip{1, 40, 1}; + WorkDiv3D const removeDupPixelTripletsFromMap_workDiv = + createWorkDiv(blocksPerGridDupPixTrip, threadsPerBlockDupPixTrip, elementsPerThread); + + alpaka::exec( + queue_, removeDupPixelTripletsFromMap_workDiv, RemoveDupPixelTripletsFromMap{}, pixelTripletsDC_->view()); +} + +void LSTEvent::createQuintuplets() { + WorkDiv1D const createEligibleModulesListForQuintuplets_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + createEligibleModulesListForQuintuplets_workDiv, + CreateEligibleModulesListForQuintuplets{}, + modules_.const_view(), + tripletsDC_->const_view(), + rangesDC_->view()); + + auto nEligibleT5Modules_buf = cms::alpakatools::make_host_buffer(queue_); + auto nTotalQuintuplets_buf = cms::alpakatools::make_host_buffer(queue_); + auto rangesOccupancy = rangesDC_->view(); + auto nEligibleT5Modules_view_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nEligibleT5Modules()); + auto nTotalQuintuplets_view_d = cms::alpakatools::make_device_view(queue_, rangesOccupancy.nTotalQuints()); + alpaka::memcpy(queue_, nEligibleT5Modules_buf, nEligibleT5Modules_view_d); + alpaka::memcpy(queue_, nTotalQuintuplets_buf, nTotalQuintuplets_view_d); + alpaka::wait(queue_); // wait for the values before using them + + auto nEligibleT5Modules = *nEligibleT5Modules_buf.data(); + auto nTotalQuintuplets = *nTotalQuintuplets_buf.data(); + + if (!quintupletsDC_) { + std::array const quintuplets_sizes{{static_cast(nTotalQuintuplets), static_cast(nLowerModules_)}}; + quintupletsDC_.emplace(quintuplets_sizes, queue_); + auto quintupletsOccupancy = quintupletsDC_->view(); + auto nQuintuplets_view = cms::alpakatools::make_device_view( + queue_, quintupletsOccupancy.nQuintuplets(), quintupletsOccupancy.metadata().size()); + alpaka::memset(queue_, nQuintuplets_view, 0u); + auto totOccupancyQuintuplets_view = cms::alpakatools::make_device_view( + queue_, quintupletsOccupancy.totOccupancyQuintuplets(), quintupletsOccupancy.metadata().size()); + alpaka::memset(queue_, totOccupancyQuintuplets_view, 0u); + auto quintuplets = quintupletsDC_->view(); + auto isDup_view = cms::alpakatools::make_device_view(queue_, quintuplets.isDup(), quintuplets.metadata().size()); + alpaka::memset(queue_, isDup_view, 0u); + auto tightCutFlag_view = + cms::alpakatools::make_device_view(queue_, quintuplets.tightCutFlag(), quintuplets.metadata().size()); + alpaka::memset(queue_, tightCutFlag_view, 0u); + auto partOfPT5_view = + cms::alpakatools::make_device_view(queue_, quintuplets.partOfPT5(), quintuplets.metadata().size()); + alpaka::memset(queue_, partOfPT5_view, 0u); + } + + Vec3D const threadsPerBlockQuints{1, 8, 32}; + Vec3D const blocksPerGridQuints{std::max((int)nEligibleT5Modules, 1), 1, 1}; + WorkDiv3D const createQuintuplets_workDiv = + createWorkDiv(blocksPerGridQuints, threadsPerBlockQuints, elementsPerThread); + + alpaka::exec(queue_, + createQuintuplets_workDiv, + CreateQuintuplets{}, + modules_.const_view(), + miniDoubletsDC_->const_view(), + segmentsDC_->const_view(), + tripletsDC_->view(), + tripletsDC_->const_view(), + quintupletsDC_->view(), + quintupletsDC_->view(), + rangesDC_->const_view(), + nEligibleT5Modules); + + Vec3D const threadsPerBlockDupQuint{1, 16, 16}; + Vec3D const blocksPerGridDupQuint{max_blocks, 1, 1}; + WorkDiv3D const removeDupQuintupletsAfterBuild_workDiv = + createWorkDiv(blocksPerGridDupQuint, threadsPerBlockDupQuint, elementsPerThread); + + alpaka::exec(queue_, + removeDupQuintupletsAfterBuild_workDiv, + RemoveDupQuintupletsAfterBuild{}, + modules_.const_view(), + quintupletsDC_->view(), + quintupletsDC_->const_view(), + rangesDC_->const_view()); + + WorkDiv1D const addQuintupletRangesToEventExplicit_workDiv = createWorkDiv({1}, {1024}, {1}); + + alpaka::exec(queue_, + addQuintupletRangesToEventExplicit_workDiv, + AddQuintupletRangesToEventExplicit{}, + modules_.const_view(), + quintupletsDC_->const_view(), + rangesDC_->view()); + + if (addObjects_) { + addQuintupletsToEventExplicit(); + } +} + +void LSTEvent::pixelLineSegmentCleaning(bool no_pls_dupclean) { + if (!no_pls_dupclean) { + Vec3D const threadsPerBlockCheckHitspLS{1, 16, 16}; + Vec3D const blocksPerGridCheckHitspLS{1, max_blocks * 4, max_blocks / 4}; + WorkDiv3D const checkHitspLS_workDiv = + createWorkDiv(blocksPerGridCheckHitspLS, threadsPerBlockCheckHitspLS, elementsPerThread); + + alpaka::exec(queue_, + checkHitspLS_workDiv, + CheckHitspLS{}, + modules_.const_view(), + segmentsDC_->const_view(), + segmentsDC_->view(), + false); + } +} + +void LSTEvent::createPixelQuintuplets() { + if (!pixelQuintupletsDC_) { + pixelQuintupletsDC_.emplace(n_max_pixel_quintuplets, queue_); + auto nPixelQuintuplets_view = + cms::alpakatools::make_device_view(queue_, (*pixelQuintupletsDC_)->nPixelQuintuplets()); + alpaka::memset(queue_, nPixelQuintuplets_view, 0u); + auto totOccupancyPixelQuintuplets_view = + cms::alpakatools::make_device_view(queue_, (*pixelQuintupletsDC_)->totOccupancyPixelQuintuplets()); + alpaka::memset(queue_, totOccupancyPixelQuintuplets_view, 0u); + } + if (!trackCandidatesDC_) { + trackCandidatesDC_.emplace(n_max_nonpixel_track_candidates + n_max_pixel_track_candidates, queue_); + auto buf = trackCandidatesDC_->buffer(); + alpaka::memset(queue_, buf, 0u); + } + SegmentsOccupancy segmentsOccupancy = segmentsDC_->view(); + SegmentsPixelConst segmentsPixel = segmentsDC_->view(); + + auto superbins_buf = cms::alpakatools::make_host_buffer(queue_, n_max_pixel_segments_per_module); + auto pixelTypes_buf = cms::alpakatools::make_host_buffer(queue_, n_max_pixel_segments_per_module); + + alpaka::memcpy(queue_, + superbins_buf, + cms::alpakatools::make_device_view(queue_, segmentsPixel.superbin(), n_max_pixel_segments_per_module)); + alpaka::memcpy( + queue_, + pixelTypes_buf, + cms::alpakatools::make_device_view(queue_, segmentsPixel.pixelType(), n_max_pixel_segments_per_module)); + auto const* superbins = superbins_buf.data(); + auto const* pixelTypes = pixelTypes_buf.data(); + + unsigned int nInnerSegments; + auto nInnerSegments_src_view = cms::alpakatools::make_host_view(nInnerSegments); + + // Create a sub-view for the device buffer + unsigned int totalModules = nLowerModules_ + 1; + auto dev_view_nSegments_buf = cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments(), totalModules); + auto dev_view_nSegments = cms::alpakatools::make_device_view(queue_, segmentsOccupancy.nSegments()[nLowerModules_]); + + alpaka::memcpy(queue_, nInnerSegments_src_view, dev_view_nSegments); + alpaka::wait(queue_); // wait to get nInnerSegments (also superbins and pixelTypes) before using + + auto connectedPixelSize_host_buf = cms::alpakatools::make_host_buffer(queue_, nInnerSegments); + auto connectedPixelIndex_host_buf = cms::alpakatools::make_host_buffer(queue_, nInnerSegments); + auto connectedPixelSize_dev_buf = cms::alpakatools::make_device_buffer(queue_, nInnerSegments); + auto connectedPixelIndex_dev_buf = cms::alpakatools::make_device_buffer(queue_, nInnerSegments); + + auto* connectedPixelSize_host = connectedPixelSize_host_buf.data(); + auto* connectedPixelIndex_host = connectedPixelIndex_host_buf.data(); + + int pixelIndexOffsetPos = pixelMapping_.connectedPixelsIndex[::size_superbins - 1] + + pixelMapping_.connectedPixelsSizes[::size_superbins - 1]; + int pixelIndexOffsetNeg = pixelMapping_.connectedPixelsIndexPos[::size_superbins - 1] + + pixelMapping_.connectedPixelsSizesPos[::size_superbins - 1] + pixelIndexOffsetPos; + + // Loop over # pLS + for (unsigned int i = 0; i < nInnerSegments; i++) { + PixelType pixelType = pixelTypes[i]; // Get pixel type for this pLS + int superbin = superbins[i]; // Get superbin for this pixel + if ((superbin < 0) or (superbin >= (int)size_superbins) or + ((pixelType != PixelType::kHighPt) and (pixelType != PixelType::kLowPtPosCurv) and + (pixelType != PixelType::kLowPtNegCurv))) { + connectedPixelSize_host[i] = 0; + connectedPixelIndex_host[i] = 0; + continue; + } + + // Used pixel type to select correct size-index arrays + switch (pixelType) { + case PixelType::kInvalid: + break; + case PixelType::kHighPt: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizes[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndex[superbin]; + break; + case PixelType::kLowPtPosCurv: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesPos[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndexPos[superbin] + pixelIndexOffsetPos; + break; + case PixelType::kLowPtNegCurv: + // number of connected modules to this pixel + connectedPixelSize_host[i] = pixelMapping_.connectedPixelsSizesNeg[superbin]; + // index to get start of connected modules for this superbin in map + connectedPixelIndex_host[i] = pixelMapping_.connectedPixelsIndexNeg[superbin] + pixelIndexOffsetNeg; + break; + } + } + + alpaka::memcpy(queue_, connectedPixelSize_dev_buf, connectedPixelSize_host_buf, nInnerSegments); + alpaka::memcpy(queue_, connectedPixelIndex_dev_buf, connectedPixelIndex_host_buf, nInnerSegments); + + Vec3D const threadsPerBlockCreatePixQuints{1, 16, 16}; + Vec3D const blocksPerGridCreatePixQuints{16, max_blocks, 1}; + WorkDiv3D const createPixelQuintupletsFromMap_workDiv = + createWorkDiv(blocksPerGridCreatePixQuints, threadsPerBlockCreatePixQuints, elementsPerThread); + + alpaka::exec(queue_, + createPixelQuintupletsFromMap_workDiv, + CreatePixelQuintupletsFromMap{}, + modules_.const_view(), + modules_.const_view(), + miniDoubletsDC_->const_view(), + segmentsDC_->const_view(), + segmentsDC_->view(), + tripletsDC_->view(), + quintupletsDC_->view(), + quintupletsDC_->const_view(), + pixelQuintupletsDC_->view(), + connectedPixelSize_dev_buf.data(), + connectedPixelIndex_dev_buf.data(), + nInnerSegments, + rangesDC_->const_view()); + + Vec3D const threadsPerBlockDupPix{1, 16, 16}; + Vec3D const blocksPerGridDupPix{1, max_blocks, 1}; + WorkDiv3D const removeDupPixelQuintupletsFromMap_workDiv = + createWorkDiv(blocksPerGridDupPix, threadsPerBlockDupPix, elementsPerThread); + + alpaka::exec(queue_, + removeDupPixelQuintupletsFromMap_workDiv, + RemoveDupPixelQuintupletsFromMap{}, + pixelQuintupletsDC_->view()); + + WorkDiv1D const addpT5asTrackCandidate_workDiv = createWorkDiv({1}, {256}, {1}); + + alpaka::exec(queue_, + addpT5asTrackCandidate_workDiv, + AddpT5asTrackCandidate{}, + nLowerModules_, + pixelQuintupletsDC_->const_view(), + trackCandidatesDC_->view(), + segmentsDC_->const_view(), + rangesDC_->const_view()); + +#ifdef WARNINGS + auto nPixelQuintuplets_buf = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy(queue_, + nPixelQuintuplets_buf, + cms::alpakatools::make_device_view(queue_, (*pixelQuintupletsDC_)->nPixelQuintuplets())); + alpaka::wait(queue_); // wait to get the value before using it + + std::cout << "number of pixel quintuplets = " << *nPixelQuintuplets_buf.data() << std::endl; +#endif +} + +void LSTEvent::addMiniDoubletsToEventExplicit() { + auto nMDsCPU_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto mdsOccupancy = miniDoubletsDC_->const_view(); + auto nMDs_view = + cms::alpakatools::make_device_view(queue_, mdsOccupancy.nMDs(), nLowerModules_); // exclude pixel part + alpaka::memcpy(queue_, nMDsCPU_buf, nMDs_view, nLowerModules_); + + auto modules = modules_.const_view(); + + // FIXME: replace by ES host data + auto module_subdets_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_subdets_view = + cms::alpakatools::make_device_view(queue_, modules.subdets(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_subdets_buf, module_subdets_view, nLowerModules_); + + auto module_layers_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_layers_view = + cms::alpakatools::make_device_view(queue_, modules.layers(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_layers_buf, module_layers_view, nLowerModules_); + + auto module_hitRanges_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto hits = hitsDC_->view(); + auto hitRanges_view = + cms::alpakatools::make_device_view(queue_, hits.hitRanges(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_hitRanges_buf, hitRanges_view, nLowerModules_); + + alpaka::wait(queue_); // wait for inputs before using them + + auto const* nMDsCPU = nMDsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + auto const* module_hitRanges = module_hitRanges_buf.data(); + + for (unsigned int i = 0; i < nLowerModules_; i++) { + if (!(nMDsCPU[i] == 0 or module_hitRanges[i][0] == -1)) { + if (module_subdets[i] == Barrel) { + n_minidoublets_by_layer_barrel_[module_layers[i] - 1] += nMDsCPU[i]; + } else { + n_minidoublets_by_layer_endcap_[module_layers[i] - 1] += nMDsCPU[i]; + } + } + } +} + +void LSTEvent::addSegmentsToEventExplicit() { + auto nSegmentsCPU_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto nSegments_buf = cms::alpakatools::make_device_view( + queue_, segmentsDC_->const_view().nSegments(), nLowerModules_); + alpaka::memcpy(queue_, nSegmentsCPU_buf, nSegments_buf, nLowerModules_); + + auto modules = modules_.const_view(); + + // FIXME: replace by ES host data + auto module_subdets_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_subdets_view = + cms::alpakatools::make_device_view(queue_, modules.subdets(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_subdets_buf, module_subdets_view, nLowerModules_); + + auto module_layers_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_layers_view = + cms::alpakatools::make_device_view(queue_, modules.layers(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_layers_buf, module_layers_view, nLowerModules_); + + alpaka::wait(queue_); // wait for inputs before using them + + auto const* nSegmentsCPU = nSegmentsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + + for (unsigned int i = 0; i < nLowerModules_; i++) { + if (!(nSegmentsCPU[i] == 0)) { + if (module_subdets[i] == Barrel) { + n_segments_by_layer_barrel_[module_layers[i] - 1] += nSegmentsCPU[i]; + } else { + n_segments_by_layer_endcap_[module_layers[i] - 1] += nSegmentsCPU[i]; + } + } + } +} + +void LSTEvent::addQuintupletsToEventExplicit() { + auto quintupletsOccupancy = quintupletsDC_->const_view(); + auto nQuintuplets_view = + cms::alpakatools::make_device_view(queue_, quintupletsOccupancy.nQuintuplets(), nLowerModules_); + auto nQuintupletsCPU_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + alpaka::memcpy(queue_, nQuintupletsCPU_buf, nQuintuplets_view); + + auto modules = modules_.const_view(); + + // FIXME: replace by ES host data + auto module_subdets_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_subdets_view = cms::alpakatools::make_device_view(queue_, modules.subdets(), modules.metadata().size()); + alpaka::memcpy(queue_, module_subdets_buf, module_subdets_view, nModules_); + + auto module_layers_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_layers_view = + cms::alpakatools::make_device_view(queue_, modules.layers(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_layers_buf, module_layers_view, nLowerModules_); + + auto module_quintupletModuleIndices_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto rangesOccupancy = rangesDC_->view(); + auto quintupletModuleIndices_view_d = + cms::alpakatools::make_device_view(queue_, rangesOccupancy.quintupletModuleIndices(), nLowerModules_); + alpaka::memcpy(queue_, module_quintupletModuleIndices_buf, quintupletModuleIndices_view_d); + + alpaka::wait(queue_); // wait for inputs before using them + + auto const* nQuintupletsCPU = nQuintupletsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + auto const* module_quintupletModuleIndices = module_quintupletModuleIndices_buf.data(); + + for (uint16_t i = 0; i < nLowerModules_; i++) { + if (!(nQuintupletsCPU[i] == 0 or module_quintupletModuleIndices[i] == -1)) { + if (module_subdets[i] == Barrel) { + n_quintuplets_by_layer_barrel_[module_layers[i] - 1] += nQuintupletsCPU[i]; + } else { + n_quintuplets_by_layer_endcap_[module_layers[i] - 1] += nQuintupletsCPU[i]; + } + } + } +} + +void LSTEvent::addTripletsToEventExplicit() { + auto tripletsOccupancy = tripletsDC_->const_view(); + auto nTriplets_view = cms::alpakatools::make_device_view(queue_, tripletsOccupancy.nTriplets(), nLowerModules_); + auto nTripletsCPU_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + alpaka::memcpy(queue_, nTripletsCPU_buf, nTriplets_view); + + auto modules = modules_.const_view(); + + // FIXME: replace by ES host data + auto module_subdets_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_subdets_view = + cms::alpakatools::make_device_view(queue_, modules.subdets(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_subdets_buf, module_subdets_view, nLowerModules_); + + auto module_layers_buf = cms::alpakatools::make_host_buffer(queue_, nLowerModules_); + auto module_layers_view = + cms::alpakatools::make_device_view(queue_, modules.layers(), nLowerModules_); // only lower modules + alpaka::memcpy(queue_, module_layers_buf, module_layers_view, nLowerModules_); + + alpaka::wait(queue_); // wait for inputs before using them + + auto const* nTripletsCPU = nTripletsCPU_buf.data(); + auto const* module_subdets = module_subdets_buf.data(); + auto const* module_layers = module_layers_buf.data(); + + for (uint16_t i = 0; i < nLowerModules_; i++) { + if (nTripletsCPU[i] != 0) { + if (module_subdets[i] == Barrel) { + n_triplets_by_layer_barrel_[module_layers[i] - 1] += nTripletsCPU[i]; + } else { + n_triplets_by_layer_endcap_[module_layers[i] - 1] += nTripletsCPU[i]; + } + } + } +} + +unsigned int LSTEvent::getNumberOfMiniDoublets() { + unsigned int miniDoublets = 0; + for (auto& it : n_minidoublets_by_layer_barrel_) { + miniDoublets += it; + } + for (auto& it : n_minidoublets_by_layer_endcap_) { + miniDoublets += it; + } + + return miniDoublets; +} + +unsigned int LSTEvent::getNumberOfMiniDoubletsByLayer(unsigned int layer) { + if (layer == 6) + return n_minidoublets_by_layer_barrel_[layer]; + else + return n_minidoublets_by_layer_barrel_[layer] + n_minidoublets_by_layer_endcap_[layer]; +} + +unsigned int LSTEvent::getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer) { + return n_minidoublets_by_layer_barrel_[layer]; +} + +unsigned int LSTEvent::getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer) { + return n_minidoublets_by_layer_endcap_[layer]; +} + +unsigned int LSTEvent::getNumberOfSegments() { + unsigned int segments = 0; + for (auto& it : n_segments_by_layer_barrel_) { + segments += it; + } + for (auto& it : n_segments_by_layer_endcap_) { + segments += it; + } + + return segments; +} + +unsigned int LSTEvent::getNumberOfSegmentsByLayer(unsigned int layer) { + if (layer == 6) + return n_segments_by_layer_barrel_[layer]; + else + return n_segments_by_layer_barrel_[layer] + n_segments_by_layer_endcap_[layer]; +} + +unsigned int LSTEvent::getNumberOfSegmentsByLayerBarrel(unsigned int layer) { + return n_segments_by_layer_barrel_[layer]; +} + +unsigned int LSTEvent::getNumberOfSegmentsByLayerEndcap(unsigned int layer) { + return n_segments_by_layer_endcap_[layer]; +} + +unsigned int LSTEvent::getNumberOfTriplets() { + unsigned int triplets = 0; + for (auto& it : n_triplets_by_layer_barrel_) { + triplets += it; + } + for (auto& it : n_triplets_by_layer_endcap_) { + triplets += it; + } + + return triplets; +} + +unsigned int LSTEvent::getNumberOfTripletsByLayer(unsigned int layer) { + if (layer == 6) + return n_triplets_by_layer_barrel_[layer]; + else + return n_triplets_by_layer_barrel_[layer] + n_triplets_by_layer_endcap_[layer]; +} + +unsigned int LSTEvent::getNumberOfTripletsByLayerBarrel(unsigned int layer) { + return n_triplets_by_layer_barrel_[layer]; +} + +unsigned int LSTEvent::getNumberOfTripletsByLayerEndcap(unsigned int layer) { + return n_triplets_by_layer_endcap_[layer]; +} + +int LSTEvent::getNumberOfPixelTriplets() { + auto nPixelTriplets_buf_h = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy( + queue_, nPixelTriplets_buf_h, cms::alpakatools::make_device_view(queue_, (*pixelTripletsDC_)->nPixelTriplets())); + alpaka::wait(queue_); + + return *nPixelTriplets_buf_h.data(); +} + +int LSTEvent::getNumberOfPixelQuintuplets() { + auto nPixelQuintuplets_buf_h = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy(queue_, + nPixelQuintuplets_buf_h, + cms::alpakatools::make_device_view(queue_, (*pixelQuintupletsDC_)->nPixelQuintuplets())); + alpaka::wait(queue_); + + return *nPixelQuintuplets_buf_h.data(); +} + +unsigned int LSTEvent::getNumberOfQuintuplets() { + unsigned int quintuplets = 0; + for (auto& it : n_quintuplets_by_layer_barrel_) { + quintuplets += it; + } + for (auto& it : n_quintuplets_by_layer_endcap_) { + quintuplets += it; + } + + return quintuplets; +} + +unsigned int LSTEvent::getNumberOfQuintupletsByLayer(unsigned int layer) { + if (layer == 6) + return n_quintuplets_by_layer_barrel_[layer]; + else + return n_quintuplets_by_layer_barrel_[layer] + n_quintuplets_by_layer_endcap_[layer]; +} + +unsigned int LSTEvent::getNumberOfQuintupletsByLayerBarrel(unsigned int layer) { + return n_quintuplets_by_layer_barrel_[layer]; +} + +unsigned int LSTEvent::getNumberOfQuintupletsByLayerEndcap(unsigned int layer) { + return n_quintuplets_by_layer_endcap_[layer]; +} + +int LSTEvent::getNumberOfTrackCandidates() { + auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy(queue_, + nTrackCandidates_buf_h, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidates())); + alpaka::wait(queue_); + + return *nTrackCandidates_buf_h.data(); +} + +int LSTEvent::getNumberOfPT5TrackCandidates() { + auto nTrackCandidatesPT5_buf_h = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy(queue_, + nTrackCandidatesPT5_buf_h, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespT5())); + alpaka::wait(queue_); + + return *nTrackCandidatesPT5_buf_h.data(); +} + +int LSTEvent::getNumberOfPT3TrackCandidates() { + auto nTrackCandidatesPT3_buf_h = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy(queue_, + nTrackCandidatesPT3_buf_h, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespT3())); + alpaka::wait(queue_); + + return *nTrackCandidatesPT3_buf_h.data(); +} + +int LSTEvent::getNumberOfPLSTrackCandidates() { + auto nTrackCandidatesPLS_buf_h = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy(queue_, + nTrackCandidatesPLS_buf_h, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatespLS())); + alpaka::wait(queue_); + + return *nTrackCandidatesPLS_buf_h.data(); +} + +int LSTEvent::getNumberOfPixelTrackCandidates() { + auto nTrackCandidates_buf_h = cms::alpakatools::make_host_buffer(queue_); + auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy(queue_, + nTrackCandidates_buf_h, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidates())); + alpaka::memcpy(queue_, + nTrackCandidatesT5_buf_h, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatesT5())); + alpaka::wait(queue_); + + return (*nTrackCandidates_buf_h.data()) - (*nTrackCandidatesT5_buf_h.data()); +} + +int LSTEvent::getNumberOfT5TrackCandidates() { + auto nTrackCandidatesT5_buf_h = cms::alpakatools::make_host_buffer(queue_); + + alpaka::memcpy(queue_, + nTrackCandidatesT5_buf_h, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidatesT5())); + alpaka::wait(queue_); + + return *nTrackCandidatesT5_buf_h.data(); +} + +template +typename TSoA::ConstView LSTEvent::getHits(bool inCMSSW, bool sync) { + if constexpr (std::is_same_v) { + return hitsDC_->const_view(); + } else { + if (!hitsHC_) { + if (inCMSSW) { + auto hits_d = hitsDC_->view(); + auto nHits = hits_d.metadata().size(); + std::array const hits_sizes{{static_cast(nHits), static_cast(nModules_)}}; + hitsHC_.emplace(hits_sizes, queue_); + auto hits_h = hitsHC_->view(); + auto idxs_h = cms::alpakatools::make_host_view(hits_h.idxs(), nHits); + auto idxs_d = cms::alpakatools::make_device_view(queue_, hits_d.idxs(), nHits); + alpaka::memcpy(queue_, idxs_h, idxs_d); + } else { + hitsHC_.emplace(cms::alpakatools::CopyToHost>::copyAsync( + queue_, *hitsDC_)); + } + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return hitsHC_->const_view(); + } +} +template HitsConst LSTEvent::getHits(bool, bool); +template HitsRangesConst LSTEvent::getHits(bool, bool); + +template +ObjectRangesConst LSTEvent::getRanges(bool sync) { + if constexpr (std::is_same_v) { + return rangesDC_->const_view(); + } else { + if (!rangesHC_) { + rangesHC_.emplace( + cms::alpakatools::CopyToHost>::copyAsync(queue_, *rangesDC_)); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return rangesHC_->const_view(); + } +} +template ObjectRangesConst LSTEvent::getRanges<>(bool); + +template +typename TSoA::ConstView LSTEvent::getMiniDoublets(bool sync) { + if constexpr (std::is_same_v) { + return miniDoubletsDC_->const_view(); + } else { + if (!miniDoubletsHC_) { + miniDoubletsHC_.emplace( + cms::alpakatools::CopyToHost< + PortableMultiCollection>::copyAsync(queue_, + *miniDoubletsDC_)); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return miniDoubletsHC_->const_view(); + } +} +template MiniDoubletsConst LSTEvent::getMiniDoublets(bool); +template MiniDoubletsOccupancyConst LSTEvent::getMiniDoublets(bool); + +template +typename TSoA::ConstView LSTEvent::getSegments(bool sync) { + if constexpr (std::is_same_v) { + return segmentsDC_->const_view(); + } else { + if (!segmentsHC_) { + segmentsHC_.emplace( + cms::alpakatools:: + CopyToHost>::copyAsync( + queue_, *segmentsDC_)); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return segmentsHC_->const_view(); + } +} +template SegmentsConst LSTEvent::getSegments(bool); +template SegmentsOccupancyConst LSTEvent::getSegments(bool); +template SegmentsPixelConst LSTEvent::getSegments(bool); + +template +typename TSoA::ConstView LSTEvent::getTriplets(bool sync) { + if constexpr (std::is_same_v) { + return tripletsDC_->const_view(); + } else { + if (!tripletsHC_) { + tripletsHC_.emplace( + cms::alpakatools::CopyToHost>::copyAsync( + queue_, *tripletsDC_)); + + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + } + return tripletsHC_->const_view(); +} +template TripletsConst LSTEvent::getTriplets(bool); +template TripletsOccupancyConst LSTEvent::getTriplets(bool); + +template +typename TSoA::ConstView LSTEvent::getQuintuplets(bool sync) { + if constexpr (std::is_same_v) { + return quintupletsDC_->const_view(); + } else { + if (!quintupletsHC_) { + quintupletsHC_.emplace( + cms::alpakatools::CopyToHost>::copyAsync( + queue_, *quintupletsDC_)); + + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + } + return quintupletsHC_->const_view(); +} +template QuintupletsConst LSTEvent::getQuintuplets(bool); +template QuintupletsOccupancyConst LSTEvent::getQuintuplets(bool); + +template +PixelTripletsConst LSTEvent::getPixelTriplets(bool sync) { + if constexpr (std::is_same_v) { + return pixelTripletsDC_->const_view(); + } else { + if (!pixelTripletsHC_) { + pixelTripletsHC_.emplace(cms::alpakatools::CopyToHost<::PortableCollection>::copyAsync( + queue_, *pixelTripletsDC_)); + + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + } + return pixelTripletsHC_->const_view(); +} +template PixelTripletsConst LSTEvent::getPixelTriplets<>(bool); + +template +PixelQuintupletsConst LSTEvent::getPixelQuintuplets(bool sync) { + if constexpr (std::is_same_v) { + return pixelQuintupletsDC_->const_view(); + } else { + if (!pixelQuintupletsHC_) { + pixelQuintupletsHC_.emplace( + cms::alpakatools::CopyToHost<::PortableCollection>::copyAsync( + queue_, *pixelQuintupletsDC_)); + + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + } + return pixelQuintupletsHC_->const_view(); +} +template PixelQuintupletsConst LSTEvent::getPixelQuintuplets<>(bool); + +const TrackCandidatesConst& LSTEvent::getTrackCandidates(bool inCMSSW, bool sync) { + if (!trackCandidatesHC_) { + // Get nTrackCanHost parameter to initialize host based instance + auto nTrackCanHost_buf_h = cms::alpakatools::make_host_buffer(queue_); + alpaka::memcpy(queue_, + nTrackCanHost_buf_h, + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->nTrackCandidates())); + alpaka::wait(queue_); // wait here before we get nTrackCanHost and trackCandidatesInCPU becomes usable + + auto const nTrackCanHost = *nTrackCanHost_buf_h.data(); + trackCandidatesHC_.emplace(nTrackCanHost, queue_); + + (*trackCandidatesHC_)->nTrackCandidates() = nTrackCanHost; + alpaka::memcpy(queue_, + cms::alpakatools::make_host_view((*trackCandidatesHC_)->hitIndices()->data(), + Params_pT5::kHits * nTrackCanHost), + cms::alpakatools::make_device_view( + queue_, (*trackCandidatesDC_)->hitIndices()->data(), Params_pT5::kHits * nTrackCanHost)); + alpaka::memcpy(queue_, + cms::alpakatools::make_host_view((*trackCandidatesHC_)->pixelSeedIndex(), nTrackCanHost), + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->pixelSeedIndex(), nTrackCanHost)); + if (not inCMSSW) { + alpaka::memcpy(queue_, + cms::alpakatools::make_host_view((*trackCandidatesHC_)->logicalLayers()->data(), + Params_pT5::kLayers * nTrackCanHost), + cms::alpakatools::make_device_view( + queue_, (*trackCandidatesDC_)->logicalLayers()->data(), Params_pT5::kLayers * nTrackCanHost)); + alpaka::memcpy( + queue_, + cms::alpakatools::make_host_view((*trackCandidatesHC_)->directObjectIndices(), nTrackCanHost), + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->directObjectIndices(), nTrackCanHost)); + alpaka::memcpy( + queue_, + cms::alpakatools::make_host_view((*trackCandidatesHC_)->objectIndices()->data(), 2 * nTrackCanHost), + cms::alpakatools::make_device_view( + queue_, (*trackCandidatesDC_)->objectIndices()->data(), 2 * nTrackCanHost)); + } + alpaka::memcpy( + queue_, + cms::alpakatools::make_host_view((*trackCandidatesHC_)->trackCandidateType(), nTrackCanHost), + cms::alpakatools::make_device_view(queue_, (*trackCandidatesDC_)->trackCandidateType(), nTrackCanHost)); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return trackCandidatesHC_.value().const_view(); +} + +template +typename TSoA::ConstView LSTEvent::getModules(bool sync) { + if constexpr (std::is_same_v) { + return modules_.const_view(); + } else { + if (!modulesHC_) { + modulesHC_.emplace( + cms::alpakatools::CopyToHost>::copyAsync( + queue_, modules_)); + if (sync) + alpaka::wait(queue_); // host consumers expect filled data + } + return modulesHC_->const_view(); + } +} +template ModulesConst LSTEvent::getModules(bool); +template ModulesPixelConst LSTEvent::getModules(bool); diff --git a/RecoTracker/LSTCore/src/alpaka/LSTEvent.h b/RecoTracker/LSTCore/src/alpaka/LSTEvent.h new file mode 100644 index 0000000000000..59f249aa9405f --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/LSTEvent.h @@ -0,0 +1,195 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_LSTEvent_h +#define RecoTracker_LSTCore_src_alpaka_LSTEvent_h + +#include + +#include "RecoTracker/LSTCore/interface/HitsHostCollection.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/PixelQuintupletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/QuintupletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/SegmentsHostCollection.h" +#include "RecoTracker/LSTCore/interface/TrackCandidatesHostCollection.h" +#include "RecoTracker/LSTCore/interface/TripletsHostCollection.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesHostCollection.h" +#include "RecoTracker/LSTCore/interface/ModulesHostCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/alpaka/LST.h" +#include "RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/PixelQuintupletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/PixelTripletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/QuintupletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/TrackCandidatesDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/TripletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/ModulesDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/ObjectRangesDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/alpaka/EndcapGeometryDevDeviceCollection.h" + +#include "Hit.h" +#include "Kernels.h" + +#include "HeterogeneousCore/AlpakaInterface/interface/host.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + class LSTEvent { + private: + Queue& queue_; + + std::array n_minidoublets_by_layer_barrel_{}; + std::array n_minidoublets_by_layer_endcap_{}; + std::array n_segments_by_layer_barrel_{}; + std::array n_segments_by_layer_endcap_{}; + std::array n_triplets_by_layer_barrel_{}; + std::array n_triplets_by_layer_endcap_{}; + std::array n_quintuplets_by_layer_barrel_{}; + std::array n_quintuplets_by_layer_endcap_{}; + unsigned int nTotalSegments_; + + //Device stuff + std::optional rangesDC_; + std::optional hitsDC_; + std::optional miniDoubletsDC_; + std::optional segmentsDC_; + std::optional tripletsDC_; + std::optional quintupletsDC_; + std::optional trackCandidatesDC_; + std::optional pixelTripletsDC_; + std::optional pixelQuintupletsDC_; + + //CPU interface stuff + std::optional rangesHC_; + std::optional hitsHC_; + std::optional miniDoubletsHC_; + std::optional segmentsHC_; + std::optional tripletsHC_; + std::optional trackCandidatesHC_; + std::optional modulesHC_; + std::optional quintupletsHC_; + std::optional pixelTripletsHC_; + std::optional pixelQuintupletsHC_; + + const uint16_t nModules_; + const uint16_t nLowerModules_; + const unsigned int nPixels_; + const unsigned int nEndCapMap_; + ModulesDeviceCollection const& modules_; + PixelMap const& pixelMapping_; + EndcapGeometryDevDeviceCollection const& endcapGeometry_; + bool addObjects_; + + public: + // Constructor used for CMSSW integration. Uses an external queue. + LSTEvent(bool verbose, Queue& q, const LSTESData* deviceESData) + : queue_(q), + nModules_(deviceESData->nModules), + nLowerModules_(deviceESData->nLowerModules), + nPixels_(deviceESData->nPixels), + nEndCapMap_(deviceESData->nEndCapMap), + modules_(*deviceESData->modules), + pixelMapping_(*deviceESData->pixelMapping), + endcapGeometry_(*deviceESData->endcapGeometry), + addObjects_(verbose) {} + void initSync(); // synchronizes, for standalone usage + void resetEventSync(); // synchronizes, for standalone usage + void wait() const { alpaka::wait(queue_); } + + // Calls the appropriate hit function, then increments the counter + void addHitToEvent(std::vector const& x, + std::vector const& y, + std::vector const& z, + std::vector const& detId, + std::vector const& idxInNtuple); + void addPixelSegmentToEvent(std::vector const& hitIndices0, + std::vector const& hitIndices1, + std::vector const& hitIndices2, + std::vector const& hitIndices3, + std::vector const& dPhiChange, + std::vector const& ptIn, + std::vector const& ptErr, + std::vector const& px, + std::vector const& py, + std::vector const& pz, + std::vector const& eta, + std::vector const& etaErr, + std::vector const& phi, + std::vector const& charge, + std::vector const& seedIdx, + std::vector const& superbin, + std::vector const& pixelType, + std::vector const& isQuad); + + void createMiniDoublets(); + void createSegmentsWithModuleMap(); + void createTriplets(); + void createTrackCandidates(bool no_pls_dupclean, bool tc_pls_triplets); + void createPixelTriplets(); + void createQuintuplets(); + void pixelLineSegmentCleaning(bool no_pls_dupclean); + void createPixelQuintuplets(); + + // functions that map the objects to the appropriate modules + void addMiniDoubletsToEventExplicit(); + void addSegmentsToEventExplicit(); + void addQuintupletsToEventExplicit(); + void addTripletsToEventExplicit(); + void resetObjectsInModule(); + + unsigned int getNumberOfMiniDoublets(); + unsigned int getNumberOfMiniDoubletsByLayer(unsigned int layer); + unsigned int getNumberOfMiniDoubletsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfMiniDoubletsByLayerEndcap(unsigned int layer); + + unsigned int getNumberOfSegments(); + unsigned int getNumberOfSegmentsByLayer(unsigned int layer); + unsigned int getNumberOfSegmentsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfSegmentsByLayerEndcap(unsigned int layer); + + unsigned int getNumberOfTriplets(); + unsigned int getNumberOfTripletsByLayer(unsigned int layer); + unsigned int getNumberOfTripletsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfTripletsByLayerEndcap(unsigned int layer); + + int getNumberOfPixelTriplets(); + int getNumberOfPixelQuintuplets(); + + unsigned int getNumberOfQuintuplets(); + unsigned int getNumberOfQuintupletsByLayer(unsigned int layer); + unsigned int getNumberOfQuintupletsByLayerBarrel(unsigned int layer); + unsigned int getNumberOfQuintupletsByLayerEndcap(unsigned int layer); + + int getNumberOfTrackCandidates(); + int getNumberOfPT5TrackCandidates(); + int getNumberOfPT3TrackCandidates(); + int getNumberOfPLSTrackCandidates(); + int getNumberOfPixelTrackCandidates(); + int getNumberOfT5TrackCandidates(); + + // sync adds alpaka::wait at the end of filling a buffer during lazy fill + // (has no effect on repeated calls) + // set to false may allow faster operation with concurrent calls of get* + // HANDLE WITH CARE + template + typename TSoA::ConstView getHits(bool inCMSSW = false, bool sync = true); + template + ObjectRangesConst getRanges(bool sync = true); + template + typename TSoA::ConstView getMiniDoublets(bool sync = true); + template + typename TSoA::ConstView getSegments(bool sync = true); + template + typename TSoA::ConstView getTriplets(bool sync = true); + template + typename TSoA::ConstView getQuintuplets(bool sync = true); + template + PixelTripletsConst getPixelTriplets(bool sync = true); + template + PixelQuintupletsConst getPixelQuintuplets(bool sync = true); + const TrackCandidatesConst& getTrackCandidates(bool inCMSSW = false, bool sync = true); + template + typename TSoA::ConstView getModules(bool sync = true); + }; + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h new file mode 100644 index 0000000000000..0a0abff8b6986 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/MiniDoublet.h @@ -0,0 +1,914 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_MiniDoublet_h +#define RecoTracker_LSTCore_src_alpaka_MiniDoublet_h + +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/alpaka/MiniDoubletsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" + +#include "Hit.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addMDToMemory(TAcc const& acc, + MiniDoublets mds, + HitsConst hits, + ModulesConst modules, + unsigned int lowerHitIdx, + unsigned int upperHitIdx, + uint16_t lowerModuleIdx, + float dz, + float dPhi, + float dPhiChange, + float shiftedX, + float shiftedY, + float shiftedZ, + float noShiftedDphi, + float noShiftedDPhiChange, + unsigned int idx) { + //the index into which this MD needs to be written will be computed in the kernel + //nMDs variable will be incremented in the kernel, no need to worry about that here + + mds.moduleIndices()[idx] = lowerModuleIdx; + unsigned int anchorHitIndex, outerHitIndex; + if (modules.moduleType()[lowerModuleIdx] == PS and modules.moduleLayerType()[lowerModuleIdx] == Strip) { + mds.anchorHitIndices()[idx] = upperHitIdx; + mds.outerHitIndices()[idx] = lowerHitIdx; + + anchorHitIndex = upperHitIdx; + outerHitIndex = lowerHitIdx; + } else { + mds.anchorHitIndices()[idx] = lowerHitIdx; + mds.outerHitIndices()[idx] = upperHitIdx; + + anchorHitIndex = lowerHitIdx; + outerHitIndex = upperHitIdx; + } + + mds.dphichanges()[idx] = dPhiChange; + + mds.dphis()[idx] = dPhi; + mds.dzs()[idx] = dz; + mds.shiftedXs()[idx] = shiftedX; + mds.shiftedYs()[idx] = shiftedY; + mds.shiftedZs()[idx] = shiftedZ; + + mds.noShiftedDphis()[idx] = noShiftedDphi; + mds.noShiftedDphiChanges()[idx] = noShiftedDPhiChange; + + mds.anchorX()[idx] = hits.xs()[anchorHitIndex]; + mds.anchorY()[idx] = hits.ys()[anchorHitIndex]; + mds.anchorZ()[idx] = hits.zs()[anchorHitIndex]; + mds.anchorRt()[idx] = hits.rts()[anchorHitIndex]; + mds.anchorPhi()[idx] = hits.phis()[anchorHitIndex]; + mds.anchorEta()[idx] = hits.etas()[anchorHitIndex]; + mds.anchorHighEdgeX()[idx] = hits.highEdgeXs()[anchorHitIndex]; + mds.anchorHighEdgeY()[idx] = hits.highEdgeYs()[anchorHitIndex]; + mds.anchorLowEdgeX()[idx] = hits.lowEdgeXs()[anchorHitIndex]; + mds.anchorLowEdgeY()[idx] = hits.lowEdgeYs()[anchorHitIndex]; + mds.anchorHighEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorHighEdgeY()[idx], mds.anchorHighEdgeX()[idx]); + mds.anchorLowEdgePhi()[idx] = alpaka::math::atan2(acc, mds.anchorLowEdgeY()[idx], mds.anchorLowEdgeX()[idx]); + + mds.outerX()[idx] = hits.xs()[outerHitIndex]; + mds.outerY()[idx] = hits.ys()[outerHitIndex]; + mds.outerZ()[idx] = hits.zs()[outerHitIndex]; + mds.outerRt()[idx] = hits.rts()[outerHitIndex]; + mds.outerPhi()[idx] = hits.phis()[outerHitIndex]; + mds.outerEta()[idx] = hits.etas()[outerHitIndex]; + mds.outerHighEdgeX()[idx] = hits.highEdgeXs()[outerHitIndex]; + mds.outerHighEdgeY()[idx] = hits.highEdgeYs()[outerHitIndex]; + mds.outerLowEdgeX()[idx] = hits.lowEdgeXs()[outerHitIndex]; + mds.outerLowEdgeY()[idx] = hits.lowEdgeYs()[outerHitIndex]; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isTighterTiltedModules(ModulesConst modules, uint16_t moduleIndex) { + // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing + // This is the same as what was previously considered as"isNormalTiltedModules" + // See Figure 9.1 of https://cds.cern.ch/record/2272264/files/CMS-TDR-014.pdf + short subdet = modules.subdets()[moduleIndex]; + short layer = modules.layers()[moduleIndex]; + short side = modules.sides()[moduleIndex]; + short rod = modules.rods()[moduleIndex]; + + if (subdet == Barrel) { + if ((side != Center and layer == 3) or (side == NegZ and layer == 2 and rod > 5) or + (side == PosZ and layer == 2 and rod < 8) or (side == NegZ and layer == 1 and rod > 9) or + (side == PosZ and layer == 1 and rod < 4)) + return true; + else + return false; + } else + return false; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize(ModulesConst modules, uint16_t moduleIndex) { + float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; + float miniDeltaFlat[6] = {0.26f, 0.16f, 0.16f, 0.18f, 0.18f, 0.18f}; + float miniDeltaLooseTilted[3] = {0.4f, 0.4f, 0.4f}; + float miniDeltaEndcap[5][15]; + + for (size_t i = 0; i < 5; i++) { + for (size_t j = 0; j < 15; j++) { + if (i == 0 || i == 1) { + if (j < 10) { + miniDeltaEndcap[i][j] = 0.4f; + } else { + miniDeltaEndcap[i][j] = 0.18f; + } + } else if (i == 2 || i == 3) { + if (j < 8) { + miniDeltaEndcap[i][j] = 0.4f; + } else { + miniDeltaEndcap[i][j] = 0.18f; + } + } else { + if (j < 9) { + miniDeltaEndcap[i][j] = 0.4f; + } else { + miniDeltaEndcap[i][j] = 0.18f; + } + } + } + } + + unsigned int iL = modules.layers()[moduleIndex] - 1; + unsigned int iR = modules.rings()[moduleIndex] - 1; + short subdet = modules.subdets()[moduleIndex]; + short side = modules.sides()[moduleIndex]; + + float moduleSeparation = 0; + + if (subdet == Barrel and side == Center) { + moduleSeparation = miniDeltaFlat[iL]; + } else if (isTighterTiltedModules(modules, moduleIndex)) { + moduleSeparation = miniDeltaTilted[iL]; + } else if (subdet == Endcap) { + moduleSeparation = miniDeltaEndcap[iL][iR]; + } else //Loose tilted modules + { + moduleSeparation = miniDeltaLooseTilted[iL]; + } + + return moduleSeparation; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float dPhiThreshold( + TAcc const& acc, float rt, ModulesConst modules, uint16_t moduleIndex, float dPhi = 0, float dz = 0) { + // ================================================================= + // Various constants + // ================================================================= + //mean of the horizontal layer position in y; treat this as R below + + // ================================================================= + // Computing some components that make up the cut threshold + // ================================================================= + + unsigned int iL = modules.layers()[moduleIndex] - 1; + const float miniSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rt * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + const float rLayNominal = + ((modules.subdets()[moduleIndex] == Barrel) ? kMiniRminMeanBarrel[iL] : kMiniRminMeanEndcap[iL]); + const float miniPVoff = 0.1f / rLayNominal; + const float miniMuls = ((modules.subdets()[moduleIndex] == Barrel) ? kMiniMulsPtScaleBarrel[iL] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[iL] * 3.f / ptCut); + const bool isTilted = modules.subdets()[moduleIndex] == Barrel and modules.sides()[moduleIndex] != Center; + //the lower module is sent in irrespective of its layer type. We need to fetch the drdz properly + + float drdz; + if (isTilted) { + if (modules.moduleType()[moduleIndex] == PS and modules.moduleLayerType()[moduleIndex] == Strip) { + drdz = modules.drdzs()[moduleIndex]; + } else { + drdz = modules.drdzs()[modules.partnerModuleIndices()[moduleIndex]]; + } + } else { + drdz = 0; + } + const float miniTilt2 = ((isTilted) ? (0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdz * drdz) / + (1.f + drdz * drdz) / moduleGapSize(modules, moduleIndex) + : 0); + + // Compute luminous region requirement for endcap + const float miniLum = alpaka::math::abs(acc, dPhi * kDeltaZLum / dz); // Balaji's new error + + // ================================================================= + // Return the threshold value + // ================================================================= + // Following condition is met if the module is central and flatly lying + if (modules.subdets()[moduleIndex] == Barrel and modules.sides()[moduleIndex] == Center) { + return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff); + } + // Following condition is met if the module is central and tilted + else if (modules.subdets()[moduleIndex] == Barrel and + modules.sides()[moduleIndex] != Center) //all types of tilted modules + { + return miniSlope + + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff + miniTilt2 * miniSlope * miniSlope); + } + // If not barrel, it is Endcap + else { + return miniSlope + alpaka::math::sqrt(acc, miniMuls * miniMuls + miniPVoff * miniPVoff + miniLum * miniLum); + } + } + + template + ALPAKA_FN_INLINE ALPAKA_FN_ACC void shiftStripHits(TAcc const& acc, + ModulesConst modules, + uint16_t lowerModuleIndex, + uint16_t upperModuleIndex, + unsigned int lowerHitIndex, + unsigned int upperHitIndex, + float* shiftedCoords, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper) { + // This is the strip shift scheme that is explained in http://uaf-10.t2.ucsd.edu/~phchang/talks/PhilipChang20190607_SDL_Update.pdf (see backup slides) + // The main feature of this shifting is that the strip hits are shifted to be "aligned" in the line of sight from interaction point to the the pixel hit. + // (since pixel hit is well defined in 3-d) + // The strip hit is shifted along the strip detector to be placed in a guessed position where we think they would have actually crossed + // The size of the radial direction shift due to module separation gap is computed in "radial" size, while the shift is done along the actual strip orientation + // This means that there may be very very subtle edge effects coming from whether the strip hit is center of the module or the at the edge of the module + // But this should be relatively minor effect + + // dependent variables for this if statement + // lowerModule + // lowerHit + // upperHit + // endcapGeometry + // tiltedGeometry + + // Some variables relevant to the function + float xp; // pixel x (pixel hit x) + float yp; // pixel y (pixel hit y) + float zp; // pixel y (pixel hit y) + float rtp; // pixel y (pixel hit y) + float xa; // "anchor" x (the anchor position on the strip module plane from pixel hit) + float ya; // "anchor" y (the anchor position on the strip module plane from pixel hit) + float xo; // old x (before the strip hit is moved up or down) + float yo; // old y (before the strip hit is moved up or down) + float xn; // new x (after the strip hit is moved up or down) + float yn; // new y (after the strip hit is moved up or down) + float abszn; // new z in absolute value + float zn; // new z with the sign (+/-) accounted + float angleA; // in r-z plane the theta of the pixel hit in polar coordinate is the angleA + float angleB; // this is the angle of tilted module in r-z plane ("drdz"), for endcap this is 90 degrees + bool isEndcap; // If endcap, drdz = infinity + float moduleSeparation; + float drprime; // The radial shift size in x-y plane projection + float drprime_x; // x-component of drprime + float drprime_y; // y-component of drprime + const float& slope = + modules.dxdys()[lowerModuleIndex]; // The slope of the possible strip hits for a given module in x-y plane + float absArctanSlope; + float angleM; // the angle M is the angle of rotation of the module in x-y plane if the possible strip hits are along the x-axis, then angleM = 0, and if the possible strip hits are along y-axis angleM = 90 degrees + float absdzprime; // The distance between the two points after shifting + const float& drdz_ = modules.drdzs()[lowerModuleIndex]; + // Assign hit pointers based on their hit type + if (modules.moduleType()[lowerModuleIndex] == PS) { + // TODO: This is somewhat of an mystery.... somewhat confused why this is the case + if (modules.subdets()[lowerModuleIndex] == Barrel ? modules.moduleLayerType()[lowerModuleIndex] != Pixel + : modules.moduleLayerType()[lowerModuleIndex] == Pixel) { + xo = xUpper; + yo = yUpper; + xp = xLower; + yp = yLower; + zp = zLower; + rtp = rtLower; + } else { + xo = xLower; + yo = yLower; + xp = xUpper; + yp = yUpper; + zp = zUpper; + rtp = rtUpper; + } + } else { + xo = xUpper; + yo = yUpper; + xp = xLower; + yp = yLower; + zp = zLower; + rtp = rtLower; + } + + // If it is endcap some of the math gets simplified (and also computers don't like infinities) + isEndcap = modules.subdets()[lowerModuleIndex] == Endcap; + + // NOTE: TODO: Keep in mind that the sin(atan) function can be simplified to something like x / sqrt(1 + x^2) and similar for cos + // I am not sure how slow sin, atan, cos, functions are in c++. If x / sqrt(1 + x^2) are faster change this later to reduce arithmetic computation time + angleA = alpaka::math::abs(acc, alpaka::math::atan(acc, rtp / zp)); + angleB = + ((isEndcap) + ? kPi / 2.f + : alpaka::math::atan( + acc, + drdz_)); // The tilt module on the positive z-axis has negative drdz slope in r-z plane and vice versa + + moduleSeparation = moduleGapSize(modules, lowerModuleIndex); + + // Sign flips if the pixel is later layer + if (modules.moduleType()[lowerModuleIndex] == PS and modules.moduleLayerType()[lowerModuleIndex] != Pixel) { + moduleSeparation *= -1; + } + + drprime = (moduleSeparation / alpaka::math::sin(acc, angleA + angleB)) * alpaka::math::sin(acc, angleA); + + // Compute arctan of the slope and take care of the slope = infinity case + absArctanSlope = ((slope != kVerticalModuleSlope) ? fabs(alpaka::math::atan(acc, slope)) : kPi / 2.f); + + // Depending on which quadrant the pixel hit lies, we define the angleM by shifting them slightly differently + if (xp > 0 and yp > 0) { + angleM = absArctanSlope; + } else if (xp > 0 and yp < 0) { + angleM = kPi - absArctanSlope; + } else if (xp < 0 and yp < 0) { + angleM = kPi + absArctanSlope; + } else // if (xp < 0 and yp > 0) + { + angleM = 2.f * kPi - absArctanSlope; + } + + // Then since the angleM sign is taken care of properly + drprime_x = drprime * alpaka::math::sin(acc, angleM); + drprime_y = drprime * alpaka::math::cos(acc, angleM); + + // The new anchor position is + xa = xp + drprime_x; + ya = yp + drprime_y; + + // Compute the new strip hit position (if the slope value is in special condition take care of the exceptions) + if (slope == + kVerticalModuleSlope) // Designated for tilted module when the slope is infinity (module lying along y-axis) + { + xn = xa; // New x point is simply where the anchor is + yn = yo; // No shift in y + } else if (slope == 0) { + xn = xo; // New x point is simply where the anchor is + yn = ya; // No shift in y + } else { + xn = (slope * xa + (1.f / slope) * xo - ya + yo) / (slope + (1.f / slope)); // new xn + yn = (xn - xa) * slope + ya; // new yn + } + + // Computing new Z position + absdzprime = alpaka::math::abs( + acc, + moduleSeparation / alpaka::math::sin(acc, angleA + angleB) * + alpaka::math::cos( + acc, + angleA)); // module separation sign is for shifting in radial direction for z-axis direction take care of the sign later + + // Depending on which one as closer to the interactin point compute the new z wrt to the pixel properly + if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { + abszn = alpaka::math::abs(acc, zp) + absdzprime; + } else { + abszn = alpaka::math::abs(acc, zp) - absdzprime; + } + + zn = abszn * ((zp > 0) ? 1 : -1); // Apply the sign of the zn + + shiftedCoords[0] = xn; + shiftedCoords[1] = yn; + shiftedCoords[2] = zn; + } + + template + ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoBarrel(TAcc const& acc, + ModulesConst modules, + uint16_t lowerModuleIndex, + uint16_t upperModuleIndex, + unsigned int lowerHitIndex, + unsigned int upperHitIndex, + float& dz, + float& dPhi, + float& dPhiChange, + float& shiftedX, + float& shiftedY, + float& shiftedZ, + float& noShiftedDphi, + float& noShiftedDphiChange, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper) { + dz = zLower - zUpper; + const float dzCut = modules.moduleType()[lowerModuleIndex] == PS ? 2.f : 10.f; + const float sign = ((dz > 0) - (dz < 0)) * ((zLower > 0) - (zLower < 0)); + const float invertedcrossercut = (alpaka::math::abs(acc, dz) > 2) * sign; + + if ((alpaka::math::abs(acc, dz) >= dzCut) || (invertedcrossercut > 0)) + return false; + + float miniCut = 0; + + miniCut = modules.moduleLayerType()[lowerModuleIndex] == Pixel + ? dPhiThreshold(acc, rtLower, modules, lowerModuleIndex) + : dPhiThreshold(acc, rtUpper, modules, lowerModuleIndex); + + // Cut #2: dphi difference + // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3085 + float xn = 0.f, yn = 0.f; // , zn = 0; + float shiftedRt2; + if (modules.sides()[lowerModuleIndex] != Center) // If barrel and not center it is tilted + { + // Shift the hits and calculate new xn, yn position + float shiftedCoords[3]; + shiftStripHits(acc, + modules, + lowerModuleIndex, + upperModuleIndex, + lowerHitIndex, + upperHitIndex, + shiftedCoords, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + xn = shiftedCoords[0]; + yn = shiftedCoords[1]; + + // Lower or the upper hit needs to be modified depending on which one was actually shifted + if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { + shiftedX = xn; + shiftedY = yn; + shiftedZ = zUpper; + shiftedRt2 = xn * xn + yn * yn; + + dPhi = deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); //function from Hit.cc + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + } else { + shiftedX = xn; + shiftedY = yn; + shiftedZ = zLower; + shiftedRt2 = xn * xn + yn * yn; + dPhi = deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + } + } else { + shiftedX = 0; + shiftedY = 0; + shiftedZ = 0; + dPhi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + noShiftedDphi = dPhi; + } + + if (alpaka::math::abs(acc, dPhi) >= miniCut) + return false; + + // Cut #3: The dphi change going from lower Hit to upper Hit + // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3076 + if (modules.sides()[lowerModuleIndex] != Center) { + // When it is tilted, use the new shifted positions + // TODO: This is somewhat of an mystery.... somewhat confused why this is the case + if (modules.moduleLayerType()[lowerModuleIndex] != Pixel) { + // dPhi Change should be calculated so that the upper hit has higher rt. + // In principle, this kind of check rt_lower < rt_upper should not be necessary because the hit shifting should have taken care of this. + // (i.e. the strip hit is shifted to be aligned in the line of sight from interaction point to pixel hit of PS module guaranteeing rt ordering) + // But I still placed this check for safety. (TODO: After checking explicitly if not needed remove later?) + // setdeltaPhiChange(lowerHit.rt() < upperHitMod.rt() ? lowerHit.deltaPhiChange(upperHitMod) : upperHitMod.deltaPhiChange(lowerHit)); + + dPhiChange = (rtLower * rtLower < shiftedRt2) ? deltaPhiChange(acc, xLower, yLower, shiftedX, shiftedY) + : deltaPhiChange(acc, shiftedX, shiftedY, xLower, yLower); + noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) + : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); + } else { + // dPhi Change should be calculated so that the upper hit has higher rt. + // In principle, this kind of check rt_lower < rt_upper should not be necessary because the hit shifting should have taken care of this. + // (i.e. the strip hit is shifted to be aligned in the line of sight from interaction point to pixel hit of PS module guaranteeing rt ordering) + // But I still placed this check for safety. (TODO: After checking explicitly if not needed remove later?) + + dPhiChange = (shiftedRt2 < rtUpper * rtUpper) ? deltaPhiChange(acc, shiftedX, shiftedY, xUpper, yUpper) + : deltaPhiChange(acc, xUpper, yUpper, shiftedX, shiftedY); + noShiftedDphiChange = rtLower < rtUpper ? deltaPhiChange(acc, xLower, yLower, xUpper, yUpper) + : deltaPhiChange(acc, xUpper, yUpper, xLower, yLower); + } + } else { + // When it is flat lying module, whichever is the lowerSide will always have rt lower + dPhiChange = deltaPhiChange(acc, xLower, yLower, xUpper, yUpper); + noShiftedDphiChange = dPhiChange; + } + + return alpaka::math::abs(acc, dPhiChange) < miniCut; + } + + template + ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgoEndcap(TAcc const& acc, + ModulesConst modules, + uint16_t lowerModuleIndex, + uint16_t upperModuleIndex, + unsigned int lowerHitIndex, + unsigned int upperHitIndex, + float& drt, + float& dPhi, + float& dPhiChange, + float& shiftedX, + float& shiftedY, + float& shiftedZ, + float& noShiftedDphi, + float& noShiftedDphichange, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper) { + // There are series of cuts that applies to mini-doublet in a "endcap" region + // Cut #1 : dz cut. The dz difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap) + // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3093 + // For PS module in case when it is tilted a different dz (after the strip hit shift) is calculated later. + + float dz = zLower - zUpper; // Not const since later it might change depending on the type of module + + const float dzCut = 1.f; + + if (alpaka::math::abs(acc, dz) >= dzCut) + return false; + // Cut #2 : drt cut. The dz difference can't be larger than 1cm. (max separation is 4mm for modules in the endcap) + // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3100 + const float drtCut = modules.moduleType()[lowerModuleIndex] == PS ? 2.f : 10.f; + drt = rtLower - rtUpper; + if (alpaka::math::abs(acc, drt) >= drtCut) + return false; + // The new scheme shifts strip hits to be "aligned" along the line of sight from interaction point to the pixel hit (if it is PS modules) + float xn = 0, yn = 0, zn = 0; + + float shiftedCoords[3]; + shiftStripHits(acc, + modules, + lowerModuleIndex, + upperModuleIndex, + lowerHitIndex, + upperHitIndex, + shiftedCoords, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + + xn = shiftedCoords[0]; + yn = shiftedCoords[1]; + zn = shiftedCoords[2]; + + if (modules.moduleType()[lowerModuleIndex] == PS) { + // Appropriate lower or upper hit is modified after checking which one was actually shifted + if (modules.moduleLayerType()[lowerModuleIndex] == Pixel) { + shiftedX = xn; + shiftedY = yn; + shiftedZ = zUpper; + dPhi = deltaPhi(acc, xLower, yLower, shiftedX, shiftedY); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + } else { + shiftedX = xn; + shiftedY = yn; + shiftedZ = zLower; + dPhi = deltaPhi(acc, shiftedX, shiftedY, xUpper, yUpper); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + } + } else { + shiftedX = xn; + shiftedY = yn; + shiftedZ = zUpper; + dPhi = deltaPhi(acc, xLower, yLower, xn, yn); + noShiftedDphi = deltaPhi(acc, xLower, yLower, xUpper, yUpper); + } + + // dz needs to change if it is a PS module where the strip hits are shifted in order to properly account for the case when a tilted module falls under "endcap logic" + // if it was an endcap it will have zero effect + if (modules.moduleType()[lowerModuleIndex] == PS) { + dz = modules.moduleLayerType()[lowerModuleIndex] == Pixel ? zLower - zn : zUpper - zn; + } + + float miniCut = 0; + miniCut = modules.moduleLayerType()[lowerModuleIndex] == Pixel + ? dPhiThreshold(acc, rtLower, modules, lowerModuleIndex, dPhi, dz) + : dPhiThreshold(acc, rtUpper, modules, lowerModuleIndex, dPhi, dz); + + if (alpaka::math::abs(acc, dPhi) >= miniCut) + return false; + + // Cut #4: Another cut on the dphi after some modification + // Ref to original code: https://github.com/slava77/cms-tkph2-ntuple/blob/184d2325147e6930030d3d1f780136bc2dd29ce6/doubletAnalysis.C#L3119-L3124 + + float dzFrac = alpaka::math::abs(acc, dz) / alpaka::math::abs(acc, zLower); + dPhiChange = dPhi / dzFrac * (1.f + dzFrac); + noShiftedDphichange = noShiftedDphi / dzFrac * (1.f + dzFrac); + + return alpaka::math::abs(acc, dPhiChange) < miniCut; + } + + template + ALPAKA_FN_ACC bool runMiniDoubletDefaultAlgo(TAcc const& acc, + ModulesConst modules, + uint16_t lowerModuleIndex, + uint16_t upperModuleIndex, + unsigned int lowerHitIndex, + unsigned int upperHitIndex, + float& dz, + float& dPhi, + float& dPhiChange, + float& shiftedX, + float& shiftedY, + float& shiftedZ, + float& noShiftedDphi, + float& noShiftedDphiChange, + float xLower, + float yLower, + float zLower, + float rtLower, + float xUpper, + float yUpper, + float zUpper, + float rtUpper) { + if (modules.subdets()[lowerModuleIndex] == Barrel) { + return runMiniDoubletDefaultAlgoBarrel(acc, + modules, + lowerModuleIndex, + upperModuleIndex, + lowerHitIndex, + upperHitIndex, + dz, + dPhi, + dPhiChange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDphiChange, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + } else { + return runMiniDoubletDefaultAlgoEndcap(acc, + modules, + lowerModuleIndex, + upperModuleIndex, + lowerHitIndex, + upperHitIndex, + dz, + dPhi, + dPhiChange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDphiChange, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + } + } + + struct CreateMiniDoublets { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + HitsConst hits, + HitsRangesConst hitsRanges, + MiniDoublets mds, + MiniDoubletsOccupancy mdsOccupancy, + ObjectRangesConst ranges) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t lowerModuleIndex = globalThreadIdx[1]; lowerModuleIndex < modules.nLowerModules(); + lowerModuleIndex += gridThreadExtent[1]) { + uint16_t upperModuleIndex = modules.partnerModuleIndices()[lowerModuleIndex]; + int nLowerHits = hitsRanges.hitRangesnLower()[lowerModuleIndex]; + int nUpperHits = hitsRanges.hitRangesnUpper()[lowerModuleIndex]; + if (hitsRanges.hitRangesLower()[lowerModuleIndex] == -1) + continue; + unsigned int upHitArrayIndex = hitsRanges.hitRangesUpper()[lowerModuleIndex]; + unsigned int loHitArrayIndex = hitsRanges.hitRangesLower()[lowerModuleIndex]; + int limit = nUpperHits * nLowerHits; + + for (int hitIndex = globalThreadIdx[2]; hitIndex < limit; hitIndex += gridThreadExtent[2]) { + int lowerHitIndex = hitIndex / nUpperHits; + int upperHitIndex = hitIndex % nUpperHits; + if (upperHitIndex >= nUpperHits) + continue; + if (lowerHitIndex >= nLowerHits) + continue; + unsigned int lowerHitArrayIndex = loHitArrayIndex + lowerHitIndex; + float xLower = hits.xs()[lowerHitArrayIndex]; + float yLower = hits.ys()[lowerHitArrayIndex]; + float zLower = hits.zs()[lowerHitArrayIndex]; + float rtLower = hits.rts()[lowerHitArrayIndex]; + unsigned int upperHitArrayIndex = upHitArrayIndex + upperHitIndex; + float xUpper = hits.xs()[upperHitArrayIndex]; + float yUpper = hits.ys()[upperHitArrayIndex]; + float zUpper = hits.zs()[upperHitArrayIndex]; + float rtUpper = hits.rts()[upperHitArrayIndex]; + + float dz, dphi, dphichange, shiftedX, shiftedY, shiftedZ, noShiftedDphi, noShiftedDphiChange; + bool success = runMiniDoubletDefaultAlgo(acc, + modules, + lowerModuleIndex, + upperModuleIndex, + lowerHitArrayIndex, + upperHitArrayIndex, + dz, + dphi, + dphichange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDphiChange, + xLower, + yLower, + zLower, + rtLower, + xUpper, + yUpper, + zUpper, + rtUpper); + if (success) { + int totOccupancyMDs = alpaka::atomicAdd( + acc, &mdsOccupancy.totOccupancyMDs()[lowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + if (totOccupancyMDs >= (ranges.miniDoubletModuleOccupancy()[lowerModuleIndex])) { +#ifdef WARNINGS + printf("Mini-doublet excess alert! Module index = %d\n", lowerModuleIndex); +#endif + } else { + int mdModuleIndex = + alpaka::atomicAdd(acc, &mdsOccupancy.nMDs()[lowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + unsigned int mdIndex = ranges.miniDoubletModuleIndices()[lowerModuleIndex] + mdModuleIndex; + + addMDToMemory(acc, + mds, + hits, + modules, + lowerHitArrayIndex, + upperHitArrayIndex, + lowerModuleIndex, + dz, + dphi, + dphichange, + shiftedX, + shiftedY, + shiftedZ, + noShiftedDphi, + noShiftedDphiChange, + mdIndex); + } + } + } + } + } + }; + + struct CreateMDArrayRangesGPU { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, ModulesConst modules, ObjectRanges ranges) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + // Declare variables in shared memory and set to 0 + int& nTotalMDs = alpaka::declareSharedVar(acc); + if (cms::alpakatools::once_per_block(acc)) { + nTotalMDs = 0; + } + alpaka::syncBlockThreads(acc); + + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + short module_rings = modules.rings()[i]; + short module_layers = modules.layers()[i]; + short module_subdets = modules.subdets()[i]; + float module_eta = alpaka::math::abs(acc, modules.eta()[i]); + + int category_number; + if (module_layers <= 3 && module_subdets == 5) + category_number = 0; + else if (module_layers >= 4 && module_subdets == 5) + category_number = 1; + else if (module_layers <= 2 && module_subdets == 4 && module_rings >= 11) + category_number = 2; + else if (module_layers >= 3 && module_subdets == 4 && module_rings >= 8) + category_number = 2; + else if (module_layers <= 2 && module_subdets == 4 && module_rings <= 10) + category_number = 3; + else if (module_layers >= 3 && module_subdets == 4 && module_rings <= 7) + category_number = 3; + else + category_number = -1; + + int eta_number; + if (module_eta < 0.75f) + eta_number = 0; + else if (module_eta < 1.5f) + eta_number = 1; + else if (module_eta < 2.25f) + eta_number = 2; + else if (module_eta < 3.0f) + eta_number = 3; + else + eta_number = -1; + + int occupancy; + if (category_number == 0 && eta_number == 0) + occupancy = 49; + else if (category_number == 0 && eta_number == 1) + occupancy = 42; + else if (category_number == 0 && eta_number == 2) + occupancy = 37; + else if (category_number == 0 && eta_number == 3) + occupancy = 41; + else if (category_number == 1) + occupancy = 100; + else if (category_number == 2 && eta_number == 1) + occupancy = 16; + else if (category_number == 2 && eta_number == 2) + occupancy = 19; + else if (category_number == 3 && eta_number == 1) + occupancy = 14; + else if (category_number == 3 && eta_number == 2) + occupancy = 20; + else if (category_number == 3 && eta_number == 3) + occupancy = 25; + else { + occupancy = 0; +#ifdef WARNINGS + printf("Unhandled case in createMDArrayRangesGPU! Module index = %i\n", i); +#endif + } + + unsigned int nTotMDs = alpaka::atomicAdd(acc, &nTotalMDs, occupancy, alpaka::hierarchy::Threads{}); + + ranges.miniDoubletModuleIndices()[i] = nTotMDs; + ranges.miniDoubletModuleOccupancy()[i] = occupancy; + } + + // Wait for all threads to finish before reporting final values + alpaka::syncBlockThreads(acc); + if (cms::alpakatools::once_per_block(acc)) { + ranges.miniDoubletModuleIndices()[modules.nLowerModules()] = nTotalMDs; + ranges.nTotalMDs() = nTotalMDs; + } + } + }; + + struct AddMiniDoubletRangesToEventExplicit { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + MiniDoubletsOccupancy mdsOccupancy, + ObjectRanges ranges, + HitsRangesConst hitsRanges) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (mdsOccupancy.nMDs()[i] == 0 or hitsRanges.hitRanges()[i][0] == -1) { + ranges.mdRanges()[i][0] = -1; + ranges.mdRanges()[i][1] = -1; + } else { + ranges.mdRanges()[i][0] = ranges.miniDoubletModuleIndices()[i]; + ranges.mdRanges()[i][1] = ranges.miniDoubletModuleIndices()[i] + mdsOccupancy.nMDs()[i] - 1; + } + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h new file mode 100644 index 0000000000000..42605c80e9434 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/NeuralNetwork.h @@ -0,0 +1,165 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_NeuralNetwork_h +#define RecoTracker_LSTCore_src_alpaka_NeuralNetwork_h + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/HitsSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" + +#include "NeuralNetworkWeights.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + namespace t5dnn { + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float runInference(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + TripletsConst triplets, + const float* xVec, + const float* yVec, + const unsigned int* mdIndices, + const uint16_t* lowerModuleIndices, + unsigned int innerTripletIndex, + unsigned int outerTripletIndex, + float innerRadius, + float outerRadius, + float bridgeRadius) { + // Unpack x-coordinates of hits + float x1 = xVec[0]; + float x2 = xVec[1]; + float x3 = xVec[2]; + float x4 = xVec[3]; + float x5 = xVec[4]; + // Unpack y-coordinates of hits + float y1 = yVec[0]; + float y2 = yVec[1]; + float y3 = yVec[2]; + float y4 = yVec[3]; + float y5 = yVec[4]; + // Unpack module indices + unsigned int mdIndex1 = mdIndices[0]; + unsigned int mdIndex2 = mdIndices[1]; + unsigned int mdIndex3 = mdIndices[2]; + unsigned int mdIndex4 = mdIndices[3]; + unsigned int mdIndex5 = mdIndices[4]; + // Unpack module indices + uint16_t lowerModuleIndex1 = lowerModuleIndices[0]; + uint16_t lowerModuleIndex2 = lowerModuleIndices[1]; + uint16_t lowerModuleIndex3 = lowerModuleIndices[2]; + uint16_t lowerModuleIndex4 = lowerModuleIndices[3]; + uint16_t lowerModuleIndex5 = lowerModuleIndices[4]; + // Compute some convenience variables + short layer2_adjustment = 0; + if (modules.layers()[lowerModuleIndex1] == 1) { + layer2_adjustment = 1; // get upper segment to be in second layer + } + unsigned int md_idx_for_t5_eta_phi = + segments.mdIndices()[triplets.segmentIndices()[innerTripletIndex][0]][layer2_adjustment]; + bool is_endcap1 = (modules.subdets()[lowerModuleIndex1] == 4); // true if anchor hit 1 is in the endcap + bool is_endcap2 = (modules.subdets()[lowerModuleIndex2] == 4); // true if anchor hit 2 is in the endcap + bool is_endcap3 = (modules.subdets()[lowerModuleIndex3] == 4); // true if anchor hit 3 is in the endcap + bool is_endcap4 = (modules.subdets()[lowerModuleIndex4] == 4); // true if anchor hit 4 is in the endcap + bool is_endcap5 = (modules.subdets()[lowerModuleIndex5] == 4); // true if anchor hit 5 is in the endcap + + // Build DNN input vector (corresponding output N-tuple branch noted in parenthetical in comment) + float x[38] = { + alpaka::math::log10(acc, 2 * k2Rinv1GeVf * innerRadius), // inner T3 pT (t3_pt) + mds.anchorEta()[mdIndex1], // inner T3 anchor hit 1 eta (t3_0_eta) + mds.anchorPhi()[mdIndex1], // inner T3 anchor hit 1 phi (t3_0_phi) + mds.anchorZ()[mdIndex1], // inner T3 anchor hit 1 z (t3_0_z) + alpaka::math::sqrt(acc, x1 * x1 + y1 * y1), // inner T3 anchor hit 1 r (t3_0_r) + float(modules.layers()[lowerModuleIndex1] + 6 * is_endcap1), // inner T3 anchor hit 1 layer (t3_0_layer) + mds.anchorEta()[mdIndex2], // inner T3 anchor hit 2 eta (t3_2_eta) + mds.anchorPhi()[mdIndex2], // inner T3 anchor hit 2 phi (t3_2_phi) + mds.anchorZ()[mdIndex2], // inner T3 anchor hit 2 z (t3_2_z) + alpaka::math::sqrt(acc, x2 * x2 + y2 * y2), // inner T3 anchor hit 2 r (t3_2_r) + float(modules.layers()[lowerModuleIndex2] + 6 * is_endcap2), // inner T3 anchor hit 2 layer (t3_2_layer) + mds.anchorEta()[mdIndex3], // inner T3 anchor hit 3 eta (t3_4_eta) + mds.anchorPhi()[mdIndex3], // inner T3 anchor hit 3 phi (t3_4_phi) + mds.anchorZ()[mdIndex3], // inner T3 anchor hit 3 z (t3_4_z) + alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // inner T3 anchor hit 3 r (t3_4_r) + float(modules.layers()[lowerModuleIndex3] + 6 * is_endcap3), // inner T3 anchor hit 3 layer (t3_4_layer) + alpaka::math::log10(acc, 2 * k2Rinv1GeVf * outerRadius), // outer T3 pT (t3_pt) + mds.anchorEta()[mdIndex3], // outer T3 anchor hit 4 eta (t3_0_eta) + mds.anchorPhi()[mdIndex3], // outer T3 anchor hit 4 phi (t3_0_phi) + mds.anchorZ()[mdIndex3], // outer T3 anchor hit 3 eta (t3_0_z) + alpaka::math::sqrt(acc, x3 * x3 + y3 * y3), // outer T3 anchor hit 3 r (t3_0_r) + float(modules.layers()[lowerModuleIndex3] + 6 * is_endcap3), // outer T3 anchor hit 3 layer (t3_0_layer) + mds.anchorEta()[mdIndex4], // outer T3 anchor hit 4 eta (t3_2_eta) + mds.anchorPhi()[mdIndex4], // outer T3 anchor hit 4 phi (t3_2_phi) + mds.anchorZ()[mdIndex4], // outer T3 anchor hit 4 z (t3_2_z) + alpaka::math::sqrt(acc, x4 * x4 + y4 * y4), // outer T3 anchor hit 4 r (t3_2_r) + float(modules.layers()[lowerModuleIndex4] + 6 * is_endcap4), // outer T3 anchor hit 4 layer (t3_2_layer) + mds.anchorEta()[mdIndex5], // outer T3 anchor hit 5 eta (t3_4_eta) + mds.anchorPhi()[mdIndex5], // outer T3 anchor hit 5 phi (t3_4_phi) + mds.anchorZ()[mdIndex5], // outer T3 anchor hit 5 z (t3_4_z) + alpaka::math::sqrt(acc, x5 * x5 + y5 * y5), // outer T3 anchor hit 5 r (t3_4_r) + float(modules.layers()[lowerModuleIndex5] + 6 * is_endcap5), // outer T3 anchor hit 5 layer (t3_4_layer) + alpaka::math::log10(acc, (innerRadius + outerRadius) * k2Rinv1GeVf), // T5 pT (t5_pt) + mds.anchorEta()[md_idx_for_t5_eta_phi], // T5 eta (t5_eta) + mds.anchorPhi()[md_idx_for_t5_eta_phi], // T5 phi (t5_phi) + alpaka::math::log10(acc, innerRadius), // T5 inner radius (t5_innerRadius) + alpaka::math::log10(acc, bridgeRadius), // T5 bridge radius (t5_bridgeRadius) + alpaka::math::log10(acc, outerRadius) // T5 outer radius (t5_outerRadius) + }; + + // (0): Linear(in_features=38, out_features=32, bias=True) => x = x*W_T + b + float x_0[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_0[col] = 0; + for (unsigned int inner = 0; inner < 38; ++inner) { + x_0[col] += x[inner] * wgtT_0[inner][col]; + } + x_0[col] += bias_0[col]; + } + + // (1): ReLU() + float x_1[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_1[col] = (x_0[col] > 0.f) ? x_0[col] : 0.f; + } + + // (2): Linear(in_features=32, out_features=32, bias=True) => x = x*W_T + b + float x_2[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_2[col] = 0; + for (unsigned int inner = 0; inner < 32; ++inner) { + x_2[col] += x_1[inner] * wgtT_2[inner][col]; + } + x_2[col] += bias_2[col]; + } + + // (3): ReLU() + float x_3[32]; + for (unsigned int col = 0; col < 32; ++col) { + x_3[col] = (x_2[col] > 0.f) ? x_2[col] : 0.f; + } + + // (4): Linear(in_features=32, out_features=1, bias=True) => x = x*W_T + b + float x_4[1]; + for (unsigned int col = 0; col < 1; ++col) { + x_4[col] = 0; + for (unsigned int inner = 0; inner < 32; ++inner) { + x_4[col] += x_3[inner] * wgtT_4[inner][col]; + } + x_4[col] += bias_4[col]; + } + + // (5): Sigmoid() + float x_5[1]; + for (unsigned int col = 0; col < 1; ++col) { + x_5[col] = alpaka::math::exp(acc, x_4[col]) / (alpaka::math::exp(acc, x_4[col]) + 1); + } + + return x_5[0]; + } + + } // namespace t5dnn +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h b/RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h new file mode 100644 index 0000000000000..d5321fea07a6e --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/NeuralNetworkWeights.h @@ -0,0 +1,315 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_NeuralNetworkWeights_h +#define RecoTracker_LSTCore_src_alpaka_NeuralNetworkWeights_h + +#include + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + namespace t5dnn { + + ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_0[32] = { + -4.5069356f, -5.8842053f, 1.0793180f, -0.1540973f, -0.4705772f, 6.4027028f, -0.6620818f, -7.0734525f, + 0.6211641f, 4.9630723f, 3.4310920f, -0.8856288f, 4.5843782f, -6.0180559f, 0.0126438f, -1.5725276f, + -0.8549317f, -6.8545237f, -1.2129461f, 3.0617838f, -0.3911322f, 0.0799793f, -2.5398655f, -0.5780622f, + 2.8533990f, -0.1777968f, -2.6457164f, -0.7976936f, 4.5644889f, -2.1747942f, 3.4286616f, -10.1073380f}; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_0[38][32] = { + {6.1269712f, -10.6625051f, 17.4907818f, -0.0019928f, -3.4468415f, 1.6674044f, -7.8957767f, 2.2077549f, + 9.5517254f, -5.1345053f, -30.1643391f, 4.0148559f, -19.8330841f, -18.3806915f, 0.1334764f, 1.6213616f, + -4.1423774f, -15.3062429f, -1.0209556f, 1.5580219f, 0.7426265f, 0.0033929f, 1.3924170f, 0.9196110f, + -0.8995734f, 1.0594707f, 39.4390869f, 8.7642002f, 28.4583893f, -5.9235659f, 3.7221889f, 14.4167147f}, + {1.7863803f, -0.6068707f, 0.3166098f, -0.0608759f, 0.5939785f, 0.4870262f, -3.1375074f, -17.7147388f, + -0.7231818f, -9.3808413f, 2.2070611f, 15.7461920f, 0.9355862f, 2.3942475f, -0.0671409f, 3.5954301f, + -3.0463996f, -2.0748904f, -0.5450584f, -4.4800100f, 0.6074556f, -0.0161482f, 3.0624702f, -4.5688419f, + 2.9881518f, -0.3714012f, -0.0387531f, -0.7699140f, 4.4028845f, 5.0333014f, -4.7350726f, -8.6568584f}, + {5.6548429f, -0.0207700f, 0.1785973f, 0.0881671f, 0.2530097f, -0.1893259f, -0.1105739f, -0.5183877f, + 1.0728362f, 0.1833011f, 1.7765219f, 0.3127359f, 0.0455277f, -0.1442616f, -0.1048361f, -0.1235604f, + -0.1217661f, -0.5487315f, 0.7575656f, -0.1177454f, -17.0993137f, 0.1628031f, 0.2789381f, 0.5304270f, + 0.0837841f, -3.1120780f, 0.0074821f, -0.1648044f, -0.3395336f, 0.3958135f, 0.8718957f, -1.1980486f}, + {0.2401041f, -0.0585765f, -0.0144584f, 0.0411095f, 0.0752229f, 0.0292672f, -0.2437613f, -1.4396472f, + -0.0971315f, -1.7181139f, 0.2417643f, 2.2030578f, 0.0566049f, 0.1081589f, -0.1060181f, 0.3473758f, + -0.7095683f, -0.0345675f, 0.2794849f, -1.1702278f, 0.2622930f, -0.0072611f, 0.5026371f, -1.2882922f, + -0.4712771f, 0.0597130f, -0.0039970f, -0.6050836f, 0.1554724f, 1.0991164f, -0.4975886f, 0.2597970f}, + {0.0766028f, 0.0218421f, -0.1739017f, -0.0076569f, 0.0384461f, -0.1841756f, 0.9677940f, -3.1114254f, + 2.3830564f, 2.0706992f, -0.9643140f, 0.7361387f, -0.0060253f, -0.1554846f, -0.0831100f, 2.8754771f, + -1.4403527f, -0.5281797f, 0.5157787f, 4.2405987f, 0.4807618f, 0.0217647f, -1.2626950f, 0.9145837f, + -0.3931780f, 0.3426280f, -0.0065206f, -0.7510439f, -0.4555758f, 2.7724340f, -1.2173026f, 0.1039017f}, + {0.5685715f, 0.3927337f, 0.4942532f, -0.0671033f, -0.2808350f, -0.0336000f, -1.3983957f, 0.9876546f, + -2.3840380f, 0.7315395f, -2.2009561f, -1.4631602f, -0.4672308f, -0.4994236f, 0.1169335f, -1.1894208f, + -1.2692982f, 0.3303853f, -2.0147655f, -0.9912014f, 1.0042895f, 0.1121151f, -1.0789106f, -2.2821584f, + -6.6459913f, -0.0959398f, -0.0068429f, -2.8177626f, 0.3213172f, -2.6832986f, -4.7613306f, -0.9985733f}, + {1.4419515f, -0.3864825f, -0.6756768f, -0.1273375f, 0.4321181f, 0.3354745f, -0.8236564f, -2.8190827f, + 0.7090831f, 1.9072700f, -3.1834064f, -2.6938572f, 0.5051147f, 1.4382831f, 0.1241910f, -0.7352629f, + 0.7703634f, -1.7556250f, -2.1104112f, 3.0603442f, 1.9873468f, -0.0358815f, -1.0087154f, 3.8253262f, + -0.5466214f, 0.0875162f, 0.2691758f, 0.7121435f, 1.9314718f, -0.1580560f, 3.6484149f, -5.3173709f}, + {6.9104381f, -0.0033664f, -1.4405546f, -0.1768288f, 0.2028089f, -0.1012344f, -4.4735684f, 0.6354278f, + 4.3039737f, 0.2056303f, 1.8338999f, -1.1351355f, 0.1015760f, -0.0733253f, -0.0561627f, 2.5292397f, + 1.6314448f, -0.9333628f, -0.7773662f, 0.8313186f, -0.7829623f, 0.1265118f, 0.5922315f, -0.3463379f, + -1.3269740f, -3.3302619f, -0.0061799f, 2.3374722f, 0.0880938f, 0.7470241f, -0.4205743f, -4.7557602f}, + {0.0380794f, 0.0947470f, 0.0419397f, 0.0582226f, -0.0603404f, 0.0234028f, -0.2575402f, 0.4125248f, + 0.3035339f, 0.2663808f, -0.6092452f, -1.4727812f, 0.0247187f, -0.0539688f, -0.0150413f, 0.2094955f, + 0.5379737f, -0.3255228f, -0.5639279f, 0.0786276f, 0.6703192f, 0.1557026f, -0.2753083f, 1.1463971f, + -0.9372965f, 0.5657740f, 0.0041413f, 0.0870248f, 0.0101520f, -0.8214461f, 0.1212932f, 1.5648646f}, + {-0.0969819f, 0.0137566f, 1.3515147f, -0.0155047f, -0.1416170f, -0.1636726f, 0.5184190f, 0.4732984f, + 0.6815788f, -1.0522166f, -0.4486531f, -0.0516016f, 0.0201894f, -0.0849667f, -0.0861271f, -1.2027841f, + 1.2458711f, -0.7061657f, 1.0381308f, -0.3450044f, -0.1300479f, -0.0828402f, 0.6859242f, -1.0575374f, + 0.6947553f, -0.0922188f, 0.0199132f, 0.8038982f, -0.1734094f, -0.1057449f, 1.6305015f, -0.0688597f}, + {-1.8151448f, 0.1024327f, 1.7063105f, 0.1130912f, -0.1081472f, -0.2904744f, -1.3465070f, -1.0455177f, + -0.4581082f, -3.2220871f, 0.5221398f, -5.1637673f, 0.0811146f, -0.1326323f, -0.0379338f, -3.0439703f, + -2.4246936f, -0.3670847f, -3.1256330f, -1.6595014f, -3.4715190f, -0.1526113f, -1.0420206f, 0.9536474f, + -3.2932863f, 1.6048199f, 0.0025162f, -3.6049840f, 0.0604250f, -2.2404826f, 1.8406851f, -3.1381185f}, + {1.2985691f, -1.1044264f, 0.9062797f, -0.0788333f, 0.2694912f, 0.0032800f, -0.0574267f, 0.9734111f, + 1.1532565f, 2.6786125f, -3.8574269f, -2.2871449f, -0.1261243f, 1.0545347f, -0.1454154f, -0.5609738f, + 1.8385800f, -0.8035598f, -1.7668265f, 5.1665063f, 0.7966110f, 0.0940206f, -2.3943975f, 2.3344002f, + 1.0342182f, 0.4806454f, -0.3880928f, 0.6998246f, 1.4011886f, -1.7313483f, 4.9702630f, -6.0058608f}, + {1.0300356f, 0.0616315f, -0.1113776f, -0.1694220f, 0.7159944f, 0.0626456f, 2.0994680f, 0.3452290f, + -3.0487001f, 0.0654031f, -1.1510723f, 0.5370992f, -0.0290704f, -0.0300795f, 0.0751569f, -0.2345951f, + -0.3472281f, 0.4424143f, 1.2444530f, -0.2114656f, 0.7865694f, -0.0709381f, -0.1839961f, -0.0529834f, + 0.5867608f, -3.8793530f, -0.0814745f, -0.6368676f, 0.0361213f, -0.5549288f, 0.5661780f, 1.8374584f}, + {0.3345098f, 0.0068199f, -0.4205509f, -0.1088801f, -0.1043202f, -0.0040804f, 0.3400922f, 0.2673528f, + -0.6050695f, 0.4443954f, -0.4319905f, -0.6044132f, -0.0260679f, 0.0137036f, 0.0765494f, -0.0095099f, + 0.5880439f, -0.0083854f, -0.2407522f, 0.1942379f, 0.6554548f, -0.1322891f, -0.8298992f, 0.7909554f, + 1.0528831f, 0.1970959f, 0.0754069f, -0.0947960f, -0.0279494f, -0.5888316f, 0.8919419f, 0.4828835f}, + {0.3995822f, -0.2139665f, 0.3982936f, -0.1285759f, -0.3445527f, -0.1167238f, -0.1263519f, 0.8393803f, + -0.7758383f, 0.0719291f, -0.0134762f, 0.1715237f, 0.0796666f, 0.1023507f, -0.1172728f, -1.2364722f, + 1.2592632f, -0.3168479f, 0.7487004f, -1.5170647f, -0.2235429f, -0.1620898f, 1.4064828f, -1.0821995f, + 0.0740103f, -1.0412805f, -0.0621277f, 0.2439800f, 0.2684972f, -1.1661061f, 0.7859434f, -0.6170313f}, + {2.1615884f, 0.1431713f, 0.0642652f, -0.0522325f, -0.2658786f, -0.0245810f, -1.6857448f, -0.6685011f, + -0.6978170f, -0.8716729f, 0.3129902f, -2.5870812f, -0.2855283f, -0.3205920f, -0.0084069f, 1.3182145f, + -0.6923816f, -0.3730274f, -2.3638811f, -1.1128502f, -2.4709859f, 0.1349022f, -0.3574466f, -0.6597407f, + -4.1122031f, 0.2240651f, 0.1806145f, -1.6836300f, -0.0766231f, -3.2611966f, 0.0091456f, -0.0997367f}, + {5.2476101f, -0.1966512f, 4.8935304f, -0.1551689f, 1.6919724f, -0.8324367f, 14.3318472f, -0.3503132f, + 10.3614969f, -9.1522884f, -0.2543063f, -1.8476851f, 16.7961140f, 9.9541416f, -0.0434563f, -9.6973553f, + -5.0469398f, 6.1688442f, 7.6429725f, -7.3149266f, 1.2345183f, 0.1412155f, 0.7114770f, -1.6378664f, + 5.1548996f, 0.3686100f, -45.3027611f, 3.0492647f, -37.3445892f, 2.7421410f, -2.7958770f, -25.2034016f}, + {1.4597454f, -1.0561740f, 0.9751291f, 0.0446527f, 0.3691662f, 0.1006782f, 0.1418435f, 0.8871480f, + 1.1603093f, 2.8034730f, -4.0856910f, -1.9786842f, -0.2206208f, 0.9539357f, 0.0868183f, -0.6811873f, + 1.9642411f, -0.8065316f, -2.0244894f, 5.2936082f, 0.6120632f, -0.1194160f, -2.3925939f, 2.5555069f, + 1.0149733f, 0.4607603f, -0.2197217f, 0.5703423f, 1.4049014f, -1.5900208f, 5.1645074f, -6.0569463f}, + {0.9000676f, -0.0028781f, -0.1967366f, 0.1039593f, 0.7993248f, 0.0655172f, 2.2296758f, 0.4391927f, + -3.0292840f, 0.0334536f, -1.1728534f, 0.3479103f, -0.1190938f, 0.0410203f, 0.1146637f, -0.2958017f, + -0.3240463f, 0.4361866f, 1.0564958f, -0.1989332f, 0.5194008f, -0.0628912f, -0.1733121f, -0.1255383f, + 0.5990249f, -3.7692382f, 0.0995128f, -0.7101220f, -0.0785123f, -0.3514554f, 0.6662078f, 2.0991604f}, + {0.1781942f, -0.1873588f, -0.4653996f, -0.0153059f, -0.1399561f, -0.0498718f, 0.4552556f, 0.2300792f, + -0.7682312f, 0.4342302f, -0.3787803f, -0.6089386f, -0.1049337f, 0.0395331f, 0.0220332f, 0.0114750f, + 0.4672548f, 0.1284784f, -0.2472819f, 0.2892784f, 0.4788667f, 0.0472555f, -0.6593549f, 0.6508777f, + 0.9286987f, 0.3043948f, -0.0635985f, 0.0814399f, -0.1168853f, -0.6688027f, 0.8876534f, 0.4865684f}, + {0.4024099f, 0.0480259f, 0.4588822f, -0.1793082f, -0.2151573f, -0.1871128f, -0.1502780f, 1.1011307f, + -0.9467706f, 0.2632496f, -0.1257263f, -0.0241331f, 0.2280627f, 0.0878608f, -0.1334262f, -1.1642927f, + 1.0943586f, -0.4799654f, 0.5981907f, -1.5051398f, -0.4235946f, 0.0012827f, 1.2342577f, -0.8281875f, + 0.2776567f, -1.0362227f, 0.0408372f, 0.1540821f, 0.1777556f, -1.2684357f, 0.8836584f, -0.4001710f}, + {2.1558056f, 0.2082023f, 0.0863442f, 0.0364868f, -0.3985825f, 0.0307202f, -1.8889453f, -0.5614714f, + -0.7311882f, -0.8075573f, 0.4895108f, -2.7770483f, -0.3121874f, -0.1671291f, -0.1281284f, 1.3212786f, + -0.5310181f, -0.1974759f, -2.6240873f, -0.8320529f, -2.3875966f, -0.0286360f, -0.6263188f, -0.6553424f, + -4.1658955f, -0.0601300f, 0.0946256f, -1.6795633f, -0.1251303f, -3.0974686f, 0.2412274f, -0.0687501f}, + {2.0523887f, -0.6387668f, 2.0633900f, -0.0550964f, 0.5181718f, -0.4202190f, 1.8569367f, 0.8295385f, + 0.8555872f, 2.4727983f, -0.2072828f, -1.9006120f, 0.5379534f, 0.4463673f, 0.1468820f, 0.4918649f, + -3.4016700f, 0.2884440f, -1.9418719f, 4.5157170f, -0.5160927f, -0.0199372f, 3.1353824f, -0.9863126f, + -1.5135859f, 0.7576568f, 0.6715558f, 2.7409093f, 0.9291748f, -0.3247162f, 1.8204515f, -8.9181070f}, + {-0.1428107f, -0.0829889f, 0.4213613f, 0.0225415f, 1.2238166f, 0.0477106f, 0.3031853f, -0.7466553f, + 2.0663500f, 0.7588379f, 0.3689216f, -0.2003786f, 0.1242338f, 0.1693589f, -0.0351716f, -0.0186597f, + -0.0189417f, 0.5468715f, -0.2862698f, -0.1311738f, 3.0747476f, -0.0310747f, 0.0943165f, 0.3139819f, + 0.6274695f, -1.8314874f, 0.0147495f, 0.3554756f, 0.3829916f, 0.4891713f, 0.1328600f, 1.0535098f}, + {0.0534900f, 0.1787969f, -0.0571320f, -0.0685673f, 0.1968977f, 0.0374476f, 0.7876674f, 0.0828491f, + 0.6444036f, -0.2203166f, -0.2383427f, 0.5397566f, 0.0106769f, -0.1230072f, -0.0135021f, -0.5691944f, + -1.5040319f, 0.0406933f, -0.0025478f, 0.9251419f, -1.7180276f, -0.1112956f, 1.4840862f, 0.0407115f, + -0.0100329f, 0.0583593f, -0.0110524f, 0.7431355f, -0.0971857f, -0.5501527f, -0.6371027f, -0.1935233f}, + {-0.6455778f, 0.2317368f, 0.9285696f, -0.1415854f, 0.0822560f, 0.2488030f, -2.6992166f, 0.0884904f, + 0.6735302f, -0.1467820f, 0.5641044f, 0.6436581f, 0.0818401f, -0.0336634f, -0.0729000f, -0.1206900f, + -2.5739892f, 0.5776953f, 0.9531668f, -1.2362405f, -0.0615577f, -0.0143544f, -2.7525210f, 1.3738545f, + 0.2751348f, -1.7463943f, -0.0020144f, 2.4814103f, 0.1716725f, -0.7055540f, -0.3474010f, 0.4482578f}, + {-0.2526205f, -0.7463821f, -3.6076138f, -0.1511098f, 0.1216256f, 0.0888247f, -1.0190924f, -1.3260181f, + -0.0443211f, -4.8911066f, -3.4385188f, -6.0057454f, 0.3340450f, 0.2997236f, -0.0907855f, 0.7500492f, + -0.4007562f, 1.9382039f, 0.5687234f, 2.6511824f, 4.7703862f, 0.0006749f, -0.0201394f, -3.5885489f, + -4.1518898f, 0.0807014f, -0.0584071f, -0.8100027f, 0.7697087f, -0.8038046f, -1.2945876f, -4.0110312f}, + {0.4337017f, -1.1532011f, 2.0740633f, 0.0271806f, 0.6654227f, 0.1012998f, -4.0791736f, 1.2631345f, + 1.9511020f, 2.3272331f, 1.2707534f, 1.6306664f, 0.4936035f, 0.8285242f, 0.0807625f, 3.8652387f, + 0.0281145f, 1.6877037f, 1.2557380f, -0.3036775f, 0.5604967f, 0.1551418f, -0.9599600f, -6.3067718f, + -0.6352320f, 0.8058553f, 0.3657880f, -2.0491202f, -0.3926269f, 2.5650854f, 1.3697821f, -8.3070078f}, + {5.1334143f, -0.0351738f, -0.4774780f, -0.0679726f, 1.4569254f, 0.0580191f, -0.3649136f, -0.2298838f, + -3.3826666f, -0.7392708f, -0.6036060f, -0.2612940f, -0.1877640f, -0.1145124f, -0.0042578f, -0.0311193f, + -0.0320479f, 0.5270581f, -0.4324475f, 0.2681437f, 4.7813129f, -0.0222701f, -0.0525629f, -0.2861001f, + -0.1251072f, 3.9112861f, 0.0045046f, -0.0426071f, -0.3299106f, -0.0686970f, -0.1602017f, -0.0070103f}, + {-0.6633690f, 0.0103367f, 0.5998458f, 0.1256577f, -0.0359184f, -0.0176820f, -0.6458368f, -0.0370536f, + 0.3542259f, 0.1394724f, 0.8255956f, 0.2501569f, 0.0320156f, -0.0256806f, 0.0277949f, 0.0036392f, + 0.2825173f, 0.1400358f, 1.0011463f, -0.6792242f, 0.0672508f, 0.0728705f, -0.1089695f, -1.0414587f, + -0.4135485f, 0.4293025f, -0.0041241f, -0.9564193f, 0.0314900f, 0.8658463f, -0.7734696f, -0.7610567f}, + {-0.0200122f, -0.0749178f, -1.5026549f, -0.0387432f, -0.0713735f, 0.1214790f, 1.8730290f, -0.0552839f, + -1.6867150f, 0.2282097f, 0.7161849f, -0.1018546f, -0.1092003f, 0.0365504f, -0.1326883f, 1.2310545f, + 0.1800210f, 0.7024739f, -2.9606545f, 1.2275347f, -0.2050014f, 0.0940569f, 0.4761694f, 0.8812068f, + -0.0083424f, -1.5406264f, 0.0061815f, -2.7606382f, 0.0248556f, 1.1086880f, -1.3608936f, 1.0795454f}, + {0.9734020f, 0.3905411f, -3.7008634f, 0.0013557f, 0.1649124f, 0.9935362f, 1.3489184f, 0.9505764f, + 0.7966231f, -0.1627246f, -2.5754328f, 1.4892205f, 0.8586300f, 0.6974363f, 0.1320204f, -0.7840260f, + 0.3121157f, 0.0966901f, 2.7447381f, 1.8256680f, 0.7229405f, -0.1723188f, 0.9145948f, -2.1376033f, + 0.5259342f, 0.0731194f, -0.2908303f, -0.2603913f, -0.2326528f, 3.6684167f, -0.2883157f, -2.8546307f}, + {-4.8917460f, 6.7944999f, -0.2255474f, 0.1051999f, 3.9000113f, 2.0624907f, 5.3019547f, 10.0209141f, + 1.1268179f, 2.2669628f, -6.5002980f, 1.8408583f, 5.3039579f, 2.2055962f, 0.1055369f, 1.7230233f, + 6.9605255f, 7.7025104f, 2.9880707f, -0.9274251f, -0.2287160f, -0.0206735f, 0.6885675f, 2.8179996f, + -7.1129837f, -1.3772345f, 3.8655453f, -5.9388318f, -0.0469947f, 7.2763596f, -6.3536129f, -17.0069847f}, + {1.8787041f, -0.9953383f, -1.4839923f, 0.1308209f, 0.3657510f, 0.3106483f, -1.4158971f, -6.7449651f, + 0.6553892f, -4.5046172f, -3.5489719f, 3.5363002f, 0.5454772f, 2.3521471f, 0.1612140f, -0.9744226f, + 0.6546553f, -2.7179255f, -1.7758157f, 0.3089439f, 1.7462813f, 0.1654593f, -0.2440207f, 3.9501827f, + 1.3750844f, 0.0596805f, -0.1977254f, 0.0264880f, 2.6396444f, 1.0816911f, 3.6413448f, -6.0299959f}, + {-4.1295738f, 0.1044480f, 0.2131937f, 0.0420826f, 0.5292229f, 0.0090477f, -0.0973486f, 0.9596778f, + 2.9579651f, -0.6364226f, -1.7556342f, 0.1539868f, -0.1273174f, -0.1348504f, 0.1257833f, -1.4168571f, + -1.0960362f, 0.0482449f, -1.4395387f, -0.2524115f, -2.9162085f, -0.0451428f, -0.4021681f, -0.5756381f, + 0.0515293f, -3.1996479f, -0.0007676f, -1.3878343f, -0.2864279f, -0.9579773f, -1.0999249f, 1.6500067f}, + {-2.4806111f, -6.8115449f, 3.2805641f, 0.1187415f, -0.9950783f, 6.2553434f, -1.6450261f, -6.1463733f, + 2.7507148f, 4.2995782f, 0.0461297f, -0.5417359f, 2.4306326f, -7.3530145f, 0.0698273f, -0.9394333f, + -1.3595498f, -7.5141478f, -1.4911395f, 3.2300410f, 0.1203540f, 0.0314884f, -2.0116949f, -0.8167119f, + 2.4133310f, 0.1920709f, 1.0619365f, 0.2459123f, 6.9166069f, -2.6384118f, 3.6829739f, -7.2385545f}, + {0.9408096f, 14.9067144f, 1.7709646f, 0.1105646f, -0.5600107f, -15.3188124f, -12.3718462f, -1.8893757f, + 13.6364670f, -5.7327847f, -14.1805468f, 1.0581509f, -14.2186184f, 14.8948650f, 0.0190344f, 5.4395180f, + 6.7243400f, 9.8468456f, 4.5144215f, -1.4551491f, 1.1032411f, -0.0317988f, 2.3398454f, -3.1671596f, + -7.7541409f, 1.1255593f, 6.7340465f, -4.4448423f, -9.1472626f, -3.1959128f, 4.4181323f, -2.7904994f}, + {-2.1621978f, -4.7202382f, 1.7378219f, 0.1417439f, -0.5000908f, 5.4468708f, 1.4260571f, -6.6136570f, + 1.5713804f, 3.4479704f, 2.7354901f, -0.7388076f, 5.4666147f, -3.8697338f, -0.1368596f, -2.7903373f, + -1.2043713f, -4.9554005f, 0.3324645f, 1.6767365f, 0.1156244f, -0.0326964f, -2.0945346f, -0.4590589f, + 3.0942657f, 0.0015020f, -6.2626700f, -0.3969755f, 0.7717427f, -1.9667094f, 2.9664171f, -11.9477053f}, + }; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_2[32] = { + 9.8383608f, 3.6922295f, 3.5774977f, -4.4619012f, 6.5087032f, -0.9540017f, -0.5059246f, 0.0706402f, + 14.3396597f, -0.2771132f, -4.8409863f, -8.3581600f, -3.5078344f, 4.3287506f, -5.7808843f, 3.9264839f, + -2.1697845f, -0.0040514f, -0.2095029f, -6.8678174f, 1.7911285f, -0.4510343f, 1.2410443f, -4.5678806f, + -0.5693849f, 2.3320096f, 4.4606552f, -6.3771009f, -4.3149071f, -0.1905672f, -3.5726390f, -1.0744030f}; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_2[32][32] = { + {-0.0155548f, 0.0243339f, 0.0037967f, -0.2771824f, 0.0111955f, -0.0115980f, 0.0079653f, -2.9803498f, + -0.0061037f, -0.0956634f, 0.0332446f, 0.0179244f, -0.0080377f, -9.0180779f, 0.1720033f, 0.0350694f, + -0.0146588f, -0.2135506f, -0.3158041f, 1.3697664f, 0.0119146f, 0.0119120f, -0.0986927f, 0.0297492f, + 0.0355827f, -0.1196868f, -0.0745119f, 0.0281862f, -0.0422190f, -0.3069138f, -0.0477367f, -0.0550450f}, + {-1.7374619f, 1.4822800f, -2.1885235f, 1.8354234f, -0.5380136f, 1.6621803f, 0.6251035f, 0.1008954f, + -0.8387129f, -0.2063313f, 1.0661691f, -0.9799694f, -5.1710258f, -3.2260630f, -1.5073707f, -1.0792168f, + 1.8569958f, -0.2289213f, 0.0563821f, -1.6398847f, -4.1649504f, -2.7527378f, -0.0134577f, 3.0424533f, + 0.0364320f, 0.6762254f, -3.1551330f, 2.4888904f, 1.4757305f, -0.3141717f, -2.0126467f, -0.1675602f}, + {-0.9571826f, 0.0914152f, 0.0404339f, 0.2927902f, 0.2933607f, 0.0619171f, 0.0772318f, -1.3796169f, + -0.8194544f, -0.2179988f, -1.1241078f, -0.1443964f, 0.0559355f, -1.2914546f, -0.3445117f, 0.2031156f, + 0.0273864f, -0.0193422f, -0.2136522f, 0.0429592f, 0.0212854f, 0.0414394f, -1.1734651f, 0.0582848f, + 0.0136039f, -0.1892604f, 0.0764908f, -0.0130132f, -0.1272559f, -0.0818855f, -0.0408583f, -0.1563294f}, + {-0.0213695f, 0.0596942f, -0.0641309f, -0.0146449f, 0.0416586f, -0.0378931f, 0.1234860f, 0.1622967f, + 0.0794091f, -0.0639933f, -0.1030663f, 0.0579078f, 0.1050275f, -0.0136866f, 0.0149978f, 0.0876813f, + 0.0693554f, 0.1612417f, -0.0595916f, -0.1008234f, -0.0579058f, 0.0915138f, 0.1321436f, -0.1484535f, + -0.0920316f, -0.0024532f, -0.1045300f, 0.0924260f, 0.0277524f, -0.0287276f, -0.1271127f, 0.1164243f}, + {0.0713067f, 0.0198056f, -0.3023696f, -0.0025908f, -0.0085885f, -1.1157553f, 0.0236462f, -0.0704844f, + -0.0189257f, -0.0997382f, 0.3379845f, -0.1229390f, -0.0616165f, -0.8968034f, 0.0401445f, -0.1144476f, + -0.0532077f, 0.0604580f, 0.0609454f, -0.1613472f, 0.0103525f, -0.1653874f, 0.0205189f, 0.0758978f, + -0.1514593f, 0.0151441f, 0.2043469f, 0.0349607f, -0.1361278f, -0.1255922f, 0.0631648f, 0.3570991f}, + {0.3371337f, -3.7541580f, 2.2215877f, -0.3390516f, 0.1912718f, -4.1861577f, -1.2264019f, 2.8179801f, + 0.0667294f, -0.0093539f, 2.3029909f, 3.1814916f, 3.9780347f, 0.2310601f, 0.3986159f, -0.8544636f, + 0.4139664f, -0.1876569f, -0.2448732f, -2.8053334f, 4.0488625f, 2.1094146f, -6.7310257f, -4.9950023f, + -0.8315823f, 0.0555959f, 2.4573720f, -3.7234364f, -4.2910552f, -0.2995245f, -3.2605181f, 2.3620574f}, + {-1.5522735f, -0.1866350f, -0.0067679f, 0.3196557f, 1.4052233f, 2.8143549f, -0.9992948f, -0.5309914f, + -25.8852596f, -0.1218249f, 0.6625420f, 0.3007106f, -0.2767264f, -0.1847300f, -0.5313534f, -0.0383462f, + -0.1987552f, 0.0581405f, -0.3376078f, 1.2621028f, 0.0818709f, -0.1401216f, -0.4550788f, -0.1592657f, + 0.0597123f, 0.1344101f, -0.1005317f, -0.1538406f, 2.9142656f, -0.0806051f, -0.4267367f, -31.9512234f}, + {0.6859627f, 0.1212986f, 0.1291616f, 0.0459838f, -0.0899920f, 0.0287645f, 0.1987007f, -2.7079368f, + -0.2628384f, -0.1402464f, -0.6302179f, -0.2923960f, -0.1106663f, 0.8256195f, -2.8054097f, -0.0296494f, + -0.5632019f, -0.1335654f, -0.1558440f, -6.8611612f, 0.0203786f, 0.0046566f, -0.4401442f, -0.0471430f, + 0.4535986f, -0.8657981f, 0.0684740f, 0.0518814f, -0.0123748f, -0.2270164f, 0.0922878f, -0.3863277f}, + {0.0127175f, 2.3346109f, -0.4390767f, -0.4657893f, 0.1659466f, -0.1132782f, -0.4928388f, 0.7652873f, + 1.1510741f, -0.0879600f, 0.2721785f, -0.1878961f, -0.3477249f, -0.8473209f, -0.8931856f, -0.4328294f, + -11.9181929f, -0.0282545f, -0.0217915f, 1.6676594f, -0.2122232f, -0.6190930f, 1.9053432f, -0.7592348f, + -1.0739189f, -0.7170524f, 0.3864411f, -0.8849231f, 0.1393488f, 0.0738489f, 0.4460345f, 1.9020857f}, + {0.4453296f, -0.0767821f, 0.1638939f, 1.6997167f, -0.1098599f, -0.0551604f, 0.0040561f, -13.5290670f, + -0.1285677f, -0.0590394f, 0.6499141f, -0.7617344f, 0.0453151f, 0.3104213f, -1.0711143f, 0.1361838f, + -0.4365610f, -0.1300649f, 0.2013344f, -0.5308123f, 0.1451896f, 0.1030715f, -0.6487910f, -0.3136590f, + -0.0280079f, 0.5394178f, 0.1318262f, -0.0159292f, 0.0636870f, -0.3224248f, -0.1868187f, -0.2468304f}, + {-0.0333494f, -0.0834255f, -0.1221875f, 0.6861304f, 0.0521738f, -0.0416543f, -0.4437352f, -19.3246250f, + -0.1520821f, 0.0528602f, -0.6375434f, -0.5803806f, -0.0958465f, -2.0058544f, -0.8282642f, 0.0259000f, + 0.4846996f, 0.1211179f, 0.0356884f, 1.0009497f, 0.0635682f, -0.0314105f, -0.0011147f, 0.0131714f, + -0.3410152f, 0.2798154f, 0.0961889f, 0.1266228f, -0.0934717f, -0.0904307f, 0.1355542f, 0.5722573f}, + {0.2146454f, 0.2143834f, 0.1290650f, -0.9063646f, 0.2100945f, 0.1331054f, -0.2620614f, -0.1264993f, + 0.1313979f, 0.0455465f, -0.8395286f, -0.4967833f, -0.0538581f, 0.9155380f, 0.6627046f, 0.1691243f, + 0.9887002f, -0.1597013f, -0.1236713f, -1.9041336f, 0.0427585f, 0.0849747f, -5.2559652f, -0.3133100f, + 0.0141170f, -0.1635530f, 0.4938746f, 0.0162943f, 0.2107756f, -0.3413893f, -0.0657575f, 1.0542560f}, + {-2.8868380f, -2.0837426f, -1.0611480f, -0.6143807f, -0.6398501f, -2.8018746f, 0.5166737f, -1.0814301f, + -1.9272422f, -0.1017482f, -0.4651161f, -1.4021232f, 1.8854499f, 0.1815407f, 0.5965426f, -2.3344259f, + -0.0690846f, -0.1678239f, -0.4219488f, 0.6215640f, 1.0270095f, -0.3473049f, -0.3926674f, -0.7942593f, + 1.1305071f, -1.4621233f, -0.8051161f, -0.7698632f, -2.6038630f, -0.3090037f, -1.6365144f, -1.0179478f}, + {0.0046026f, 1.1319581f, -2.6405678f, -2.0353596f, -2.1687336f, 0.3364883f, 2.1122196f, 0.2584647f, + -2.4344857f, -0.0378498f, 0.6158544f, -0.6060749f, -4.9598379f, 0.1570698f, 2.2436838f, -2.6198347f, + -2.0935996f, -0.1845744f, -0.0716080f, -1.9338604f, -4.1995640f, -3.6706774f, -1.6762524f, 3.9646862f, + -0.9677961f, 1.8319578f, -3.1916575f, 3.7312632f, 0.0820446f, -0.0497568f, -0.0898171f, -0.2499462f}, + {-0.0780375f, -0.0286571f, 0.1007227f, 0.0012229f, -0.0531285f, 0.0840718f, 0.1013894f, 0.1312424f, + -0.0673772f, 0.1603183f, 0.0074385f, -0.0718321f, -0.1549873f, 0.1616689f, 0.0405887f, -0.1558588f, + 0.0740745f, 0.1696893f, -0.0064026f, -0.1656420f, -0.1186674f, -0.1262667f, -0.0784757f, -0.1280154f, + 0.0909976f, 0.0853046f, -0.1075811f, 0.1310615f, 0.0610194f, 0.0647223f, 0.1360559f, 0.0440074f}, + {-0.2106480f, 0.0087131f, 0.1119385f, -1.0611318f, 0.5250220f, 0.0525479f, -0.2733742f, -1.0799565f, + -0.5601607f, -0.0651806f, -1.9793440f, -0.3373334f, -0.1550518f, 0.8932216f, 0.7264332f, -0.0450735f, + 1.2373760f, -0.1236272f, 0.0680048f, -3.0446634f, -0.1533586f, -0.0127355f, -0.3326311f, -0.0225603f, + -0.2265739f, -2.3752897f, -0.3771705f, -0.0728938f, 0.1741305f, 0.1111639f, 0.4131119f, 0.2239323f}, + {-2.5691276f, -1.4011253f, -2.0640867f, -3.7236946f, 1.5542637f, -0.9456654f, -1.7575809f, 3.6794879f, + -0.4439790f, -0.1009826f, 3.6702275f, -0.1935008f, -0.4423219f, -0.3825364f, -0.4784791f, 0.5927492f, + -2.3482494f, 0.0801714f, -0.1567418f, -1.7934613f, -0.1706410f, -0.6326947f, 0.6260155f, 0.3631033f, + -0.9325932f, 1.9647995f, -1.3409088f, 1.3501998f, 0.0367797f, -0.1744210f, 1.8690013f, -1.0737898f}, + {-0.5934777f, 0.6232591f, -0.3391055f, 0.2640936f, -0.2824444f, 0.4815128f, 0.6625078f, -0.1103976f, + 0.9555223f, -0.0624896f, -0.6778919f, 0.1181502f, -0.5425385f, 0.7297349f, -1.7261271f, -0.2917557f, + 1.1873137f, -0.2725933f, 0.0975242f, 1.7756181f, -0.5735835f, -0.4453230f, 0.9800369f, 0.9344145f, + -1.8692539f, 0.0120440f, -0.7315661f, 0.6250805f, 0.3839143f, -0.0376306f, 0.3816243f, 0.6059195f}, + {0.5522162f, -1.8043815f, -10.9379101f, 0.5719097f, -0.2246755f, -1.4856353f, 0.4877502f, 0.7163438f, + -11.8135147f, -0.0180790f, -0.9928634f, 0.1107815f, -0.0005064f, -0.3824990f, -0.7453306f, -1.9909632f, + -7.4362645f, -0.0245507f, -0.1815712f, -3.5507584f, -0.0075889f, -11.0296011f, -1.1292133f, -0.0710276f, + 0.5675677f, 0.2017778f, -0.0684891f, -0.0367653f, -1.6674192f, 0.0281711f, -0.8356591f, -0.0447807f}, + {0.2537312f, -3.0178010f, -0.3493635f, 1.8573236f, 0.4017631f, 0.9912633f, -0.8625028f, -0.7783228f, + -1.7815375f, -0.1204695f, 1.8551122f, 0.3344182f, -0.2828701f, -1.3226960f, -1.4470471f, 0.2895959f, + 0.6780876f, -0.2010069f, 0.0425280f, -2.1786852f, -0.1274053f, -0.2549899f, -0.2233993f, -0.1561645f, + -0.4640818f, 0.6375850f, 0.7733670f, -0.2388286f, 1.0447853f, -0.1503223f, 0.3823584f, -13.8176088f}, + {0.2575197f, -2.2127593f, -0.0389457f, -0.0215759f, 0.1659477f, -0.0097748f, -0.1935415f, -0.9091369f, + -0.1453371f, 0.0442428f, -0.1206519f, 0.1435609f, -0.0186047f, -5.0154042f, 0.0538177f, 0.0403250f, + 0.0240955f, 0.0331080f, 0.0517951f, 0.7422639f, 0.0069818f, 0.0248351f, -0.2205741f, -0.0082387f, + 0.2043269f, 0.0459435f, 0.0876343f, 0.0140607f, 0.1056308f, 0.0062555f, 0.0184278f, -0.5539715f}, + {-0.0398742f, 0.1075264f, 0.1725024f, -0.0755192f, -0.0360048f, 0.1325573f, 0.0903103f, -0.0882263f, + 0.1207692f, 0.0032722f, 0.0048489f, -0.1257241f, 0.1450990f, -0.0713558f, 0.1116815f, 0.1107689f, + -0.1447252f, 0.1581838f, -0.0160124f, -0.0425587f, 0.1411217f, 0.0865060f, -0.0643460f, -0.0431262f, + -0.1452804f, -0.0195101f, 0.1234572f, 0.0520887f, 0.1117576f, -0.0751791f, 0.1511539f, 0.1224861f}, + {0.7728126f, 2.3075340f, -0.0385258f, -3.1270287f, 0.9414487f, 3.5251477f, -0.8043440f, 0.7212446f, + -7.6850162f, -0.1609414f, -3.7687578f, -1.0751100f, -0.2052089f, 5.0728245f, 2.2835267f, 0.5930225f, + 0.1303335f, -0.1428799f, -0.3715075f, 0.5136011f, -0.4755619f, -0.2192461f, -3.8696294f, -0.0062392f, + -1.3774812f, -0.0034140f, -1.5944362f, 0.9773729f, 3.2859125f, -0.1616932f, -1.2785367f, -13.5732412f}, + {0.5535743f, 0.1461481f, -0.2218016f, -0.2971808f, -0.2169309f, 0.1564545f, -0.0390397f, 1.1558976f, + -0.0119933f, -0.0774637f, 1.1907971f, -0.5127968f, -0.0066028f, -1.6794037f, -0.3650940f, 0.2555613f, + -0.9488379f, 0.0449603f, -0.1620417f, 0.1583214f, 0.0000908f, 0.0152763f, -1.0660053f, -0.0139402f, + -1.7440189f, 0.2515209f, 0.3333162f, 0.1904725f, 0.1116094f, -0.2287960f, -0.0007165f, -1.7047704f}, + {-5.9897852f, -0.1316296f, -0.0218074f, -0.4602887f, 0.3288545f, -0.0882939f, -0.5929499f, 0.4294790f, + -0.0383545f, 0.0556869f, 0.1975944f, 0.1341491f, 0.0629570f, -2.2742157f, 0.0175826f, -0.1439869f, + -24.8701649f, -0.1582915f, -0.2460304f, -3.9643264f, 0.0863483f, 0.0180861f, -0.2210452f, -0.0868723f, + -0.4175525f, -0.8231756f, 0.0247534f, -0.1473545f, -0.0021330f, -0.0410253f, -1.1944869f, -1.1523768f}, + {0.1031547f, -3.3402514f, -4.3636522f, -0.1534714f, -0.0622189f, 0.0374694f, -0.0870097f, -4.1865788f, + -0.0555377f, 0.0252329f, 0.1339467f, 0.0461691f, -0.0503090f, 0.0289890f, -0.0095674f, -0.3289992f, + -0.0279080f, 0.0274977f, -0.0903500f, 0.5610157f, -0.0478177f, 0.4346960f, 0.4822784f, -0.1058945f, + -0.2026870f, -0.0560638f, 0.0910069f, -0.0818529f, 0.0819198f, -0.0292193f, 0.3040628f, -0.1275230f}, + {-5.8789845f, -17.1114635f, -4.6755161f, 0.1016624f, -0.8685016f, -0.3898779f, -2.3363957f, 0.1413794f, + -2.4254086f, -0.2171030f, -0.0901150f, 0.7058705f, 0.4166250f, -0.0231085f, -0.1789686f, -9.4244318f, + -0.6418229f, -0.0857969f, 0.1683681f, -0.0310597f, -0.0247807f, -5.3748040f, -7.4730940f, 0.1019564f, + -1.2126822f, -0.3726285f, -1.0287101f, 0.1803891f, -0.2227769f, -0.0791530f, -0.0159770f, -1.4883354f}, + {-17.9394970f, -0.5228514f, -11.3547935f, -0.0672671f, -2.0371394f, -0.9076943f, 2.4331825f, -6.9409127f, + 0.8286008f, 0.0208618f, -0.8009814f, 1.2268484f, 0.1943726f, -1.7297083f, -0.7668949f, -6.5505466f, + -0.6495168f, -0.0404727f, -0.1260914f, -3.5029383f, -0.0852898f, -2.9679556f, 1.6404767f, -0.0251449f, + 1.1460075f, -0.7877688f, -0.0586593f, -0.4741839f, -1.7420560f, 0.0295600f, -2.3574052f, 0.0974777f}, + {0.4443443f, 0.6384261f, 1.3317494f, -1.0085982f, 0.9508762f, 1.3168396f, -0.1862490f, -0.1801148f, + 1.1106120f, -0.0654911f, 0.1186706f, -0.7198273f, 0.5449172f, -0.5886080f, 0.7504217f, 1.8046317f, + -0.1294390f, -0.1939137f, -0.2383934f, 0.4131435f, 0.6910310f, 1.2821866f, -0.1088722f, -0.5660405f, + -0.1188610f, 0.0364403f, 0.3597929f, -0.6409024f, 1.2114668f, -0.0212278f, 0.8423592f, 0.4848156f}, + {-0.8772649f, -13.5265112f, -4.5540547f, -0.2856667f, 0.7604876f, -0.6829260f, -0.8320626f, 0.6541347f, + 0.4020181f, 0.0009324f, -10.9660740f, -0.3540186f, -0.2316812f, 0.3576394f, 0.0998953f, -1.5738430f, + 1.2089975f, 0.0706465f, -0.2538019f, 0.7016497f, -0.0282650f, -3.1291001f, -0.4375663f, -0.3979468f, + -0.1588882f, 0.3978875f, 0.2038192f, -0.4281644f, -0.5787544f, -0.0922198f, 0.9595569f, 0.0212818f}, + {0.3392667f, 0.1170919f, -0.0705636f, -0.1025443f, -0.1192213f, -0.0495686f, 0.0284667f, -0.1226804f, + 0.0050191f, -0.0516545f, -1.0892097f, 0.0033689f, 0.0471462f, 1.4266804f, 0.0288870f, -0.0110408f, + -1.1283765f, -0.1299917f, -0.4318301f, -0.9854419f, -0.0190479f, -0.0269406f, 0.3697925f, -0.0757695f, + -0.3632923f, -0.1714077f, 0.0669245f, 0.0557428f, -0.1713906f, -0.4307863f, -0.1749060f, -2.1246362f}, + {0.8383662f, -3.8122442f, 0.1568939f, -2.2105119f, -0.7086993f, -0.4664145f, -0.3578597f, 0.5554636f, + 0.6965880f, -0.1506968f, 0.2646832f, 0.2874083f, 0.1901203f, -2.4997077f, -0.3519035f, -0.0518054f, + 1.0862818f, -0.2502540f, -0.3133347f, -0.7411230f, 0.1268138f, 0.1069811f, -0.8109779f, 0.0264679f, + 0.1604289f, -0.7534032f, -0.1419461f, 0.0688303f, -0.1570919f, -0.3055144f, -0.7415189f, 2.5547018f}, + }; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float bias_4[1] = {1.4616280f}; + ALPAKA_STATIC_ACC_MEM_GLOBAL const float wgtT_4[32][1] = { + {0.0609813f}, {0.0685224f}, {0.1655236f}, {-0.0599842f}, {0.0669006f}, {-0.1817371f}, {-0.0539167f}, + {-0.0737955f}, {0.0654664f}, {0.0302955f}, {-0.0586768f}, {0.0717433f}, {0.1472274f}, {-0.0610073f}, + {-0.0601061f}, {0.2086218f}, {-0.0545418f}, {-0.0388369f}, {-0.0613536f}, {-0.1141072f}, {-0.2289097f}, + {-0.3354485f}, {0.0831025f}, {0.1333673f}, {0.0490410f}, {0.0484894f}, {0.0436755f}, {-0.1479877f}, + {0.1540713f}, {0.0021261f}, {-0.0845848f}, {-0.0564973f}, + }; + + } // namespace t5dnn +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h new file mode 100644 index 0000000000000..08feb0dfe3384 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/PixelQuintuplet.h @@ -0,0 +1,818 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_PixelQuintuplet_h +#define RecoTracker_LSTCore_src_alpaka_PixelQuintuplet_h + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" + +#include "Hit.h" +#include "PixelTriplet.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelQuintupletToMemory(ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + QuintupletsConst quintuplets, + PixelQuintuplets pixelQuintuplets, + unsigned int pixelIndex, + unsigned int t5Index, + unsigned int pixelQuintupletIndex, + float rzChiSquared, + float rPhiChiSquared, + float rPhiChiSquaredInwards, + float score, + float eta, + float phi, + float pixelRadius, + float quintupletRadius, + float centerX, + float centerY) { + pixelQuintuplets.pixelSegmentIndices()[pixelQuintupletIndex] = pixelIndex; + pixelQuintuplets.quintupletIndices()[pixelQuintupletIndex] = t5Index; + pixelQuintuplets.isDup()[pixelQuintupletIndex] = false; + pixelQuintuplets.score()[pixelQuintupletIndex] = __F2H(score); + pixelQuintuplets.eta()[pixelQuintupletIndex] = __F2H(eta); + pixelQuintuplets.phi()[pixelQuintupletIndex] = __F2H(phi); + + pixelQuintuplets.pixelRadius()[pixelQuintupletIndex] = __F2H(pixelRadius); + pixelQuintuplets.quintupletRadius()[pixelQuintupletIndex] = __F2H(quintupletRadius); + pixelQuintuplets.centerX()[pixelQuintupletIndex] = __F2H(centerX); + pixelQuintuplets.centerY()[pixelQuintupletIndex] = __F2H(centerY); + + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][0] = 0; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][1] = 0; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][2] = quintuplets.logicalLayers()[t5Index][0]; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][3] = quintuplets.logicalLayers()[t5Index][1]; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][4] = quintuplets.logicalLayers()[t5Index][2]; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][5] = quintuplets.logicalLayers()[t5Index][3]; + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex][6] = quintuplets.logicalLayers()[t5Index][4]; + + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][0] = segments.innerLowerModuleIndices()[pixelIndex]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][1] = segments.outerLowerModuleIndices()[pixelIndex]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][2] = quintuplets.lowerModuleIndices()[t5Index][0]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][3] = quintuplets.lowerModuleIndices()[t5Index][1]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][4] = quintuplets.lowerModuleIndices()[t5Index][2]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][5] = quintuplets.lowerModuleIndices()[t5Index][3]; + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex][6] = quintuplets.lowerModuleIndices()[t5Index][4]; + + unsigned int pixelInnerMD = segments.mdIndices()[pixelIndex][0]; + unsigned int pixelOuterMD = segments.mdIndices()[pixelIndex][1]; + + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][0] = mds.anchorHitIndices()[pixelInnerMD]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][1] = mds.outerHitIndices()[pixelInnerMD]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][2] = mds.anchorHitIndices()[pixelOuterMD]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][3] = mds.outerHitIndices()[pixelOuterMD]; + + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][4] = quintuplets.hitIndices()[t5Index][0]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][5] = quintuplets.hitIndices()[t5Index][1]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][6] = quintuplets.hitIndices()[t5Index][2]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][7] = quintuplets.hitIndices()[t5Index][3]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][8] = quintuplets.hitIndices()[t5Index][4]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][9] = quintuplets.hitIndices()[t5Index][5]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][10] = quintuplets.hitIndices()[t5Index][6]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][11] = quintuplets.hitIndices()[t5Index][7]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][12] = quintuplets.hitIndices()[t5Index][8]; + pixelQuintuplets.hitIndices()[pixelQuintupletIndex][13] = quintuplets.hitIndices()[t5Index][9]; + + pixelQuintuplets.rzChiSquared()[pixelQuintupletIndex] = rzChiSquared; + pixelQuintuplets.rPhiChiSquared()[pixelQuintupletIndex] = rPhiChiSquared; + pixelQuintuplets.rPhiChiSquaredInwards()[pixelQuintupletIndex] = rPhiChiSquaredInwards; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RZChiSquaredCuts(ModulesConst modules, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + float rzChiSquared) { + const int layer1 = + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); + const int layer2 = + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); + const int layer3 = + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); + const int layer4 = + modules.layers()[lowerModuleIndex4] + 6 * (modules.subdets()[lowerModuleIndex4] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex4] == Endcap and modules.moduleType()[lowerModuleIndex4] == TwoS); + const int layer5 = + modules.layers()[lowerModuleIndex5] + 6 * (modules.subdets()[lowerModuleIndex5] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex5] == Endcap and modules.moduleType()[lowerModuleIndex5] == TwoS); + + if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + if (layer4 == 12 and layer5 == 13) { + return rzChiSquared < 451.141f; + } else if (layer4 == 4 and layer5 == 12) { + return rzChiSquared < 392.654f; + } else if (layer4 == 4 and layer5 == 5) { + return rzChiSquared < 225.322f; + } else if (layer4 == 7 and layer5 == 13) { + return rzChiSquared < 595.546f; + } else if (layer4 == 7 and layer5 == 8) { + return rzChiSquared < 196.111f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rzChiSquared < 297.446f; + } else if (layer4 == 8 and layer5 == 14) { + return rzChiSquared < 451.141f; + } else if (layer4 == 8 and layer5 == 9) { + return rzChiSquared < 518.339f; + } + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 10) { + return rzChiSquared < 341.75f; + } else if (layer4 == 9 and layer5 == 15) { + return rzChiSquared < 341.75f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + if (layer4 == 12 and layer5 == 13) { + return rzChiSquared < 392.655f; + } else if (layer4 == 5 and layer5 == 12) { + return rzChiSquared < 341.75f; + } else if (layer4 == 5 and layer5 == 6) { + return rzChiSquared < 112.537f; + } + } else if (layer1 == 2 and layer2 == 3 and layer4 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rzChiSquared < 595.545f; + } else if (layer4 == 8 and layer5 == 14) { + return rzChiSquared < 74.198f; + } + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + if (layer4 == 14 and layer5 == 15) { + return rzChiSquared < 518.339f; + } else if (layer4 == 9 and layer5 == 10) { + return rzChiSquared < 8.046f; + } else if (layer4 == 9 and layer5 == 15) { + return rzChiSquared < 451.141f; + } + } else if (layer1 == 3 and layer2 == 7 and layer3 == 8 and layer4 == 14 and layer5 == 15) { + return rzChiSquared < 56.207f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + if (layer4 == 10 and layer5 == 11) { + return rzChiSquared < 64.578f; + } else if (layer4 == 10 and layer5 == 16) { + return rzChiSquared < 85.250f; + } else if (layer4 == 15 and layer5 == 16) { + return rzChiSquared < 85.250f; + } + } + return true; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredCuts(ModulesConst modules, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + float rPhiChiSquared) { + const int layer1 = + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); + const int layer2 = + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); + const int layer3 = + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); + const int layer4 = + modules.layers()[lowerModuleIndex4] + 6 * (modules.subdets()[lowerModuleIndex4] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex4] == Endcap and modules.moduleType()[lowerModuleIndex4] == TwoS); + const int layer5 = + modules.layers()[lowerModuleIndex5] + 6 * (modules.subdets()[lowerModuleIndex5] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex5] == Endcap and modules.moduleType()[lowerModuleIndex5] == TwoS); + + if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + if (layer4 == 12 and layer5 == 13) { + return rPhiChiSquared < 48.921f; + } else if (layer4 == 4 and layer5 == 12) { + return rPhiChiSquared < 97.948f; + } else if (layer4 == 4 and layer5 == 5) { + return rPhiChiSquared < 129.3f; + } else if (layer4 == 7 and layer5 == 13) { + return rPhiChiSquared < 56.21f; + } else if (layer4 == 7 and layer5 == 8) { + return rPhiChiSquared < 74.198f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rPhiChiSquared < 21.265f; + } else if (layer4 == 8 and layer5 == 14) { + return rPhiChiSquared < 37.058f; + } else if (layer4 == 8 and layer5 == 9) { + return rPhiChiSquared < 42.578f; + } + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 10) { + return rPhiChiSquared < 32.253f; + } else if (layer4 == 9 and layer5 == 15) { + return rPhiChiSquared < 37.058f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + if (layer4 == 12 and layer5 == 13) { + return rPhiChiSquared < 97.947f; + } else if (layer4 == 5 and layer5 == 12) { + return rPhiChiSquared < 129.3f; + } else if (layer4 == 5 and layer5 == 6) { + return rPhiChiSquared < 170.68f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rPhiChiSquared < 48.92f; + } else if (layer4 == 8 and layer5 == 14) { + return rPhiChiSquared < 74.2f; + } + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + if (layer4 == 14 and layer5 == 15) { + return rPhiChiSquared < 42.58f; + } else if (layer4 == 9 and layer5 == 10) { + return rPhiChiSquared < 37.06f; + } else if (layer4 == 9 and layer5 == 15) { + return rPhiChiSquared < 48.92f; + } + } else if (layer1 == 3 and layer2 == 7 and layer3 == 8 and layer4 == 14 and layer5 == 15) { + return rPhiChiSquared < 85.25f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + if (layer4 == 10 and layer5 == 11) { + return rPhiChiSquared < 42.58f; + } else if (layer4 == 10 and layer5 == 16) { + return rPhiChiSquared < 37.06f; + } else if (layer4 == 15 and layer5 == 16) { + return rPhiChiSquared < 37.06f; + } + } + return true; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeChiSquaredpT5(TAcc const& acc, + unsigned int nPoints, + float* xs, + float* ys, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + float g, + float f, + float radius) { + /* + Given values of (g, f, radius) and a set of points (and its uncertainties) compute chi squared + */ + float c = g * g + f * f - radius * radius; + float chiSquared = 0.f; + float absArctanSlope, angleM, xPrime, yPrime, sigma2; + for (size_t i = 0; i < nPoints; i++) { + absArctanSlope = ((slopes[i] != kVerticalModuleSlope) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) + : kPi / 2.f); + if (xs[i] > 0 and ys[i] > 0) { + angleM = kPi / 2.f - absArctanSlope; + } else if (xs[i] < 0 and ys[i] > 0) { + angleM = absArctanSlope + kPi / 2.f; + } else if (xs[i] < 0 and ys[i] < 0) { + angleM = -(absArctanSlope + kPi / 2.f); + } else if (xs[i] > 0 and ys[i] < 0) { + angleM = -(kPi / 2.f - absArctanSlope); + } else { + angleM = 0; + } + if (not isFlat[i]) { + xPrime = xs[i] * alpaka::math::cos(acc, angleM) + ys[i] * alpaka::math::sin(acc, angleM); + yPrime = ys[i] * alpaka::math::cos(acc, angleM) - xs[i] * alpaka::math::sin(acc, angleM); + } else { + xPrime = xs[i]; + yPrime = ys[i]; + } + sigma2 = 4 * ((xPrime * delta1[i]) * (xPrime * delta1[i]) + (yPrime * delta2[i]) * (yPrime * delta2[i])); + chiSquared += (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) * + (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) / (sigma2); + } + return chiSquared; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeSigmasForRegression_pT5(TAcc const& acc, + ModulesConst modules, + const uint16_t* lowerModuleIndices, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + unsigned int nPoints = 5, + bool anchorHits = true) { + /* + bool anchorHits required to deal with a weird edge case wherein + the hits ultimately used in the regression are anchor hits, but the + lower modules need not all be Pixel Modules (in case of PS). Similarly, + when we compute the chi squared for the non-anchor hits, the "partner module" + need not always be a PS strip module, but all non-anchor hits sit on strip + modules. + */ + ModuleType moduleType; + short moduleSubdet, moduleSide; + float inv1 = kWidthPS / kWidth2S; + float inv2 = kPixelPSZpitch / kWidth2S; + float inv3 = kStripPSZpitch / kWidth2S; + for (size_t i = 0; i < nPoints; i++) { + moduleType = modules.moduleType()[lowerModuleIndices[i]]; + moduleSubdet = modules.subdets()[lowerModuleIndices[i]]; + moduleSide = modules.sides()[lowerModuleIndices[i]]; + const float& drdz = modules.drdzs()[lowerModuleIndices[i]]; + slopes[i] = modules.dxdys()[lowerModuleIndices[i]]; + //category 1 - barrel PS flat + if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { + delta1[i] = inv1; + delta2[i] = inv1; + slopes[i] = -999.f; + isFlat[i] = true; + } + //category 2 - barrel 2S + else if (moduleSubdet == Barrel and moduleType == TwoS) { + delta1[i] = 1.f; + delta2[i] = 1.f; + slopes[i] = -999.f; + isFlat[i] = true; + } + //category 3 - barrel PS tilted + else if (moduleSubdet == Barrel and moduleType == PS and moduleSide != Center) { + delta1[i] = inv1; + isFlat[i] = false; + + if (anchorHits) { + delta2[i] = (inv2 * drdz / alpaka::math::sqrt(acc, 1 + drdz * drdz)); + } else { + delta2[i] = (inv3 * drdz / alpaka::math::sqrt(acc, 1 + drdz * drdz)); + } + } + //category 4 - endcap PS + else if (moduleSubdet == Endcap and moduleType == PS) { + delta1[i] = inv1; + isFlat[i] = false; + /* + despite the type of the module layer of the lower module index, + all anchor hits are on the pixel side and all non-anchor hits are + on the strip side! + */ + if (anchorHits) { + delta2[i] = inv2; + } else { + delta2[i] = inv3; + } + } + //category 5 - endcap 2S + else if (moduleSubdet == Endcap and moduleType == TwoS) { + delta1[i] = 1.f; + delta2[i] = 500.f * inv1; + isFlat[i] = false; + } +#ifdef WARNINGS + else { + printf("ERROR!!!!! I SHOULDN'T BE HERE!!!! subdet = %d, type = %d, side = %d\n", + moduleSubdet, + moduleType, + moduleSide); + } +#endif + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RPhiChiSquared(TAcc const& acc, + ModulesConst modules, + uint16_t* lowerModuleIndices, + float g, + float f, + float radius, + float* xs, + float* ys) { + /* + Compute circle parameters from 3 pixel hits, and then use them to compute the chi squared for the outer hits + */ + + float delta1[5], delta2[5], slopes[5]; + bool isFlat[5]; + float chiSquared = 0; + + computeSigmasForRegression_pT5(acc, modules, lowerModuleIndices, delta1, delta2, slopes, isFlat); + chiSquared = computeChiSquaredpT5(acc, 5, xs, ys, delta1, delta2, slopes, isFlat, g, f, radius); + + return chiSquared; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RPhiChiSquaredInwards( + float g, float f, float r, float* xPix, float* yPix) { + /* + Using the computed regression center and radius, compute the chi squared for the pixels + */ + + float chiSquared = 0; + for (size_t i = 0; i < 2; i++) { + float residual = (xPix[i] - g) * (xPix[i] - g) + (yPix[i] - f) * (yPix[i] - f) - r * r; + chiSquared += residual * residual; + } + chiSquared *= 0.5f; + return chiSquared; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT5RPhiChiSquaredInwardsCuts(ModulesConst modules, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + float rPhiChiSquared) { + const int layer1 = + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); + const int layer2 = + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); + const int layer3 = + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); + const int layer4 = + modules.layers()[lowerModuleIndex4] + 6 * (modules.subdets()[lowerModuleIndex4] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex4] == Endcap and modules.moduleType()[lowerModuleIndex4] == TwoS); + const int layer5 = + modules.layers()[lowerModuleIndex5] + 6 * (modules.subdets()[lowerModuleIndex5] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex5] == Endcap and modules.moduleType()[lowerModuleIndex5] == TwoS); + + if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + if (layer4 == 12 and layer5 == 13) { + return rPhiChiSquared < 451.141f; + } else if (layer4 == 4 and layer5 == 12) { + return rPhiChiSquared < 786.173f; + } else if (layer4 == 4 and layer5 == 5) { + return rPhiChiSquared < 595.545f; + } else if (layer4 == 7 and layer5 == 13) { + return rPhiChiSquared < 581.339f; + } else if (layer4 == 7 and layer5 == 8) { + return rPhiChiSquared < 112.537f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rPhiChiSquared < 225.322f; + } else if (layer4 == 8 and layer5 == 14) { + return rPhiChiSquared < 1192.402f; + } else if (layer4 == 8 and layer5 == 9) { + return rPhiChiSquared < 786.173f; + } + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 10) { + return rPhiChiSquared < 1037.817f; + } else if (layer4 == 9 and layer5 == 15) { + return rPhiChiSquared < 1808.536f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + if (layer4 == 12 and layer5 == 13) { + return rPhiChiSquared < 684.253f; + } else if (layer4 == 5 and layer5 == 12) { + return rPhiChiSquared < 684.253f; + } else if (layer4 == 5 and layer5 == 6) { + return rPhiChiSquared < 684.253f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + if (layer4 == 13 and layer5 == 14) { + return rPhiChiSquared < 451.141f; + } else if (layer4 == 8 and layer5 == 14) { + return rPhiChiSquared < 518.34f; + } + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + if (layer4 == 14 and layer5 == 15) { + return rPhiChiSquared < 2077.92f; + } else if (layer4 == 9 and layer5 == 10) { + return rPhiChiSquared < 74.20f; + } else if (layer4 == 9 and layer5 == 15) { + return rPhiChiSquared < 1808.536f; + } + } else if (layer1 == 3 and layer2 == 7 and layer3 == 8 and layer4 == 14 and layer5 == 15) { + return rPhiChiSquared < 786.173f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + if (layer4 == 10 and layer5 == 11) { + return rPhiChiSquared < 1574.076f; + } else if (layer4 == 10 and layer5 == 16) { + return rPhiChiSquared < 5492.11f; + } else if (layer4 == 15 and layer5 == 16) { + return rPhiChiSquared < 2743.037f; + } + } + return true; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT5RZChiSquared(TAcc const& acc, + ModulesConst modules, + uint16_t* lowerModuleIndices, + float* rtPix, + float* zPix, + float* rts, + float* zs) { + //use the two anchor hits of the pixel segment to compute the slope + //then compute the pseudo chi squared of the five outer hits + + float slope = (zPix[1] - zPix[0]) / (rtPix[1] - rtPix[0]); + float residual = 0; + float error2 = 0; + //hardcoded array indices!!! + float RMSE = 0; + for (size_t i = 0; i < Params_T5::kLayers; i++) { + uint16_t& lowerModuleIndex = lowerModuleIndices[i]; + const int moduleType = modules.moduleType()[lowerModuleIndex]; + const int moduleSide = modules.sides()[lowerModuleIndex]; + const int moduleSubdet = modules.subdets()[lowerModuleIndex]; + + residual = (moduleSubdet == Barrel) ? (zs[i] - zPix[0]) - slope * (rts[i] - rtPix[0]) + : (rts[i] - rtPix[0]) - (zs[i] - zPix[0]) / slope; + const float& drdz = modules.drdzs()[lowerModuleIndex]; + //PS Modules + if (moduleType == 0) { + error2 = kPixelPSZpitch * kPixelPSZpitch; + } else //2S modules + { + error2 = kStrip2SZpitch * kStrip2SZpitch; + } + + //special dispensation to tilted PS modules! + if (moduleType == 0 and moduleSubdet == Barrel and moduleSide != Center) { + error2 /= (1.f + drdz * drdz); + } + RMSE += (residual * residual) / error2; + } + + RMSE = alpaka::math::sqrt(acc, 0.2f * RMSE); // Divided by the degree of freedom 5. + return RMSE; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelQuintupletDefaultAlgo(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + TripletsConst triplets, + QuintupletsConst quintuplets, + unsigned int pixelSegmentIndex, + unsigned int quintupletIndex, + float& rzChiSquared, + float& rPhiChiSquared, + float& rPhiChiSquaredInwards, + float& pixelRadius, + float& quintupletRadius, + float& centerX, + float& centerY, + unsigned int pixelSegmentArrayIndex) { + unsigned int t5InnerT3Index = quintuplets.tripletIndices()[quintupletIndex][0]; + unsigned int t5OuterT3Index = quintuplets.tripletIndices()[quintupletIndex][1]; + + float pixelRadiusTemp, tripletRadius, rPhiChiSquaredTemp, rzChiSquaredTemp, rPhiChiSquaredInwardsTemp, centerXTemp, + centerYTemp; + + if (not runPixelTripletDefaultAlgo(acc, + modules, + ranges, + mds, + segments, + segmentsPixel, + triplets, + pixelSegmentIndex, + t5InnerT3Index, + pixelRadiusTemp, + tripletRadius, + centerXTemp, + centerYTemp, + rzChiSquaredTemp, + rPhiChiSquaredTemp, + rPhiChiSquaredInwardsTemp, + false)) + return false; + + unsigned int firstSegmentIndex = triplets.segmentIndices()[t5InnerT3Index][0]; + unsigned int secondSegmentIndex = triplets.segmentIndices()[t5InnerT3Index][1]; + unsigned int thirdSegmentIndex = triplets.segmentIndices()[t5OuterT3Index][0]; + unsigned int fourthSegmentIndex = triplets.segmentIndices()[t5OuterT3Index][1]; + + unsigned int pixelInnerMDIndex = segments.mdIndices()[pixelSegmentIndex][0]; + unsigned int pixelOuterMDIndex = segments.mdIndices()[pixelSegmentIndex][1]; + unsigned int firstMDIndex = segments.mdIndices()[firstSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[secondSegmentIndex][0]; + unsigned int thirdMDIndex = segments.mdIndices()[secondSegmentIndex][1]; + unsigned int fourthMDIndex = segments.mdIndices()[thirdSegmentIndex][1]; + unsigned int fifthMDIndex = segments.mdIndices()[fourthSegmentIndex][1]; + + uint16_t lowerModuleIndex1 = quintuplets.lowerModuleIndices()[quintupletIndex][0]; + uint16_t lowerModuleIndex2 = quintuplets.lowerModuleIndices()[quintupletIndex][1]; + uint16_t lowerModuleIndex3 = quintuplets.lowerModuleIndices()[quintupletIndex][2]; + uint16_t lowerModuleIndex4 = quintuplets.lowerModuleIndices()[quintupletIndex][3]; + uint16_t lowerModuleIndex5 = quintuplets.lowerModuleIndices()[quintupletIndex][4]; + + uint16_t lowerModuleIndices[Params_T5::kLayers] = { + lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, lowerModuleIndex5}; + + float zPix[Params_pLS::kLayers] = {mds.anchorZ()[pixelInnerMDIndex], mds.anchorZ()[pixelOuterMDIndex]}; + float rtPix[Params_pLS::kLayers] = {mds.anchorRt()[pixelInnerMDIndex], mds.anchorRt()[pixelOuterMDIndex]}; + float zs[Params_T5::kLayers] = {mds.anchorZ()[firstMDIndex], + mds.anchorZ()[secondMDIndex], + mds.anchorZ()[thirdMDIndex], + mds.anchorZ()[fourthMDIndex], + mds.anchorZ()[fifthMDIndex]}; + float rts[Params_T5::kLayers] = {mds.anchorRt()[firstMDIndex], + mds.anchorRt()[secondMDIndex], + mds.anchorRt()[thirdMDIndex], + mds.anchorRt()[fourthMDIndex], + mds.anchorRt()[fifthMDIndex]}; + + rzChiSquared = computePT5RZChiSquared(acc, modules, lowerModuleIndices, rtPix, zPix, rts, zs); + + if (/*pixelRadius*/ 0 < 5.0f * kR1GeVf) { // FIXME: pixelRadius is not defined yet + if (not passPT5RZChiSquaredCuts(modules, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex3, + lowerModuleIndex4, + lowerModuleIndex5, + rzChiSquared)) + return false; + } + + //outer T5 + float xs[Params_T5::kLayers] = {mds.anchorX()[firstMDIndex], + mds.anchorX()[secondMDIndex], + mds.anchorX()[thirdMDIndex], + mds.anchorX()[fourthMDIndex], + mds.anchorX()[fifthMDIndex]}; + float ys[Params_T5::kLayers] = {mds.anchorY()[firstMDIndex], + mds.anchorY()[secondMDIndex], + mds.anchorY()[thirdMDIndex], + mds.anchorY()[fourthMDIndex], + mds.anchorY()[fifthMDIndex]}; + + //get the appropriate radii and centers + centerX = segmentsPixel.circleCenterX()[pixelSegmentArrayIndex]; + centerY = segmentsPixel.circleCenterY()[pixelSegmentArrayIndex]; + pixelRadius = segmentsPixel.circleRadius()[pixelSegmentArrayIndex]; + + float T5CenterX = quintuplets.regressionG()[quintupletIndex]; + float T5CenterY = quintuplets.regressionF()[quintupletIndex]; + quintupletRadius = quintuplets.regressionRadius()[quintupletIndex]; + + rPhiChiSquared = computePT5RPhiChiSquared(acc, modules, lowerModuleIndices, centerX, centerY, pixelRadius, xs, ys); + + if (pixelRadius < 5.0f * kR1GeVf) { + if (not passPT5RPhiChiSquaredCuts(modules, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex3, + lowerModuleIndex4, + lowerModuleIndex5, + rPhiChiSquared)) + return false; + } + + float xPix[] = {mds.anchorX()[pixelInnerMDIndex], mds.anchorX()[pixelOuterMDIndex]}; + float yPix[] = {mds.anchorY()[pixelInnerMDIndex], mds.anchorY()[pixelOuterMDIndex]}; + rPhiChiSquaredInwards = computePT5RPhiChiSquaredInwards(T5CenterX, T5CenterY, quintupletRadius, xPix, yPix); + + if (quintuplets.regressionRadius()[quintupletIndex] < 5.0f * kR1GeVf) { + if (not passPT5RPhiChiSquaredInwardsCuts(modules, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex3, + lowerModuleIndex4, + lowerModuleIndex5, + rPhiChiSquaredInwards)) + return false; + } + //trusting the T5 regression center to also be a good estimate.. + centerX = (centerX + T5CenterX) / 2; + centerY = (centerY + T5CenterY) / 2; + + return true; + } + + struct CreatePixelQuintupletsFromMap { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + ModulesPixelConst modulesPixel, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixel segmentsPixel, + Triplets triplets, + Quintuplets quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + PixelQuintuplets pixelQuintuplets, + unsigned int* connectedPixelSize, + unsigned int* connectedPixelIndex, + unsigned int nPixelSegments, + ObjectRangesConst ranges) const { + auto const globalBlockIdx = alpaka::getIdx(acc); + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridBlockExtent = alpaka::getWorkDiv(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (unsigned int i_pLS = globalThreadIdx[1]; i_pLS < nPixelSegments; i_pLS += gridThreadExtent[1]) { + auto iLSModule_max = connectedPixelIndex[i_pLS] + connectedPixelSize[i_pLS]; + for (unsigned int iLSModule = connectedPixelIndex[i_pLS] + globalBlockIdx[0]; iLSModule < iLSModule_max; + iLSModule += gridBlockExtent[0]) { + //these are actual module indices + uint16_t quintupletLowerModuleIndex = modulesPixel.connectedPixels()[iLSModule]; + if (quintupletLowerModuleIndex >= modules.nLowerModules()) + continue; + if (modules.moduleType()[quintupletLowerModuleIndex] == TwoS) + continue; + uint16_t pixelModuleIndex = modules.nLowerModules(); + if (segmentsPixel.isDup()[i_pLS]) + continue; + unsigned int nOuterQuintuplets = quintupletsOccupancy.nQuintuplets()[quintupletLowerModuleIndex]; + + if (nOuterQuintuplets == 0) + continue; + + unsigned int pixelSegmentIndex = ranges.segmentModuleIndices()[pixelModuleIndex] + i_pLS; + + //fetch the quintuplet + for (unsigned int outerQuintupletArrayIndex = globalThreadIdx[2]; + outerQuintupletArrayIndex < nOuterQuintuplets; + outerQuintupletArrayIndex += gridThreadExtent[2]) { + unsigned int quintupletIndex = + ranges.quintupletModuleIndices()[quintupletLowerModuleIndex] + outerQuintupletArrayIndex; + + if (quintuplets.isDup()[quintupletIndex]) + continue; + + float rzChiSquared, rPhiChiSquared, rPhiChiSquaredInwards, pixelRadius, quintupletRadius, centerX, centerY; + + bool success = runPixelQuintupletDefaultAlgo(acc, + modules, + ranges, + mds, + segments, + segmentsPixel, + triplets, + quintuplets, + pixelSegmentIndex, + quintupletIndex, + rzChiSquared, + rPhiChiSquared, + rPhiChiSquaredInwards, + pixelRadius, + quintupletRadius, + centerX, + centerY, + static_cast(i_pLS)); + if (success) { + unsigned int totOccupancyPixelQuintuplets = alpaka::atomicAdd( + acc, &pixelQuintuplets.totOccupancyPixelQuintuplets(), 1u, alpaka::hierarchy::Threads{}); + if (totOccupancyPixelQuintuplets >= n_max_pixel_quintuplets) { +#ifdef WARNINGS + printf("Pixel Quintuplet excess alert!\n"); +#endif + } else { + unsigned int pixelQuintupletIndex = + alpaka::atomicAdd(acc, &pixelQuintuplets.nPixelQuintuplets(), 1u, alpaka::hierarchy::Threads{}); + float eta = __H2F(quintuplets.eta()[quintupletIndex]); + float phi = __H2F(quintuplets.phi()[quintupletIndex]); + + addPixelQuintupletToMemory(modules, + mds, + segments, + quintuplets, + pixelQuintuplets, + pixelSegmentIndex, + quintupletIndex, + pixelQuintupletIndex, + rzChiSquared, + rPhiChiSquared, + rPhiChiSquaredInwards, + rPhiChiSquared, + eta, + phi, + pixelRadius, + quintupletRadius, + centerX, + centerY); + + triplets.partOfPT5()[quintuplets.tripletIndices()[quintupletIndex][0]] = true; + triplets.partOfPT5()[quintuplets.tripletIndices()[quintupletIndex][1]] = true; + segmentsPixel.partOfPT5()[i_pLS] = true; + quintuplets.partOfPT5()[quintupletIndex] = true; + } // tot occupancy + } // end success + } // end T5 + } // end iLS + } // end i_pLS + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h new file mode 100644 index 0000000000000..a8be90fff5227 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/PixelTriplet.h @@ -0,0 +1,1587 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_PixelTriplet_h +#define RecoTracker_LSTCore_src_alpaka_PixelTriplet_h + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPBB(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + uint16_t pixelModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex); + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPEE(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + uint16_t pixelModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex); + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelTripletToMemory(MiniDoubletsConst mds, + SegmentsConst segments, + TripletsConst triplets, + PixelTriplets pixelTriplets, + unsigned int pixelSegmentIndex, + unsigned int tripletIndex, + float pixelRadius, + float tripletRadius, + float centerX, + float centerY, + float rPhiChiSquared, + float rPhiChiSquaredInwards, + float rzChiSquared, + unsigned int pixelTripletIndex, + float pt, + float eta, + float phi, + float eta_pix, + float phi_pix, + float score) { + pixelTriplets.pixelSegmentIndices()[pixelTripletIndex] = pixelSegmentIndex; + pixelTriplets.tripletIndices()[pixelTripletIndex] = tripletIndex; + pixelTriplets.pixelRadius()[pixelTripletIndex] = __F2H(pixelRadius); + pixelTriplets.tripletRadius()[pixelTripletIndex] = __F2H(tripletRadius); + pixelTriplets.pt()[pixelTripletIndex] = __F2H(pt); + pixelTriplets.eta()[pixelTripletIndex] = __F2H(eta); + pixelTriplets.phi()[pixelTripletIndex] = __F2H(phi); + pixelTriplets.eta_pix()[pixelTripletIndex] = __F2H(eta_pix); + pixelTriplets.phi_pix()[pixelTripletIndex] = __F2H(phi_pix); + pixelTriplets.isDup()[pixelTripletIndex] = false; + pixelTriplets.score()[pixelTripletIndex] = __F2H(score); + + pixelTriplets.centerX()[pixelTripletIndex] = __F2H(centerX); + pixelTriplets.centerY()[pixelTripletIndex] = __F2H(centerY); + pixelTriplets.logicalLayers()[pixelTripletIndex][0] = 0; + pixelTriplets.logicalLayers()[pixelTripletIndex][1] = 0; + pixelTriplets.logicalLayers()[pixelTripletIndex][2] = triplets.logicalLayers()[tripletIndex][0]; + pixelTriplets.logicalLayers()[pixelTripletIndex][3] = triplets.logicalLayers()[tripletIndex][1]; + pixelTriplets.logicalLayers()[pixelTripletIndex][4] = triplets.logicalLayers()[tripletIndex][2]; + + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][0] = segments.innerLowerModuleIndices()[pixelSegmentIndex]; + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][1] = segments.outerLowerModuleIndices()[pixelSegmentIndex]; + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][2] = triplets.lowerModuleIndices()[tripletIndex][0]; + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][3] = triplets.lowerModuleIndices()[tripletIndex][1]; + pixelTriplets.lowerModuleIndices()[pixelTripletIndex][4] = triplets.lowerModuleIndices()[tripletIndex][2]; + + unsigned int pixelInnerMD = segments.mdIndices()[pixelSegmentIndex][0]; + unsigned int pixelOuterMD = segments.mdIndices()[pixelSegmentIndex][1]; + + pixelTriplets.hitIndices()[pixelTripletIndex][0] = mds.anchorHitIndices()[pixelInnerMD]; + pixelTriplets.hitIndices()[pixelTripletIndex][1] = mds.outerHitIndices()[pixelInnerMD]; + pixelTriplets.hitIndices()[pixelTripletIndex][2] = mds.anchorHitIndices()[pixelOuterMD]; + pixelTriplets.hitIndices()[pixelTripletIndex][3] = mds.outerHitIndices()[pixelOuterMD]; + + pixelTriplets.hitIndices()[pixelTripletIndex][4] = triplets.hitIndices()[tripletIndex][0]; + pixelTriplets.hitIndices()[pixelTripletIndex][5] = triplets.hitIndices()[tripletIndex][1]; + pixelTriplets.hitIndices()[pixelTripletIndex][6] = triplets.hitIndices()[tripletIndex][2]; + pixelTriplets.hitIndices()[pixelTripletIndex][7] = triplets.hitIndices()[tripletIndex][3]; + pixelTriplets.hitIndices()[pixelTripletIndex][8] = triplets.hitIndices()[tripletIndex][4]; + pixelTriplets.hitIndices()[pixelTripletIndex][9] = triplets.hitIndices()[tripletIndex][5]; + pixelTriplets.rPhiChiSquared()[pixelTripletIndex] = rPhiChiSquared; + pixelTriplets.rPhiChiSquaredInwards()[pixelTripletIndex] = rPhiChiSquaredInwards; + pixelTriplets.rzChiSquared()[pixelTripletIndex] = rzChiSquared; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTrackletDefaultAlgopT3(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + uint16_t pixelLowerModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex) { + short outerInnerLowerModuleSubdet = modules.subdets()[outerInnerLowerModuleIndex]; + short outerOuterLowerModuleSubdet = modules.subdets()[outerOuterLowerModuleIndex]; + + unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[innerSegmentIndex][1]; + + unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][0]; + unsigned int fourthMDIndex = segments.mdIndices()[outerSegmentIndex][1]; + + if (outerInnerLowerModuleSubdet == Barrel and + (outerOuterLowerModuleSubdet == Barrel or outerOuterLowerModuleSubdet == Endcap)) { + return runTripletDefaultAlgoPPBB(acc, + modules, + ranges, + mds, + segments, + segmentsPixel, + pixelLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } else if (outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { + return runTripletDefaultAlgoPPEE(acc, + modules, + ranges, + mds, + segments, + segmentsPixel, + pixelLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } + return false; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RZChiSquaredCuts(ModulesConst modules, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + float rzChiSquared) { + const int layer1 = + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); + const int layer2 = + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); + const int layer3 = + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); + + if (layer1 == 8 and layer2 == 9 and layer3 == 10) { + return rzChiSquared < 13.6067f; + } else if (layer1 == 8 and layer2 == 9 and layer3 == 15) { + return rzChiSquared < 5.5953f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + return rzChiSquared < 3.9263f; + } + /* + else if(layer1 == 7 and layer2 == 8 and layer3 == 14) + { + // PS+PS+2S in endcap layers 1+2+3, which is not really feasible in the current geometry, + // without skipping barrel layers 1 and 2 (not allowed by algorithm logic). + } + */ + else if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + return rzChiSquared < 9.4377f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + return rzChiSquared < 9.9975f; + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + return rzChiSquared < 8.6369f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + return rzChiSquared < 37.945f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 12) { + return rzChiSquared < 43.0167f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + return rzChiSquared < 8.6923f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + return rzChiSquared < 11.9672f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 13) { + return rzChiSquared < 16.2133f; + } + + //default - category not found! + return true; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeChiSquaredpT3(TAcc const& acc, + unsigned int nPoints, + float* xs, + float* ys, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + float g, + float f, + float radius) { + //given values of (g, f, radius) and a set of points (and its uncertainties) + //compute chi squared + float c = g * g + f * f - radius * radius; + float chiSquared = 0.f; + float absArctanSlope, angleM, xPrime, yPrime, sigma2; + for (size_t i = 0; i < nPoints; i++) { + absArctanSlope = ((slopes[i] != kVerticalModuleSlope) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) + : kPi / 2.f); + if (xs[i] > 0 and ys[i] > 0) { + angleM = kPi / 2.f - absArctanSlope; + } else if (xs[i] < 0 and ys[i] > 0) { + angleM = absArctanSlope + kPi / 2.f; + } else if (xs[i] < 0 and ys[i] < 0) { + angleM = -(absArctanSlope + kPi / 2.f); + } else if (xs[i] > 0 and ys[i] < 0) { + angleM = -(kPi / 2.f - absArctanSlope); + } else { + angleM = 0; + } + + if (not isFlat[i]) { + xPrime = xs[i] * alpaka::math::cos(acc, angleM) + ys[i] * alpaka::math::sin(acc, angleM); + yPrime = ys[i] * alpaka::math::cos(acc, angleM) - xs[i] * alpaka::math::sin(acc, angleM); + } else { + xPrime = xs[i]; + yPrime = ys[i]; + } + sigma2 = 4 * ((xPrime * delta1[i]) * (xPrime * delta1[i]) + (yPrime * delta2[i]) * (yPrime * delta2[i])); + chiSquared += (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) * + (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) / sigma2; + } + return chiSquared; + } + + //TODO: merge this one and the pT5 function later into a single function + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT3RPhiChiSquared(TAcc const& acc, + ModulesConst modules, + uint16_t* lowerModuleIndices, + float g, + float f, + float radius, + float* xs, + float* ys) { + float delta1[3]{}, delta2[3]{}, slopes[3]{}; + bool isFlat[3]{}; + float chiSquared = 0; + float inv1 = kWidthPS / kWidth2S; + float inv2 = kPixelPSZpitch / kWidth2S; + for (size_t i = 0; i < 3; i++) { + ModuleType moduleType = modules.moduleType()[lowerModuleIndices[i]]; + short moduleSubdet = modules.subdets()[lowerModuleIndices[i]]; + short moduleSide = modules.sides()[lowerModuleIndices[i]]; + float drdz = modules.drdzs()[lowerModuleIndices[i]]; + slopes[i] = modules.dxdys()[lowerModuleIndices[i]]; + //category 1 - barrel PS flat + if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { + delta1[i] = inv1; + delta2[i] = inv1; + slopes[i] = -999; + isFlat[i] = true; + } + //category 2 - barrel 2S + else if (moduleSubdet == Barrel and moduleType == TwoS) { + delta1[i] = 1; + delta2[i] = 1; + slopes[i] = -999; + isFlat[i] = true; + } + //category 3 - barrel PS tilted + else if (moduleSubdet == Barrel and moduleType == PS and moduleSide != Center) { + delta1[i] = inv1; + isFlat[i] = false; + delta2[i] = (inv2 * drdz / alpaka::math::sqrt(acc, 1 + drdz * drdz)); + } + //category 4 - endcap PS + else if (moduleSubdet == Endcap and moduleType == PS) { + delta1[i] = inv1; + isFlat[i] = false; + + /* + despite the type of the module layer of the lower module index, all anchor + hits are on the pixel side and all non-anchor hits are on the strip side! + */ + delta2[i] = inv2; + } + //category 5 - endcap 2S + else if (moduleSubdet == Endcap and moduleType == TwoS) { + delta1[i] = 1; + delta2[i] = 500 * inv1; + isFlat[i] = false; + } +#ifdef WARNINGS + else { + printf("ERROR!!!!! I SHOULDN'T BE HERE!!!! subdet = %d, type = %d, side = %d\n", + moduleSubdet, + moduleType, + moduleSide); + } +#endif + } + chiSquared = computeChiSquaredpT3(acc, 3, xs, ys, delta1, delta2, slopes, isFlat, g, f, radius); + + return chiSquared; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT3RPhiChiSquaredInwards( + float g, float f, float r, float* xPix, float* yPix) { + float residual = (xPix[0] - g) * (xPix[0] - g) + (yPix[0] - f) * (yPix[0] - f) - r * r; + float chiSquared = residual * residual; + residual = (xPix[1] - g) * (xPix[1] - g) + (yPix[1] - f) * (yPix[1] - f) - r * r; + chiSquared += residual * residual; + + chiSquared *= 0.5f; + return chiSquared; + } + + //90pc threshold + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredCuts(ModulesConst modules, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + float chiSquared) { + const int layer1 = + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); + const int layer2 = + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); + const int layer3 = + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); + + if (layer1 == 8 and layer2 == 9 and layer3 == 10) { + return chiSquared < 7.003f; + } else if (layer1 == 8 and layer2 == 9 and layer3 == 15) { + return chiSquared < 0.5f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + return chiSquared < 8.046f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 14) { + return chiSquared < 0.575f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + return chiSquared < 5.304f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + return chiSquared < 10.6211f; + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + return chiSquared < 4.617f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + return chiSquared < 8.046f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 13) { + return chiSquared < 0.435f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + return chiSquared < 9.244f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 12) { + return chiSquared < 0.287f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + return chiSquared < 18.509f; + } + + return true; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPT3RPhiChiSquaredInwardsCuts(ModulesConst modules, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + float chiSquared) { + const int layer1 = + modules.layers()[lowerModuleIndex1] + 6 * (modules.subdets()[lowerModuleIndex1] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS); + const int layer2 = + modules.layers()[lowerModuleIndex2] + 6 * (modules.subdets()[lowerModuleIndex2] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS); + const int layer3 = + modules.layers()[lowerModuleIndex3] + 6 * (modules.subdets()[lowerModuleIndex3] == Endcap) + + 5 * (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS); + + if (layer1 == 7 and layer2 == 8 and layer3 == 9) // endcap layer 1,2,3, ps + { + return chiSquared < 22016.8055f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 14) // endcap layer 1,2,3 layer3->2s + { + return chiSquared < 935179.56807f; + } else if (layer1 == 8 and layer2 == 9 and layer3 == 10) // endcap layer 2,3,4 + { + return chiSquared < 29064.12959f; + } else if (layer1 == 8 and layer2 == 9 and layer3 == 15) // endcap layer 2,3,4, layer3->2s + { + return chiSquared < 935179.5681f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3) // barrel 1,2,3 + { + return chiSquared < 1370.0113195101474f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) // barrel 1,2 endcap 1 + { + return chiSquared < 5492.110048314815f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) // barrel 2,3,4 + { + return chiSquared < 4160.410806470067f; + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) // barrel 1, endcap 1,2 + { + return chiSquared < 29064.129591225726f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) // barrel 2,3 endcap 1 + { + return chiSquared < 12634.215376250893f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 12) // barrel 2,3, endcap 1->2s + { + return chiSquared < 353821.69361145404f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) // barrel2, endcap 1,2 + { + return chiSquared < 33393.26076341235f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 13) //barrel 2, endcap 1, endcap2->2s + { + return chiSquared < 935179.5680742573f; + } + + return true; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool checkIntervalOverlappT3(float firstMin, + float firstMax, + float secondMin, + float secondMax) { + return ((firstMin <= secondMin) && (secondMin < firstMax)) || ((secondMin < firstMin) && (firstMin < secondMax)); + } + + /*bounds for high Pt taken from : http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_efficiency/efficiencies/new_efficiencies/efficiencies_20210513_T5_recovering_high_Pt_efficiencies/highE_radius_matching/highE_bounds.txt */ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterionBBB(TAcc const& acc, + float pixelRadius, + float pixelRadiusError, + float tripletRadius) { + float tripletInvRadiusErrorBound = 0.15624f; + float pixelInvRadiusErrorBound = 0.17235f; + + if (pixelRadius > 2.0f * kR1GeVf) { + pixelInvRadiusErrorBound = 0.6375f; + tripletInvRadiusErrorBound = 0.6588f; + } + + float tripletRadiusInvMax = (1 + tripletInvRadiusErrorBound) / tripletRadius; + float tripletRadiusInvMin = alpaka::math::max(acc, (1 - tripletInvRadiusErrorBound) / tripletRadius, 0.0f); + + float pixelRadiusInvMax = + alpaka::math::max(acc, (1 + pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius - pixelRadiusError)); + float pixelRadiusInvMin = + alpaka::math::min(acc, (1 - pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius + pixelRadiusError)); + + return checkIntervalOverlappT3(tripletRadiusInvMin, tripletRadiusInvMax, pixelRadiusInvMin, pixelRadiusInvMax); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterionBBE(TAcc const& acc, + float pixelRadius, + float pixelRadiusError, + float tripletRadius) { + float tripletInvRadiusErrorBound = 0.45972f; + float pixelInvRadiusErrorBound = 0.19644f; + + if (pixelRadius > 2.0f * kR1GeVf) { + pixelInvRadiusErrorBound = 0.6805f; + tripletInvRadiusErrorBound = 0.8557f; + } + + float tripletRadiusInvMax = (1 + tripletInvRadiusErrorBound) / tripletRadius; + float tripletRadiusInvMin = alpaka::math::max(acc, (1 - tripletInvRadiusErrorBound) / tripletRadius, 0.0f); + + float pixelRadiusInvMax = + alpaka::math::max(acc, (1 + pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius - pixelRadiusError)); + float pixelRadiusInvMin = + alpaka::math::min(acc, (1 - pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius + pixelRadiusError)); + + return checkIntervalOverlappT3(tripletRadiusInvMin, tripletRadiusInvMax, pixelRadiusInvMin, pixelRadiusInvMax); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterionBEE(TAcc const& acc, + float pixelRadius, + float pixelRadiusError, + float tripletRadius) { + float tripletInvRadiusErrorBound = 1.59294f; + float pixelInvRadiusErrorBound = 0.255181f; + + if (pixelRadius > 2.0f * kR1GeVf) //as good as not having selections + { + pixelInvRadiusErrorBound = 2.2091f; + tripletInvRadiusErrorBound = 2.3548f; + } + + float tripletRadiusInvMax = (1 + tripletInvRadiusErrorBound) / tripletRadius; + float tripletRadiusInvMin = alpaka::math::max(acc, (1 - tripletInvRadiusErrorBound) / tripletRadius, 0.0f); + + float pixelRadiusInvMax = + alpaka::math::max(acc, (1 + pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius - pixelRadiusError)); + float pixelRadiusInvMin = + alpaka::math::min(acc, (1 - pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius + pixelRadiusError)); + pixelRadiusInvMin = alpaka::math::max(acc, pixelRadiusInvMin, 0.0f); + + return checkIntervalOverlappT3(tripletRadiusInvMin, tripletRadiusInvMax, pixelRadiusInvMin, pixelRadiusInvMax); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterionEEE(TAcc const& acc, + float pixelRadius, + float pixelRadiusError, + float tripletRadius) { + float tripletInvRadiusErrorBound = 1.7006f; + float pixelInvRadiusErrorBound = 0.26367f; + + if (pixelRadius > 2.0f * kR1GeVf) //as good as not having selections + { + pixelInvRadiusErrorBound = 2.286f; + tripletInvRadiusErrorBound = 2.436f; + } + + float tripletRadiusInvMax = (1 + tripletInvRadiusErrorBound) / tripletRadius; + float tripletRadiusInvMin = alpaka::math::max(acc, (1 - tripletInvRadiusErrorBound) / tripletRadius, 0.0f); + + float pixelRadiusInvMax = + alpaka::math::max(acc, (1 + pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius - pixelRadiusError)); + float pixelRadiusInvMin = + alpaka::math::min(acc, (1 - pixelInvRadiusErrorBound) / pixelRadius, 1.f / (pixelRadius + pixelRadiusError)); + pixelRadiusInvMin = alpaka::math::max(acc, 0.0f, pixelRadiusInvMin); + + return checkIntervalOverlappT3(tripletRadiusInvMin, tripletRadiusInvMax, pixelRadiusInvMin, pixelRadiusInvMax); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRadiusCriterion(TAcc const& acc, + ModulesConst modules, + float pixelRadius, + float pixelRadiusError, + float tripletRadius, + int16_t lowerModuleIndex, + uint16_t middleModuleIndex, + uint16_t upperModuleIndex) { + if (modules.subdets()[lowerModuleIndex] == Endcap) { + return passRadiusCriterionEEE(acc, pixelRadius, pixelRadiusError, tripletRadius); + } else if (modules.subdets()[middleModuleIndex] == Endcap) { + return passRadiusCriterionBEE(acc, pixelRadius, pixelRadiusError, tripletRadius); + } else if (modules.subdets()[upperModuleIndex] == Endcap) { + return passRadiusCriterionBBE(acc, pixelRadius, pixelRadiusError, tripletRadius); + } else { + return passRadiusCriterionBBB(acc, pixelRadius, pixelRadiusError, tripletRadius); + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computePT3RZChiSquared(TAcc const& acc, + ModulesConst modules, + const uint16_t* lowerModuleIndices, + const float* rtPix, + const float* xPix, + const float* yPix, + const float* zPix, + const float* rts, + const float* xs, + const float* ys, + const float* zs, + float pixelSegmentPt, + float pixelSegmentPx, + float pixelSegmentPy, + float pixelSegmentPz, + int pixelSegmentCharge) { + float residual = 0; + float error2 = 0; + float RMSE = 0; + + float Px = pixelSegmentPx, Py = pixelSegmentPy, Pz = pixelSegmentPz; + int charge = pixelSegmentCharge; + float x1 = xPix[1] / 100; + float y1 = yPix[1] / 100; + float z1 = zPix[1] / 100; + float r1 = rtPix[1] / 100; + + float a = -2.f * k2Rinv1GeVf * 100 * charge; // multiply by 100 to make the correct length units + + for (size_t i = 0; i < Params_T3::kLayers; i++) { + float zsi = zs[i] / 100; + float rtsi = rts[i] / 100; + uint16_t lowerModuleIndex = lowerModuleIndices[i]; + const int moduleType = modules.moduleType()[lowerModuleIndex]; + const int moduleSide = modules.sides()[lowerModuleIndex]; + const int moduleSubdet = modules.subdets()[lowerModuleIndex]; + + // calculation is detailed documented here https://indico.cern.ch/event/1185895/contributions/4982756/attachments/2526561/4345805/helix%20pT3%20summarize.pdf + float diffr, diffz; + float p = alpaka::math::sqrt(acc, Px * Px + Py * Py + Pz * Pz); + + float rou = a / p; + if (moduleSubdet == Endcap) { + float s = (zsi - z1) * p / Pz; + float x = x1 + Px / a * alpaka::math::sin(acc, rou * s) - Py / a * (1 - alpaka::math::cos(acc, rou * s)); + float y = y1 + Py / a * alpaka::math::sin(acc, rou * s) + Px / a * (1 - alpaka::math::cos(acc, rou * s)); + diffr = alpaka::math::abs(acc, rtsi - alpaka::math::sqrt(acc, x * x + y * y)) * 100; + } + + if (moduleSubdet == Barrel) { + float paraA = r1 * r1 + 2 * (Px * Px + Py * Py) / (a * a) + 2 * (y1 * Px - x1 * Py) / a - rtsi * rtsi; + float paraB = 2 * (x1 * Px + y1 * Py) / a; + float paraC = 2 * (y1 * Px - x1 * Py) / a + 2 * (Px * Px + Py * Py) / (a * a); + float A = paraB * paraB + paraC * paraC; + float B = 2 * paraA * paraB; + float C = paraA * paraA - paraC * paraC; + float sol1 = (-B + alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A); + float sol2 = (-B - alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A); + float solz1 = alpaka::math::asin(acc, sol1) / rou * Pz / p + z1; + float solz2 = alpaka::math::asin(acc, sol2) / rou * Pz / p + z1; + float diffz1 = alpaka::math::abs(acc, solz1 - zsi) * 100; + float diffz2 = alpaka::math::abs(acc, solz2 - zsi) * 100; + diffz = alpaka::math::min(acc, diffz1, diffz2); + } + + residual = moduleSubdet == Barrel ? diffz : diffr; + + //PS Modules + if (moduleType == 0) { + error2 = kPixelPSZpitch * kPixelPSZpitch; + } else //2S modules + { + error2 = kStrip2SZpitch * kStrip2SZpitch; + } + + //special dispensation to tilted PS modules! + if (moduleType == 0 and moduleSubdet == Barrel and moduleSide != Center) { + float drdz = modules.drdzs()[lowerModuleIndex]; + error2 /= (1 + drdz * drdz); + } + RMSE += (residual * residual) / error2; + } + + RMSE = alpaka::math::sqrt(acc, 0.2f * RMSE); // Divided by the degree of freedom 5. + + return RMSE; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runPixelTripletDefaultAlgo(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + TripletsConst triplets, + unsigned int pixelSegmentIndex, + unsigned int tripletIndex, + float& pixelRadius, + float& tripletRadius, + float& centerX, + float& centerY, + float& rzChiSquared, + float& rPhiChiSquared, + float& rPhiChiSquaredInwards, + bool runChiSquaredCuts = true) { + //run pT4 compatibility between the pixel segment and inner segment, and between the pixel and outer segment of the triplet + uint16_t pixelModuleIndex = segments.innerLowerModuleIndices()[pixelSegmentIndex]; + + uint16_t lowerModuleIndex = triplets.lowerModuleIndices()[tripletIndex][0]; + uint16_t middleModuleIndex = triplets.lowerModuleIndices()[tripletIndex][1]; + uint16_t upperModuleIndex = triplets.lowerModuleIndices()[tripletIndex][2]; + + { + // pixel segment vs inner segment of the triplet + if (not runPixelTrackletDefaultAlgopT3(acc, + modules, + ranges, + mds, + segments, + segmentsPixel, + pixelModuleIndex, + lowerModuleIndex, + middleModuleIndex, + pixelSegmentIndex, + triplets.segmentIndices()[tripletIndex][0])) + return false; + + //pixel segment vs outer segment of triplet + if (not runPixelTrackletDefaultAlgopT3(acc, + modules, + ranges, + mds, + segments, + segmentsPixel, + pixelModuleIndex, + middleModuleIndex, + upperModuleIndex, + pixelSegmentIndex, + triplets.segmentIndices()[tripletIndex][1])) + return false; + } + + //pt matching between the pixel ptin and the triplet circle pt + unsigned int pixelSegmentArrayIndex = pixelSegmentIndex - ranges.segmentModuleIndices()[pixelModuleIndex]; + float pixelSegmentPt = segmentsPixel.ptIn()[pixelSegmentArrayIndex]; + float pixelSegmentPtError = segmentsPixel.ptErr()[pixelSegmentArrayIndex]; + float pixelSegmentPx = segmentsPixel.px()[pixelSegmentArrayIndex]; + float pixelSegmentPy = segmentsPixel.py()[pixelSegmentArrayIndex]; + float pixelSegmentPz = segmentsPixel.pz()[pixelSegmentArrayIndex]; + int pixelSegmentCharge = segmentsPixel.charge()[pixelSegmentArrayIndex]; + + float pixelG = segmentsPixel.circleCenterX()[pixelSegmentArrayIndex]; + float pixelF = segmentsPixel.circleCenterY()[pixelSegmentArrayIndex]; + float pixelRadiusPCA = segmentsPixel.circleRadius()[pixelSegmentArrayIndex]; + + unsigned int pixelInnerMDIndex = segments.mdIndices()[pixelSegmentIndex][0]; + unsigned int pixelOuterMDIndex = segments.mdIndices()[pixelSegmentIndex][1]; + + pixelRadius = pixelSegmentPt * kR1GeVf; + float pixelRadiusError = pixelSegmentPtError * kR1GeVf; + unsigned int tripletInnerSegmentIndex = triplets.segmentIndices()[tripletIndex][0]; + unsigned int tripletOuterSegmentIndex = triplets.segmentIndices()[tripletIndex][1]; + + unsigned int firstMDIndex = segments.mdIndices()[tripletInnerSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[tripletInnerSegmentIndex][1]; + unsigned int thirdMDIndex = segments.mdIndices()[tripletOuterSegmentIndex][1]; + + float xs[Params_T3::kLayers] = { + mds.anchorX()[firstMDIndex], mds.anchorX()[secondMDIndex], mds.anchorX()[thirdMDIndex]}; + float ys[Params_T3::kLayers] = { + mds.anchorY()[firstMDIndex], mds.anchorY()[secondMDIndex], mds.anchorY()[thirdMDIndex]}; + + float g, f; + tripletRadius = triplets.radius()[tripletIndex]; + g = triplets.centerX()[tripletIndex]; + f = triplets.centerY()[tripletIndex]; + + if (not passRadiusCriterion(acc, + modules, + pixelRadius, + pixelRadiusError, + tripletRadius, + lowerModuleIndex, + middleModuleIndex, + upperModuleIndex)) + return false; + + uint16_t lowerModuleIndices[Params_T3::kLayers] = {lowerModuleIndex, middleModuleIndex, upperModuleIndex}; + + if (runChiSquaredCuts and pixelSegmentPt < 5.0f) { + float rts[Params_T3::kLayers] = { + mds.anchorRt()[firstMDIndex], mds.anchorRt()[secondMDIndex], mds.anchorRt()[thirdMDIndex]}; + float zs[Params_T3::kLayers] = { + mds.anchorZ()[firstMDIndex], mds.anchorZ()[secondMDIndex], mds.anchorZ()[thirdMDIndex]}; + float rtPix[Params_pLS::kLayers] = {mds.anchorRt()[pixelInnerMDIndex], mds.anchorRt()[pixelOuterMDIndex]}; + float xPix[Params_pLS::kLayers] = {mds.anchorX()[pixelInnerMDIndex], mds.anchorX()[pixelOuterMDIndex]}; + float yPix[Params_pLS::kLayers] = {mds.anchorY()[pixelInnerMDIndex], mds.anchorY()[pixelOuterMDIndex]}; + float zPix[Params_pLS::kLayers] = {mds.anchorZ()[pixelInnerMDIndex], mds.anchorZ()[pixelOuterMDIndex]}; + + rzChiSquared = computePT3RZChiSquared(acc, + modules, + lowerModuleIndices, + rtPix, + xPix, + yPix, + zPix, + rts, + xs, + ys, + zs, + pixelSegmentPt, + pixelSegmentPx, + pixelSegmentPy, + pixelSegmentPz, + pixelSegmentCharge); + if (not passPT3RZChiSquaredCuts(modules, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rzChiSquared)) + return false; + } else { + rzChiSquared = -1; + } + + rPhiChiSquared = computePT3RPhiChiSquared(acc, modules, lowerModuleIndices, pixelG, pixelF, pixelRadiusPCA, xs, ys); + + if (runChiSquaredCuts and pixelSegmentPt < 5.0f) { + if (not passPT3RPhiChiSquaredCuts(modules, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rPhiChiSquared)) + return false; + } + + float xPix[Params_pLS::kLayers] = {mds.anchorX()[pixelInnerMDIndex], mds.anchorX()[pixelOuterMDIndex]}; + float yPix[Params_pLS::kLayers] = {mds.anchorY()[pixelInnerMDIndex], mds.anchorY()[pixelOuterMDIndex]}; + rPhiChiSquaredInwards = computePT3RPhiChiSquaredInwards(g, f, tripletRadius, xPix, yPix); + + if (runChiSquaredCuts and pixelSegmentPt < 5.0f) { + if (not passPT3RPhiChiSquaredInwardsCuts( + modules, lowerModuleIndex, middleModuleIndex, upperModuleIndex, rPhiChiSquaredInwards)) + return false; + } + centerX = 0; + centerY = 0; + return true; + } + + struct CreatePixelTripletsFromMap { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + ModulesPixelConst modulesPixel, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + Triplets triplets, + TripletsOccupancyConst tripletsOccupancy, + PixelTriplets pixelTriplets, + unsigned int* connectedPixelSize, + unsigned int* connectedPixelIndex, + unsigned int nPixelSegments) const { + auto const globalBlockIdx = alpaka::getIdx(acc); + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridBlockExtent = alpaka::getWorkDiv(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (unsigned int i_pLS = globalThreadIdx[1]; i_pLS < nPixelSegments; i_pLS += gridThreadExtent[1]) { + auto iLSModule_max = connectedPixelIndex[i_pLS] + connectedPixelSize[i_pLS]; + + for (unsigned int iLSModule = connectedPixelIndex[i_pLS] + globalBlockIdx[0]; iLSModule < iLSModule_max; + iLSModule += gridBlockExtent[0]) { + uint16_t tripletLowerModuleIndex = + modulesPixel.connectedPixels() + [iLSModule]; //connected pixels will have the appropriate lower module index by default! +#ifdef WARNINGS + if (tripletLowerModuleIndex >= modules.nLowerModules()) { + printf("tripletLowerModuleIndex %d >= modules.nLowerModules %d \n", + tripletLowerModuleIndex, + modules.nLowerModules()); + continue; //sanity check + } +#endif + //Removes 2S-2S :FIXME: filter these out in the pixel map + if (modules.moduleType()[tripletLowerModuleIndex] == TwoS) + continue; + + uint16_t pixelModuleIndex = modules.nLowerModules(); + unsigned int nOuterTriplets = tripletsOccupancy.nTriplets()[tripletLowerModuleIndex]; + if (nOuterTriplets == 0) + continue; + + unsigned int pixelSegmentIndex = ranges.segmentModuleIndices()[pixelModuleIndex] + i_pLS; + + if (segmentsPixel.isDup()[i_pLS]) + continue; + if (segmentsPixel.partOfPT5()[i_pLS]) + continue; //don't make pT3s for those pixels that are part of pT5 + + short layer2_adjustment; + if (modules.layers()[tripletLowerModuleIndex] == 1) { + layer2_adjustment = 1; + } //get upper segment to be in second layer + else if (modules.layers()[tripletLowerModuleIndex] == 2) { + layer2_adjustment = 0; + } // get lower segment to be in second layer + else { + continue; + } + + //fetch the triplet + for (unsigned int outerTripletArrayIndex = globalThreadIdx[2]; outerTripletArrayIndex < nOuterTriplets; + outerTripletArrayIndex += gridThreadExtent[2]) { + unsigned int outerTripletIndex = + ranges.tripletModuleIndices()[tripletLowerModuleIndex] + outerTripletArrayIndex; + if (modules.moduleType()[triplets.lowerModuleIndices()[outerTripletIndex][1]] == TwoS) + continue; //REMOVES PS-2S + + if (triplets.partOfPT5()[outerTripletIndex]) + continue; //don't create pT3s for T3s accounted in pT5s + + float pixelRadius, tripletRadius, rPhiChiSquared, rzChiSquared, rPhiChiSquaredInwards, centerX, centerY; + bool success = runPixelTripletDefaultAlgo(acc, + modules, + ranges, + mds, + segments, + segmentsPixel, + triplets, + pixelSegmentIndex, + outerTripletIndex, + pixelRadius, + tripletRadius, + centerX, + centerY, + rzChiSquared, + rPhiChiSquared, + rPhiChiSquaredInwards); + + if (success) { + float phi = + mds.anchorPhi()[segments + .mdIndices()[triplets.segmentIndices()[outerTripletIndex][0]][layer2_adjustment]]; + float eta = + mds.anchorEta()[segments + .mdIndices()[triplets.segmentIndices()[outerTripletIndex][0]][layer2_adjustment]]; + float eta_pix = segmentsPixel.eta()[i_pLS]; + float phi_pix = segmentsPixel.phi()[i_pLS]; + float pt = segmentsPixel.ptIn()[i_pLS]; + float score = rPhiChiSquared + rPhiChiSquaredInwards; + unsigned int totOccupancyPixelTriplets = + alpaka::atomicAdd(acc, &pixelTriplets.totOccupancyPixelTriplets(), 1u, alpaka::hierarchy::Threads{}); + if (totOccupancyPixelTriplets >= n_max_pixel_triplets) { +#ifdef WARNINGS + printf("Pixel Triplet excess alert!\n"); +#endif + } else { + unsigned int pixelTripletIndex = + alpaka::atomicAdd(acc, &pixelTriplets.nPixelTriplets(), 1u, alpaka::hierarchy::Threads{}); + addPixelTripletToMemory(mds, + segments, + triplets, + pixelTriplets, + pixelSegmentIndex, + outerTripletIndex, + pixelRadius, + tripletRadius, + centerX, + centerY, + rPhiChiSquared, + rPhiChiSquaredInwards, + rzChiSquared, + pixelTripletIndex, + pt, + eta, + phi, + eta_pix, + phi_pix, + score); + triplets.partOfPT3()[outerTripletIndex] = true; + } + } + } // for outerTripletArrayIndex + } // for iLSModule < iLSModule_max + } // for i_pLS + } + }; + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void runDeltaBetaIterationspT3(TAcc const& acc, + float& betaIn, + float& betaOut, + float betaAv, + float& pt_beta, + float sdIn_dr, + float sdOut_dr, + float dr, + float lIn) { + if (lIn == 0) { + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaOut); + return; + } + + if (betaIn * betaOut > 0.f and + (alpaka::math::abs(acc, pt_beta) < 4.f * kPt_betaMax or + (lIn >= 11 and alpaka::math::abs(acc, pt_beta) < + 8.f * kPt_betaMax))) //and the pt_beta is well-defined; less strict for endcap-endcap + { + const float betaInUpd = + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + const float betaOutUpd = + betaOut + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaOut); //FIXME: need a faster version + betaAv = 0.5f * (betaInUpd + betaOutUpd); + + //1st update + const float pt_beta_inv = + 1.f / alpaka::math::abs(acc, dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv)); //get a better pt estimate + + betaIn += alpaka::math::copysign( + acc, + alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), + betaOut); //FIXME: need a faster version + //update the av and pt + betaAv = 0.5f * (betaIn + betaOut); + //2nd update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + } else if (lIn < 11 && alpaka::math::abs(acc, betaOut) < 0.2f * alpaka::math::abs(acc, betaIn) && + alpaka::math::abs(acc, pt_beta) < 12.f * kPt_betaMax) //use betaIn sign as ref + { + const float pt_betaIn = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaIn); + + const float betaInUpd = + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + const float betaOutUpd = + betaOut + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, + alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaAv = (alpaka::math::abs(acc, betaOut) > 0.2f * alpaka::math::abs(acc, betaIn)) + ? (0.5f * (betaInUpd + betaOutUpd)) + : betaInUpd; + + //1st update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + betaIn += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + //update the av and pt + betaAv = 0.5f * (betaIn + betaOut); + //2nd update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPBB(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + uint16_t pixelModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + float dPhi, betaIn, betaOut, pt_beta, zLo, zHi, zLoPointed, zHiPointed, dPhiCut, betaOutCut; + + bool isPS_OutLo = (modules.moduleType()[outerInnerLowerModuleIndex] == PS); + + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InUp = mds.anchorRt()[secondMDIndex]; + float rt_OutLo = mds.anchorRt()[thirdMDIndex]; + float rt_OutUp = mds.anchorRt()[fourthMDIndex]; + + float z_InUp = mds.anchorZ()[secondMDIndex]; + float z_OutLo = mds.anchorZ()[thirdMDIndex]; + + float x_InLo = mds.anchorX()[firstMDIndex]; + float x_InUp = mds.anchorX()[secondMDIndex]; + float x_OutLo = mds.anchorX()[thirdMDIndex]; + float x_OutUp = mds.anchorX()[fourthMDIndex]; + + float y_InLo = mds.anchorY()[firstMDIndex]; + float y_InUp = mds.anchorY()[secondMDIndex]; + float y_OutLo = mds.anchorY()[thirdMDIndex]; + float y_OutUp = mds.anchorY()[fourthMDIndex]; + + float rt_InOut = rt_InUp; + + if (alpaka::math::abs(acc, deltaPhi(acc, x_InUp, y_InUp, x_OutLo, y_OutLo)) > kPi / 2.f) + return false; + + unsigned int pixelSegmentArrayIndex = innerSegmentIndex - ranges.segmentModuleIndices()[pixelModuleIndex]; + float ptIn = segmentsPixel.ptIn()[pixelSegmentArrayIndex]; + float ptSLo = ptIn; + float px = segmentsPixel.px()[pixelSegmentArrayIndex]; + float py = segmentsPixel.py()[pixelSegmentArrayIndex]; + float pz = segmentsPixel.pz()[pixelSegmentArrayIndex]; + float ptErr = segmentsPixel.ptErr()[pixelSegmentArrayIndex]; + float etaErr = segmentsPixel.etaErr()[pixelSegmentArrayIndex]; + ptSLo = alpaka::math::max(acc, ptCut, ptSLo - 10.0f * alpaka::math::max(acc, ptErr, 0.005f * ptSLo)); + ptSLo = alpaka::math::min(acc, 10.0f, ptSLo); + + float alpha1GeV_OutLo = + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + const float rtRatio_OutLoInOut = + rt_OutLo / rt_InOut; // Outer segment beginning rt divided by inner segment beginning rt; + + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly + const float zpitch_InLo = 0.05f; + const float zpitch_InOut = 0.05f; + float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); + float zGeom = zpitch_InLo + zpitch_OutLo; + zHi = z_InUp + (z_InUp + kDeltaZLum) * (rtRatio_OutLoInOut - 1.f) * (z_InUp < 0.f ? 1.f : dzDrtScale) + + (zpitch_InOut + zpitch_OutLo); + zLo = z_InUp + (z_InUp - kDeltaZLum) * (rtRatio_OutLoInOut - 1.f) * (z_InUp > 0.f ? 1.f : dzDrtScale) - + (zpitch_InOut + zpitch_OutLo); //slope-correction only on outer end + + if ((z_OutLo < zLo) || (z_OutLo > zHi)) + return false; + + const float cosh2Eta = 1.f + (pz * pz) / (ptIn * ptIn); + + const float drt_OutLo_InUp = (rt_OutLo - rt_InUp); + + const float r3_InUp = alpaka::math::sqrt(acc, z_InUp * z_InUp + rt_InUp * rt_InUp); + + float drt_InSeg = rt_InOut - rt_InLo; + + const float thetaMuls2 = + (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InUp) / 50.f) * (r3_InUp / rt_InUp); + const float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + + float dzErr = (drt_OutLo_InUp * drt_OutLo_InUp) * (etaErr * etaErr) * cosh2Eta; + dzErr += 0.03f * 0.03f; // Approximately account for IT module size + dzErr *= 9.f; // 3 sigma + dzErr += muls2 * (drt_OutLo_InUp * drt_OutLo_InUp) / 3.f * cosh2Eta; + dzErr += zGeom * zGeom; + dzErr = alpaka::math::sqrt(acc, dzErr); + + const float dzDrIn = pz / ptIn; + const float zWindow = dzErr / drt_InSeg * drt_OutLo_InUp + zGeom; + const float dzMean = dzDrIn * drt_OutLo_InUp * + (1.f + drt_OutLo_InUp * drt_OutLo_InUp * 4 * k2Rinv1GeVf * k2Rinv1GeVf / ptIn / ptIn / + 24.f); // with curved path correction + // Constructing upper and lower bound + zLoPointed = z_InUp + dzMean - zWindow; + zHiPointed = z_InUp + dzMean + zWindow; + + if ((z_OutLo < zLoPointed) || (z_OutLo > zHiPointed)) + return false; + + const float pvOffset = 0.1f / rt_OutLo; + dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); + + //no dphipos cut + float midPointX = 0.5f * (x_InLo + x_OutLo); + float midPointY = 0.5f * (y_InLo + y_OutLo); + + float diffX = x_OutLo - x_InLo; + float diffY = y_OutLo - y_InLo; + + dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); + + if (alpaka::math::abs(acc, dPhi) > dPhiCut) + return false; + + //lots of array accesses below this... + + float alpha_InLo = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float alpha_OutLo = __H2F(segments.dPhiChanges()[outerSegmentIndex]); + + bool isEC_lastLayer = modules.subdets()[outerOuterLowerModuleIndex] == Endcap and + modules.moduleType()[outerOuterLowerModuleIndex] == TwoS; + + float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; + alpha_OutUp = deltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); + + alpha_OutUp_highEdge = alpha_OutUp; + alpha_OutUp_lowEdge = alpha_OutUp; + + float tl_axis_x = x_OutUp - x_InUp; + float tl_axis_y = y_OutUp - y_InUp; + + float tl_axis_highEdge_x = tl_axis_x; + float tl_axis_highEdge_y = tl_axis_y; + + float tl_axis_lowEdge_x = tl_axis_x; + float tl_axis_lowEdge_y = tl_axis_y; + + betaIn = -deltaPhi(acc, px, py, tl_axis_x, tl_axis_y); + float betaInRHmin = betaIn; + float betaInRHmax = betaIn; + + betaOut = -alpha_OutUp + deltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); + + float betaOutRHmin = betaOut; + float betaOutRHmax = betaOut; + + if (isEC_lastLayer) { + alpha_OutUp_highEdge = deltaPhi(acc, + mds.anchorHighEdgeX()[fourthMDIndex], + mds.anchorHighEdgeY()[fourthMDIndex], + mds.anchorHighEdgeX()[fourthMDIndex] - x_OutLo, + mds.anchorHighEdgeY()[fourthMDIndex] - y_OutLo); + alpha_OutUp_lowEdge = deltaPhi(acc, + mds.anchorLowEdgeX()[fourthMDIndex], + mds.anchorLowEdgeY()[fourthMDIndex], + mds.anchorLowEdgeX()[fourthMDIndex] - x_OutLo, + mds.anchorLowEdgeY()[fourthMDIndex] - y_OutLo); + + tl_axis_highEdge_x = mds.anchorHighEdgeX()[fourthMDIndex] - x_InUp; + tl_axis_highEdge_y = mds.anchorHighEdgeY()[fourthMDIndex] - y_InUp; + tl_axis_lowEdge_x = mds.anchorLowEdgeX()[fourthMDIndex] - x_InUp; + tl_axis_lowEdge_y = mds.anchorLowEdgeY()[fourthMDIndex] - y_InUp; + + betaOutRHmin = -alpha_OutUp_highEdge + deltaPhi(acc, + mds.anchorHighEdgeX()[fourthMDIndex], + mds.anchorHighEdgeY()[fourthMDIndex], + tl_axis_highEdge_x, + tl_axis_highEdge_y); + betaOutRHmax = -alpha_OutUp_lowEdge + deltaPhi(acc, + mds.anchorLowEdgeX()[fourthMDIndex], + mds.anchorLowEdgeY()[fourthMDIndex], + tl_axis_lowEdge_x, + tl_axis_lowEdge_y); + } + + //beta computation + float drt_tl_axis = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + + //innerOuterAnchor - innerInnerAnchor + const float rt_InSeg = + alpaka::math::sqrt(acc, (x_InUp - x_InLo) * (x_InUp - x_InLo) + (y_InUp - y_InLo) * (y_InUp - y_InLo)); + + //no betaIn cut for the pixels + float betaAv = 0.5f * (betaIn + betaOut); + pt_beta = ptIn; + + int lIn = 0; + int lOut = isEC_lastLayer ? 11 : 5; + float sdOut_dr = + alpaka::math::sqrt(acc, (x_OutUp - x_OutLo) * (x_OutUp - x_OutLo) + (y_OutUp - y_OutLo) * (y_OutUp - y_OutLo)); + float sdOut_d = rt_OutUp - rt_OutLo; + + runDeltaBetaIterationspT3(acc, betaIn, betaOut, betaAv, pt_beta, rt_InSeg, sdOut_dr, drt_tl_axis, lIn); + + const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) + ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) + : 0.; //mean value of min,max is the old betaIn + const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) + ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) + : 0.; + betaInRHmin *= betaInMMSF; + betaInRHmax *= betaInMMSF; + betaOutRHmin *= betaOutMMSF; + betaOutRHmax *= betaOutMMSF; + + float min_ptBeta_ptBetaMax = alpaka::math::min( + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV + const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_ptBetaMax * min_ptBeta_ptBetaMax); + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_InLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InUp * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_OutLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InUp); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); + const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); + + const float sinDPhi = alpaka::math::sin(acc, dPhi); + const float dBetaRIn2 = 0; // TODO-RH + + float dBetaROut = 0; + if (isEC_lastLayer) { + dBetaROut = (alpaka::math::sqrt(acc, + mds.anchorHighEdgeX()[fourthMDIndex] * mds.anchorHighEdgeX()[fourthMDIndex] + + mds.anchorHighEdgeY()[fourthMDIndex] * mds.anchorHighEdgeY()[fourthMDIndex]) - + alpaka::math::sqrt(acc, + mds.anchorLowEdgeX()[fourthMDIndex] * mds.anchorLowEdgeX()[fourthMDIndex] + + mds.anchorLowEdgeY()[fourthMDIndex] * mds.anchorLowEdgeY()[fourthMDIndex])) * + sinDPhi / drt_tl_axis; + } + + const float dBetaROut2 = dBetaROut * dBetaROut; + + //FIXME: need faster version + betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + + //Cut #6: The real beta cut + if (alpaka::math::abs(acc, betaOut) >= betaOutCut) + return false; + const float dBetaRes = 0.02f / alpaka::math::min(acc, sdOut_d, drt_InSeg); + const float dBetaCut2 = + (dBetaRes * dBetaRes * 2.0f + dBetaMuls2 + dBetaLum2 + dBetaRIn2 + dBetaROut2 + + 0.25f * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); + float dBeta = betaIn - betaOut; + return dBeta * dBeta <= dBetaCut2; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletDefaultAlgoPPEE(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsPixelConst segmentsPixel, + uint16_t pixelModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + float dPhi, betaIn, betaOut, pt_beta, rtLo, rtHi, dPhiCut, betaOutCut; + + bool isPS_OutLo = (modules.moduleType()[outerInnerLowerModuleIndex] == PS); + + float z_InUp = mds.anchorZ()[secondMDIndex]; + float z_OutLo = mds.anchorZ()[thirdMDIndex]; + + if (z_InUp * z_OutLo <= 0) + return false; + + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InUp = mds.anchorRt()[secondMDIndex]; + float rt_OutLo = mds.anchorRt()[thirdMDIndex]; + float rt_OutUp = mds.anchorRt()[fourthMDIndex]; + + float x_InLo = mds.anchorX()[firstMDIndex]; + float x_InUp = mds.anchorX()[secondMDIndex]; + float x_OutLo = mds.anchorX()[thirdMDIndex]; + float x_OutUp = mds.anchorX()[fourthMDIndex]; + + float y_InLo = mds.anchorY()[firstMDIndex]; + float y_InUp = mds.anchorY()[secondMDIndex]; + float y_OutLo = mds.anchorY()[thirdMDIndex]; + float y_OutUp = mds.anchorY()[fourthMDIndex]; + + unsigned int pixelSegmentArrayIndex = innerSegmentIndex - ranges.segmentModuleIndices()[pixelModuleIndex]; + + float ptIn = segmentsPixel.ptIn()[pixelSegmentArrayIndex]; + float ptSLo = ptIn; + float px = segmentsPixel.px()[pixelSegmentArrayIndex]; + float py = segmentsPixel.py()[pixelSegmentArrayIndex]; + float pz = segmentsPixel.pz()[pixelSegmentArrayIndex]; + float ptErr = segmentsPixel.ptErr()[pixelSegmentArrayIndex]; + float etaErr = segmentsPixel.etaErr()[pixelSegmentArrayIndex]; + + ptSLo = alpaka::math::max(acc, ptCut, ptSLo - 10.0f * alpaka::math::max(acc, ptErr, 0.005f * ptSLo)); + ptSLo = alpaka::math::min(acc, 10.0f, ptSLo); + + const float zpitch_InLo = 0.05f; + float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); + float zGeom = zpitch_InLo + zpitch_OutLo; + + const float slope = alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + const float dzDrtScale = alpaka::math::tan(acc, slope) / slope; //FIXME: need approximate value + + const float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InUp); + bool isOutSgInnerMDPS = modules.moduleType()[outerInnerLowerModuleIndex] == PS; + + const float rtGeom1 = isOutSgInnerMDPS + ? kPixelPSZpitch + : kStrip2SZpitch; //FIXME: make this chosen by configuration for lay11,12 full PS + const float zGeom1 = alpaka::math::copysign(acc, zGeom, z_InUp); //used in B-E region + rtLo = rt_InUp * (1.f + (z_OutLo - z_InUp - zGeom1) / (z_InUp + zGeom1 + dLum) / dzDrtScale) - + rtGeom1; //slope correction only on the lower end + + float zInForHi = z_InUp - zGeom1 - dLum; + if (zInForHi * z_InUp < 0) + zInForHi = alpaka::math::copysign(acc, 0.1f, z_InUp); + rtHi = rt_InUp * (1.f + (z_OutLo - z_InUp + zGeom1) / zInForHi) + rtGeom1; + + // Cut #2: rt condition + if ((rt_OutLo < rtLo) || (rt_OutLo > rtHi)) + return false; + + const float dzOutInAbs = alpaka::math::abs(acc, z_OutLo - z_InUp); + const float cosh2Eta = 1.f + (pz * pz) / (ptIn * ptIn); + const float multDzDr2 = (dzOutInAbs * dzOutInAbs) * cosh2Eta / ((cosh2Eta - 1.f) * (cosh2Eta - 1.f)); + const float r3_InUp = alpaka::math::sqrt(acc, z_InUp * z_InUp + rt_InUp * rt_InUp); + const float thetaMuls2 = + (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InUp) / 50.f) * (r3_InUp / rt_InUp); + const float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + + float drtErr = (etaErr * etaErr) * multDzDr2; + drtErr += 0.03f * 0.03f; // Approximately account for IT module size + drtErr *= 9.f; // 3 sigma + drtErr += muls2 * multDzDr2 / 3.f * cosh2Eta; + drtErr = alpaka::math::sqrt(acc, drtErr); + const float drtDzIn = alpaka::math::abs(acc, ptIn / pz); + + const float drt_OutLo_InUp = (rt_OutLo - rt_InUp); // drOutIn + + const float rtWindow = drtErr + rtGeom1; + const float drtMean = drtDzIn * dzOutInAbs * + (1.f - drt_OutLo_InUp * drt_OutLo_InUp * 4 * k2Rinv1GeVf * k2Rinv1GeVf / ptIn / ptIn / + 24.f); // with curved path correction + const float rtLo_point = rt_InUp + drtMean - rtWindow; + const float rtHi_point = rt_InUp + drtMean + rtWindow; + + // Cut #3: rt-z pointed + if ((rt_OutLo < rtLo_point) || (rt_OutLo > rtHi_point)) + return false; + + const float alpha1GeV_OutLo = + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + const float pvOffset = 0.1f / rt_OutLo; + dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); + + float midPointX = 0.5f * (x_InLo + x_OutLo); + float midPointY = 0.5f * (y_InLo + y_OutLo); + + float diffX = x_OutLo - x_InLo; + float diffY = y_OutLo - y_InLo; + + dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); + + // Cut #5: deltaPhiChange + if (alpaka::math::abs(acc, dPhi) > dPhiCut) + return false; + + float alpha_InLo = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float alpha_OutLo = __H2F(segments.dPhiChanges()[outerSegmentIndex]); + + bool isEC_lastLayer = modules.subdets()[outerOuterLowerModuleIndex] == Endcap and + modules.moduleType()[outerOuterLowerModuleIndex] == TwoS; + + float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; + + alpha_OutUp = deltaPhi(acc, x_OutUp, y_OutUp, x_OutUp - x_OutLo, y_OutUp - y_OutLo); + alpha_OutUp_highEdge = alpha_OutUp; + alpha_OutUp_lowEdge = alpha_OutUp; + + float tl_axis_x = x_OutUp - x_InUp; + float tl_axis_y = y_OutUp - y_InUp; + + float tl_axis_highEdge_x = tl_axis_x; + float tl_axis_highEdge_y = tl_axis_y; + + float tl_axis_lowEdge_x = tl_axis_x; + float tl_axis_lowEdge_y = tl_axis_y; + + betaIn = -deltaPhi(acc, px, py, tl_axis_x, tl_axis_y); + float betaInRHmin = betaIn; + float betaInRHmax = betaIn; + + betaOut = -alpha_OutUp + deltaPhi(acc, x_OutUp, y_OutUp, tl_axis_x, tl_axis_y); + float betaOutRHmin = betaOut; + float betaOutRHmax = betaOut; + + if (isEC_lastLayer) { + alpha_OutUp_highEdge = deltaPhi(acc, + mds.anchorHighEdgeX()[fourthMDIndex], + mds.anchorHighEdgeY()[fourthMDIndex], + mds.anchorHighEdgeX()[fourthMDIndex] - x_OutLo, + mds.anchorHighEdgeY()[fourthMDIndex] - y_OutLo); + alpha_OutUp_lowEdge = deltaPhi(acc, + mds.anchorLowEdgeX()[fourthMDIndex], + mds.anchorLowEdgeY()[fourthMDIndex], + mds.anchorLowEdgeX()[fourthMDIndex] - x_OutLo, + mds.anchorLowEdgeY()[fourthMDIndex] - y_OutLo); + + tl_axis_highEdge_x = mds.anchorHighEdgeX()[fourthMDIndex] - x_InUp; + tl_axis_highEdge_y = mds.anchorHighEdgeY()[fourthMDIndex] - y_InUp; + tl_axis_lowEdge_x = mds.anchorLowEdgeX()[fourthMDIndex] - x_InUp; + tl_axis_lowEdge_y = mds.anchorLowEdgeY()[fourthMDIndex] - y_InUp; + + betaOutRHmin = -alpha_OutUp_highEdge + deltaPhi(acc, + mds.anchorHighEdgeX()[fourthMDIndex], + mds.anchorHighEdgeY()[fourthMDIndex], + tl_axis_highEdge_x, + tl_axis_highEdge_y); + betaOutRHmax = -alpha_OutUp_lowEdge + deltaPhi(acc, + mds.anchorLowEdgeX()[fourthMDIndex], + mds.anchorLowEdgeY()[fourthMDIndex], + tl_axis_lowEdge_x, + tl_axis_lowEdge_y); + } + + //beta computation + float drt_tl_axis = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + //no betaIn cut for the pixels + const float rt_InSeg = + alpaka::math::sqrt(acc, (x_InUp - x_InLo) * (x_InUp - x_InLo) + (y_InUp - y_InLo) * (y_InUp - y_InLo)); + + float betaAv = 0.5f * (betaIn + betaOut); + pt_beta = ptIn; + + int lIn = 0; + int lOut = isEC_lastLayer ? 11 : 5; + float sdOut_dr = + alpaka::math::sqrt(acc, (x_OutUp - x_OutLo) * (x_OutUp - x_OutLo) + (y_OutUp - y_OutLo) * (y_OutUp - y_OutLo)); + float sdOut_d = rt_OutUp - rt_OutLo; + + runDeltaBetaIterationspT3(acc, betaIn, betaOut, betaAv, pt_beta, rt_InSeg, sdOut_dr, drt_tl_axis, lIn); + + const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) + ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) + : 0.; //mean value of min,max is the old betaIn + const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) + ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) + : 0.; + betaInRHmin *= betaInMMSF; + betaInRHmax *= betaInMMSF; + betaOutRHmin *= betaOutMMSF; + betaOutRHmax *= betaOutMMSF; + + float min_ptBeta_ptBetaMax = alpaka::math::min( + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV + const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_ptBetaMax * min_ptBeta_ptBetaMax); + + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_InLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InUp * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_OutLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InUp); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); + const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); + + const float sinDPhi = alpaka::math::sin(acc, dPhi); + const float dBetaRIn2 = 0; // TODO-RH + + float dBetaROut = 0; + if (isEC_lastLayer) { + dBetaROut = (alpaka::math::sqrt(acc, + mds.anchorHighEdgeX()[fourthMDIndex] * mds.anchorHighEdgeX()[fourthMDIndex] + + mds.anchorHighEdgeY()[fourthMDIndex] * mds.anchorHighEdgeY()[fourthMDIndex]) - + alpaka::math::sqrt(acc, + mds.anchorLowEdgeX()[fourthMDIndex] * mds.anchorLowEdgeX()[fourthMDIndex] + + mds.anchorLowEdgeY()[fourthMDIndex] * mds.anchorLowEdgeY()[fourthMDIndex])) * + sinDPhi / drt_tl_axis; + } + + const float dBetaROut2 = dBetaROut * dBetaROut; + + betaOutCut = + alpaka::math::asin( + acc, alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax)) //FIXME: need faster version + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + + //Cut #6: The real beta cut + if (alpaka::math::abs(acc, betaOut) >= betaOutCut) + return false; + + float drt_InSeg = rt_InUp - rt_InLo; + + const float dBetaRes = 0.02f / alpaka::math::min(acc, sdOut_d, drt_InSeg); + const float dBetaCut2 = + (dBetaRes * dBetaRes * 2.0f + dBetaMuls2 + dBetaLum2 + dBetaRIn2 + dBetaROut2 + + 0.25f * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); + float dBeta = betaIn - betaOut; + return dBeta * dBeta <= dBetaCut2; + } + +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/Quintuplet.h b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h new file mode 100644 index 0000000000000..24ce2d1d53e22 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/Quintuplet.h @@ -0,0 +1,2592 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_Quintuplet_h +#define RecoTracker_LSTCore_src_alpaka_Quintuplet_h + +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" + +#include "NeuralNetwork.h" +#include "Hit.h" +#include "Triplet.h" // FIXME: need to refactor common functions to a common place + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool checkIntervalOverlap(float firstMin, + float firstMax, + float secondMin, + float secondMax) { + return ((firstMin <= secondMin) && (secondMin < firstMax)) || ((secondMin < firstMin) && (firstMin < secondMax)); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addQuintupletToMemory(TripletsConst triplets, + Quintuplets quintuplets, + unsigned int innerTripletIndex, + unsigned int outerTripletIndex, + uint16_t lowerModule1, + uint16_t lowerModule2, + uint16_t lowerModule3, + uint16_t lowerModule4, + uint16_t lowerModule5, + float innerRadius, + float bridgeRadius, + float outerRadius, + float regressionG, + float regressionF, + float regressionRadius, + float rzChiSquared, + float rPhiChiSquared, + float nonAnchorChiSquared, + float pt, + float eta, + float phi, + float scores, + uint8_t layer, + unsigned int quintupletIndex, + bool tightCutFlag) { + quintuplets.tripletIndices()[quintupletIndex][0] = innerTripletIndex; + quintuplets.tripletIndices()[quintupletIndex][1] = outerTripletIndex; + + quintuplets.lowerModuleIndices()[quintupletIndex][0] = lowerModule1; + quintuplets.lowerModuleIndices()[quintupletIndex][1] = lowerModule2; + quintuplets.lowerModuleIndices()[quintupletIndex][2] = lowerModule3; + quintuplets.lowerModuleIndices()[quintupletIndex][3] = lowerModule4; + quintuplets.lowerModuleIndices()[quintupletIndex][4] = lowerModule5; + quintuplets.innerRadius()[quintupletIndex] = __F2H(innerRadius); + quintuplets.outerRadius()[quintupletIndex] = __F2H(outerRadius); + quintuplets.pt()[quintupletIndex] = __F2H(pt); + quintuplets.eta()[quintupletIndex] = __F2H(eta); + quintuplets.phi()[quintupletIndex] = __F2H(phi); + quintuplets.score_rphisum()[quintupletIndex] = __F2H(scores); + quintuplets.isDup()[quintupletIndex] = 0; + quintuplets.tightCutFlag()[quintupletIndex] = tightCutFlag; + quintuplets.regressionRadius()[quintupletIndex] = regressionRadius; + quintuplets.regressionG()[quintupletIndex] = regressionG; + quintuplets.regressionF()[quintupletIndex] = regressionF; + quintuplets.logicalLayers()[quintupletIndex][0] = triplets.logicalLayers()[innerTripletIndex][0]; + quintuplets.logicalLayers()[quintupletIndex][1] = triplets.logicalLayers()[innerTripletIndex][1]; + quintuplets.logicalLayers()[quintupletIndex][2] = triplets.logicalLayers()[innerTripletIndex][2]; + quintuplets.logicalLayers()[quintupletIndex][3] = triplets.logicalLayers()[outerTripletIndex][1]; + quintuplets.logicalLayers()[quintupletIndex][4] = triplets.logicalLayers()[outerTripletIndex][2]; + + quintuplets.hitIndices()[quintupletIndex][0] = triplets.hitIndices()[innerTripletIndex][0]; + quintuplets.hitIndices()[quintupletIndex][1] = triplets.hitIndices()[innerTripletIndex][1]; + quintuplets.hitIndices()[quintupletIndex][2] = triplets.hitIndices()[innerTripletIndex][2]; + quintuplets.hitIndices()[quintupletIndex][3] = triplets.hitIndices()[innerTripletIndex][3]; + quintuplets.hitIndices()[quintupletIndex][4] = triplets.hitIndices()[innerTripletIndex][4]; + quintuplets.hitIndices()[quintupletIndex][5] = triplets.hitIndices()[innerTripletIndex][5]; + quintuplets.hitIndices()[quintupletIndex][6] = triplets.hitIndices()[outerTripletIndex][2]; + quintuplets.hitIndices()[quintupletIndex][7] = triplets.hitIndices()[outerTripletIndex][3]; + quintuplets.hitIndices()[quintupletIndex][8] = triplets.hitIndices()[outerTripletIndex][4]; + quintuplets.hitIndices()[quintupletIndex][9] = triplets.hitIndices()[outerTripletIndex][5]; + quintuplets.bridgeRadius()[quintupletIndex] = bridgeRadius; + quintuplets.rzChiSquared()[quintupletIndex] = rzChiSquared; + quintuplets.chiSquared()[quintupletIndex] = rPhiChiSquared; + quintuplets.nonAnchorChiSquared()[quintupletIndex] = nonAnchorChiSquared; + } + + //90% constraint + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passChiSquaredConstraint(ModulesConst modules, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + float chiSquared) { + // Using lstLayer numbering convention defined in ModuleMethods.h + const int layer1 = modules.lstLayers()[lowerModuleIndex1]; + const int layer2 = modules.lstLayers()[lowerModuleIndex2]; + const int layer3 = modules.lstLayers()[lowerModuleIndex3]; + const int layer4 = modules.lstLayers()[lowerModuleIndex4]; + const int layer5 = modules.lstLayers()[lowerModuleIndex5]; + + if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + if (layer4 == 10 and layer5 == 11) { + return chiSquared < 0.01788f; + } else if (layer4 == 10 and layer5 == 16) { + return chiSquared < 0.04725f; + } else if (layer4 == 15 and layer5 == 16) { + return chiSquared < 0.04725f; + } + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 10) { + return chiSquared < 0.01788f; + } else if (layer4 == 9 and layer5 == 15) { + return chiSquared < 0.08234f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + if (layer4 == 8 and layer5 == 9) { + return chiSquared < 0.02360f; + } else if (layer4 == 8 and layer5 == 14) { + return chiSquared < 0.07167f; + } else if (layer4 == 13 and layer5 == 14) { + return chiSquared < 0.08234f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + if (layer4 == 7 and layer5 == 8) { + return chiSquared < 0.01026f; + } else if (layer4 == 7 and layer5 == 13) { + return chiSquared < 0.06238f; + } else if (layer4 == 12 and layer5 == 13) { + return chiSquared < 0.06238f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3 and layer4 == 4) { + if (layer5 == 5) { + return chiSquared < 0.04725f; + } else if (layer5 == 12) { + return chiSquared < 0.09461f; + } + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 10) { + return chiSquared < 0.00512f; + } + if (layer4 == 9 and layer5 == 15) { + return chiSquared < 0.04112f; + } else if (layer4 == 14 and layer5 == 15) { + return chiSquared < 0.06238f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + if (layer4 == 8 and layer5 == 14) { + return chiSquared < 0.07167f; + } else if (layer4 == 13 and layer5 == 14) { + return chiSquared < 0.06238f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + if (layer4 == 5 and layer5 == 6) { + return chiSquared < 0.08234f; + } else if (layer4 == 5 and layer5 == 12) { + return chiSquared < 0.10870f; + } else if (layer4 == 12 and layer5 == 13) { + return chiSquared < 0.10870f; + } + } else if (layer1 == 3 and layer2 == 7 and layer3 == 8 and layer4 == 14 and layer5 == 15) { + return chiSquared < 0.09461f; + } else if (layer1 == 3 and layer2 == 4 and layer3 == 5 and layer4 == 12 and layer5 == 13) { + return chiSquared < 0.09461f; + } + + return true; + } + + //bounds can be found at http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_RZFix/t5_rz_thresholds.txt + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passT5RZConstraint(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex, + unsigned int fifthMDIndex, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + float& rzChiSquared, + float inner_pt, + float innerRadius, + float g, + float f, + bool& tightCutFlag) { + //(g,f) is the center of the circle fitted by the innermost 3 points on x,y coordinates + const float rt1 = mds.anchorRt()[firstMDIndex] / 100; //in the unit of m instead of cm + const float rt2 = mds.anchorRt()[secondMDIndex] / 100; + const float rt3 = mds.anchorRt()[thirdMDIndex] / 100; + const float rt4 = mds.anchorRt()[fourthMDIndex] / 100; + const float rt5 = mds.anchorRt()[fifthMDIndex] / 100; + + const float z1 = mds.anchorZ()[firstMDIndex] / 100; + const float z2 = mds.anchorZ()[secondMDIndex] / 100; + const float z3 = mds.anchorZ()[thirdMDIndex] / 100; + const float z4 = mds.anchorZ()[fourthMDIndex] / 100; + const float z5 = mds.anchorZ()[fifthMDIndex] / 100; + + // Using lst_layer numbering convention defined in ModuleMethods.h + const int layer1 = modules.lstLayers()[lowerModuleIndex1]; + const int layer2 = modules.lstLayers()[lowerModuleIndex2]; + const int layer3 = modules.lstLayers()[lowerModuleIndex3]; + const int layer4 = modules.lstLayers()[lowerModuleIndex4]; + const int layer5 = modules.lstLayers()[lowerModuleIndex5]; + + //slope computed using the internal T3s + const int moduleType1 = modules.moduleType()[lowerModuleIndex1]; //0 is ps, 1 is 2s + const int moduleType2 = modules.moduleType()[lowerModuleIndex2]; + const int moduleType3 = modules.moduleType()[lowerModuleIndex3]; + const int moduleType4 = modules.moduleType()[lowerModuleIndex4]; + const int moduleType5 = modules.moduleType()[lowerModuleIndex5]; + + const float x1 = mds.anchorX()[firstMDIndex] / 100; + const float x2 = mds.anchorX()[secondMDIndex] / 100; + const float x3 = mds.anchorX()[thirdMDIndex] / 100; + const float x4 = mds.anchorX()[fourthMDIndex] / 100; + const float y1 = mds.anchorY()[firstMDIndex] / 100; + const float y2 = mds.anchorY()[secondMDIndex] / 100; + const float y3 = mds.anchorY()[thirdMDIndex] / 100; + const float y4 = mds.anchorY()[fourthMDIndex] / 100; + + float residual = 0; + float error2 = 0; + float x_center = g / 100, y_center = f / 100; + float x_init = mds.anchorX()[thirdMDIndex] / 100; + float y_init = mds.anchorY()[thirdMDIndex] / 100; + float z_init = mds.anchorZ()[thirdMDIndex] / 100; + float rt_init = mds.anchorRt()[thirdMDIndex] / 100; //use the second MD as initial point + + if (moduleType3 == 1) // 1: if MD3 is in 2s layer + { + x_init = mds.anchorX()[secondMDIndex] / 100; + y_init = mds.anchorY()[secondMDIndex] / 100; + z_init = mds.anchorZ()[secondMDIndex] / 100; + rt_init = mds.anchorRt()[secondMDIndex] / 100; + } + + // start from a circle of inner T3. + // to determine the charge + int charge = 0; + float slope3c = (y3 - y_center) / (x3 - x_center); + float slope1c = (y1 - y_center) / (x1 - x_center); + // these 4 "if"s basically separate the x-y plane into 4 quarters. It determines geometrically how a circle and line slope goes and their positions, and we can get the charges correspondingly. + if ((y3 - y_center) > 0 && (y1 - y_center) > 0) { + if (slope1c > 0 && slope3c < 0) + charge = -1; // on x axis of a quarter, 3 hits go anti-clockwise + else if (slope1c < 0 && slope3c > 0) + charge = 1; // on x axis of a quarter, 3 hits go clockwise + else if (slope3c > slope1c) + charge = -1; + else if (slope3c < slope1c) + charge = 1; + } else if ((y3 - y_center) < 0 && (y1 - y_center) < 0) { + if (slope1c < 0 && slope3c > 0) + charge = 1; + else if (slope1c > 0 && slope3c < 0) + charge = -1; + else if (slope3c > slope1c) + charge = -1; + else if (slope3c < slope1c) + charge = 1; + } else if ((y3 - y_center) < 0 && (y1 - y_center) > 0) { + if ((x3 - x_center) > 0 && (x1 - x_center) > 0) + charge = 1; + else if ((x3 - x_center) < 0 && (x1 - x_center) < 0) + charge = -1; + } else if ((y3 - y_center) > 0 && (y1 - y_center) < 0) { + if ((x3 - x_center) > 0 && (x1 - x_center) > 0) + charge = -1; + else if ((x3 - x_center) < 0 && (x1 - x_center) < 0) + charge = 1; + } + + float pseudo_phi = alpaka::math::atan( + acc, (y_init - y_center) / (x_init - x_center)); //actually represent pi/2-phi, wrt helix axis z + float Pt = inner_pt, Px = Pt * alpaka::math::abs(acc, alpaka::math::sin(acc, pseudo_phi)), + Py = Pt * alpaka::math::abs(acc, cos(pseudo_phi)); + + // Above line only gives you the correct value of Px and Py, but signs of Px and Py calculated below. + // We look at if the circle is clockwise or anti-clock wise, to make it simpler, we separate the x-y plane into 4 quarters. + if (x_init > x_center && y_init > y_center) //1st quad + { + if (charge == 1) + Py = -Py; + if (charge == -1) + Px = -Px; + } + if (x_init < x_center && y_init > y_center) //2nd quad + { + if (charge == -1) { + Px = -Px; + Py = -Py; + } + } + if (x_init < x_center && y_init < y_center) //3rd quad + { + if (charge == 1) + Px = -Px; + if (charge == -1) + Py = -Py; + } + if (x_init > x_center && y_init < y_center) //4th quad + { + if (charge == 1) { + Px = -Px; + Py = -Py; + } + } + + // But if the initial T5 curve goes across quarters(i.e. cross axis to separate the quarters), need special redeclaration of Px,Py signs on these to avoid errors + if (moduleType3 == 0) { // 0 is ps + if (x4 < x3 && x3 < x2) + Px = -alpaka::math::abs(acc, Px); + else if (x4 > x3 && x3 > x2) + Px = alpaka::math::abs(acc, Px); + if (y4 < y3 && y3 < y2) + Py = -alpaka::math::abs(acc, Py); + else if (y4 > y3 && y3 > y2) + Py = alpaka::math::abs(acc, Py); + } else if (moduleType3 == 1) // 1 is 2s + { + if (x3 < x2 && x2 < x1) + Px = -alpaka::math::abs(acc, Px); + else if (x3 > x2 && x2 > x1) + Px = alpaka::math::abs(acc, Px); + if (y3 < y2 && y2 < y1) + Py = -alpaka::math::abs(acc, Py); + else if (y3 > y2 && y2 > y1) + Py = alpaka::math::abs(acc, Py); + } + + //to get Pz, we use pt/pz=ds/dz, ds is the arclength between MD1 and MD3. + float AO = alpaka::math::sqrt(acc, (x1 - x_center) * (x1 - x_center) + (y1 - y_center) * (y1 - y_center)); + float BO = + alpaka::math::sqrt(acc, (x_init - x_center) * (x_init - x_center) + (y_init - y_center) * (y_init - y_center)); + float AB2 = (x1 - x_init) * (x1 - x_init) + (y1 - y_init) * (y1 - y_init); + float dPhi = alpaka::math::acos(acc, (AO * AO + BO * BO - AB2) / (2 * AO * BO)); + float ds = innerRadius / 100 * dPhi; + + float Pz = (z_init - z1) / ds * Pt; + float p = alpaka::math::sqrt(acc, Px * Px + Py * Py + Pz * Pz); + + float a = -2.f * k2Rinv1GeVf * 100 * charge; // multiply by 100 to make the correct length units + + float zsi, rtsi; + int layeri, moduleTypei; + rzChiSquared = 0; + for (size_t i = 2; i < 6; i++) { + if (i == 2) { + zsi = z2; + rtsi = rt2; + layeri = layer2; + moduleTypei = moduleType2; + } else if (i == 3) { + zsi = z3; + rtsi = rt3; + layeri = layer3; + moduleTypei = moduleType3; + } else if (i == 4) { + zsi = z4; + rtsi = rt4; + layeri = layer4; + moduleTypei = moduleType4; + } else if (i == 5) { + zsi = z5; + rtsi = rt5; + layeri = layer5; + moduleTypei = moduleType5; + } + + if (moduleType3 == 0) { //0: ps + if (i == 3) + continue; + } else { + if (i == 2) + continue; + } + + // calculation is copied from PixelTriplet.h computePT3RZChiSquared + float diffr = 0, diffz = 0; + + float rou = a / p; + // for endcap + float s = (zsi - z_init) * p / Pz; + float x = x_init + Px / a * alpaka::math::sin(acc, rou * s) - Py / a * (1 - alpaka::math::cos(acc, rou * s)); + float y = y_init + Py / a * alpaka::math::sin(acc, rou * s) + Px / a * (1 - alpaka::math::cos(acc, rou * s)); + diffr = (rtsi - alpaka::math::sqrt(acc, x * x + y * y)) * 100; + + // for barrel + if (layeri <= 6) { + float paraA = + rt_init * rt_init + 2 * (Px * Px + Py * Py) / (a * a) + 2 * (y_init * Px - x_init * Py) / a - rtsi * rtsi; + float paraB = 2 * (x_init * Px + y_init * Py) / a; + float paraC = 2 * (y_init * Px - x_init * Py) / a + 2 * (Px * Px + Py * Py) / (a * a); + float A = paraB * paraB + paraC * paraC; + float B = 2 * paraA * paraB; + float C = paraA * paraA - paraC * paraC; + float sol1 = (-B + alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A); + float sol2 = (-B - alpaka::math::sqrt(acc, B * B - 4 * A * C)) / (2 * A); + float solz1 = alpaka::math::asin(acc, sol1) / rou * Pz / p + z_init; + float solz2 = alpaka::math::asin(acc, sol2) / rou * Pz / p + z_init; + float diffz1 = (solz1 - zsi) * 100; + float diffz2 = (solz2 - zsi) * 100; + if (alpaka::math::isnan(acc, diffz1)) + diffz = diffz2; + else if (alpaka::math::isnan(acc, diffz2)) + diffz = diffz1; + else { + diffz = (alpaka::math::abs(acc, diffz1) < alpaka::math::abs(acc, diffz2)) ? diffz1 : diffz2; + } + } + residual = (layeri > 6) ? diffr : diffz; + + //PS Modules + if (moduleTypei == 0) { + error2 = kPixelPSZpitch * kPixelPSZpitch; + } else //2S modules + { + error2 = kStrip2SZpitch * kStrip2SZpitch; + } + + //check the tilted module, side: PosZ, NegZ, Center(for not tilted) + float drdz; + short side, subdets; + if (i == 2) { + drdz = alpaka::math::abs(acc, modules.drdzs()[lowerModuleIndex2]); + side = modules.sides()[lowerModuleIndex2]; + subdets = modules.subdets()[lowerModuleIndex2]; + } + if (i == 3) { + drdz = alpaka::math::abs(acc, modules.drdzs()[lowerModuleIndex3]); + side = modules.sides()[lowerModuleIndex3]; + subdets = modules.subdets()[lowerModuleIndex3]; + } + if (i == 2 || i == 3) { + residual = (layeri <= 6 && ((side == Center) or (drdz < 1))) ? diffz : diffr; + float projection_missing2 = 1.f; + if (drdz < 1) + projection_missing2 = + ((subdets == Endcap) or (side == Center)) ? 1.f : 1.f / (1 + drdz * drdz); // cos(atan(drdz)), if dr/dz<1 + if (drdz > 1) + projection_missing2 = ((subdets == Endcap) or (side == Center)) + ? 1.f + : (drdz * drdz) / (1 + drdz * drdz); //sin(atan(drdz)), if dr/dz>1 + error2 = error2 * projection_missing2; + } + rzChiSquared += 12 * (residual * residual) / error2; + } + // for set rzchi2 cut + // if the 5 points are linear, helix calculation gives nan + if (inner_pt > 100 || alpaka::math::isnan(acc, rzChiSquared)) { + float slope; + if (moduleType1 == 0 and moduleType2 == 0 and moduleType3 == 1) //PSPS2S + { + slope = (z2 - z1) / (rt2 - rt1); + } else { + slope = (z3 - z1) / (rt3 - rt1); + } + float residual4_linear = (layer4 <= 6) ? ((z4 - z1) - slope * (rt4 - rt1)) : ((rt4 - rt1) - (z4 - z1) / slope); + float residual5_linear = (layer4 <= 6) ? ((z5 - z1) - slope * (rt5 - rt1)) : ((rt5 - rt1) - (z5 - z1) / slope); + + // creating a chi squared type quantity + // 0-> PS, 1->2S + residual4_linear = (moduleType4 == 0) ? residual4_linear / kPixelPSZpitch : residual4_linear / kStrip2SZpitch; + residual5_linear = (moduleType5 == 0) ? residual5_linear / kPixelPSZpitch : residual5_linear / kStrip2SZpitch; + residual4_linear = residual4_linear * 100; + residual5_linear = residual5_linear * 100; + + rzChiSquared = 12 * (residual4_linear * residual4_linear + residual5_linear * residual5_linear); + return rzChiSquared < 4.677f; + } + + // when building T5, apply 99% chi2 cuts as default, and add to pT5 collection. But when adding T5 to TC collections, apply 95% cut to reduce the fake rate + tightCutFlag = false; + // The category numbers are related to module regions and layers, decoding of the region numbers can be found here in slide 2 table. https://github.com/SegmentLinking/TrackLooper/files/11420927/part.2.pdf + // The commented numbers after each case is the region code, and can look it up from the table to see which category it belongs to. For example, //0 means T5 built with Endcap 1,2,3,4,5 ps modules + if (layer1 == 7 and layer2 == 8 and layer3 == 9 and layer4 == 10 and layer5 == 11) //0 + { + if (rzChiSquared < 94.470f) + tightCutFlag = true; + return true; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9 and layer4 == 10 and layer5 == 16) //1 + { + if (rzChiSquared < 22.099f) + tightCutFlag = true; + return rzChiSquared < 37.956f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9 and layer4 == 15 and layer5 == 16) //2 + { + if (rzChiSquared < 7.992f) + tightCutFlag = true; + return rzChiSquared < 11.622f; + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8 and layer4 == 9) { + if (layer5 == 10) //3 + { + if (rzChiSquared < 111.390f) + tightCutFlag = true; + return true; + } + if (layer5 == 15) //4 + { + if (rzChiSquared < 18.351f) + tightCutFlag = true; + return rzChiSquared < 37.941f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + if (layer4 == 8 and layer5 == 9) //5 + { + if (rzChiSquared < 116.148f) + tightCutFlag = true; + return true; + } + if (layer4 == 8 and layer5 == 14) //6 + { + if (rzChiSquared < 19.352f) + tightCutFlag = true; + return rzChiSquared < 52.561f; + } else if (layer4 == 13 and layer5 == 14) //7 + { + if (rzChiSquared < 10.392f) + tightCutFlag = true; + return rzChiSquared < 13.76f; + } + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + if (layer4 == 7 and layer5 == 8) //8 + { + if (rzChiSquared < 27.824f) + tightCutFlag = true; + return rzChiSquared < 44.247f; + } else if (layer4 == 7 and layer5 == 13) //9 + { + if (rzChiSquared < 18.145f) + tightCutFlag = true; + return rzChiSquared < 33.752f; + } else if (layer4 == 12 and layer5 == 13) //10 + { + if (rzChiSquared < 13.308f) + tightCutFlag = true; + return rzChiSquared < 21.213f; + } else if (layer4 == 4 and layer5 == 5) //11 + { + if (rzChiSquared < 15.627f) + tightCutFlag = true; + return rzChiSquared < 29.035f; + } else if (layer4 == 4 and layer5 == 12) //12 + { + if (rzChiSquared < 14.64f) + tightCutFlag = true; + return rzChiSquared < 23.037f; + } + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + if (layer4 == 9 and layer5 == 15) //14 + { + if (rzChiSquared < 24.662f) + tightCutFlag = true; + return rzChiSquared < 41.036f; + } else if (layer4 == 14 and layer5 == 15) //15 + { + if (rzChiSquared < 8.866f) + tightCutFlag = true; + return rzChiSquared < 14.092f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + if (layer4 == 8 and layer5 == 14) //16 + { + if (rzChiSquared < 23.730f) + tightCutFlag = true; + return rzChiSquared < 23.748f; + } + if (layer4 == 13 and layer5 == 14) //17 + { + if (rzChiSquared < 10.772f) + tightCutFlag = true; + return rzChiSquared < 17.945f; + } + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + if (layer4 == 5 and layer5 == 6) //18 + { + if (rzChiSquared < 6.065f) + tightCutFlag = true; + return rzChiSquared < 8.803f; + } else if (layer4 == 5 and layer5 == 12) //19 + { + if (rzChiSquared < 5.693f) + tightCutFlag = true; + return rzChiSquared < 7.930f; + } + + else if (layer4 == 12 and layer5 == 13) //20 + { + if (rzChiSquared < 5.473f) + tightCutFlag = true; + return rzChiSquared < 7.626f; + } + } + return true; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool T5HasCommonMiniDoublet(TripletsConst triplets, + SegmentsConst segments, + unsigned int innerTripletIndex, + unsigned int outerTripletIndex) { + unsigned int innerOuterSegmentIndex = triplets.segmentIndices()[innerTripletIndex][1]; + unsigned int outerInnerSegmentIndex = triplets.segmentIndices()[outerTripletIndex][0]; + unsigned int innerOuterOuterMiniDoubletIndex = + segments.mdIndices()[innerOuterSegmentIndex][1]; //inner triplet outer segment outer MD index + unsigned int outerInnerInnerMiniDoubletIndex = + segments.mdIndices()[outerInnerSegmentIndex][0]; //outer triplet inner segment inner MD index + + return (innerOuterOuterMiniDoubletIndex == outerInnerInnerMiniDoubletIndex); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeErrorInRadius(TAcc const& acc, + float* x1Vec, + float* y1Vec, + float* x2Vec, + float* y2Vec, + float* x3Vec, + float* y3Vec, + float& minimumRadius, + float& maximumRadius) { + //brute force + float candidateRadius; + float g, f; + minimumRadius = kVerticalModuleSlope; + maximumRadius = 0.f; + for (size_t i = 0; i < 3; i++) { + float x1 = x1Vec[i]; + float y1 = y1Vec[i]; + for (size_t j = 0; j < 3; j++) { + float x2 = x2Vec[j]; + float y2 = y2Vec[j]; + for (size_t k = 0; k < 3; k++) { + float x3 = x3Vec[k]; + float y3 = y3Vec[k]; + candidateRadius = computeRadiusFromThreeAnchorHits(acc, x1, y1, x2, y2, x3, y3, g, f); + maximumRadius = alpaka::math::max(acc, candidateRadius, maximumRadius); + minimumRadius = alpaka::math::min(acc, candidateRadius, minimumRadius); + } + } + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE12378(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.178f; + float bridgeInvRadiusErrorBound = 0.507f; + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, + innerInvRadiusMax, + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); + } + + /*bounds for high Pt taken from : http://uaf-10.t2.ucsd.edu/~bsathian/SDL/T5_efficiency/efficiencies/new_efficiencies/efficiencies_20210513_T5_recovering_high_Pt_efficiencies/highE_radius_matching/highE_bounds.txt */ + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBBB(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.1512f; + float bridgeInvRadiusErrorBound = 0.1781f; + + if (innerRadius * k2Rinv1GeVf > 1.f) { + innerInvRadiusErrorBound = 0.4449f; + bridgeInvRadiusErrorBound = 0.4033f; + } + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBBE(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.1781f; + float bridgeInvRadiusErrorBound = 0.2167f; + + if (innerRadius * k2Rinv1GeVf > 1.f) { + innerInvRadiusErrorBound = 0.4750f; + bridgeInvRadiusErrorBound = 0.3903f; + } + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE23478(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.2097f; + float bridgeInvRadiusErrorBound = 0.8557f; + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, + innerInvRadiusMax, + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBBEE34578(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.066f; + float bridgeInvRadiusErrorBound = 0.617f; + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, + innerInvRadiusMax, + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBBEEE(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 0.6376f; + float bridgeInvRadiusErrorBound = 2.1381f; + + if (innerRadius * k2Rinv1GeVf > 1.f) //as good as no selections! + { + innerInvRadiusErrorBound = 12.9173f; + bridgeInvRadiusErrorBound = 5.1700f; + } + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(innerInvRadiusMin, + innerInvRadiusMax, + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiBEEEE(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float innerRadiusMin2S, + float innerRadiusMax2S, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 1.9382f; + float bridgeInvRadiusErrorBound = 3.7280f; + + if (innerRadius * k2Rinv1GeVf > 1.f) { + innerInvRadiusErrorBound = 23.2713f; + bridgeInvRadiusErrorBound = 21.7980f; + } + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(alpaka::math::min(acc, innerInvRadiusMin, 1.0f / innerRadiusMax2S), + alpaka::math::max(acc, innerInvRadiusMax, 1.0f / innerRadiusMin2S), + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool matchRadiiEEEEE(TAcc const& acc, + float innerRadius, + float bridgeRadius, + float outerRadius, + float innerRadiusMin2S, + float innerRadiusMax2S, + float bridgeRadiusMin2S, + float bridgeRadiusMax2S) { + float innerInvRadiusMin, innerInvRadiusMax, bridgeInvRadiusMin, bridgeInvRadiusMax; + + float innerInvRadiusErrorBound = 1.9382f; + float bridgeInvRadiusErrorBound = 2.2091f; + + if (innerRadius * k2Rinv1GeVf > 1.f) { + innerInvRadiusErrorBound = 22.5226f; + bridgeInvRadiusErrorBound = 21.0966f; + } + + innerInvRadiusMax = (1.f + innerInvRadiusErrorBound) / innerRadius; + innerInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - innerInvRadiusErrorBound) / innerRadius); + + bridgeInvRadiusMax = (1.f + bridgeInvRadiusErrorBound) / bridgeRadius; + bridgeInvRadiusMin = alpaka::math::max(acc, 0.f, (1.f - bridgeInvRadiusErrorBound) / bridgeRadius); + + return checkIntervalOverlap(alpaka::math::min(acc, innerInvRadiusMin, 1.0f / innerRadiusMax2S), + alpaka::math::max(acc, innerInvRadiusMax, 1.0f / innerRadiusMin2S), + alpaka::math::min(acc, bridgeInvRadiusMin, 1.0f / bridgeRadiusMax2S), + alpaka::math::max(acc, bridgeInvRadiusMax, 1.0f / bridgeRadiusMin2S)); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void computeSigmasForRegression(TAcc const& acc, + ModulesConst modules, + const uint16_t* lowerModuleIndices, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + unsigned int nPoints = 5, + bool anchorHits = true) { + /* + Bool anchorHits required to deal with a weird edge case wherein + the hits ultimately used in the regression are anchor hits, but the + lower modules need not all be Pixel Modules (in case of PS). Similarly, + when we compute the chi squared for the non-anchor hits, the "partner module" + need not always be a PS strip module, but all non-anchor hits sit on strip + modules. + */ + + ModuleType moduleType; + short moduleSubdet, moduleSide; + float inv1 = kWidthPS / kWidth2S; + float inv2 = kPixelPSZpitch / kWidth2S; + float inv3 = kStripPSZpitch / kWidth2S; + for (size_t i = 0; i < nPoints; i++) { + moduleType = modules.moduleType()[lowerModuleIndices[i]]; + moduleSubdet = modules.subdets()[lowerModuleIndices[i]]; + moduleSide = modules.sides()[lowerModuleIndices[i]]; + const float& drdz = modules.drdzs()[lowerModuleIndices[i]]; + slopes[i] = modules.dxdys()[lowerModuleIndices[i]]; + //category 1 - barrel PS flat + if (moduleSubdet == Barrel and moduleType == PS and moduleSide == Center) { + delta1[i] = inv1; + delta2[i] = inv1; + slopes[i] = -999.f; + isFlat[i] = true; + } + //category 2 - barrel 2S + else if (moduleSubdet == Barrel and moduleType == TwoS) { + delta1[i] = 1.f; + delta2[i] = 1.f; + slopes[i] = -999.f; + isFlat[i] = true; + } + //category 3 - barrel PS tilted + else if (moduleSubdet == Barrel and moduleType == PS and moduleSide != Center) { + delta1[i] = inv1; + isFlat[i] = false; + + if (anchorHits) { + delta2[i] = (inv2 * drdz / alpaka::math::sqrt(acc, 1 + drdz * drdz)); + } else { + delta2[i] = (inv3 * drdz / alpaka::math::sqrt(acc, 1 + drdz * drdz)); + } + } + //category 4 - endcap PS + else if (moduleSubdet == Endcap and moduleType == PS) { + delta1[i] = inv1; + isFlat[i] = false; + + /* + despite the type of the module layer of the lower module index, + all anchor hits are on the pixel side and all non-anchor hits are + on the strip side! + */ + if (anchorHits) { + delta2[i] = inv2; + } else { + delta2[i] = inv3; + } + } + //category 5 - endcap 2S + else if (moduleSubdet == Endcap and moduleType == TwoS) { + delta1[i] = 1.f; + delta2[i] = 500.f * inv1; + isFlat[i] = false; + } else { +#ifdef WARNINGS + printf("ERROR!!!!! I SHOULDN'T BE HERE!!!! subdet = %d, type = %d, side = %d\n", + moduleSubdet, + moduleType, + moduleSide); +#endif + } + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeRadiusUsingRegression(TAcc const& acc, + unsigned int nPoints, + float* xs, + float* ys, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + float& g, + float& f, + float* sigmas2, + float& chiSquared) { + float radius = 0.f; + + // Some extra variables + // the two variables will be called x1 and x2, and y (which is x^2 + y^2) + + float sigmaX1Squared = 0.f; + float sigmaX2Squared = 0.f; + float sigmaX1X2 = 0.f; + float sigmaX1y = 0.f; + float sigmaX2y = 0.f; + float sigmaY = 0.f; + float sigmaX1 = 0.f; + float sigmaX2 = 0.f; + float sigmaOne = 0.f; + + float xPrime, yPrime, absArctanSlope, angleM; + for (size_t i = 0; i < nPoints; i++) { + // Computing sigmas is a very tricky affair + // if the module is tilted or endcap, we need to use the slopes properly! + + absArctanSlope = ((slopes[i] != kVerticalModuleSlope) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) + : kPi / 2.f); + + if (xs[i] > 0 and ys[i] > 0) { + angleM = kPi / 2.f - absArctanSlope; + } else if (xs[i] < 0 and ys[i] > 0) { + angleM = absArctanSlope + kPi / 2.f; + } else if (xs[i] < 0 and ys[i] < 0) { + angleM = -(absArctanSlope + kPi / 2.f); + } else if (xs[i] > 0 and ys[i] < 0) { + angleM = -(kPi / 2.f - absArctanSlope); + } else { + angleM = 0; + } + + if (not isFlat[i]) { + xPrime = xs[i] * alpaka::math::cos(acc, angleM) + ys[i] * alpaka::math::sin(acc, angleM); + yPrime = ys[i] * alpaka::math::cos(acc, angleM) - xs[i] * alpaka::math::sin(acc, angleM); + } else { + xPrime = xs[i]; + yPrime = ys[i]; + } + sigmas2[i] = 4 * ((xPrime * delta1[i]) * (xPrime * delta1[i]) + (yPrime * delta2[i]) * (yPrime * delta2[i])); + + sigmaX1Squared += (xs[i] * xs[i]) / sigmas2[i]; + sigmaX2Squared += (ys[i] * ys[i]) / sigmas2[i]; + sigmaX1X2 += (xs[i] * ys[i]) / sigmas2[i]; + sigmaX1y += (xs[i] * (xs[i] * xs[i] + ys[i] * ys[i])) / sigmas2[i]; + sigmaX2y += (ys[i] * (xs[i] * xs[i] + ys[i] * ys[i])) / sigmas2[i]; + sigmaY += (xs[i] * xs[i] + ys[i] * ys[i]) / sigmas2[i]; + sigmaX1 += xs[i] / sigmas2[i]; + sigmaX2 += ys[i] / sigmas2[i]; + sigmaOne += 1.0f / sigmas2[i]; + } + float denominator = (sigmaX1X2 - sigmaX1 * sigmaX2) * (sigmaX1X2 - sigmaX1 * sigmaX2) - + (sigmaX1Squared - sigmaX1 * sigmaX1) * (sigmaX2Squared - sigmaX2 * sigmaX2); + + float twoG = ((sigmaX2y - sigmaX2 * sigmaY) * (sigmaX1X2 - sigmaX1 * sigmaX2) - + (sigmaX1y - sigmaX1 * sigmaY) * (sigmaX2Squared - sigmaX2 * sigmaX2)) / + denominator; + float twoF = ((sigmaX1y - sigmaX1 * sigmaY) * (sigmaX1X2 - sigmaX1 * sigmaX2) - + (sigmaX2y - sigmaX2 * sigmaY) * (sigmaX1Squared - sigmaX1 * sigmaX1)) / + denominator; + + float c = -(sigmaY - twoG * sigmaX1 - twoF * sigmaX2) / sigmaOne; + g = 0.5f * twoG; + f = 0.5f * twoF; + if (g * g + f * f - c < 0) { +#ifdef WARNINGS + printf("FATAL! r^2 < 0!\n"); +#endif + chiSquared = -1; + return -1; + } + + radius = alpaka::math::sqrt(acc, g * g + f * f - c); + // compute chi squared + chiSquared = 0.f; + for (size_t i = 0; i < nPoints; i++) { + chiSquared += (xs[i] * xs[i] + ys[i] * ys[i] - twoG * xs[i] - twoF * ys[i] + c) * + (xs[i] * xs[i] + ys[i] * ys[i] - twoG * xs[i] - twoF * ys[i] + c) / sigmas2[i]; + } + return radius; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeChiSquared(TAcc const& acc, + unsigned int nPoints, + float* xs, + float* ys, + float* delta1, + float* delta2, + float* slopes, + bool* isFlat, + float g, + float f, + float radius) { + // given values of (g, f, radius) and a set of points (and its uncertainties) + // compute chi squared + float c = g * g + f * f - radius * radius; + float chiSquared = 0.f; + float absArctanSlope, angleM, xPrime, yPrime, sigma2; + for (size_t i = 0; i < nPoints; i++) { + absArctanSlope = ((slopes[i] != kVerticalModuleSlope) ? alpaka::math::abs(acc, alpaka::math::atan(acc, slopes[i])) + : kPi / 2.f); + if (xs[i] > 0 and ys[i] > 0) { + angleM = kPi / 2.f - absArctanSlope; + } else if (xs[i] < 0 and ys[i] > 0) { + angleM = absArctanSlope + kPi / 2.f; + } else if (xs[i] < 0 and ys[i] < 0) { + angleM = -(absArctanSlope + kPi / 2.f); + } else if (xs[i] > 0 and ys[i] < 0) { + angleM = -(kPi / 2.f - absArctanSlope); + } else { + angleM = 0; + } + + if (not isFlat[i]) { + xPrime = xs[i] * alpaka::math::cos(acc, angleM) + ys[i] * alpaka::math::sin(acc, angleM); + yPrime = ys[i] * alpaka::math::cos(acc, angleM) - xs[i] * alpaka::math::sin(acc, angleM); + } else { + xPrime = xs[i]; + yPrime = ys[i]; + } + sigma2 = 4 * ((xPrime * delta1[i]) * (xPrime * delta1[i]) + (yPrime * delta2[i]) * (yPrime * delta2[i])); + chiSquared += (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) * + (xs[i] * xs[i] + ys[i] * ys[i] - 2 * g * xs[i] - 2 * f * ys[i] + c) / sigma2; + } + return chiSquared; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void runDeltaBetaIterationsT5(TAcc const& acc, + float& betaIn, + float& betaOut, + float betaAv, + float& pt_beta, + float sdIn_dr, + float sdOut_dr, + float dr, + float lIn) { + if (lIn == 0) { + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaOut); + return; + } + + if (betaIn * betaOut > 0.f and + (alpaka::math::abs(acc, pt_beta) < 4.f * kPt_betaMax or + (lIn >= 11 and alpaka::math::abs(acc, pt_beta) < + 8.f * kPt_betaMax))) //and the pt_beta is well-defined; less strict for endcap-endcap + { + const float betaInUpd = + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + const float betaOutUpd = + betaOut + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaOut); //FIXME: need a faster version + betaAv = 0.5f * (betaInUpd + betaOutUpd); + + //1st update + const float pt_beta_inv = + 1.f / alpaka::math::abs(acc, dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv)); //get a better pt estimate + + betaIn += alpaka::math::copysign( + acc, + alpaka::math::asin(acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin(acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf * pt_beta_inv, kSinAlphaMax)), + betaOut); //FIXME: need a faster version + //update the av and pt + betaAv = 0.5f * (betaIn + betaOut); + //2nd update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + } else if (lIn < 11 && alpaka::math::abs(acc, betaOut) < 0.2f * alpaka::math::abs(acc, betaIn) && + alpaka::math::abs(acc, pt_beta) < 12.f * kPt_betaMax) //use betaIn sign as ref + { + const float pt_betaIn = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaIn); + + const float betaInUpd = + betaIn + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + const float betaOutUpd = + betaOut + + alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, + alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_betaIn), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaAv = (alpaka::math::abs(acc, betaOut) > 0.2f * alpaka::math::abs(acc, betaIn)) + ? (0.5f * (betaInUpd + betaOutUpd)) + : betaInUpd; + + //1st update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + betaIn += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdIn_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + betaOut += alpaka::math::copysign( + acc, + alpaka::math::asin( + acc, alpaka::math::min(acc, sdOut_dr * k2Rinv1GeVf / alpaka::math::abs(acc, pt_beta), kSinAlphaMax)), + betaIn); //FIXME: need a faster version + //update the av and pt + betaAv = 0.5f * (betaIn + betaOut); + //2nd update + pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); //get a better pt estimate + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBBB(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t innerOuterLowerModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + bool isPS_InLo = (modules.moduleType()[innerInnerLowerModuleIndex] == PS); + bool isPS_OutLo = (modules.moduleType()[outerInnerLowerModuleIndex] == PS); + + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InOut = mds.anchorRt()[secondMDIndex]; + float rt_OutLo = mds.anchorRt()[thirdMDIndex]; + + float z_InLo = mds.anchorZ()[firstMDIndex]; + float z_InOut = mds.anchorZ()[secondMDIndex]; + float z_OutLo = mds.anchorZ()[thirdMDIndex]; + + float alpha1GeV_OutLo = + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float rtRatio_OutLoInLo = rt_OutLo / rt_InLo; // Outer segment beginning rt divided by inner segment beginning rt; + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly + float zpitch_InLo = (isPS_InLo ? kPixelPSZpitch : kStrip2SZpitch); + float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); + + float zHi = z_InLo + (z_InLo + kDeltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo < 0.f ? 1.f : dzDrtScale) + + (zpitch_InLo + zpitch_OutLo); + float zLo = z_InLo + (z_InLo - kDeltaZLum) * (rtRatio_OutLoInLo - 1.f) * (z_InLo > 0.f ? 1.f : dzDrtScale) - + (zpitch_InLo + zpitch_OutLo); + + //Cut 1 - z compatibility + if ((z_OutLo < zLo) || (z_OutLo > zHi)) + return false; + + float drt_OutLo_InLo = (rt_OutLo - rt_InLo); + float r3_InLo = alpaka::math::sqrt(acc, z_InLo * z_InLo + rt_InLo * rt_InLo); + float drt_InSeg = rt_InOut - rt_InLo; + float dz_InSeg = z_InOut - z_InLo; + float dr3_InSeg = alpaka::math::sqrt(acc, rt_InOut * rt_InOut + z_InOut * z_InOut) - + alpaka::math::sqrt(acc, rt_InLo * rt_InLo + z_InLo * z_InLo); + + float coshEta = dr3_InSeg / drt_InSeg; + float dzErr = (zpitch_InLo + zpitch_OutLo) * (zpitch_InLo + zpitch_OutLo) * 2.f; + + float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f) * (r3_InLo / rt_InLo); + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + dzErr += muls2 * drt_OutLo_InLo * drt_OutLo_InLo / 3.f * coshEta * coshEta; + dzErr = alpaka::math::sqrt(acc, dzErr); + + // Constructing upper and lower bound + const float dzMean = dz_InSeg / drt_InSeg * drt_OutLo_InLo; + const float zWindow = + dzErr / drt_InSeg * drt_OutLo_InLo + + (zpitch_InLo + zpitch_OutLo); //FIXME for ptCut lower than ~0.8 need to add curv path correction + float zLoPointed = z_InLo + dzMean * (z_InLo > 0.f ? 1.f : dzDrtScale) - zWindow; + float zHiPointed = z_InLo + dzMean * (z_InLo < 0.f ? 1.f : dzDrtScale) + zWindow; + + // Cut #2: Pointed Z (Inner segment two MD points to outer segment inner MD) + if ((z_OutLo < zLoPointed) || (z_OutLo > zHiPointed)) + return false; + + float pvOffset = 0.1f / rt_OutLo; + float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); + + float deltaPhiPos = phi_mpi_pi(acc, mds.anchorPhi()[fourthMDIndex] - mds.anchorPhi()[secondMDIndex]); + // Cut #3: FIXME:deltaPhiPos can be tighter + if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) + return false; + + float midPointX = 0.5f * (mds.anchorX()[firstMDIndex] + mds.anchorX()[thirdMDIndex]); + float midPointY = 0.5f * (mds.anchorY()[firstMDIndex] + mds.anchorY()[thirdMDIndex]); + float diffX = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float diffY = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; + + float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); + + // Cut #4: deltaPhiChange + if (alpaka::math::abs(acc, dPhi) > dPhiCut) + return false; + + // First obtaining the raw betaIn and betaOut values without any correction and just purely based on the mini-doublet hit positions + float alpha_InLo = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float alpha_OutLo = __H2F(segments.dPhiChanges()[outerSegmentIndex]); + + bool isEC_lastLayer = modules.subdets()[outerOuterLowerModuleIndex] == Endcap and + modules.moduleType()[outerOuterLowerModuleIndex] == TwoS; + + float alpha_OutUp, alpha_OutUp_highEdge, alpha_OutUp_lowEdge; + + alpha_OutUp = phi_mpi_pi(acc, + phi(acc, + mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex], + mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) - + mds.anchorPhi()[fourthMDIndex]); + + alpha_OutUp_highEdge = alpha_OutUp; + alpha_OutUp_lowEdge = alpha_OutUp; + + float tl_axis_x = mds.anchorX()[fourthMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[fourthMDIndex] - mds.anchorY()[firstMDIndex]; + float tl_axis_highEdge_x = tl_axis_x; + float tl_axis_highEdge_y = tl_axis_y; + float tl_axis_lowEdge_x = tl_axis_x; + float tl_axis_lowEdge_y = tl_axis_y; + + float betaIn = alpha_InLo - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); + + float betaInRHmin = betaIn; + float betaInRHmax = betaIn; + float betaOut = -alpha_OutUp + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[fourthMDIndex]); + + float betaOutRHmin = betaOut; + float betaOutRHmax = betaOut; + + if (isEC_lastLayer) { + alpha_OutUp_highEdge = phi_mpi_pi(acc, + phi(acc, + mds.anchorHighEdgeX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex], + mds.anchorHighEdgeY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) - + mds.anchorHighEdgePhi()[fourthMDIndex]); + alpha_OutUp_lowEdge = phi_mpi_pi(acc, + phi(acc, + mds.anchorLowEdgeX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex], + mds.anchorLowEdgeY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) - + mds.anchorLowEdgePhi()[fourthMDIndex]); + + tl_axis_highEdge_x = mds.anchorHighEdgeX()[fourthMDIndex] - mds.anchorX()[firstMDIndex]; + tl_axis_highEdge_y = mds.anchorHighEdgeY()[fourthMDIndex] - mds.anchorY()[firstMDIndex]; + tl_axis_lowEdge_x = mds.anchorLowEdgeX()[fourthMDIndex] - mds.anchorX()[firstMDIndex]; + tl_axis_lowEdge_y = mds.anchorLowEdgeY()[fourthMDIndex] - mds.anchorY()[firstMDIndex]; + + betaOutRHmin = + -alpha_OutUp_highEdge + + phi_mpi_pi(acc, phi(acc, tl_axis_highEdge_x, tl_axis_highEdge_y) - mds.anchorHighEdgePhi()[fourthMDIndex]); + betaOutRHmax = + -alpha_OutUp_lowEdge + + phi_mpi_pi(acc, phi(acc, tl_axis_lowEdge_x, tl_axis_lowEdge_y) - mds.anchorLowEdgePhi()[fourthMDIndex]); + } + + //beta computation + float drt_tl_axis = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + + float corrF = 1.f; + //innerOuterAnchor - innerInnerAnchor + const float rt_InSeg = alpaka::math::sqrt(acc, + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); + float betaInCut = + alpaka::math::asin( + acc, alpaka::math::min(acc, (-rt_InSeg * corrF + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / drt_InSeg); + + //Cut #5: first beta cut + if (alpaka::math::abs(acc, betaInRHmin) >= betaInCut) + return false; + + float betaAv = 0.5f * (betaIn + betaOut); + float pt_beta = drt_tl_axis * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); + int lIn = 5; + int lOut = isEC_lastLayer ? 11 : 5; + float sdOut_dr = alpaka::math::sqrt(acc, + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) * + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) + + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) * + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex])); + float sdOut_d = mds.anchorRt()[fourthMDIndex] - mds.anchorRt()[thirdMDIndex]; + + runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, rt_InSeg, sdOut_dr, drt_tl_axis, lIn); + + const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) + ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) + : 0.f; //mean value of min,max is the old betaIn + const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) + ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) + : 0.f; + betaInRHmin *= betaInMMSF; + betaInRHmax *= betaInMMSF; + betaOutRHmin *= betaOutMMSF; + betaOutRHmax *= betaOutMMSF; + + float min_ptBeta_maxPtBeta = alpaka::math::min( + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confimm the range-out value of 7 GeV + const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); + + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_InLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, alpha_OutLo), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); + const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); + const float sinDPhi = alpaka::math::sin(acc, dPhi); + + const float dBetaRIn2 = 0; // TODO-RH + float dBetaROut = 0; + if (isEC_lastLayer) { + dBetaROut = (alpaka::math::sqrt(acc, + mds.anchorHighEdgeX()[fourthMDIndex] * mds.anchorHighEdgeX()[fourthMDIndex] + + mds.anchorHighEdgeY()[fourthMDIndex] * mds.anchorHighEdgeY()[fourthMDIndex]) - + alpaka::math::sqrt(acc, + mds.anchorLowEdgeX()[fourthMDIndex] * mds.anchorLowEdgeX()[fourthMDIndex] + + mds.anchorLowEdgeY()[fourthMDIndex] * mds.anchorLowEdgeY()[fourthMDIndex])) * + sinDPhi / drt_tl_axis; + } + + const float dBetaROut2 = dBetaROut * dBetaROut; + + float betaOutCut = + alpaka::math::asin(acc, alpaka::math::min(acc, drt_tl_axis * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + + //Cut #6: The real beta cut + if (alpaka::math::abs(acc, betaOut) >= betaOutCut) + return false; + + float dBetaRes = 0.02f / alpaka::math::min(acc, sdOut_d, drt_InSeg); + float dBetaCut2 = + (dBetaRes * dBetaRes * 2.0f + dBetaMuls2 + dBetaLum2 + dBetaRIn2 + dBetaROut2 + + 0.25f * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); + + float dBeta = betaIn - betaOut; + return dBeta * dBeta <= dBetaCut2; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoBBEE(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t innerOuterLowerModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + bool isPS_InLo = (modules.moduleType()[innerInnerLowerModuleIndex] == PS); + bool isPS_OutLo = (modules.moduleType()[outerInnerLowerModuleIndex] == PS); + + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InOut = mds.anchorRt()[secondMDIndex]; + float rt_OutLo = mds.anchorRt()[thirdMDIndex]; + + float z_InLo = mds.anchorZ()[firstMDIndex]; + float z_InOut = mds.anchorZ()[secondMDIndex]; + float z_OutLo = mds.anchorZ()[thirdMDIndex]; + + float alpha1GeV_OutLo = + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly + float zpitch_InLo = (isPS_InLo ? kPixelPSZpitch : kStrip2SZpitch); + float zpitch_OutLo = (isPS_OutLo ? kPixelPSZpitch : kStrip2SZpitch); + float zGeom = zpitch_InLo + zpitch_OutLo; + + // Cut #0: Preliminary (Only here in endcap case) + if (z_InLo * z_OutLo <= 0) + return false; + + float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InLo); + bool isOutSgInnerMDPS = modules.moduleType()[outerInnerLowerModuleIndex] == PS; + float rtGeom1 = isOutSgInnerMDPS ? kPixelPSZpitch : kStrip2SZpitch; + float zGeom1 = alpaka::math::copysign(acc, zGeom, z_InLo); + float rtLo = rt_InLo * (1.f + (z_OutLo - z_InLo - zGeom1) / (z_InLo + zGeom1 + dLum) / dzDrtScale) - + rtGeom1; //slope correction only on the lower end + float rtOut = rt_OutLo; + + //Cut #1: rt condition + if (rtOut < rtLo) + return false; + + float zInForHi = z_InLo - zGeom1 - dLum; + if (zInForHi * z_InLo < 0) { + zInForHi = alpaka::math::copysign(acc, 0.1f, z_InLo); + } + float rtHi = rt_InLo * (1.f + (z_OutLo - z_InLo + zGeom1) / zInForHi) + rtGeom1; + + //Cut #2: rt condition + if ((rt_OutLo < rtLo) || (rt_OutLo > rtHi)) + return false; + + float rIn = alpaka::math::sqrt(acc, z_InLo * z_InLo + rt_InLo * rt_InLo); + const float drtSDIn = rt_InOut - rt_InLo; + const float dzSDIn = z_InOut - z_InLo; + const float dr3SDIn = alpaka::math::sqrt(acc, rt_InOut * rt_InOut + z_InOut * z_InOut) - + alpaka::math::sqrt(acc, rt_InLo * rt_InLo + z_InLo * z_InLo); + + const float coshEta = dr3SDIn / drtSDIn; //direction estimate + const float dzOutInAbs = alpaka::math::abs(acc, z_OutLo - z_InLo); + const float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f); + const float zGeom1_another = kPixelPSZpitch; + float kZ = (z_OutLo - z_InLo) / dzSDIn; + float drtErr = + zGeom1_another * zGeom1_another * drtSDIn * drtSDIn / dzSDIn / dzSDIn * (1.f - 2.f * kZ + 2.f * kZ * kZ); + const float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f) * (rIn / rt_InLo); + const float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + drtErr += muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta; + drtErr = alpaka::math::sqrt(acc, drtErr); + + //Cut #3: rt-z pointed + if ((kZ < 0) || (rtOut < rtLo) || (rtOut > rtHi)) + return false; + + const float pvOffset = 0.1f / rt_OutLo; + float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); + + float deltaPhiPos = phi_mpi_pi(acc, mds.anchorPhi()[fourthMDIndex] - mds.anchorPhi()[secondMDIndex]); + + //Cut #4: deltaPhiPos can be tighter + if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) + return false; + + float midPointX = 0.5f * (mds.anchorX()[firstMDIndex] + mds.anchorX()[thirdMDIndex]); + float midPointY = 0.5f * (mds.anchorY()[firstMDIndex] + mds.anchorY()[thirdMDIndex]); + float diffX = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float diffY = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; + + float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); + // Cut #5: deltaPhiChange + if (alpaka::math::abs(acc, dPhi) > dPhiCut) + return false; + + float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float sdIn_alpha_min = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); + float sdIn_alpha_max = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]); + float sdOut_alpha = sdIn_alpha; + + float sdOut_alphaOut = phi_mpi_pi(acc, + phi(acc, + mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex], + mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) - + mds.anchorPhi()[fourthMDIndex]); + + float sdOut_alphaOut_min = phi_mpi_pi( + acc, __H2F(segments.dPhiChangeMins()[outerSegmentIndex]) - __H2F(segments.dPhiMins()[outerSegmentIndex])); + float sdOut_alphaOut_max = phi_mpi_pi( + acc, __H2F(segments.dPhiChangeMaxs()[outerSegmentIndex]) - __H2F(segments.dPhiMaxs()[outerSegmentIndex])); + + float tl_axis_x = mds.anchorX()[fourthMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[fourthMDIndex] - mds.anchorY()[firstMDIndex]; + + float betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); + + float betaInRHmin = betaIn; + float betaInRHmax = betaIn; + float betaOut = -sdOut_alphaOut + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[fourthMDIndex]); + + float betaOutRHmin = betaOut; + float betaOutRHmax = betaOut; + + bool isEC_secondLayer = (modules.subdets()[innerOuterLowerModuleIndex] == Endcap) and + (modules.moduleType()[innerOuterLowerModuleIndex] == TwoS); + + if (isEC_secondLayer) { + betaInRHmin = betaIn - sdIn_alpha_min + sdIn_alpha; + betaInRHmax = betaIn - sdIn_alpha_max + sdIn_alpha; + } + + betaOutRHmin = betaOut - sdOut_alphaOut_min + sdOut_alphaOut; + betaOutRHmax = betaOut - sdOut_alphaOut_max + sdOut_alphaOut; + + float swapTemp; + if (alpaka::math::abs(acc, betaOutRHmin) > alpaka::math::abs(acc, betaOutRHmax)) { + swapTemp = betaOutRHmin; + betaOutRHmin = betaOutRHmax; + betaOutRHmax = swapTemp; + } + + if (alpaka::math::abs(acc, betaInRHmin) > alpaka::math::abs(acc, betaInRHmax)) { + swapTemp = betaInRHmin; + betaInRHmin = betaInRHmax; + betaInRHmax = swapTemp; + } + + float sdIn_dr = alpaka::math::sqrt(acc, + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); + float sdIn_d = rt_InOut - rt_InLo; + + float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + const float corrF = 1.f; + float betaInCut = + alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr * corrF + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdIn_d); + + //Cut #6: first beta cut + if (alpaka::math::abs(acc, betaInRHmin) >= betaInCut) + return false; + + float betaAv = 0.5f * (betaIn + betaOut); + float pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); + + float lIn = 5; + float lOut = 11; + + float sdOut_dr = alpaka::math::sqrt(acc, + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) * + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) + + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) * + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex])); + float sdOut_d = mds.anchorRt()[fourthMDIndex] - mds.anchorRt()[thirdMDIndex]; + + runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn); + + const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) + ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) + : 0.; //mean value of min,max is the old betaIn + const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) + ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) + : 0.; + betaInRHmin *= betaInMMSF; + betaInRHmax *= betaInMMSF; + betaOutRHmin *= betaOutMMSF; + betaOutRHmax *= betaOutMMSF; + + float min_ptBeta_maxPtBeta = alpaka::math::min( + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV + const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); + + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdIn_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdOut_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); + const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); + const float sinDPhi = alpaka::math::sin(acc, dPhi); + + const float dBetaRIn2 = 0; // TODO-RH + float dBetaROut = 0; + if (modules.moduleType()[outerOuterLowerModuleIndex] == TwoS) { + dBetaROut = (alpaka::math::sqrt(acc, + mds.anchorHighEdgeX()[fourthMDIndex] * mds.anchorHighEdgeX()[fourthMDIndex] + + mds.anchorHighEdgeY()[fourthMDIndex] * mds.anchorHighEdgeY()[fourthMDIndex]) - + alpaka::math::sqrt(acc, + mds.anchorLowEdgeX()[fourthMDIndex] * mds.anchorLowEdgeX()[fourthMDIndex] + + mds.anchorLowEdgeY()[fourthMDIndex] * mds.anchorLowEdgeY()[fourthMDIndex])) * + sinDPhi / dr; + } + + const float dBetaROut2 = dBetaROut * dBetaROut; + float betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, dr * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + + //Cut #6: The real beta cut + if (alpaka::math::abs(acc, betaOut) >= betaOutCut) + return false; + + float dBetaRes = 0.02f / alpaka::math::min(acc, sdOut_d, sdIn_d); + float dBetaCut2 = + (dBetaRes * dBetaRes * 2.0f + dBetaMuls2 + dBetaLum2 + dBetaRIn2 + dBetaROut2 + + 0.25f * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); + float dBeta = betaIn - betaOut; + //Cut #7: Cut on dBet + return dBeta * dBeta <= dBetaCut2; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgoEEEE(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t innerOuterLowerModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InOut = mds.anchorRt()[secondMDIndex]; + float rt_OutLo = mds.anchorRt()[thirdMDIndex]; + + float z_InLo = mds.anchorZ()[firstMDIndex]; + float z_InOut = mds.anchorZ()[secondMDIndex]; + float z_OutLo = mds.anchorZ()[thirdMDIndex]; + + float alpha1GeV_OutLo = + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly + + // Cut #0: Preliminary (Only here in endcap case) + if ((z_InLo * z_OutLo) <= 0) + return false; + + float dLum = alpaka::math::copysign(acc, kDeltaZLum, z_InLo); + bool isOutSgInnerMDPS = modules.moduleType()[outerInnerLowerModuleIndex] == PS; + bool isInSgInnerMDPS = modules.moduleType()[innerInnerLowerModuleIndex] == PS; + + float rtGeom = (isInSgInnerMDPS and isOutSgInnerMDPS) ? 2.f * kPixelPSZpitch + : (isInSgInnerMDPS or isOutSgInnerMDPS) ? kPixelPSZpitch + kStrip2SZpitch + : 2.f * kStrip2SZpitch; + + float dz = z_OutLo - z_InLo; + float rtLo = rt_InLo * (1.f + dz / (z_InLo + dLum) / dzDrtScale) - rtGeom; //slope correction only on the lower end + + float rtOut = rt_OutLo; + + //Cut #1: rt condition + + float rtHi = rt_InLo * (1.f + dz / (z_InLo - dLum)) + rtGeom; + + if ((rtOut < rtLo) || (rtOut > rtHi)) + return false; + + bool isInSgOuterMDPS = modules.moduleType()[innerOuterLowerModuleIndex] == PS; + + const float drtSDIn = rt_InOut - rt_InLo; + const float dzSDIn = z_InOut - z_InLo; + const float dr3SDIn = alpaka::math::sqrt(acc, rt_InOut * rt_InOut + z_InOut * z_InOut) - + alpaka::math::sqrt(acc, rt_InLo * rt_InLo + z_InLo * z_InLo); + float coshEta = dr3SDIn / drtSDIn; //direction estimate + float dzOutInAbs = alpaka::math::abs(acc, z_OutLo - z_InLo); + float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f); + + float kZ = (z_OutLo - z_InLo) / dzSDIn; + float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rt_OutLo - rt_InLo) / 50.f); + + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + + float drtErr = + alpaka::math::sqrt(acc, + kPixelPSZpitch * kPixelPSZpitch * 2.f / (dzSDIn * dzSDIn) * (dzOutInAbs * dzOutInAbs) + + muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta); + + float drtMean = drtSDIn * dzOutInAbs / alpaka::math::abs(acc, dzSDIn); + float rtWindow = drtErr + rtGeom; + float rtLo_point = rt_InLo + drtMean / dzDrtScale - rtWindow; + float rtHi_point = rt_InLo + drtMean + rtWindow; + + // Cut #3: rt-z pointed + // https://github.com/slava77/cms-tkph2-ntuple/blob/superDoubletLinked-91X-noMock/doubletAnalysis.C#L3765 + + if (isInSgInnerMDPS and isInSgOuterMDPS) // If both PS then we can point + { + if (kZ < 0 || rtOut < rtLo_point || rtOut > rtHi_point) + return false; + } + + float pvOffset = 0.1f / rtOut; + float dPhiCut = alpha1GeV_OutLo + alpaka::math::sqrt(acc, muls2 + pvOffset * pvOffset); + + float deltaPhiPos = phi_mpi_pi(acc, mds.anchorPhi()[fourthMDIndex] - mds.anchorPhi()[secondMDIndex]); + + if (alpaka::math::abs(acc, deltaPhiPos) > dPhiCut) + return false; + + float midPointX = 0.5f * (mds.anchorX()[firstMDIndex] + mds.anchorX()[thirdMDIndex]); + float midPointY = 0.5f * (mds.anchorY()[firstMDIndex] + mds.anchorY()[thirdMDIndex]); + float diffX = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float diffY = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; + + float dPhi = deltaPhi(acc, midPointX, midPointY, diffX, diffY); + + // Cut #5: deltaPhiChange + if (alpaka::math::abs(acc, dPhi) > dPhiCut) + return false; + + float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float sdOut_alpha = sdIn_alpha; //weird + float sdOut_dPhiPos = phi_mpi_pi(acc, mds.anchorPhi()[fourthMDIndex] - mds.anchorPhi()[thirdMDIndex]); + + float sdOut_dPhiChange = __H2F(segments.dPhiChanges()[outerSegmentIndex]); + float sdOut_dPhiChange_min = __H2F(segments.dPhiChangeMins()[outerSegmentIndex]); + float sdOut_dPhiChange_max = __H2F(segments.dPhiChangeMaxs()[outerSegmentIndex]); + + float sdOut_alphaOutRHmin = phi_mpi_pi(acc, sdOut_dPhiChange_min - sdOut_dPhiPos); + float sdOut_alphaOutRHmax = phi_mpi_pi(acc, sdOut_dPhiChange_max - sdOut_dPhiPos); + float sdOut_alphaOut = phi_mpi_pi(acc, sdOut_dPhiChange - sdOut_dPhiPos); + + float tl_axis_x = mds.anchorX()[fourthMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[fourthMDIndex] - mds.anchorY()[firstMDIndex]; + + float betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); + + float sdIn_alphaRHmin = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); + float sdIn_alphaRHmax = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]); + float betaInRHmin = betaIn + sdIn_alphaRHmin - sdIn_alpha; + float betaInRHmax = betaIn + sdIn_alphaRHmax - sdIn_alpha; + + float betaOut = -sdOut_alphaOut + phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[fourthMDIndex]); + + float betaOutRHmin = betaOut - sdOut_alphaOutRHmin + sdOut_alphaOut; + float betaOutRHmax = betaOut - sdOut_alphaOutRHmax + sdOut_alphaOut; + + float swapTemp; + if (alpaka::math::abs(acc, betaOutRHmin) > alpaka::math::abs(acc, betaOutRHmax)) { + swapTemp = betaOutRHmin; + betaOutRHmin = betaOutRHmax; + betaOutRHmax = swapTemp; + } + + if (alpaka::math::abs(acc, betaInRHmin) > alpaka::math::abs(acc, betaInRHmax)) { + swapTemp = betaInRHmin; + betaInRHmin = betaInRHmax; + betaInRHmax = swapTemp; + } + float sdIn_dr = alpaka::math::sqrt(acc, + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); + float sdIn_d = rt_InOut - rt_InLo; + + float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + const float corrF = 1.f; + float betaInCut = + alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr * corrF + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdIn_d); + + //Cut #6: first beta cut + if (alpaka::math::abs(acc, betaInRHmin) >= betaInCut) + return false; + + float betaAv = 0.5f * (betaIn + betaOut); + float pt_beta = dr * k2Rinv1GeVf / alpaka::math::sin(acc, betaAv); + + int lIn = 11; //endcap + int lOut = 13; //endcap + + float sdOut_dr = alpaka::math::sqrt(acc, + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) * + (mds.anchorX()[fourthMDIndex] - mds.anchorX()[thirdMDIndex]) + + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex]) * + (mds.anchorY()[fourthMDIndex] - mds.anchorY()[thirdMDIndex])); + float sdOut_d = mds.anchorRt()[fourthMDIndex] - mds.anchorRt()[thirdMDIndex]; + + runDeltaBetaIterationsT5(acc, betaIn, betaOut, betaAv, pt_beta, sdIn_dr, sdOut_dr, dr, lIn); + + const float betaInMMSF = (alpaka::math::abs(acc, betaInRHmin + betaInRHmax) > 0) + ? (2.f * betaIn / alpaka::math::abs(acc, betaInRHmin + betaInRHmax)) + : 0.; //mean value of min,max is the old betaIn + const float betaOutMMSF = (alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax) > 0) + ? (2.f * betaOut / alpaka::math::abs(acc, betaOutRHmin + betaOutRHmax)) + : 0.; + betaInRHmin *= betaInMMSF; + betaInRHmax *= betaInMMSF; + betaOutRHmin *= betaOutMMSF; + betaOutRHmax *= betaOutMMSF; + + float min_ptBeta_maxPtBeta = alpaka::math::min( + acc, alpaka::math::abs(acc, pt_beta), kPt_betaMax); //need to confirm the range-out value of 7 GeV + const float dBetaMuls2 = thetaMuls2 * 16.f / (min_ptBeta_maxPtBeta * min_ptBeta_maxPtBeta); + + const float alphaInAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdIn_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_InLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float alphaOutAbsReg = + alpaka::math::max(acc, + alpaka::math::abs(acc, sdOut_alpha), + alpaka::math::asin(acc, alpaka::math::min(acc, rt_OutLo * k2Rinv1GeVf / 3.0f, kSinAlphaMax))); + const float dBetaInLum = lIn < 11 ? 0.0f : alpaka::math::abs(acc, alphaInAbsReg * kDeltaZLum / z_InLo); + const float dBetaOutLum = lOut < 11 ? 0.0f : alpaka::math::abs(acc, alphaOutAbsReg * kDeltaZLum / z_OutLo); + const float dBetaLum2 = (dBetaInLum + dBetaOutLum) * (dBetaInLum + dBetaOutLum); + + const float dBetaRIn2 = 0; // TODO-RH + + float dBetaROut2 = 0; //TODO-RH + float betaOutCut = alpaka::math::asin(acc, alpaka::math::min(acc, dr * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdOut_d) + alpaka::math::sqrt(acc, dBetaLum2 + dBetaMuls2); + + //Cut #6: The real beta cut + if (alpaka::math::abs(acc, betaOut) >= betaOutCut) + return false; + + float dBetaRes = 0.02f / alpaka::math::min(acc, sdOut_d, sdIn_d); + float dBetaCut2 = + (dBetaRes * dBetaRes * 2.0f + dBetaMuls2 + dBetaLum2 + dBetaRIn2 + dBetaROut2 + + 0.25f * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax)) * + (alpaka::math::abs(acc, betaInRHmin - betaInRHmax) + alpaka::math::abs(acc, betaOutRHmin - betaOutRHmax))); + float dBeta = betaIn - betaOut; + //Cut #7: Cut on dBeta + return dBeta * dBeta <= dBetaCut2; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletAlgoSelector(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t innerOuterLowerModuleIndex, + uint16_t outerInnerLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + unsigned int fourthMDIndex) { + short innerInnerLowerModuleSubdet = modules.subdets()[innerInnerLowerModuleIndex]; + short innerOuterLowerModuleSubdet = modules.subdets()[innerOuterLowerModuleIndex]; + short outerInnerLowerModuleSubdet = modules.subdets()[outerInnerLowerModuleIndex]; + short outerOuterLowerModuleSubdet = modules.subdets()[outerOuterLowerModuleIndex]; + + if (innerInnerLowerModuleSubdet == Barrel and innerOuterLowerModuleSubdet == Barrel and + outerInnerLowerModuleSubdet == Barrel and outerOuterLowerModuleSubdet == Barrel) { + return runQuintupletDefaultAlgoBBBB(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + innerOuterLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } else if (innerInnerLowerModuleSubdet == Barrel and innerOuterLowerModuleSubdet == Barrel and + outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { + return runQuintupletDefaultAlgoBBEE(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + innerOuterLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } else if (innerInnerLowerModuleSubdet == Barrel and innerOuterLowerModuleSubdet == Barrel and + outerInnerLowerModuleSubdet == Barrel and outerOuterLowerModuleSubdet == Endcap) { + return runQuintupletDefaultAlgoBBBB(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + innerOuterLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } else if (innerInnerLowerModuleSubdet == Barrel and innerOuterLowerModuleSubdet == Endcap and + outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { + return runQuintupletDefaultAlgoBBEE(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + innerOuterLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } else if (innerInnerLowerModuleSubdet == Endcap and innerOuterLowerModuleSubdet == Endcap and + outerInnerLowerModuleSubdet == Endcap and outerOuterLowerModuleSubdet == Endcap) { + return runQuintupletDefaultAlgoEEEE(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + innerOuterLowerModuleIndex, + outerInnerLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex); + } + + return false; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runQuintupletDefaultAlgo(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + TripletsConst triplets, + uint16_t lowerModuleIndex1, + uint16_t lowerModuleIndex2, + uint16_t lowerModuleIndex3, + uint16_t lowerModuleIndex4, + uint16_t lowerModuleIndex5, + unsigned int innerTripletIndex, + unsigned int outerTripletIndex, + float& innerRadius, + float& outerRadius, + float& bridgeRadius, + float& regressionG, + float& regressionF, + float& regressionRadius, + float& rzChiSquared, + float& chiSquared, + float& nonAnchorChiSquared, + bool& tightCutFlag) { + unsigned int firstSegmentIndex = triplets.segmentIndices()[innerTripletIndex][0]; + unsigned int secondSegmentIndex = triplets.segmentIndices()[innerTripletIndex][1]; + unsigned int thirdSegmentIndex = triplets.segmentIndices()[outerTripletIndex][0]; + unsigned int fourthSegmentIndex = triplets.segmentIndices()[outerTripletIndex][1]; + + unsigned int innerOuterOuterMiniDoubletIndex = + segments.mdIndices()[secondSegmentIndex][1]; //inner triplet outer segment outer MD index + unsigned int outerInnerInnerMiniDoubletIndex = + segments.mdIndices()[thirdSegmentIndex][0]; //outer triplet inner segment inner MD index + + //this cut reduces the number of candidates by a factor of 3, i.e., 2 out of 3 warps can end right here! + if (innerOuterOuterMiniDoubletIndex != outerInnerInnerMiniDoubletIndex) + return false; + + unsigned int firstMDIndex = segments.mdIndices()[firstSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[secondSegmentIndex][0]; + unsigned int thirdMDIndex = segments.mdIndices()[secondSegmentIndex][1]; + unsigned int fourthMDIndex = segments.mdIndices()[thirdSegmentIndex][1]; + unsigned int fifthMDIndex = segments.mdIndices()[fourthSegmentIndex][1]; + + if (not runQuintupletAlgoSelector(acc, + modules, + mds, + segments, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex3, + lowerModuleIndex4, + firstSegmentIndex, + thirdSegmentIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex)) + return false; + + if (not runQuintupletAlgoSelector(acc, + modules, + mds, + segments, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex4, + lowerModuleIndex5, + firstSegmentIndex, + fourthSegmentIndex, + firstMDIndex, + secondMDIndex, + fourthMDIndex, + fifthMDIndex)) + return false; + + float x1 = mds.anchorX()[firstMDIndex]; + float x2 = mds.anchorX()[secondMDIndex]; + float x3 = mds.anchorX()[thirdMDIndex]; + float x4 = mds.anchorX()[fourthMDIndex]; + float x5 = mds.anchorX()[fifthMDIndex]; + + float y1 = mds.anchorY()[firstMDIndex]; + float y2 = mds.anchorY()[secondMDIndex]; + float y3 = mds.anchorY()[thirdMDIndex]; + float y4 = mds.anchorY()[fourthMDIndex]; + float y5 = mds.anchorY()[fifthMDIndex]; + + //construct the arrays + float x1Vec[] = {x1, x1, x1}; + float y1Vec[] = {y1, y1, y1}; + float x2Vec[] = {x2, x2, x2}; + float y2Vec[] = {y2, y2, y2}; + float x3Vec[] = {x3, x3, x3}; + float y3Vec[] = {y3, y3, y3}; + + if (modules.subdets()[lowerModuleIndex1] == Endcap and modules.moduleType()[lowerModuleIndex1] == TwoS) { + x1Vec[1] = mds.anchorLowEdgeX()[firstMDIndex]; + x1Vec[2] = mds.anchorHighEdgeX()[firstMDIndex]; + + y1Vec[1] = mds.anchorLowEdgeY()[firstMDIndex]; + y1Vec[2] = mds.anchorHighEdgeY()[firstMDIndex]; + } + if (modules.subdets()[lowerModuleIndex2] == Endcap and modules.moduleType()[lowerModuleIndex2] == TwoS) { + x2Vec[1] = mds.anchorLowEdgeX()[secondMDIndex]; + x2Vec[2] = mds.anchorHighEdgeX()[secondMDIndex]; + + y2Vec[1] = mds.anchorLowEdgeY()[secondMDIndex]; + y2Vec[2] = mds.anchorHighEdgeY()[secondMDIndex]; + } + if (modules.subdets()[lowerModuleIndex3] == Endcap and modules.moduleType()[lowerModuleIndex3] == TwoS) { + x3Vec[1] = mds.anchorLowEdgeX()[thirdMDIndex]; + x3Vec[2] = mds.anchorHighEdgeX()[thirdMDIndex]; + + y3Vec[1] = mds.anchorLowEdgeY()[thirdMDIndex]; + y3Vec[2] = mds.anchorHighEdgeY()[thirdMDIndex]; + } + + float innerRadiusMin2S, innerRadiusMax2S; + computeErrorInRadius(acc, x1Vec, y1Vec, x2Vec, y2Vec, x3Vec, y3Vec, innerRadiusMin2S, innerRadiusMax2S); + + for (int i = 0; i < 3; i++) { + x1Vec[i] = x4; + y1Vec[i] = y4; + } + if (modules.subdets()[lowerModuleIndex4] == Endcap and modules.moduleType()[lowerModuleIndex4] == TwoS) { + x1Vec[1] = mds.anchorLowEdgeX()[fourthMDIndex]; + x1Vec[2] = mds.anchorHighEdgeX()[fourthMDIndex]; + + y1Vec[1] = mds.anchorLowEdgeY()[fourthMDIndex]; + y1Vec[2] = mds.anchorHighEdgeY()[fourthMDIndex]; + } + + float bridgeRadiusMin2S, bridgeRadiusMax2S; + computeErrorInRadius(acc, x2Vec, y2Vec, x3Vec, y3Vec, x1Vec, y1Vec, bridgeRadiusMin2S, bridgeRadiusMax2S); + + for (int i = 0; i < 3; i++) { + x2Vec[i] = x5; + y2Vec[i] = y5; + } + if (modules.subdets()[lowerModuleIndex5] == Endcap and modules.moduleType()[lowerModuleIndex5] == TwoS) { + x2Vec[1] = mds.anchorLowEdgeX()[fifthMDIndex]; + x2Vec[2] = mds.anchorHighEdgeX()[fifthMDIndex]; + + y2Vec[1] = mds.anchorLowEdgeY()[fifthMDIndex]; + y2Vec[2] = mds.anchorHighEdgeY()[fifthMDIndex]; + } + + float outerRadiusMin2S, outerRadiusMax2S; + computeErrorInRadius(acc, x3Vec, y3Vec, x1Vec, y1Vec, x2Vec, y2Vec, outerRadiusMin2S, outerRadiusMax2S); + + float g, f; + outerRadius = triplets.radius()[outerTripletIndex]; + bridgeRadius = computeRadiusFromThreeAnchorHits(acc, x2, y2, x3, y3, x4, y4, g, f); + innerRadius = triplets.radius()[innerTripletIndex]; + g = triplets.centerX()[innerTripletIndex]; + f = triplets.centerY()[innerTripletIndex]; + + float inner_pt = 2 * k2Rinv1GeVf * innerRadius; + + if (not passT5RZConstraint(acc, + modules, + mds, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + fourthMDIndex, + fifthMDIndex, + lowerModuleIndex1, + lowerModuleIndex2, + lowerModuleIndex3, + lowerModuleIndex4, + lowerModuleIndex5, + rzChiSquared, + inner_pt, + innerRadius, + g, + f, + tightCutFlag)) + return false; + + if (innerRadius < 0.95f * ptCut / (2.f * k2Rinv1GeVf)) + return false; + + //split by category + bool matchedRadii; + if (modules.subdets()[lowerModuleIndex1] == Barrel and modules.subdets()[lowerModuleIndex2] == Barrel and + modules.subdets()[lowerModuleIndex3] == Barrel and modules.subdets()[lowerModuleIndex4] == Barrel and + modules.subdets()[lowerModuleIndex5] == Barrel) { + matchedRadii = matchRadiiBBBBB(acc, innerRadius, bridgeRadius, outerRadius); + } else if (modules.subdets()[lowerModuleIndex1] == Barrel and modules.subdets()[lowerModuleIndex2] == Barrel and + modules.subdets()[lowerModuleIndex3] == Barrel and modules.subdets()[lowerModuleIndex4] == Barrel and + modules.subdets()[lowerModuleIndex5] == Endcap) { + matchedRadii = matchRadiiBBBBE(acc, innerRadius, bridgeRadius, outerRadius); + } else if (modules.subdets()[lowerModuleIndex1] == Barrel and modules.subdets()[lowerModuleIndex2] == Barrel and + modules.subdets()[lowerModuleIndex3] == Barrel and modules.subdets()[lowerModuleIndex4] == Endcap and + modules.subdets()[lowerModuleIndex5] == Endcap) { + if (modules.layers()[lowerModuleIndex1] == 1) { + matchedRadii = + matchRadiiBBBEE12378(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); + } else if (modules.layers()[lowerModuleIndex1] == 2) { + matchedRadii = + matchRadiiBBBEE23478(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); + } else { + matchedRadii = + matchRadiiBBBEE34578(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); + } + } + + else if (modules.subdets()[lowerModuleIndex1] == Barrel and modules.subdets()[lowerModuleIndex2] == Barrel and + modules.subdets()[lowerModuleIndex3] == Endcap and modules.subdets()[lowerModuleIndex4] == Endcap and + modules.subdets()[lowerModuleIndex5] == Endcap) { + matchedRadii = matchRadiiBBEEE(acc, innerRadius, bridgeRadius, outerRadius, bridgeRadiusMin2S, bridgeRadiusMax2S); + } else if (modules.subdets()[lowerModuleIndex1] == Barrel and modules.subdets()[lowerModuleIndex2] == Endcap and + modules.subdets()[lowerModuleIndex3] == Endcap and modules.subdets()[lowerModuleIndex4] == Endcap and + modules.subdets()[lowerModuleIndex5] == Endcap) { + matchedRadii = matchRadiiBEEEE(acc, + innerRadius, + bridgeRadius, + outerRadius, + innerRadiusMin2S, + innerRadiusMax2S, + bridgeRadiusMin2S, + bridgeRadiusMax2S); + } else { + matchedRadii = matchRadiiEEEEE(acc, + innerRadius, + bridgeRadius, + outerRadius, + innerRadiusMin2S, + innerRadiusMax2S, + bridgeRadiusMin2S, + bridgeRadiusMax2S); + } + + //compute regression radius right here - this computation is expensive!!! + if (not matchedRadii) + return false; + + float xVec[] = {x1, x2, x3, x4, x5}; + float yVec[] = {y1, y2, y3, y4, y5}; + const uint16_t lowerModuleIndices[] = { + lowerModuleIndex1, lowerModuleIndex2, lowerModuleIndex3, lowerModuleIndex4, lowerModuleIndex5}; + + // 5 categories for sigmas + float sigmas2[5], delta1[5], delta2[5], slopes[5]; + bool isFlat[5]; + + computeSigmasForRegression(acc, modules, lowerModuleIndices, delta1, delta2, slopes, isFlat); + regressionRadius = computeRadiusUsingRegression(acc, + Params_T5::kLayers, + xVec, + yVec, + delta1, + delta2, + slopes, + isFlat, + regressionG, + regressionF, + sigmas2, + chiSquared); + + unsigned int mdIndices[] = {firstMDIndex, secondMDIndex, thirdMDIndex, fourthMDIndex, fifthMDIndex}; + float inference = t5dnn::runInference(acc, + modules, + mds, + segments, + triplets, + xVec, + yVec, + mdIndices, + lowerModuleIndices, + innerTripletIndex, + outerTripletIndex, + innerRadius, + outerRadius, + bridgeRadius); + tightCutFlag = tightCutFlag and (inference > t5dnn::kLSTWp2); // T5-in-TC cut + if (inference <= t5dnn::kLSTWp2) // T5-building cut + return false; + + //compute the other chisquared + //non anchor is always shifted for tilted and endcap! + float nonAnchorDelta1[Params_T5::kLayers], nonAnchorDelta2[Params_T5::kLayers], nonAnchorSlopes[Params_T5::kLayers]; + float nonAnchorxs[] = {mds.outerX()[firstMDIndex], + mds.outerX()[secondMDIndex], + mds.outerX()[thirdMDIndex], + mds.outerX()[fourthMDIndex], + mds.outerX()[fifthMDIndex]}; + float nonAnchorys[] = {mds.outerY()[firstMDIndex], + mds.outerY()[secondMDIndex], + mds.outerY()[thirdMDIndex], + mds.outerY()[fourthMDIndex], + mds.outerY()[fifthMDIndex]}; + + computeSigmasForRegression(acc, + modules, + lowerModuleIndices, + nonAnchorDelta1, + nonAnchorDelta2, + nonAnchorSlopes, + isFlat, + Params_T5::kLayers, + false); + nonAnchorChiSquared = computeChiSquared(acc, + Params_T5::kLayers, + nonAnchorxs, + nonAnchorys, + nonAnchorDelta1, + nonAnchorDelta2, + nonAnchorSlopes, + isFlat, + regressionG, + regressionF, + regressionRadius); + return true; + } + + struct CreateQuintuplets { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + Triplets triplets, + TripletsOccupancyConst tripletsOccupancy, + Quintuplets quintuplets, + QuintupletsOccupancy quintupletsOccupancy, + ObjectRangesConst ranges, + uint16_t nEligibleT5Modules) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (int iter = globalThreadIdx[0]; iter < nEligibleT5Modules; iter += gridThreadExtent[0]) { + uint16_t lowerModule1 = ranges.indicesOfEligibleT5Modules()[iter]; + short layer2_adjustment; + int layer = modules.layers()[lowerModule1]; + if (layer == 1) { + layer2_adjustment = 1; + } // get upper segment to be in second layer + else if (layer == 2) { + layer2_adjustment = 0; + } // get lower segment to be in second layer + else { + continue; + } + unsigned int nInnerTriplets = tripletsOccupancy.nTriplets()[lowerModule1]; + for (unsigned int innerTripletArrayIndex = globalThreadIdx[1]; innerTripletArrayIndex < nInnerTriplets; + innerTripletArrayIndex += gridThreadExtent[1]) { + unsigned int innerTripletIndex = ranges.tripletModuleIndices()[lowerModule1] + innerTripletArrayIndex; + uint16_t lowerModule2 = triplets.lowerModuleIndices()[innerTripletIndex][1]; + uint16_t lowerModule3 = triplets.lowerModuleIndices()[innerTripletIndex][2]; + unsigned int nOuterTriplets = tripletsOccupancy.nTriplets()[lowerModule3]; + for (unsigned int outerTripletArrayIndex = globalThreadIdx[2]; outerTripletArrayIndex < nOuterTriplets; + outerTripletArrayIndex += gridThreadExtent[2]) { + unsigned int outerTripletIndex = ranges.tripletModuleIndices()[lowerModule3] + outerTripletArrayIndex; + uint16_t lowerModule4 = triplets.lowerModuleIndices()[outerTripletIndex][1]; + uint16_t lowerModule5 = triplets.lowerModuleIndices()[outerTripletIndex][2]; + + float innerRadius, outerRadius, bridgeRadius, regressionG, regressionF, regressionRadius, rzChiSquared, + chiSquared, nonAnchorChiSquared; //required for making distributions + + bool tightCutFlag = false; + bool success = runQuintupletDefaultAlgo(acc, + modules, + mds, + segments, + triplets, + lowerModule1, + lowerModule2, + lowerModule3, + lowerModule4, + lowerModule5, + innerTripletIndex, + outerTripletIndex, + innerRadius, + outerRadius, + bridgeRadius, + regressionG, + regressionF, + regressionRadius, + rzChiSquared, + chiSquared, + nonAnchorChiSquared, + tightCutFlag); + + if (success) { + int totOccupancyQuintuplets = alpaka::atomicAdd( + acc, &quintupletsOccupancy.totOccupancyQuintuplets()[lowerModule1], 1u, alpaka::hierarchy::Threads{}); + if (totOccupancyQuintuplets >= ranges.quintupletModuleOccupancy()[lowerModule1]) { +#ifdef WARNINGS + printf("Quintuplet excess alert! Module index = %d\n", lowerModule1); +#endif + } else { + int quintupletModuleIndex = alpaka::atomicAdd( + acc, &quintupletsOccupancy.nQuintuplets()[lowerModule1], 1u, alpaka::hierarchy::Threads{}); + //this if statement should never get executed! + if (ranges.quintupletModuleIndices()[lowerModule1] == -1) { +#ifdef WARNINGS + printf("Quintuplets : no memory for module at module index = %d\n", lowerModule1); +#endif + } else { + unsigned int quintupletIndex = ranges.quintupletModuleIndices()[lowerModule1] + quintupletModuleIndex; + float phi = mds.anchorPhi()[segments.mdIndices()[triplets.segmentIndices()[innerTripletIndex][0]] + [layer2_adjustment]]; + float eta = mds.anchorEta()[segments.mdIndices()[triplets.segmentIndices()[innerTripletIndex][0]] + [layer2_adjustment]]; + float pt = (innerRadius + outerRadius) * k2Rinv1GeVf; + float scores = chiSquared + nonAnchorChiSquared; + addQuintupletToMemory(triplets, + quintuplets, + innerTripletIndex, + outerTripletIndex, + lowerModule1, + lowerModule2, + lowerModule3, + lowerModule4, + lowerModule5, + innerRadius, + bridgeRadius, + outerRadius, + regressionG, + regressionF, + regressionRadius, + rzChiSquared, + chiSquared, + nonAnchorChiSquared, + pt, + eta, + phi, + scores, + layer, + quintupletIndex, + tightCutFlag); + + triplets.partOfT5()[quintuplets.tripletIndices()[quintupletIndex][0]] = true; + triplets.partOfT5()[quintuplets.tripletIndices()[quintupletIndex][1]] = true; + } + } + } + } + } + } + } + }; + + struct CreateEligibleModulesListForQuintuplets { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + TripletsOccupancyConst tripletsOccupancy, + ObjectRanges ranges) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + // Initialize variables in shared memory and set to 0 + int& nEligibleT5Modulesx = alpaka::declareSharedVar(acc); + int& nTotalQuintupletsx = alpaka::declareSharedVar(acc); + if (cms::alpakatools::once_per_block(acc)) { + nTotalQuintupletsx = 0; + nEligibleT5Modulesx = 0; + } + alpaka::syncBlockThreads(acc); + + for (int i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + // Condition for a quintuple to exist for a module + // TCs don't exist for layers 5 and 6 barrel, and layers 2,3,4,5 endcap + short module_rings = modules.rings()[i]; + short module_layers = modules.layers()[i]; + short module_subdets = modules.subdets()[i]; + float module_eta = alpaka::math::abs(acc, modules.eta()[i]); + + if (tripletsOccupancy.nTriplets()[i] == 0) + continue; + if (module_subdets == Barrel and module_layers >= 3) + continue; + if (module_subdets == Endcap and module_layers > 1) + continue; + + int nEligibleT5Modules = alpaka::atomicAdd(acc, &nEligibleT5Modulesx, 1, alpaka::hierarchy::Threads{}); + + int category_number; + if (module_layers <= 3 && module_subdets == 5) + category_number = 0; + else if (module_layers >= 4 && module_subdets == 5) + category_number = 1; + else if (module_layers <= 2 && module_subdets == 4 && module_rings >= 11) + category_number = 2; + else if (module_layers >= 3 && module_subdets == 4 && module_rings >= 8) + category_number = 2; + else if (module_layers <= 2 && module_subdets == 4 && module_rings <= 10) + category_number = 3; + else if (module_layers >= 3 && module_subdets == 4 && module_rings <= 7) + category_number = 3; + else + category_number = -1; + + int eta_number; + if (module_eta < 0.75f) + eta_number = 0; + else if (module_eta < 1.5f) + eta_number = 1; + else if (module_eta < 2.25f) + eta_number = 2; + else if (module_eta < 3.0f) + eta_number = 3; + else + eta_number = -1; + + int occupancy; + if (category_number == 0 && eta_number == 0) + occupancy = 336; + else if (category_number == 0 && eta_number == 1) + occupancy = 414; + else if (category_number == 0 && eta_number == 2) + occupancy = 231; + else if (category_number == 0 && eta_number == 3) + occupancy = 146; + else if (category_number == 3 && eta_number == 1) + occupancy = 0; + else if (category_number == 3 && eta_number == 2) + occupancy = 191; + else if (category_number == 3 && eta_number == 3) + occupancy = 106; + else { + occupancy = 0; +#ifdef WARNINGS + printf("Unhandled case in createEligibleModulesListForQuintupletsGPU! Module index = %i\n", i); +#endif + } + + int nTotQ = alpaka::atomicAdd(acc, &nTotalQuintupletsx, occupancy, alpaka::hierarchy::Threads{}); + ranges.quintupletModuleIndices()[i] = nTotQ; + ranges.indicesOfEligibleT5Modules()[nEligibleT5Modules] = i; + ranges.quintupletModuleOccupancy()[i] = occupancy; + } + + // Wait for all threads to finish before reporting final values + alpaka::syncBlockThreads(acc); + if (cms::alpakatools::once_per_block(acc)) { + ranges.nEligibleT5Modules() = static_cast(nEligibleT5Modulesx); + ranges.nTotalQuints() = static_cast(nTotalQuintupletsx); + } + } + }; + + struct AddQuintupletRangesToEventExplicit { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + QuintupletsOccupancyConst quintupletsOccupancy, + ObjectRanges ranges) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (quintupletsOccupancy.nQuintuplets()[i] == 0 or ranges.quintupletModuleIndices()[i] == -1) { + ranges.quintupletRanges()[i][0] = -1; + ranges.quintupletRanges()[i][1] = -1; + } else { + ranges.quintupletRanges()[i][0] = ranges.quintupletModuleIndices()[i]; + ranges.quintupletRanges()[i][1] = + ranges.quintupletModuleIndices()[i] + quintupletsOccupancy.nQuintuplets()[i] - 1; + } + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/Segment.h b/RecoTracker/LSTCore/src/alpaka/Segment.h new file mode 100644 index 0000000000000..fc885e9d66afe --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/Segment.h @@ -0,0 +1,853 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_Segment_h +#define RecoTracker_LSTCore_src_alpaka_Segment_h + +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/alpaka/SegmentsDeviceCollection.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/EndcapGeometry.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" + +#include "MiniDoublet.h" +#include "Hit.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isTighterTiltedModules_seg(ModulesConst modules, unsigned int moduleIndex) { + // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing + // This is the same as what was previously considered as"isNormalTiltedModules" + // See Figure 9.1 of https://cds.cern.ch/record/2272264/files/CMS-TDR-014.pdf + short subdet = modules.subdets()[moduleIndex]; + short layer = modules.layers()[moduleIndex]; + short side = modules.sides()[moduleIndex]; + short rod = modules.rods()[moduleIndex]; + + return (subdet == Barrel) && (((side != Center) && (layer == 3)) || + ((side == NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || + ((side == PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool isTighterTiltedModules_seg(short subdet, short layer, short side, short rod) { + // The "tighter" tilted modules are the subset of tilted modules that have smaller spacing + // This is the same as what was previously considered as"isNormalTiltedModules" + // See Figure 9.1 of https://cds.cern.ch/record/2272264/files/CMS-TDR-014.pdf + return (subdet == Barrel) && (((side != Center) && (layer == 3)) || + ((side == NegZ) && (((layer == 2) && (rod > 5)) || ((layer == 1) && (rod > 9)))) || + ((side == PosZ) && (((layer == 2) && (rod < 8)) || ((layer == 1) && (rod < 4))))); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(short layer, short ring, short subdet, short side, short rod) { + static constexpr float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; + static constexpr float miniDeltaFlat[6] = {0.26f, 0.16f, 0.16f, 0.18f, 0.18f, 0.18f}; + static constexpr float miniDeltaLooseTilted[3] = {0.4f, 0.4f, 0.4f}; + static constexpr float miniDeltaEndcap[5][15] = { + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}}; + + unsigned int iL = layer - 1; + unsigned int iR = ring - 1; + + float moduleSeparation = 0; + + if (subdet == Barrel and side == Center) { + moduleSeparation = miniDeltaFlat[iL]; + } else if (isTighterTiltedModules_seg(subdet, layer, side, rod)) { + moduleSeparation = miniDeltaTilted[iL]; + } else if (subdet == Endcap) { + moduleSeparation = miniDeltaEndcap[iL][iR]; + } else //Loose tilted modules + { + moduleSeparation = miniDeltaLooseTilted[iL]; + } + + return moduleSeparation; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE float moduleGapSize_seg(ModulesConst modules, unsigned int moduleIndex) { + static constexpr float miniDeltaTilted[3] = {0.26f, 0.26f, 0.26f}; + static constexpr float miniDeltaFlat[6] = {0.26f, 0.16f, 0.16f, 0.18f, 0.18f, 0.18f}; + static constexpr float miniDeltaLooseTilted[3] = {0.4f, 0.4f, 0.4f}; + static constexpr float miniDeltaEndcap[5][15] = { + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}, + {0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.4f, 0.18f, /*10*/ 0.18f, 0.18f, 0.18f, 0.18f, 0.18f}}; + + unsigned int iL = modules.layers()[moduleIndex] - 1; + unsigned int iR = modules.rings()[moduleIndex] - 1; + short subdet = modules.subdets()[moduleIndex]; + short side = modules.sides()[moduleIndex]; + + float moduleSeparation = 0; + + if (subdet == Barrel and side == Center) { + moduleSeparation = miniDeltaFlat[iL]; + } else if (isTighterTiltedModules_seg(modules, moduleIndex)) { + moduleSeparation = miniDeltaTilted[iL]; + } else if (subdet == Endcap) { + moduleSeparation = miniDeltaEndcap[iL][iR]; + } else //Loose tilted modules + { + moduleSeparation = miniDeltaLooseTilted[iL]; + } + + return moduleSeparation; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void dAlphaThreshold(TAcc const& acc, + float* dAlphaThresholdValues, + ModulesConst modules, + MiniDoubletsConst mds, + float xIn, + float yIn, + float zIn, + float rtIn, + float xOut, + float yOut, + float zOut, + float rtOut, + uint16_t innerLowerModuleIndex, + uint16_t outerLowerModuleIndex, + unsigned int innerMDIndex, + unsigned int outerMDIndex) { + float sdMuls = (modules.subdets()[innerLowerModuleIndex] == Barrel) + ? kMiniMulsPtScaleBarrel[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut; + + //more accurate then outer rt - inner rt + float segmentDr = alpaka::math::sqrt(acc, (yOut - yIn) * (yOut - yIn) + (xOut - xIn) * (xOut - xIn)); + + const float dAlpha_Bfield = + alpaka::math::asin(acc, alpaka::math::min(acc, segmentDr * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + bool isInnerTilted = + modules.subdets()[innerLowerModuleIndex] == Barrel and modules.sides()[innerLowerModuleIndex] != Center; + bool isOuterTilted = + modules.subdets()[outerLowerModuleIndex] == Barrel and modules.sides()[outerLowerModuleIndex] != Center; + + float drdzInner = modules.drdzs()[innerLowerModuleIndex]; + float drdzOuter = modules.drdzs()[outerLowerModuleIndex]; + float innerModuleGapSize = moduleGapSize_seg(modules, innerLowerModuleIndex); + float outerModuleGapSize = moduleGapSize_seg(modules, outerLowerModuleIndex); + const float innerminiTilt2 = isInnerTilted + ? ((0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdzInner * drdzInner) / + (1.f + drdzInner * drdzInner) / (innerModuleGapSize * innerModuleGapSize)) + : 0; + + const float outerminiTilt2 = isOuterTilted + ? ((0.5f * 0.5f) * (kPixelPSZpitch * kPixelPSZpitch) * (drdzOuter * drdzOuter) / + (1.f + drdzOuter * drdzOuter) / (outerModuleGapSize * outerModuleGapSize)) + : 0; + + float miniDelta = innerModuleGapSize; + + float sdLumForInnerMini2; + float sdLumForOuterMini2; + + if (modules.subdets()[innerLowerModuleIndex] == Barrel) { + sdLumForInnerMini2 = innerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield); + } else { + sdLumForInnerMini2 = (mds.dphis()[innerMDIndex] * mds.dphis()[innerMDIndex]) * (kDeltaZLum * kDeltaZLum) / + (mds.dzs()[innerMDIndex] * mds.dzs()[innerMDIndex]); + } + + if (modules.subdets()[outerLowerModuleIndex] == Barrel) { + sdLumForOuterMini2 = outerminiTilt2 * (dAlpha_Bfield * dAlpha_Bfield); + } else { + sdLumForOuterMini2 = (mds.dphis()[outerMDIndex] * mds.dphis()[outerMDIndex]) * (kDeltaZLum * kDeltaZLum) / + (mds.dzs()[outerMDIndex] * mds.dzs()[outerMDIndex]); + } + + // Unique stuff for the segment dudes alone + float dAlpha_res_inner = + 0.02f / miniDelta * + (modules.subdets()[innerLowerModuleIndex] == Barrel ? 1.0f : alpaka::math::abs(acc, zIn) / rtIn); + float dAlpha_res_outer = + 0.02f / miniDelta * + (modules.subdets()[outerLowerModuleIndex] == Barrel ? 1.0f : alpaka::math::abs(acc, zOut) / rtOut); + + float dAlpha_res = dAlpha_res_inner + dAlpha_res_outer; + + if (modules.subdets()[innerLowerModuleIndex] == Barrel and modules.sides()[innerLowerModuleIndex] == Center) { + dAlphaThresholdValues[0] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); + } else { + dAlphaThresholdValues[0] = + dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls + sdLumForInnerMini2); + } + + if (modules.subdets()[outerLowerModuleIndex] == Barrel and modules.sides()[outerLowerModuleIndex] == Center) { + dAlphaThresholdValues[1] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); + } else { + dAlphaThresholdValues[1] = + dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls + sdLumForOuterMini2); + } + + //Inner to outer + dAlphaThresholdValues[2] = dAlpha_Bfield + alpaka::math::sqrt(acc, dAlpha_res * dAlpha_res + sdMuls * sdMuls); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addSegmentToMemory(Segments segments, + unsigned int lowerMDIndex, + unsigned int upperMDIndex, + uint16_t innerLowerModuleIndex, + uint16_t outerLowerModuleIndex, + unsigned int innerMDAnchorHitIndex, + unsigned int outerMDAnchorHitIndex, + float dPhi, + float dPhiMin, + float dPhiMax, + float dPhiChange, + float dPhiChangeMin, + float dPhiChangeMax, + unsigned int idx) { + segments.mdIndices()[idx][0] = lowerMDIndex; + segments.mdIndices()[idx][1] = upperMDIndex; + segments.innerLowerModuleIndices()[idx] = innerLowerModuleIndex; + segments.outerLowerModuleIndices()[idx] = outerLowerModuleIndex; + segments.innerMiniDoubletAnchorHitIndices()[idx] = innerMDAnchorHitIndex; + segments.outerMiniDoubletAnchorHitIndices()[idx] = outerMDAnchorHitIndex; + + segments.dPhis()[idx] = __F2H(dPhi); + segments.dPhiMins()[idx] = __F2H(dPhiMin); + segments.dPhiMaxs()[idx] = __F2H(dPhiMax); + segments.dPhiChanges()[idx] = __F2H(dPhiChange); + segments.dPhiChangeMins()[idx] = __F2H(dPhiChangeMin); + segments.dPhiChangeMaxs()[idx] = __F2H(dPhiChangeMax); + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addPixelSegmentToMemory(TAcc const& acc, + Segments segments, + SegmentsPixel segmentsPixel, + MiniDoubletsConst mds, + unsigned int innerMDIndex, + unsigned int outerMDIndex, + uint16_t pixelModuleIndex, + unsigned int hitIdxs[4], + unsigned int innerAnchorHitIndex, + unsigned int outerAnchorHitIndex, + float dPhiChange, + unsigned int idx, + unsigned int pixelSegmentArrayIndex, + float score) { + segments.mdIndices()[idx][0] = innerMDIndex; + segments.mdIndices()[idx][1] = outerMDIndex; + segments.innerLowerModuleIndices()[idx] = pixelModuleIndex; + segments.outerLowerModuleIndices()[idx] = pixelModuleIndex; + segments.innerMiniDoubletAnchorHitIndices()[idx] = innerAnchorHitIndex; + segments.outerMiniDoubletAnchorHitIndices()[idx] = outerAnchorHitIndex; + segments.dPhiChanges()[idx] = __F2H(dPhiChange); + + segmentsPixel.isDup()[pixelSegmentArrayIndex] = false; + segmentsPixel.partOfPT5()[pixelSegmentArrayIndex] = false; + segmentsPixel.score()[pixelSegmentArrayIndex] = score; + segmentsPixel.pLSHitsIdxs()[pixelSegmentArrayIndex].x = hitIdxs[0]; + segmentsPixel.pLSHitsIdxs()[pixelSegmentArrayIndex].y = hitIdxs[1]; + segmentsPixel.pLSHitsIdxs()[pixelSegmentArrayIndex].z = hitIdxs[2]; + segmentsPixel.pLSHitsIdxs()[pixelSegmentArrayIndex].w = hitIdxs[3]; + + //computing circle parameters + /* + The two anchor hits are r3PCA and r3LH. p3PCA pt, eta, phi is hitIndex1 x, y, z + */ + float circleRadius = mds.outerX()[innerMDIndex] / (2 * k2Rinv1GeVf); + float circlePhi = mds.outerZ()[innerMDIndex]; + float candidateCenterXs[] = {mds.anchorX()[innerMDIndex] + circleRadius * alpaka::math::sin(acc, circlePhi), + mds.anchorX()[innerMDIndex] - circleRadius * alpaka::math::sin(acc, circlePhi)}; + float candidateCenterYs[] = {mds.anchorY()[innerMDIndex] - circleRadius * alpaka::math::cos(acc, circlePhi), + mds.anchorY()[innerMDIndex] + circleRadius * alpaka::math::cos(acc, circlePhi)}; + + //check which of the circles can accommodate r3LH better (we won't get perfect agreement) + float bestChiSquared = kVerticalModuleSlope; + float chiSquared; + size_t bestIndex; + for (size_t i = 0; i < 2; i++) { + chiSquared = alpaka::math::abs(acc, + alpaka::math::sqrt(acc, + (mds.anchorX()[outerMDIndex] - candidateCenterXs[i]) * + (mds.anchorX()[outerMDIndex] - candidateCenterXs[i]) + + (mds.anchorY()[outerMDIndex] - candidateCenterYs[i]) * + (mds.anchorY()[outerMDIndex] - candidateCenterYs[i])) - + circleRadius); + if (chiSquared < bestChiSquared) { + bestChiSquared = chiSquared; + bestIndex = i; + } + } + segmentsPixel.circleCenterX()[pixelSegmentArrayIndex] = candidateCenterXs[bestIndex]; + segmentsPixel.circleCenterY()[pixelSegmentArrayIndex] = candidateCenterYs[bestIndex]; + segmentsPixel.circleRadius()[pixelSegmentArrayIndex] = circleRadius; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoBarrel(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + uint16_t innerLowerModuleIndex, + uint16_t outerLowerModuleIndex, + unsigned int innerMDIndex, + unsigned int outerMDIndex, + float& dPhi, + float& dPhiMin, + float& dPhiMax, + float& dPhiChange, + float& dPhiChangeMin, + float& dPhiChangeMax) { + float sdMuls = (modules.subdets()[innerLowerModuleIndex] == Barrel) + ? kMiniMulsPtScaleBarrel[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut + : kMiniMulsPtScaleEndcap[modules.layers()[innerLowerModuleIndex] - 1] * 3.f / ptCut; + + float xIn, yIn, zIn, rtIn, xOut, yOut, zOut, rtOut; + + xIn = mds.anchorX()[innerMDIndex]; + yIn = mds.anchorY()[innerMDIndex]; + zIn = mds.anchorZ()[innerMDIndex]; + rtIn = mds.anchorRt()[innerMDIndex]; + + xOut = mds.anchorX()[outerMDIndex]; + yOut = mds.anchorY()[outerMDIndex]; + zOut = mds.anchorZ()[outerMDIndex]; + rtOut = mds.anchorRt()[outerMDIndex]; + + float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + float sdPVoff = 0.1f / rtOut; + float dzDrtScale = alpaka::math::tan(acc, sdSlope) / sdSlope; //FIXME: need appropriate value + + const float zGeom = modules.layers()[innerLowerModuleIndex] <= 2 ? 2.f * kPixelPSZpitch : 2.f * kStrip2SZpitch; + + float zLo = zIn + (zIn - kDeltaZLum) * (rtOut / rtIn - 1.f) * (zIn > 0.f ? 1.f : dzDrtScale) - + zGeom; //slope-correction only on outer end + float zHi = zIn + (zIn + kDeltaZLum) * (rtOut / rtIn - 1.f) * (zIn < 0.f ? 1.f : dzDrtScale) + zGeom; + + if ((zOut < zLo) || (zOut > zHi)) + return false; + + float sdCut = sdSlope + alpaka::math::sqrt(acc, sdMuls * sdMuls + sdPVoff * sdPVoff); + + dPhi = phi_mpi_pi(acc, mds.anchorPhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]); + + if (alpaka::math::abs(acc, dPhi) > sdCut) + return false; + + dPhiChange = phi_mpi_pi(acc, phi(acc, xOut - xIn, yOut - yIn) - mds.anchorPhi()[innerMDIndex]); + + if (alpaka::math::abs(acc, dPhiChange) > sdCut) + return false; + + float dAlphaThresholdValues[3]; + dAlphaThreshold(acc, + dAlphaThresholdValues, + modules, + mds, + xIn, + yIn, + zIn, + rtIn, + xOut, + yOut, + zOut, + rtOut, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMDIndex, + outerMDIndex); + + float innerMDAlpha = mds.dphichanges()[innerMDIndex]; + float outerMDAlpha = mds.dphichanges()[outerMDIndex]; + float dAlphaInnerMDSegment = innerMDAlpha - dPhiChange; + float dAlphaOuterMDSegment = outerMDAlpha - dPhiChange; + float dAlphaInnerMDOuterMD = innerMDAlpha - outerMDAlpha; + + float dAlphaInnerMDSegmentThreshold = dAlphaThresholdValues[0]; + float dAlphaOuterMDSegmentThreshold = dAlphaThresholdValues[1]; + float dAlphaInnerMDOuterMDThreshold = dAlphaThresholdValues[2]; + + if (alpaka::math::abs(acc, dAlphaInnerMDSegment) >= dAlphaInnerMDSegmentThreshold) + return false; + if (alpaka::math::abs(acc, dAlphaOuterMDSegment) >= dAlphaOuterMDSegmentThreshold) + return false; + return alpaka::math::abs(acc, dAlphaInnerMDOuterMD) < dAlphaInnerMDOuterMDThreshold; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgoEndcap(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + uint16_t innerLowerModuleIndex, + uint16_t outerLowerModuleIndex, + unsigned int innerMDIndex, + unsigned int outerMDIndex, + float& dPhi, + float& dPhiMin, + float& dPhiMax, + float& dPhiChange, + float& dPhiChangeMin, + float& dPhiChangeMax) { + float xIn, yIn, zIn, rtIn, xOut, yOut, zOut, rtOut; + + xIn = mds.anchorX()[innerMDIndex]; + yIn = mds.anchorY()[innerMDIndex]; + zIn = mds.anchorZ()[innerMDIndex]; + rtIn = mds.anchorRt()[innerMDIndex]; + + xOut = mds.anchorX()[outerMDIndex]; + yOut = mds.anchorY()[outerMDIndex]; + zOut = mds.anchorZ()[outerMDIndex]; + rtOut = mds.anchorRt()[outerMDIndex]; + + bool outerLayerEndcapTwoS = + (modules.subdets()[outerLowerModuleIndex] == Endcap) && (modules.moduleType()[outerLowerModuleIndex] == TwoS); + + float sdSlope = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + float disks2SMinRadius = 60.f; + + float rtGeom = ((rtIn < disks2SMinRadius && rtOut < disks2SMinRadius) + ? (2.f * kPixelPSZpitch) + : ((rtIn < disks2SMinRadius || rtOut < disks2SMinRadius) ? (kPixelPSZpitch + kStrip2SZpitch) + : (2.f * kStrip2SZpitch))); + + //cut 0 - z compatibility + if (zIn * zOut < 0) + return false; + + float dz = zOut - zIn; + float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); + float drtDzScale = sdSlope / alpaka::math::tan(acc, sdSlope); + + float rtLo = alpaka::math::max( + acc, rtIn * (1.f + dz / (zIn + dLum) * drtDzScale) - rtGeom, rtIn - 0.5f * rtGeom); //rt should increase + float rtHi = rtIn * (zOut - dLum) / (zIn - dLum) + + rtGeom; //dLum for luminous; rGeom for measurement size; no tanTheta_loc(pt) correction + + // Completeness + if ((rtOut < rtLo) || (rtOut > rtHi)) + return false; + + dPhi = phi_mpi_pi(acc, mds.anchorPhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]); + + float sdCut = sdSlope; + if (outerLayerEndcapTwoS) { + float dPhiPos_high = phi_mpi_pi(acc, mds.anchorHighEdgePhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]); + float dPhiPos_low = phi_mpi_pi(acc, mds.anchorLowEdgePhi()[outerMDIndex] - mds.anchorPhi()[innerMDIndex]); + + dPhiMax = alpaka::math::abs(acc, dPhiPos_high) > alpaka::math::abs(acc, dPhiPos_low) ? dPhiPos_high : dPhiPos_low; + dPhiMin = alpaka::math::abs(acc, dPhiPos_high) > alpaka::math::abs(acc, dPhiPos_low) ? dPhiPos_low : dPhiPos_high; + } else { + dPhiMax = dPhi; + dPhiMin = dPhi; + } + if (alpaka::math::abs(acc, dPhi) > sdCut) + return false; + + float dzFrac = dz / zIn; + dPhiChange = dPhi / dzFrac * (1.f + dzFrac); + dPhiChangeMin = dPhiMin / dzFrac * (1.f + dzFrac); + dPhiChangeMax = dPhiMax / dzFrac * (1.f + dzFrac); + + if (alpaka::math::abs(acc, dPhiChange) > sdCut) + return false; + + float dAlphaThresholdValues[3]; + dAlphaThreshold(acc, + dAlphaThresholdValues, + modules, + mds, + xIn, + yIn, + zIn, + rtIn, + xOut, + yOut, + zOut, + rtOut, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMDIndex, + outerMDIndex); + + float innerMDAlpha = mds.dphichanges()[innerMDIndex]; + float outerMDAlpha = mds.dphichanges()[outerMDIndex]; + float dAlphaInnerMDSegment = innerMDAlpha - dPhiChange; + float dAlphaOuterMDSegment = outerMDAlpha - dPhiChange; + float dAlphaInnerMDOuterMD = innerMDAlpha - outerMDAlpha; + + float dAlphaInnerMDSegmentThreshold = dAlphaThresholdValues[0]; + float dAlphaOuterMDSegmentThreshold = dAlphaThresholdValues[1]; + float dAlphaInnerMDOuterMDThreshold = dAlphaThresholdValues[2]; + + if (alpaka::math::abs(acc, dAlphaInnerMDSegment) >= dAlphaInnerMDSegmentThreshold) + return false; + if (alpaka::math::abs(acc, dAlphaOuterMDSegment) >= dAlphaOuterMDSegmentThreshold) + return false; + return alpaka::math::abs(acc, dAlphaInnerMDOuterMD) < dAlphaInnerMDOuterMDThreshold; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runSegmentDefaultAlgo(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + uint16_t innerLowerModuleIndex, + uint16_t outerLowerModuleIndex, + unsigned int innerMDIndex, + unsigned int outerMDIndex, + float& dPhi, + float& dPhiMin, + float& dPhiMax, + float& dPhiChange, + float& dPhiChangeMin, + float& dPhiChangeMax) { + if (modules.subdets()[innerLowerModuleIndex] == Barrel and modules.subdets()[outerLowerModuleIndex] == Barrel) { + return runSegmentDefaultAlgoBarrel(acc, + modules, + mds, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMDIndex, + outerMDIndex, + dPhi, + dPhiMin, + dPhiMax, + dPhiChange, + dPhiChangeMin, + dPhiChangeMax); + } else { + return runSegmentDefaultAlgoEndcap(acc, + modules, + mds, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMDIndex, + outerMDIndex, + dPhi, + dPhiMin, + dPhiMax, + dPhiChange, + dPhiChangeMin, + dPhiChangeMax); + } + } + + struct CreateSegments { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + MiniDoubletsOccupancyConst mdsOccupancy, + Segments segments, + SegmentsOccupancy segmentsOccupancy, + ObjectRangesConst ranges) const { + auto const globalBlockIdx = alpaka::getIdx(acc); + auto const blockThreadIdx = alpaka::getIdx(acc); + auto const gridBlockExtent = alpaka::getWorkDiv(acc); + auto const blockThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t innerLowerModuleIndex = globalBlockIdx[2]; innerLowerModuleIndex < modules.nLowerModules(); + innerLowerModuleIndex += gridBlockExtent[2]) { + unsigned int nInnerMDs = mdsOccupancy.nMDs()[innerLowerModuleIndex]; + if (nInnerMDs == 0) + continue; + + unsigned int nConnectedModules = modules.nConnectedModules()[innerLowerModuleIndex]; + + for (uint16_t outerLowerModuleArrayIdx = blockThreadIdx[1]; outerLowerModuleArrayIdx < nConnectedModules; + outerLowerModuleArrayIdx += blockThreadExtent[1]) { + uint16_t outerLowerModuleIndex = modules.moduleMap()[innerLowerModuleIndex][outerLowerModuleArrayIdx]; + + unsigned int nOuterMDs = mdsOccupancy.nMDs()[outerLowerModuleIndex]; + + unsigned int limit = nInnerMDs * nOuterMDs; + + if (limit == 0) + continue; + for (unsigned int hitIndex = blockThreadIdx[2]; hitIndex < limit; hitIndex += blockThreadExtent[2]) { + unsigned int innerMDArrayIdx = hitIndex / nOuterMDs; + unsigned int outerMDArrayIdx = hitIndex % nOuterMDs; + if (outerMDArrayIdx >= nOuterMDs) + continue; + + unsigned int innerMDIndex = ranges.mdRanges()[innerLowerModuleIndex][0] + innerMDArrayIdx; + unsigned int outerMDIndex = ranges.mdRanges()[outerLowerModuleIndex][0] + outerMDArrayIdx; + + float dPhi, dPhiMin, dPhiMax, dPhiChange, dPhiChangeMin, dPhiChangeMax; + + unsigned int innerMiniDoubletAnchorHitIndex = mds.anchorHitIndices()[innerMDIndex]; + unsigned int outerMiniDoubletAnchorHitIndex = mds.anchorHitIndices()[outerMDIndex]; + dPhiMin = 0; + dPhiMax = 0; + dPhiChangeMin = 0; + dPhiChangeMax = 0; + if (runSegmentDefaultAlgo(acc, + modules, + mds, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMDIndex, + outerMDIndex, + dPhi, + dPhiMin, + dPhiMax, + dPhiChange, + dPhiChangeMin, + dPhiChangeMax)) { + unsigned int totOccupancySegments = + alpaka::atomicAdd(acc, + &segmentsOccupancy.totOccupancySegments()[innerLowerModuleIndex], + 1u, + alpaka::hierarchy::Threads{}); + if (static_cast(totOccupancySegments) >= ranges.segmentModuleOccupancy()[innerLowerModuleIndex]) { +#ifdef WARNINGS + printf("Segment excess alert! Module index = %d\n", innerLowerModuleIndex); +#endif + } else { + unsigned int segmentModuleIdx = alpaka::atomicAdd( + acc, &segmentsOccupancy.nSegments()[innerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + unsigned int segmentIdx = ranges.segmentModuleIndices()[innerLowerModuleIndex] + segmentModuleIdx; + + addSegmentToMemory(segments, + innerMDIndex, + outerMDIndex, + innerLowerModuleIndex, + outerLowerModuleIndex, + innerMiniDoubletAnchorHitIndex, + outerMiniDoubletAnchorHitIndex, + dPhi, + dPhiMin, + dPhiMax, + dPhiChange, + dPhiChangeMin, + dPhiChangeMax, + segmentIdx); + } + } + } + } + } + } + }; + + struct CreateSegmentArrayRanges { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + ObjectRanges ranges, + MiniDoubletsConst mds) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + // Initialize variables in shared memory and set to 0 + int& nTotalSegments = alpaka::declareSharedVar(acc); + if (cms::alpakatools::once_per_block(acc)) { + nTotalSegments = 0; + } + alpaka::syncBlockThreads(acc); + + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (modules.nConnectedModules()[i] == 0) { + ranges.segmentModuleIndices()[i] = nTotalSegments; + ranges.segmentModuleOccupancy()[i] = 0; + continue; + } + + short module_rings = modules.rings()[i]; + short module_layers = modules.layers()[i]; + short module_subdets = modules.subdets()[i]; + float module_eta = alpaka::math::abs(acc, modules.eta()[i]); + + int category_number; + if (module_layers <= 3 && module_subdets == 5) + category_number = 0; + else if (module_layers >= 4 && module_subdets == 5) + category_number = 1; + else if (module_layers <= 2 && module_subdets == 4 && module_rings >= 11) + category_number = 2; + else if (module_layers >= 3 && module_subdets == 4 && module_rings >= 8) + category_number = 2; + else if (module_layers <= 2 && module_subdets == 4 && module_rings <= 10) + category_number = 3; + else if (module_layers >= 3 && module_subdets == 4 && module_rings <= 7) + category_number = 3; + else + category_number = -1; + + int eta_number; + if (module_eta < 0.75f) + eta_number = 0; + else if (module_eta < 1.5f) + eta_number = 1; + else if (module_eta < 2.25f) + eta_number = 2; + else if (module_eta < 3.0f) + eta_number = 3; + else + eta_number = -1; + + int occupancy; + if (category_number == 0 && eta_number == 0) + occupancy = 572; + else if (category_number == 0 && eta_number == 1) + occupancy = 300; + else if (category_number == 0 && eta_number == 2) + occupancy = 183; + else if (category_number == 0 && eta_number == 3) + occupancy = 62; + else if (category_number == 1 && eta_number == 0) + occupancy = 191; + else if (category_number == 1 && eta_number == 1) + occupancy = 128; + else if (category_number == 2 && eta_number == 1) + occupancy = 107; + else if (category_number == 2 && eta_number == 2) + occupancy = 102; + else if (category_number == 3 && eta_number == 1) + occupancy = 64; + else if (category_number == 3 && eta_number == 2) + occupancy = 79; + else if (category_number == 3 && eta_number == 3) + occupancy = 85; + else { + occupancy = 0; +#ifdef WARNINGS + printf("Unhandled case in createSegmentArrayRanges! Module index = %i\n", i); +#endif + } + + int nTotSegs = alpaka::atomicAdd(acc, &nTotalSegments, occupancy, alpaka::hierarchy::Threads{}); + ranges.segmentModuleIndices()[i] = nTotSegs; + ranges.segmentModuleOccupancy()[i] = occupancy; + } + + // Wait for all threads to finish before reporting final values + alpaka::syncBlockThreads(acc); + if (cms::alpakatools::once_per_block(acc)) { + ranges.segmentModuleIndices()[modules.nLowerModules()] = nTotalSegments; + ranges.nTotalSegs() = nTotalSegments; + } + } + }; + + struct AddSegmentRangesToEventExplicit { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + SegmentsOccupancyConst segmentsOccupancy, + ObjectRanges ranges) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (segmentsOccupancy.nSegments()[i] == 0) { + ranges.segmentRanges()[i][0] = -1; + ranges.segmentRanges()[i][1] = -1; + } else { + ranges.segmentRanges()[i][0] = ranges.segmentModuleIndices()[i]; + ranges.segmentRanges()[i][1] = ranges.segmentModuleIndices()[i] + segmentsOccupancy.nSegments()[i] - 1; + } + } + } + }; + + struct AddPixelSegmentToEventKernel { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + HitsConst hits, + MiniDoublets mds, + Segments segments, + SegmentsPixel segmentsPixel, + unsigned int* hitIndices0, + unsigned int* hitIndices1, + unsigned int* hitIndices2, + unsigned int* hitIndices3, + float* dPhiChange, + uint16_t pixelModuleIndex, + int size) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (int tid = globalThreadIdx[2]; tid < size; tid += gridThreadExtent[2]) { + unsigned int innerMDIndex = ranges.miniDoubletModuleIndices()[pixelModuleIndex] + 2 * (tid); + unsigned int outerMDIndex = ranges.miniDoubletModuleIndices()[pixelModuleIndex] + 2 * (tid) + 1; + unsigned int pixelSegmentIndex = ranges.segmentModuleIndices()[pixelModuleIndex] + tid; + + addMDToMemory(acc, + mds, + hits, + modules, + hitIndices0[tid], + hitIndices1[tid], + pixelModuleIndex, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + innerMDIndex); + addMDToMemory(acc, + mds, + hits, + modules, + hitIndices2[tid], + hitIndices3[tid], + pixelModuleIndex, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + outerMDIndex); + + //in outer hits - pt, eta, phi + float slope = alpaka::math::sinh(acc, hits.ys()[mds.outerHitIndices()[innerMDIndex]]); + float intercept = + hits.zs()[mds.anchorHitIndices()[innerMDIndex]] - slope * hits.rts()[mds.anchorHitIndices()[innerMDIndex]]; + float score_lsq = (hits.rts()[mds.anchorHitIndices()[outerMDIndex]] * slope + intercept) - + (hits.zs()[mds.anchorHitIndices()[outerMDIndex]]); + score_lsq = score_lsq * score_lsq; + + unsigned int hits1[Params_pLS::kHits]; + hits1[0] = hits.idxs()[mds.anchorHitIndices()[innerMDIndex]]; + hits1[1] = hits.idxs()[mds.anchorHitIndices()[outerMDIndex]]; + hits1[2] = hits.idxs()[mds.outerHitIndices()[innerMDIndex]]; + hits1[3] = hits.idxs()[mds.outerHitIndices()[outerMDIndex]]; + addPixelSegmentToMemory(acc, + segments, + segmentsPixel, + mds, + innerMDIndex, + outerMDIndex, + pixelModuleIndex, + hits1, + hitIndices0[tid], + hitIndices2[tid], + dPhiChange[tid], + pixelSegmentIndex, + tid, + score_lsq); + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h new file mode 100644 index 0000000000000..1863f262ffd7d --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/TrackCandidate.h @@ -0,0 +1,493 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_TrackCandidate_h +#define RecoTracker_LSTCore_src_alpaka_TrackCandidate_h + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/MiniDoubletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelQuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/PixelTripletsSoA.h" +#include "RecoTracker/LSTCore/interface/QuintupletsSoA.h" +#include "RecoTracker/LSTCore/interface/SegmentsSoA.h" +#include "RecoTracker/LSTCore/interface/TrackCandidatesSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" + +#include "Hit.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addpLSTrackCandidateToMemory(TrackCandidates& cands, + unsigned int trackletIndex, + unsigned int trackCandidateIndex, + uint4 hitIndices, + int pixelSeedIndex) { + cands.trackCandidateType()[trackCandidateIndex] = LSTObjType::pLS; + cands.directObjectIndices()[trackCandidateIndex] = trackletIndex; + cands.pixelSeedIndex()[trackCandidateIndex] = pixelSeedIndex; + + cands.objectIndices()[trackCandidateIndex][0] = trackletIndex; + cands.objectIndices()[trackCandidateIndex][1] = trackletIndex; + + cands.hitIndices()[trackCandidateIndex][0] = + hitIndices.x; // Order explanation in https://github.com/SegmentLinking/TrackLooper/issues/267 + cands.hitIndices()[trackCandidateIndex][1] = hitIndices.z; + cands.hitIndices()[trackCandidateIndex][2] = hitIndices.y; + cands.hitIndices()[trackCandidateIndex][3] = hitIndices.w; + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTrackCandidateToMemory(TrackCandidates& cands, + short trackCandidateType, + unsigned int innerTrackletIndex, + unsigned int outerTrackletIndex, + const uint8_t* logicalLayerIndices, + const uint16_t* lowerModuleIndices, + const unsigned int* hitIndices, + int pixelSeedIndex, + float centerX, + float centerY, + float radius, + unsigned int trackCandidateIndex, + unsigned int directObjectIndex) { + cands.trackCandidateType()[trackCandidateIndex] = trackCandidateType; + cands.directObjectIndices()[trackCandidateIndex] = directObjectIndex; + cands.pixelSeedIndex()[trackCandidateIndex] = pixelSeedIndex; + + cands.objectIndices()[trackCandidateIndex][0] = innerTrackletIndex; + cands.objectIndices()[trackCandidateIndex][1] = outerTrackletIndex; + + size_t limits = trackCandidateType == LSTObjType::pT5 ? Params_pT5::kLayers : Params_pT3::kLayers; + + //send the starting pointer to the logicalLayer and hitIndices + for (size_t i = 0; i < limits; i++) { + cands.logicalLayers()[trackCandidateIndex][i] = logicalLayerIndices[i]; + cands.lowerModuleIndices()[trackCandidateIndex][i] = lowerModuleIndices[i]; + } + for (size_t i = 0; i < 2 * limits; i++) { + cands.hitIndices()[trackCandidateIndex][i] = hitIndices[i]; + } + cands.centerX()[trackCandidateIndex] = __F2H(centerX); + cands.centerY()[trackCandidateIndex] = __F2H(centerY); + cands.radius()[trackCandidateIndex] = __F2H(radius); + } + + ALPAKA_FN_ACC ALPAKA_FN_INLINE int checkPixelHits( + unsigned int ix, unsigned int jx, MiniDoubletsConst mds, SegmentsConst segments, HitsConst hits) { + int phits1[Params_pLS::kHits]; + int phits2[Params_pLS::kHits]; + + phits1[0] = hits.idxs()[mds.anchorHitIndices()[segments.mdIndices()[ix][0]]]; + phits1[1] = hits.idxs()[mds.anchorHitIndices()[segments.mdIndices()[ix][1]]]; + phits1[2] = hits.idxs()[mds.outerHitIndices()[segments.mdIndices()[ix][0]]]; + phits1[3] = hits.idxs()[mds.outerHitIndices()[segments.mdIndices()[ix][1]]]; + + phits2[0] = hits.idxs()[mds.anchorHitIndices()[segments.mdIndices()[jx][0]]]; + phits2[1] = hits.idxs()[mds.anchorHitIndices()[segments.mdIndices()[jx][1]]]; + phits2[2] = hits.idxs()[mds.outerHitIndices()[segments.mdIndices()[jx][0]]]; + phits2[3] = hits.idxs()[mds.outerHitIndices()[segments.mdIndices()[jx][1]]]; + + int npMatched = 0; + + for (int i = 0; i < Params_pLS::kHits; i++) { + bool pmatched = false; + if (phits1[i] == -1) + continue; + + for (int j = 0; j < Params_pLS::kHits; j++) { + if (phits2[j] == -1) + continue; + + if (phits1[i] == phits2[j]) { + pmatched = true; + break; + } + } + if (pmatched) + npMatched++; + } + return npMatched; + } + + struct CrossCleanpT3 { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + PixelTriplets pixelTriplets, + SegmentsPixelConst segmentsPixel, + PixelQuintupletsConst pixelQuintuplets) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + unsigned int nPixelTriplets = pixelTriplets.nPixelTriplets(); + for (unsigned int pixelTripletIndex = globalThreadIdx[2]; pixelTripletIndex < nPixelTriplets; + pixelTripletIndex += gridThreadExtent[2]) { + if (pixelTriplets.isDup()[pixelTripletIndex]) + continue; + + // Cross cleaning step + float eta1 = __H2F(pixelTriplets.eta_pix()[pixelTripletIndex]); + float phi1 = __H2F(pixelTriplets.phi_pix()[pixelTripletIndex]); + + int pixelModuleIndex = modules.nLowerModules(); + unsigned int prefix = ranges.segmentModuleIndices()[pixelModuleIndex]; + + unsigned int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets(); + for (unsigned int pixelQuintupletIndex = globalThreadIdx[1]; pixelQuintupletIndex < nPixelQuintuplets; + pixelQuintupletIndex += gridThreadExtent[1]) { + unsigned int pLS_jx = pixelQuintuplets.pixelSegmentIndices()[pixelQuintupletIndex]; + float eta2 = segmentsPixel.eta()[pLS_jx - prefix]; + float phi2 = segmentsPixel.phi()[pLS_jx - prefix]; + float dEta = alpaka::math::abs(acc, (eta1 - eta2)); + float dPhi = calculate_dPhi(phi1, phi2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if (dR2 < 1e-5f) + pixelTriplets.isDup()[pixelTripletIndex] = true; + } + } + } + }; + + struct CrossCleanT5 { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + Quintuplets quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + PixelQuintupletsConst pixelQuintuplets, + PixelTripletsConst pixelTriplets, + ObjectRangesConst ranges) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (int innerInnerInnerLowerModuleArrayIndex = globalThreadIdx[0]; + innerInnerInnerLowerModuleArrayIndex < modules.nLowerModules(); + innerInnerInnerLowerModuleArrayIndex += gridThreadExtent[0]) { + if (ranges.quintupletModuleIndices()[innerInnerInnerLowerModuleArrayIndex] == -1) + continue; + + unsigned int nQuints = quintupletsOccupancy.nQuintuplets()[innerInnerInnerLowerModuleArrayIndex]; + for (unsigned int innerObjectArrayIndex = globalThreadIdx[1]; innerObjectArrayIndex < nQuints; + innerObjectArrayIndex += gridThreadExtent[1]) { + unsigned int quintupletIndex = + ranges.quintupletModuleIndices()[innerInnerInnerLowerModuleArrayIndex] + innerObjectArrayIndex; + + // Don't add duplicate T5s or T5s that are accounted in pT5s + if (quintuplets.isDup()[quintupletIndex] or quintuplets.partOfPT5()[quintupletIndex]) + continue; + unsigned int loop_bound = pixelQuintuplets.nPixelQuintuplets() + pixelTriplets.nPixelTriplets(); + // Cross cleaning step + float eta1 = __H2F(quintuplets.eta()[quintupletIndex]); + float phi1 = __H2F(quintuplets.phi()[quintupletIndex]); + + for (unsigned int jx = globalThreadIdx[2]; jx < loop_bound; jx += gridThreadExtent[2]) { + float eta2, phi2; + if (jx < pixelQuintuplets.nPixelQuintuplets()) { + eta2 = __H2F(pixelQuintuplets.eta()[jx]); + phi2 = __H2F(pixelQuintuplets.phi()[jx]); + } else { + eta2 = __H2F(pixelTriplets.eta()[jx - pixelQuintuplets.nPixelQuintuplets()]); + phi2 = __H2F(pixelTriplets.phi()[jx - pixelQuintuplets.nPixelQuintuplets()]); + } + + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if (dR2 < 1e-3f) + quintuplets.isDup()[quintupletIndex] = true; + } + } + } + } + }; + + struct CrossCleanpLS { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + ObjectRangesConst ranges, + PixelTripletsConst pixelTriplets, + TrackCandidates cands, + SegmentsConst segments, + SegmentsOccupancyConst segmentsOccupancy, + SegmentsPixel segmentsPixel, + MiniDoubletsConst mds, + HitsConst hits, + QuintupletsConst quintuplets) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + int pixelModuleIndex = modules.nLowerModules(); + unsigned int nPixels = segmentsOccupancy.nSegments()[pixelModuleIndex]; + for (unsigned int pixelArrayIndex = globalThreadIdx[2]; pixelArrayIndex < nPixels; + pixelArrayIndex += gridThreadExtent[2]) { + if (!segmentsPixel.isQuad()[pixelArrayIndex] || segmentsPixel.isDup()[pixelArrayIndex]) + continue; + + float eta1 = segmentsPixel.eta()[pixelArrayIndex]; + float phi1 = segmentsPixel.phi()[pixelArrayIndex]; + unsigned int prefix = ranges.segmentModuleIndices()[pixelModuleIndex]; + + unsigned int nTrackCandidates = cands.nTrackCandidates(); + for (unsigned int trackCandidateIndex = globalThreadIdx[1]; trackCandidateIndex < nTrackCandidates; + trackCandidateIndex += gridThreadExtent[1]) { + short type = cands.trackCandidateType()[trackCandidateIndex]; + unsigned int innerTrackletIdx = cands.objectIndices()[trackCandidateIndex][0]; + if (type == LSTObjType::T5) { + unsigned int quintupletIndex = innerTrackletIdx; // T5 index + float eta2 = __H2F(quintuplets.eta()[quintupletIndex]); + float phi2 = __H2F(quintuplets.phi()[quintupletIndex]); + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if (dR2 < 1e-3f) + segmentsPixel.isDup()[pixelArrayIndex] = true; + } + if (type == LSTObjType::pT3) { + int pLSIndex = pixelTriplets.pixelSegmentIndices()[innerTrackletIdx]; + int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mds, segments, hits); + if (npMatched > 0) + segmentsPixel.isDup()[pixelArrayIndex] = true; + + int pT3Index = innerTrackletIdx; + float eta2 = __H2F(pixelTriplets.eta_pix()[pT3Index]); + float phi2 = __H2F(pixelTriplets.phi_pix()[pT3Index]); + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if (dR2 < 0.000001f) + segmentsPixel.isDup()[pixelArrayIndex] = true; + } + if (type == LSTObjType::pT5) { + unsigned int pLSIndex = innerTrackletIdx; + int npMatched = checkPixelHits(prefix + pixelArrayIndex, pLSIndex, mds, segments, hits); + if (npMatched > 0) { + segmentsPixel.isDup()[pixelArrayIndex] = true; + } + + float eta2 = segmentsPixel.eta()[pLSIndex - prefix]; + float phi2 = segmentsPixel.phi()[pLSIndex - prefix]; + float dEta = alpaka::math::abs(acc, eta1 - eta2); + float dPhi = calculate_dPhi(phi1, phi2); + + float dR2 = dEta * dEta + dPhi * dPhi; + if (dR2 < 0.000001f) + segmentsPixel.isDup()[pixelArrayIndex] = true; + } + } + } + } + }; + + struct AddpT3asTrackCandidates { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint16_t nLowerModules, + PixelTripletsConst pixelTriplets, + TrackCandidates cands, + SegmentsPixelConst segmentsPixel, + ObjectRangesConst ranges) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + unsigned int nPixelTriplets = pixelTriplets.nPixelTriplets(); + unsigned int pLS_offset = ranges.segmentModuleIndices()[nLowerModules]; + for (unsigned int pixelTripletIndex = globalThreadIdx[0]; pixelTripletIndex < nPixelTriplets; + pixelTripletIndex += gridThreadExtent[0]) { + if ((pixelTriplets.isDup()[pixelTripletIndex])) + continue; + + unsigned int trackCandidateIdx = + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx >= n_max_pixel_track_candidates) // This is done before any non-pixel TCs are added + { +#ifdef WARNINGS + printf("Track Candidate excess alert! Type = pT3"); +#endif + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + break; + + } else { + alpaka::atomicAdd(acc, &cands.nTrackCandidatespT3(), 1u, alpaka::hierarchy::Threads{}); + + float radius = 0.5f * (__H2F(pixelTriplets.pixelRadius()[pixelTripletIndex]) + + __H2F(pixelTriplets.tripletRadius()[pixelTripletIndex])); + unsigned int pT3PixelIndex = pixelTriplets.pixelSegmentIndices()[pixelTripletIndex]; + addTrackCandidateToMemory(cands, + LSTObjType::pT3, + pixelTripletIndex, + pixelTripletIndex, + pixelTriplets.logicalLayers()[pixelTripletIndex].data(), + pixelTriplets.lowerModuleIndices()[pixelTripletIndex].data(), + pixelTriplets.hitIndices()[pixelTripletIndex].data(), + segmentsPixel.seedIdx()[pT3PixelIndex - pLS_offset], + __H2F(pixelTriplets.centerX()[pixelTripletIndex]), + __H2F(pixelTriplets.centerY()[pixelTripletIndex]), + radius, + trackCandidateIdx, + pixelTripletIndex); + } + } + } + }; + + struct AddT5asTrackCandidate { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint16_t nLowerModules, + QuintupletsConst quintuplets, + QuintupletsOccupancyConst quintupletsOccupancy, + TrackCandidates cands, + ObjectRangesConst ranges) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (int idx = globalThreadIdx[1]; idx < nLowerModules; idx += gridThreadExtent[1]) { + if (ranges.quintupletModuleIndices()[idx] == -1) + continue; + + unsigned int nQuints = quintupletsOccupancy.nQuintuplets()[idx]; + for (unsigned int jdx = globalThreadIdx[2]; jdx < nQuints; jdx += gridThreadExtent[2]) { + unsigned int quintupletIndex = ranges.quintupletModuleIndices()[idx] + jdx; + if (quintuplets.isDup()[quintupletIndex] or quintuplets.partOfPT5()[quintupletIndex]) + continue; + if (!(quintuplets.tightCutFlag()[quintupletIndex])) + continue; + + unsigned int trackCandidateIdx = + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx - cands.nTrackCandidatespT5() - cands.nTrackCandidatespT3() >= + n_max_nonpixel_track_candidates) // pT5 and pT3 TCs have been added, but not pLS TCs + { +#ifdef WARNINGS + printf("Track Candidate excess alert! Type = T5"); +#endif + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + break; + } else { + alpaka::atomicAdd(acc, &cands.nTrackCandidatesT5(), 1u, alpaka::hierarchy::Threads{}); + addTrackCandidateToMemory(cands, + LSTObjType::T5, + quintupletIndex, + quintupletIndex, + quintuplets.logicalLayers()[quintupletIndex].data(), + quintuplets.lowerModuleIndices()[quintupletIndex].data(), + quintuplets.hitIndices()[quintupletIndex].data(), + -1 /*no pixel seed index for T5s*/, + quintuplets.regressionG()[quintupletIndex], + quintuplets.regressionF()[quintupletIndex], + quintuplets.regressionRadius()[quintupletIndex], + trackCandidateIdx, + quintupletIndex); + } + } + } + } + }; + + struct AddpLSasTrackCandidate { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint16_t nLowerModules, + TrackCandidates cands, + SegmentsOccupancyConst segmentsOccupancy, + SegmentsPixelConst segmentsPixel, + bool tc_pls_triplets) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + unsigned int nPixels = segmentsOccupancy.nSegments()[nLowerModules]; + for (unsigned int pixelArrayIndex = globalThreadIdx[2]; pixelArrayIndex < nPixels; + pixelArrayIndex += gridThreadExtent[2]) { + if ((tc_pls_triplets ? 0 : !segmentsPixel.isQuad()[pixelArrayIndex]) || + (segmentsPixel.isDup()[pixelArrayIndex])) + continue; + + unsigned int trackCandidateIdx = + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx - cands.nTrackCandidatesT5() >= + n_max_pixel_track_candidates) // T5 TCs have already been added + { +#ifdef WARNINGS + printf("Track Candidate excess alert! Type = pLS"); +#endif + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + break; + + } else { + alpaka::atomicAdd(acc, &cands.nTrackCandidatespLS(), 1u, alpaka::hierarchy::Threads{}); + addpLSTrackCandidateToMemory(cands, + pixelArrayIndex, + trackCandidateIdx, + segmentsPixel.pLSHitsIdxs()[pixelArrayIndex], + segmentsPixel.seedIdx()[pixelArrayIndex]); + } + } + } + }; + + struct AddpT5asTrackCandidate { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + uint16_t nLowerModules, + PixelQuintupletsConst pixelQuintuplets, + TrackCandidates cands, + SegmentsPixelConst segmentsPixel, + ObjectRangesConst ranges) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + int nPixelQuintuplets = pixelQuintuplets.nPixelQuintuplets(); + unsigned int pLS_offset = ranges.segmentModuleIndices()[nLowerModules]; + for (int pixelQuintupletIndex = globalThreadIdx[0]; pixelQuintupletIndex < nPixelQuintuplets; + pixelQuintupletIndex += gridThreadExtent[0]) { + if (pixelQuintuplets.isDup()[pixelQuintupletIndex]) + continue; + + unsigned int trackCandidateIdx = + alpaka::atomicAdd(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + if (trackCandidateIdx >= n_max_pixel_track_candidates) // No other TCs have been added yet + { +#ifdef WARNINGS + printf("Track Candidate excess alert! Type = pT5"); +#endif + alpaka::atomicSub(acc, &cands.nTrackCandidates(), 1u, alpaka::hierarchy::Threads{}); + break; + + } else { + alpaka::atomicAdd(acc, &cands.nTrackCandidatespT5(), 1u, alpaka::hierarchy::Threads{}); + + float radius = 0.5f * (__H2F(pixelQuintuplets.pixelRadius()[pixelQuintupletIndex]) + + __H2F(pixelQuintuplets.quintupletRadius()[pixelQuintupletIndex])); + unsigned int pT5PixelIndex = pixelQuintuplets.pixelSegmentIndices()[pixelQuintupletIndex]; + addTrackCandidateToMemory(cands, + LSTObjType::pT5, + pT5PixelIndex, + pixelQuintuplets.quintupletIndices()[pixelQuintupletIndex], + pixelQuintuplets.logicalLayers()[pixelQuintupletIndex].data(), + pixelQuintuplets.lowerModuleIndices()[pixelQuintupletIndex].data(), + pixelQuintuplets.hitIndices()[pixelQuintupletIndex].data(), + segmentsPixel.seedIdx()[pT5PixelIndex - pLS_offset], + __H2F(pixelQuintuplets.centerX()[pixelQuintupletIndex]), + __H2F(pixelQuintuplets.centerY()[pixelQuintupletIndex]), + radius, + trackCandidateIdx, + pixelQuintupletIndex); + } + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst + +ASSERT_DEVICE_MATCHES_HOST_COLLECTION(lst::TrackCandidatesDeviceCollection, lst::TrackCandidatesHostCollection); + +#endif diff --git a/RecoTracker/LSTCore/src/alpaka/Triplet.h b/RecoTracker/LSTCore/src/alpaka/Triplet.h new file mode 100644 index 0000000000000..9192edbd9a186 --- /dev/null +++ b/RecoTracker/LSTCore/src/alpaka/Triplet.h @@ -0,0 +1,895 @@ +#ifndef RecoTracker_LSTCore_src_alpaka_Triplet_h +#define RecoTracker_LSTCore_src_alpaka_Triplet_h + +#include "HeterogeneousCore/AlpakaInterface/interface/workdivision.h" + +#include "RecoTracker/LSTCore/interface/alpaka/Common.h" +#include "RecoTracker/LSTCore/interface/ModulesSoA.h" +#include "RecoTracker/LSTCore/interface/ObjectRangesSoA.h" +#include "RecoTracker/LSTCore/interface/TripletsSoA.h" + +#include "Segment.h" +#include "MiniDoublet.h" +#include "Hit.h" + +namespace ALPAKA_ACCELERATOR_NAMESPACE::lst { + + ALPAKA_FN_ACC ALPAKA_FN_INLINE void addTripletToMemory(ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + Triplets& triplets, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, +#ifdef CUT_VALUE_DEBUG + float zOut, + float rtOut, +#endif + float betaIn, + float betaInCut, + float circleRadius, + float circleCenterX, + float circleCenterY, + unsigned int tripletIndex) { + triplets.segmentIndices()[tripletIndex][0] = innerSegmentIndex; + triplets.segmentIndices()[tripletIndex][1] = outerSegmentIndex; + triplets.lowerModuleIndices()[tripletIndex][0] = innerInnerLowerModuleIndex; + triplets.lowerModuleIndices()[tripletIndex][1] = middleLowerModuleIndex; + triplets.lowerModuleIndices()[tripletIndex][2] = outerOuterLowerModuleIndex; + + triplets.betaIn()[tripletIndex] = __F2H(betaIn); + triplets.radius()[tripletIndex] = circleRadius; + triplets.centerX()[tripletIndex] = circleCenterX; + triplets.centerY()[tripletIndex] = circleCenterY; + triplets.logicalLayers()[tripletIndex][0] = + modules.layers()[innerInnerLowerModuleIndex] + (modules.subdets()[innerInnerLowerModuleIndex] == 4) * 6; + triplets.logicalLayers()[tripletIndex][1] = + modules.layers()[middleLowerModuleIndex] + (modules.subdets()[middleLowerModuleIndex] == 4) * 6; + triplets.logicalLayers()[tripletIndex][2] = + modules.layers()[outerOuterLowerModuleIndex] + (modules.subdets()[outerOuterLowerModuleIndex] == 4) * 6; + //get the hits + unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[innerSegmentIndex][1]; + unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][1]; + + triplets.hitIndices()[tripletIndex][0] = mds.anchorHitIndices()[firstMDIndex]; + triplets.hitIndices()[tripletIndex][1] = mds.outerHitIndices()[firstMDIndex]; + triplets.hitIndices()[tripletIndex][2] = mds.anchorHitIndices()[secondMDIndex]; + triplets.hitIndices()[tripletIndex][3] = mds.outerHitIndices()[secondMDIndex]; + triplets.hitIndices()[tripletIndex][4] = mds.anchorHitIndices()[thirdMDIndex]; + triplets.hitIndices()[tripletIndex][5] = mds.outerHitIndices()[thirdMDIndex]; +#ifdef CUT_VALUE_DEBUG + triplets.zOut()[tripletIndex] = zOut; + triplets.rtOut()[tripletIndex] = rtOut; + triplets.betaInCut()[tripletIndex] = betaInCut; +#endif + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passRZConstraint(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex) { + //get the rt and z + const float& r1 = mds.anchorRt()[firstMDIndex]; + const float& r2 = mds.anchorRt()[secondMDIndex]; + const float& r3 = mds.anchorRt()[thirdMDIndex]; + + const float& z1 = mds.anchorZ()[firstMDIndex]; + const float& z2 = mds.anchorZ()[secondMDIndex]; + const float& z3 = mds.anchorZ()[thirdMDIndex]; + + // Using lst_layer numbering convention defined in ModuleMethods.h + const int layer1 = modules.lstLayers()[innerInnerLowerModuleIndex]; + const int layer2 = modules.lstLayers()[middleLowerModuleIndex]; + const int layer3 = modules.lstLayers()[outerOuterLowerModuleIndex]; + + const float residual = z2 - ((z3 - z1) / (r3 - r1) * (r2 - r1) + z1); + + if (layer1 == 12 and layer2 == 13 and layer3 == 14) { + return false; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 3) { + return alpaka::math::abs(acc, residual) < 0.53f; + } else if (layer1 == 1 and layer2 == 2 and layer3 == 7) { + return alpaka::math::abs(acc, residual) < 1; + } else if (layer1 == 13 and layer2 == 14 and layer3 == 15) { + return false; + } else if (layer1 == 14 and layer2 == 15 and layer3 == 16) { + return false; + } else if (layer1 == 1 and layer2 == 7 and layer3 == 8) { + return alpaka::math::abs(acc, residual) < 1; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 4) { + return alpaka::math::abs(acc, residual) < 1.21f; + } else if (layer1 == 2 and layer2 == 3 and layer3 == 7) { + return alpaka::math::abs(acc, residual) < 1.f; + } else if (layer1 == 2 and layer2 == 7 and layer3 == 8) { + return alpaka::math::abs(acc, residual) < 1.f; + } else if (layer1 == 3 and layer2 == 4 and layer3 == 5) { + return alpaka::math::abs(acc, residual) < 2.7f; + } else if (layer1 == 4 and layer2 == 5 and layer3 == 6) { + return alpaka::math::abs(acc, residual) < 3.06f; + } else if (layer1 == 7 and layer2 == 8 and layer3 == 9) { + return alpaka::math::abs(acc, residual) < 1; + } else if (layer1 == 8 and layer2 == 9 and layer3 == 10) { + return alpaka::math::abs(acc, residual) < 1; + } else if (layer1 == 9 and layer2 == 10 and layer3 == 11) { + return alpaka::math::abs(acc, residual) < 1; + } else { + return alpaka::math::abs(acc, residual) < 5; + } + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBB(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + float& zOut, + float& rtOut, + unsigned int innerSegmentIndex, + float& betaIn, + float& betaInCut) { + bool isPSIn = (modules.moduleType()[innerInnerLowerModuleIndex] == PS); + bool isPSOut = (modules.moduleType()[outerOuterLowerModuleIndex] == PS); + + float rtIn = mds.anchorRt()[firstMDIndex]; + float rtMid = mds.anchorRt()[secondMDIndex]; + rtOut = mds.anchorRt()[thirdMDIndex]; + + float zIn = mds.anchorZ()[firstMDIndex]; + float zMid = mds.anchorZ()[secondMDIndex]; + zOut = mds.anchorZ()[thirdMDIndex]; + + float alpha1GeVOut = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float rtRatio_OutIn = rtOut / rtIn; // Outer segment beginning rt divided by inner segment beginning rt; + float dzDrtScale = alpaka::math::tan(acc, alpha1GeVOut) / alpha1GeVOut; // The track can bend in r-z plane slightly + float zpitchIn = (isPSIn ? kPixelPSZpitch : kStrip2SZpitch); + float zpitchOut = (isPSOut ? kPixelPSZpitch : kStrip2SZpitch); + + const float zHi = + zIn + (zIn + kDeltaZLum) * (rtRatio_OutIn - 1.f) * (zIn < 0.f ? 1.f : dzDrtScale) + (zpitchIn + zpitchOut); + const float zLo = zIn + (zIn - kDeltaZLum) * (rtRatio_OutIn - 1.f) * (zIn > 0.f ? 1.f : dzDrtScale) - + (zpitchIn + zpitchOut); //slope-correction only on outer end + + //Cut 1 - z compatibility + if ((zOut < zLo) || (zOut > zHi)) + return false; + + float drt_OutIn = (rtOut - rtIn); + + float r3In = alpaka::math::sqrt(acc, zIn * zIn + rtIn * rtIn); + float drt_InSeg = rtMid - rtIn; + float dz_InSeg = zMid - zIn; + float dr3_InSeg = + alpaka::math::sqrt(acc, rtMid * rtMid + zMid * zMid) - alpaka::math::sqrt(acc, rtIn * rtIn + zIn * zIn); + + float coshEta = dr3_InSeg / drt_InSeg; + float dzErr = (zpitchIn + zpitchOut) * (zpitchIn + zpitchOut) * 2.f; + + float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rtOut - rtIn) / 50.f) * (r3In / rtIn); + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + dzErr += muls2 * drt_OutIn * drt_OutIn / 3.f * coshEta * coshEta; + dzErr = alpaka::math::sqrt(acc, dzErr); + + // Constructing upper and lower bound + const float dzMean = dz_InSeg / drt_InSeg * drt_OutIn; + const float zWindow = dzErr / drt_InSeg * drt_OutIn + + (zpitchIn + zpitchOut); //FIXME for ptCut lower than ~0.8 need to add curv path correction + const float zLoPointed = zIn + dzMean * (zIn > 0.f ? 1.f : dzDrtScale) - zWindow; + const float zHiPointed = zIn + dzMean * (zIn < 0.f ? 1.f : dzDrtScale) + zWindow; + + // Constructing upper and lower bound + + // Cut #2: Pointed Z (Inner segment two MD points to outer segment inner MD) + if ((zOut < zLoPointed) || (zOut > zHiPointed)) + return false; + + // raw betaIn value without any correction, based on the mini-doublet hit positions + float alpha_InLo = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + float tl_axis_x = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; + betaIn = alpha_InLo - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); + + //beta computation + float drt_tl_axis = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + + //innerOuterAnchor - innerInnerAnchor + const float rt_InSeg = alpaka::math::sqrt(acc, + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); + betaInCut = + alpaka::math::asin(acc, alpaka::math::min(acc, (-rt_InSeg + drt_tl_axis) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / drt_InSeg); + + //Cut #3: first beta cut + return alpaka::math::abs(acc, betaIn) < betaInCut; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintBBE(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + float& zOut, + float& rtOut, + uint16_t innerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + float& betaIn, + float& betaInCut) { + bool isPSIn = (modules.moduleType()[innerInnerLowerModuleIndex] == PS); + bool isPSOut = (modules.moduleType()[outerOuterLowerModuleIndex] == PS); + + float rtIn = mds.anchorRt()[firstMDIndex]; + float rtMid = mds.anchorRt()[secondMDIndex]; + rtOut = mds.anchorRt()[thirdMDIndex]; + + float zIn = mds.anchorZ()[firstMDIndex]; + float zMid = mds.anchorZ()[secondMDIndex]; + zOut = mds.anchorZ()[thirdMDIndex]; + + float alpha1GeV_OutLo = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_OutLo) / alpha1GeV_OutLo; // The track can bend in r-z plane slightly + float zpitchIn = (isPSIn ? kPixelPSZpitch : kStrip2SZpitch); + float zpitchOut = (isPSOut ? kPixelPSZpitch : kStrip2SZpitch); + float zGeom = zpitchIn + zpitchOut; + + // Cut #0: Preliminary (Only here in endcap case) + if (zIn * zOut <= 0) + return false; + + float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); + bool isOutSgInnerMDPS = modules.moduleType()[outerOuterLowerModuleIndex] == PS; + float rtGeom1 = isOutSgInnerMDPS ? kPixelPSZpitch : kStrip2SZpitch; + float zGeom1 = alpaka::math::copysign(acc, zGeom, zIn); + float rtLo = rtIn * (1.f + (zOut - zIn - zGeom1) / (zIn + zGeom1 + dLum) / dzDrtScale) - + rtGeom1; //slope correction only on the lower end + + //Cut #1: rt condition + float zInForHi = zIn - zGeom1 - dLum; + if (zInForHi * zIn < 0) { + zInForHi = alpaka::math::copysign(acc, 0.1f, zIn); + } + float rtHi = rtIn * (1.f + (zOut - zIn + zGeom1) / zInForHi) + rtGeom1; + + //Cut #2: rt condition + if ((rtOut < rtLo) || (rtOut > rtHi)) + return false; + + float rIn = alpaka::math::sqrt(acc, zIn * zIn + rtIn * rtIn); + + const float drtSDIn = rtMid - rtIn; + const float dzSDIn = zMid - zIn; + const float dr3SDIn = + alpaka::math::sqrt(acc, rtMid * rtMid + zMid * zMid) - alpaka::math::sqrt(acc, rtIn * rtIn + zIn * zIn); + + const float coshEta = dr3SDIn / drtSDIn; //direction estimate + const float dzOutInAbs = alpaka::math::abs(acc, zOut - zIn); + const float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f); + const float zGeom1_another = kPixelPSZpitch; + const float kZ = (zOut - zIn) / dzSDIn; + float drtErr = + zGeom1_another * zGeom1_another * drtSDIn * drtSDIn / dzSDIn / dzSDIn * (1.f - 2.f * kZ + 2.f * kZ * kZ); + const float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2 * (rtOut - rtIn) / 50.f) * (rIn / rtIn); + const float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + drtErr += muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta; + drtErr = alpaka::math::sqrt(acc, drtErr); + + //Cut #3: rt-z pointed + + if ((kZ < 0) || (rtOut < rtLo) || (rtOut > rtHi)) + return false; + + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InOut = mds.anchorRt()[secondMDIndex]; + + float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + + float tl_axis_x = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; + + betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); + + float betaInRHmin = betaIn; + float betaInRHmax = betaIn; + + float swapTemp; + + if (alpaka::math::abs(acc, betaInRHmin) > alpaka::math::abs(acc, betaInRHmax)) { + swapTemp = betaInRHmin; + betaInRHmin = betaInRHmax; + betaInRHmax = swapTemp; + } + + float sdIn_dr = alpaka::math::sqrt(acc, + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); + float sdIn_d = rt_InOut - rt_InLo; + + float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + betaInCut = alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdIn_d); + + //Cut #4: first beta cut + return alpaka::math::abs(acc, betaInRHmin) < betaInCut; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraintEEE(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + float& zOut, + float& rtOut, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + float& betaIn, + float& betaInCut) { + float rtIn = mds.anchorRt()[firstMDIndex]; + float rtMid = mds.anchorRt()[secondMDIndex]; + rtOut = mds.anchorRt()[thirdMDIndex]; + + float zIn = mds.anchorZ()[firstMDIndex]; + float zMid = mds.anchorZ()[secondMDIndex]; + zOut = mds.anchorZ()[thirdMDIndex]; + + float alpha1GeV_Out = alpaka::math::asin(acc, alpaka::math::min(acc, rtOut * k2Rinv1GeVf / ptCut, kSinAlphaMax)); + + float dzDrtScale = + alpaka::math::tan(acc, alpha1GeV_Out) / alpha1GeV_Out; // The track can bend in r-z plane slightly + + // Cut #0: Preliminary (Only here in endcap case) + if (zIn * zOut <= 0) + return false; + + float dLum = alpaka::math::copysign(acc, kDeltaZLum, zIn); + bool isOutSgOuterMDPS = modules.moduleType()[outerOuterLowerModuleIndex] == PS; + bool isInSgInnerMDPS = modules.moduleType()[innerInnerLowerModuleIndex] == PS; + + float rtGeom = (isInSgInnerMDPS and isOutSgOuterMDPS) ? 2.f * kPixelPSZpitch + : (isInSgInnerMDPS or isOutSgOuterMDPS) ? kPixelPSZpitch + kStrip2SZpitch + : 2.f * kStrip2SZpitch; + + float dz = zOut - zIn; + const float rtLo = rtIn * (1.f + dz / (zIn + dLum) / dzDrtScale) - rtGeom; //slope correction only on the lower end + const float rtHi = rtIn * (1.f + dz / (zIn - dLum)) + rtGeom; + + //Cut #1: rt condition + if ((rtOut < rtLo) || (rtOut > rtHi)) + return false; + + bool isInSgOuterMDPS = modules.moduleType()[outerOuterLowerModuleIndex] == PS; + + float drtSDIn = rtMid - rtIn; + float dzSDIn = zMid - zIn; + float dr3SDIn = + alpaka::math::sqrt(acc, rtMid * rtMid + zMid * zMid) - alpaka::math::sqrt(acc, rtIn * rtIn + zIn * zIn); + + float coshEta = dr3SDIn / drtSDIn; //direction estimate + float dzOutInAbs = alpaka::math::abs(acc, zOut - zIn); + float multDzDr = dzOutInAbs * coshEta / (coshEta * coshEta - 1.f); + + float kZ = (zOut - zIn) / dzSDIn; + float thetaMuls2 = (kMulsInGeV * kMulsInGeV) * (0.1f + 0.2f * (rtOut - rtIn) / 50.f); + + float muls2 = thetaMuls2 * 9.f / (ptCut * ptCut) * 16.f; + + float drtErr = + alpaka::math::sqrt(acc, + kPixelPSZpitch * kPixelPSZpitch * 2.f / (dzSDIn * dzSDIn) * (dzOutInAbs * dzOutInAbs) + + muls2 * multDzDr * multDzDr / 3.f * coshEta * coshEta); + + float drtMean = drtSDIn * dzOutInAbs / alpaka::math::abs(acc, dzSDIn); + float rtWindow = drtErr + rtGeom; + float rtLo_point = rtIn + drtMean / dzDrtScale - rtWindow; + float rtHi_point = rtIn + drtMean + rtWindow; + + // Cut #3: rt-z pointed + // https://github.com/slava77/cms-tkph2-ntuple/blob/superDoubletLinked-91X-noMock/doubletAnalysis.C#L3765 + + if (isInSgInnerMDPS and isInSgOuterMDPS) // If both PS then we can point + { + if ((kZ < 0) || (rtOut < rtLo_point) || (rtOut > rtHi_point)) + return false; + } + + float rt_InLo = mds.anchorRt()[firstMDIndex]; + float rt_InOut = mds.anchorRt()[secondMDIndex]; + float sdIn_alpha = __H2F(segments.dPhiChanges()[innerSegmentIndex]); + + float tl_axis_x = mds.anchorX()[thirdMDIndex] - mds.anchorX()[firstMDIndex]; + float tl_axis_y = mds.anchorY()[thirdMDIndex] - mds.anchorY()[firstMDIndex]; + + betaIn = sdIn_alpha - phi_mpi_pi(acc, phi(acc, tl_axis_x, tl_axis_y) - mds.anchorPhi()[firstMDIndex]); + + float sdIn_alphaRHmin = __H2F(segments.dPhiChangeMins()[innerSegmentIndex]); + float sdIn_alphaRHmax = __H2F(segments.dPhiChangeMaxs()[innerSegmentIndex]); + float betaInRHmin = betaIn + sdIn_alphaRHmin - sdIn_alpha; + float betaInRHmax = betaIn + sdIn_alphaRHmax - sdIn_alpha; + + float swapTemp; + + if (alpaka::math::abs(acc, betaInRHmin) > alpaka::math::abs(acc, betaInRHmax)) { + swapTemp = betaInRHmin; + betaInRHmin = betaInRHmax; + betaInRHmax = swapTemp; + } + float sdIn_dr = alpaka::math::sqrt(acc, + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) * + (mds.anchorX()[secondMDIndex] - mds.anchorX()[firstMDIndex]) + + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex]) * + (mds.anchorY()[secondMDIndex] - mds.anchorY()[firstMDIndex])); + float sdIn_d = rt_InOut - rt_InLo; + + float dr = alpaka::math::sqrt(acc, tl_axis_x * tl_axis_x + tl_axis_y * tl_axis_y); + betaInCut = alpaka::math::asin(acc, alpaka::math::min(acc, (-sdIn_dr + dr) * k2Rinv1GeVf / ptCut, kSinAlphaMax)) + + (0.02f / sdIn_d); + + //Cut #4: first beta cut + return alpaka::math::abs(acc, betaInRHmin) < betaInCut; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool passPointingConstraint(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int firstMDIndex, + unsigned int secondMDIndex, + unsigned int thirdMDIndex, + float& zOut, + float& rtOut, + uint16_t innerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + float& betaIn, + float& betaInCut) { + short innerInnerLowerModuleSubdet = modules.subdets()[innerInnerLowerModuleIndex]; + short middleLowerModuleSubdet = modules.subdets()[middleLowerModuleIndex]; + short outerOuterLowerModuleSubdet = modules.subdets()[outerOuterLowerModuleIndex]; + + if (innerInnerLowerModuleSubdet == Barrel and middleLowerModuleSubdet == Barrel and + outerOuterLowerModuleSubdet == Barrel) { + return passPointingConstraintBBB(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + zOut, + rtOut, + innerSegmentIndex, + betaIn, + betaInCut); + } else if (innerInnerLowerModuleSubdet == Barrel and middleLowerModuleSubdet == Barrel and + outerOuterLowerModuleSubdet == Endcap) { + return passPointingConstraintBBE(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + zOut, + rtOut, + innerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + betaIn, + betaInCut); + } else if (innerInnerLowerModuleSubdet == Barrel and middleLowerModuleSubdet == Endcap and + outerOuterLowerModuleSubdet == Endcap) { + return passPointingConstraintBBE(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + zOut, + rtOut, + innerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + betaIn, + betaInCut); + + } + + else if (innerInnerLowerModuleSubdet == Endcap and middleLowerModuleSubdet == Endcap and + outerOuterLowerModuleSubdet == Endcap) { + return passPointingConstraintEEE(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + zOut, + rtOut, + innerSegmentIndex, + outerSegmentIndex, + betaIn, + betaInCut); + } + return false; // failsafe + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE float computeRadiusFromThreeAnchorHits( + TAcc const& acc, float x1, float y1, float x2, float y2, float x3, float y3, float& g, float& f) { + float radius = 0.f; + + //(g,f) -> center + //first anchor hit - (x1,y1), second anchor hit - (x2,y2), third anchor hit - (x3, y3) + + float denomInv = 1.0f / ((y1 - y3) * (x2 - x3) - (x1 - x3) * (y2 - y3)); + + float xy1sqr = x1 * x1 + y1 * y1; + + float xy2sqr = x2 * x2 + y2 * y2; + + float xy3sqr = x3 * x3 + y3 * y3; + + g = 0.5f * ((y3 - y2) * xy1sqr + (y1 - y3) * xy2sqr + (y2 - y1) * xy3sqr) * denomInv; + + f = 0.5f * ((x2 - x3) * xy1sqr + (x3 - x1) * xy2sqr + (x1 - x2) * xy3sqr) * denomInv; + + float c = ((x2 * y3 - x3 * y2) * xy1sqr + (x3 * y1 - x1 * y3) * xy2sqr + (x1 * y2 - x2 * y1) * xy3sqr) * denomInv; + + if (((y1 - y3) * (x2 - x3) - (x1 - x3) * (y2 - y3) == 0) || (g * g + f * f - c < 0)) { +#ifdef WARNINGS + printf("three collinear points or FATAL! r^2 < 0!\n"); +#endif + radius = -1.f; + } else + radius = alpaka::math::sqrt(acc, g * g + f * f - c); + + return radius; + } + + template + ALPAKA_FN_ACC ALPAKA_FN_INLINE bool runTripletConstraintsAndAlgo(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + uint16_t innerInnerLowerModuleIndex, + uint16_t middleLowerModuleIndex, + uint16_t outerOuterLowerModuleIndex, + unsigned int innerSegmentIndex, + unsigned int outerSegmentIndex, + float& zOut, + float& rtOut, + float& betaIn, + float& betaInCut, + float& circleRadius, + float& circleCenterX, + float& circleCenterY) { + //this cut reduces the number of candidates by a factor of 4, i.e., 3 out of 4 warps can end right here! + if (segments.mdIndices()[innerSegmentIndex][1] != segments.mdIndices()[outerSegmentIndex][0]) + return false; + + unsigned int firstMDIndex = segments.mdIndices()[innerSegmentIndex][0]; + unsigned int secondMDIndex = segments.mdIndices()[outerSegmentIndex][0]; + unsigned int thirdMDIndex = segments.mdIndices()[outerSegmentIndex][1]; + + if (not passRZConstraint(acc, + modules, + mds, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex)) + return false; + if (not passPointingConstraint(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + firstMDIndex, + secondMDIndex, + thirdMDIndex, + zOut, + rtOut, + middleLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + betaIn, + betaInCut)) + return false; + + float x1 = mds.anchorX()[firstMDIndex]; + float x2 = mds.anchorX()[secondMDIndex]; + float x3 = mds.anchorX()[thirdMDIndex]; + float y1 = mds.anchorY()[firstMDIndex]; + float y2 = mds.anchorY()[secondMDIndex]; + float y3 = mds.anchorY()[thirdMDIndex]; + + circleRadius = computeRadiusFromThreeAnchorHits(acc, x1, y1, x2, y2, x3, y3, circleCenterX, circleCenterY); + return true; + } + + struct CreateTriplets { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + MiniDoubletsConst mds, + SegmentsConst segments, + SegmentsOccupancyConst segmentsOccupancy, + Triplets triplets, + TripletsOccupancy tripletsOccupancy, + ObjectRangesConst ranges, + uint16_t* index_gpu, + uint16_t nonZeroModules) const { + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t innerLowerModuleArrayIdx = globalThreadIdx[0]; innerLowerModuleArrayIdx < nonZeroModules; + innerLowerModuleArrayIdx += gridThreadExtent[0]) { + uint16_t innerInnerLowerModuleIndex = index_gpu[innerLowerModuleArrayIdx]; + if (innerInnerLowerModuleIndex >= modules.nLowerModules()) + continue; + + uint16_t nConnectedModules = modules.nConnectedModules()[innerInnerLowerModuleIndex]; + if (nConnectedModules == 0) + continue; + + unsigned int nInnerSegments = segmentsOccupancy.nSegments()[innerInnerLowerModuleIndex]; + for (unsigned int innerSegmentArrayIndex = globalThreadIdx[1]; innerSegmentArrayIndex < nInnerSegments; + innerSegmentArrayIndex += gridThreadExtent[1]) { + unsigned int innerSegmentIndex = + ranges.segmentRanges()[innerInnerLowerModuleIndex][0] + innerSegmentArrayIndex; + + // middle lower module - outer lower module of inner segment + uint16_t middleLowerModuleIndex = segments.outerLowerModuleIndices()[innerSegmentIndex]; + + unsigned int nOuterSegments = segmentsOccupancy.nSegments()[middleLowerModuleIndex]; + for (unsigned int outerSegmentArrayIndex = globalThreadIdx[2]; outerSegmentArrayIndex < nOuterSegments; + outerSegmentArrayIndex += gridThreadExtent[2]) { + unsigned int outerSegmentIndex = ranges.segmentRanges()[middleLowerModuleIndex][0] + outerSegmentArrayIndex; + + uint16_t outerOuterLowerModuleIndex = segments.outerLowerModuleIndices()[outerSegmentIndex]; + + float zOut, rtOut, betaIn, betaInCut, circleRadius, circleCenterX, circleCenterY; + + bool success = runTripletConstraintsAndAlgo(acc, + modules, + mds, + segments, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, + innerSegmentIndex, + outerSegmentIndex, + zOut, + rtOut, + betaIn, + betaInCut, + circleRadius, + circleCenterX, + circleCenterY); + + if (success) { + unsigned int totOccupancyTriplets = + alpaka::atomicAdd(acc, + &tripletsOccupancy.totOccupancyTriplets()[innerInnerLowerModuleIndex], + 1u, + alpaka::hierarchy::Threads{}); + if (static_cast(totOccupancyTriplets) >= + ranges.tripletModuleOccupancy()[innerInnerLowerModuleIndex]) { +#ifdef WARNINGS + printf("Triplet excess alert! Module index = %d\n", innerInnerLowerModuleIndex); +#endif + } else { + unsigned int tripletModuleIndex = alpaka::atomicAdd( + acc, &tripletsOccupancy.nTriplets()[innerInnerLowerModuleIndex], 1u, alpaka::hierarchy::Threads{}); + unsigned int tripletIndex = + ranges.tripletModuleIndices()[innerInnerLowerModuleIndex] + tripletModuleIndex; + addTripletToMemory(modules, + mds, + segments, + triplets, + innerSegmentIndex, + outerSegmentIndex, + innerInnerLowerModuleIndex, + middleLowerModuleIndex, + outerOuterLowerModuleIndex, +#ifdef CUT_VALUE_DEBUG + zOut, + rtOut, +#endif + betaIn, + betaInCut, + circleRadius, + circleCenterX, + circleCenterY, + tripletIndex); + } + } + } + } + } + } + }; + + struct CreateTripletArrayRanges { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + ObjectRanges ranges, + SegmentsOccupancyConst segmentsOccupancy) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + // Initialize variables in shared memory and set to 0 + int& nTotalTriplets = alpaka::declareSharedVar(acc); + if (cms::alpakatools::once_per_block(acc)) { + nTotalTriplets = 0; + } + alpaka::syncBlockThreads(acc); + + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (segmentsOccupancy.nSegments()[i] == 0) { + ranges.tripletModuleIndices()[i] = nTotalTriplets; + ranges.tripletModuleOccupancy()[i] = 0; + continue; + } + + short module_rings = modules.rings()[i]; + short module_layers = modules.layers()[i]; + short module_subdets = modules.subdets()[i]; + float module_eta = alpaka::math::abs(acc, modules.eta()[i]); + + int category_number; + if (module_layers <= 3 && module_subdets == 5) + category_number = 0; + else if (module_layers >= 4 && module_subdets == 5) + category_number = 1; + else if (module_layers <= 2 && module_subdets == 4 && module_rings >= 11) + category_number = 2; + else if (module_layers >= 3 && module_subdets == 4 && module_rings >= 8) + category_number = 2; + else if (module_layers <= 2 && module_subdets == 4 && module_rings <= 10) + category_number = 3; + else if (module_layers >= 3 && module_subdets == 4 && module_rings <= 7) + category_number = 3; + else + category_number = -1; + + int eta_number; + if (module_eta < 0.75f) + eta_number = 0; + else if (module_eta < 1.5f) + eta_number = 1; + else if (module_eta < 2.25f) + eta_number = 2; + else if (module_eta < 3.0f) + eta_number = 3; + else + eta_number = -1; + + int occupancy; + if (category_number == 0 && eta_number == 0) + occupancy = 543; + else if (category_number == 0 && eta_number == 1) + occupancy = 235; + else if (category_number == 0 && eta_number == 2) + occupancy = 88; + else if (category_number == 0 && eta_number == 3) + occupancy = 46; + else if (category_number == 1 && eta_number == 0) + occupancy = 755; + else if (category_number == 1 && eta_number == 1) + occupancy = 347; + else if (category_number == 2 && eta_number == 1) + occupancy = 0; + else if (category_number == 2 && eta_number == 2) + occupancy = 0; + else if (category_number == 3 && eta_number == 1) + occupancy = 38; + else if (category_number == 3 && eta_number == 2) + occupancy = 46; + else if (category_number == 3 && eta_number == 3) + occupancy = 39; + else { + occupancy = 0; +#ifdef WARNINGS + printf("Unhandled case in createTripletArrayRanges! Module index = %i\n", i); +#endif + } + + ranges.tripletModuleOccupancy()[i] = occupancy; + unsigned int nTotT = alpaka::atomicAdd(acc, &nTotalTriplets, occupancy, alpaka::hierarchy::Threads{}); + ranges.tripletModuleIndices()[i] = nTotT; + } + + // Wait for all threads to finish before reporting final values + alpaka::syncBlockThreads(acc); + if (cms::alpakatools::once_per_block(acc)) { + ranges.nTotalTrips() = nTotalTriplets; + } + } + }; + + struct AddTripletRangesToEventExplicit { + template + ALPAKA_FN_ACC void operator()(TAcc const& acc, + ModulesConst modules, + TripletsOccupancyConst tripletsOccupancy, + ObjectRanges ranges) const { + // implementation is 1D with a single block + static_assert(std::is_same_v, "Should be Acc1D"); + ALPAKA_ASSERT_ACC((alpaka::getWorkDiv(acc)[0] == 1)); + + auto const globalThreadIdx = alpaka::getIdx(acc); + auto const gridThreadExtent = alpaka::getWorkDiv(acc); + + for (uint16_t i = globalThreadIdx[0]; i < modules.nLowerModules(); i += gridThreadExtent[0]) { + if (tripletsOccupancy.nTriplets()[i] == 0) { + ranges.tripletRanges()[i][0] = -1; + ranges.tripletRanges()[i][1] = -1; + } else { + ranges.tripletRanges()[i][0] = ranges.tripletModuleIndices()[i]; + ranges.tripletRanges()[i][1] = ranges.tripletModuleIndices()[i] + tripletsOccupancy.nTriplets()[i] - 1; + } + } + } + }; +} // namespace ALPAKA_ACCELERATOR_NAMESPACE::lst +#endif diff --git a/RecoTracker/LSTCore/standalone/.gitignore b/RecoTracker/LSTCore/standalone/.gitignore new file mode 100644 index 0000000000000..29e86cb6b932a --- /dev/null +++ b/RecoTracker/LSTCore/standalone/.gitignore @@ -0,0 +1,43 @@ +mtv +*~ +results/ +*.o +debug.root +*.pdf +plots/ +plots_*/ +scripts/moduleconnection*.txt +*.root +.make.log* +bin/doAnalysis +bin/lst +bin/lst_cuda +bin/lst_cpu +bin/lst_rocm +code/rooutil/librooutil.so +code/rooutil/rooutil.so +.gitversion.txt +efficiency/doAnalysis +.jobs.txt +efficiency/results* +efficiencies/ +efficiency/bin/createEffNumDenPlots +efficiency/bin/createPerfNumDenHists +efficiency/compare +efficiency/summary +*.txt +*.pyc +output* +movetoweb.sh +*.nvvp +*.ipynb +*.log +*.nsys-rep +*.sqlite +*.ncu-rep +*.swp + +*.nfs* +.directoryhash +performance/ +notebooks/ diff --git a/RecoTracker/LSTCore/standalone/LST/.gitignore b/RecoTracker/LSTCore/standalone/LST/.gitignore new file mode 100644 index 0000000000000..32429d8358fb5 --- /dev/null +++ b/RecoTracker/LSTCore/standalone/LST/.gitignore @@ -0,0 +1,3 @@ +*.o +*.so +.vscode/ diff --git a/RecoTracker/LSTCore/standalone/LST/Makefile b/RecoTracker/LSTCore/standalone/LST/Makefile new file mode 100644 index 0000000000000..ee6f82ecccde1 --- /dev/null +++ b/RecoTracker/LSTCore/standalone/LST/Makefile @@ -0,0 +1,151 @@ +# +# stuff to make +# + +CCSOURCES=$(wildcard ../../src/*.cc) +ALPAKACCSOURCES=$(wildcard ../../src/alpaka/*.dev.cc) +CCOBJECTS_CPU=$(patsubst ../../src/alpaka/%.dev.cc, %_cpu.o, $(ALPAKACCSOURCES)) $(patsubst ../../src/%.cc, %_cpu.o, $(CCSOURCES)) +CCOBJECTS_CUDA=$(patsubst ../../src/alpaka/%.dev.cc, %_cuda.o, $(ALPAKACCSOURCES)) $(patsubst ../../src/%.cc, %_cuda.o, $(CCSOURCES)) +CCOBJECTS_ROCM=$(patsubst ../../src/alpaka/%.dev.cc, %_rocm.o, $(ALPAKACCSOURCES)) $(patsubst ../../src/%.cc, %_rocm.o, $(CCSOURCES)) + +LSTSOURCES=../../src/alpaka/LST.cc +LSTOBJECTS_CPU=$(patsubst ../../src/alpaka/%.cc, %_cpu.o, $(LSTSOURCES)) +LSTOBJECTS_CUDA=$(patsubst ../../src/alpaka/%.cc, %_cuda.o, $(LSTSOURCES)) +LSTOBJECTS_ROCM=$(patsubst ../../src/alpaka/%.cc, %_rocm.o, $(LSTSOURCES)) + +# Default to CPU and CUDA backends +ifeq ($(BACKEND),) + LIB_CPU=liblst_cpu.so + LIB_CUDA=liblst_cuda.so +endif + +ifneq ($(findstring cpu,$(BACKEND)),) + LIB_CPU=liblst_cpu.so +endif +ifneq ($(findstring cuda,$(BACKEND)),) + LIB_CUDA=liblst_cuda.so +endif +ifneq ($(findstring rocm,$(BACKEND)),) + LIB_ROCM=liblst_rocm.so +endif +ifneq ($(findstring all,$(BACKEND)),) + LIB_CPU=liblst_cpu.so + LIB_CUDA=liblst_cuda.so + LIB_ROCM=liblst_rocm.so +endif + +LIBS=$(LIB_CPU) $(LIB_CUDA) $(LIB_ROCM) + +# +# flags to keep track of +# + +# Different architectures to optimize for +GENCODE_CUDA := -gencode arch=compute_70,code=[sm_70,compute_70] -gencode arch=compute_89,code=[sm_89,compute_89] + +CXX = g++ +CXXFLAGS_CPU = -march=native -mtune=native -Ofast -fno-reciprocal-math -fopenmp-simd -g -Wall -Woverloaded-virtual -fPIC -fopenmp -I.. +CXXFLAGS_CUDA = -O3 -g --compiler-options -Wall --compiler-options -Woverloaded-virtual --compiler-options -fPIC --compiler-options -fopenmp -dc -lineinfo --ptxas-options=-v --cudart shared $(GENCODE_CUDA) --use_fast_math --default-stream per-thread -I.. +CXXFLAGS_ROCM = -O3 -g -Wall -Woverloaded-virtual -fPIC -I${ROCM_ROOT}/include -I.. +CMSSWINCLUDE := -I${TRACKLOOPERDIR}/../../../ -I${CMSSW_BASE}/src +ifdef CMSSW_RELEASE_BASE +CMSSWINCLUDE := ${CMSSWINCLUDE} -I${CMSSW_RELEASE_BASE}/src +endif +ALPAKAINCLUDE = -I${ALPAKA_ROOT}/include -I/${BOOST_ROOT}/include -std=c++20 ${CMSSWINCLUDE} +ALPAKASERIAL = -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 +ALPAKACUDA = -DALPAKA_ACC_GPU_CUDA_ENABLED -DALPAKA_ACC_GPU_CUDA_ONLY -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 --expt-relaxed-constexpr +ALPAKAROCM = -DALPAKA_ACC_GPU_HIP_ENABLED -DALPAKA_ACC_GPU_HIP_ONLY -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 -DGNU_GCC -D_GNU_SOURCE --target=x86_64-redhat-linux-gnu --gcc-toolchain=$(patsubst %/bin/gcc,%,$(shell command -v gcc)) +ROOTINCLUDE = -I$(ROOT_ROOT)/include +ROOTCFLAGS = -pthread -m64 $(ROOTINCLUDE) +PTCUTFLAG = +LSTWARNINGSFLAG = +CMSSW_WERRORS_CPU = -Werror=pointer-arith -Werror=overlength-strings -Werror=return-type -Werror=missing-braces -Werror=unused-value -Werror=unused-label \ + -Werror=address -Werror=format -Werror=sign-compare -Werror=write-strings -Werror=delete-non-virtual-dtor -Werror=strict-aliasing -Werror=narrowing \ + -Werror=unused-but-set-variable -Werror=reorder -Werror=unused-variable -Werror=conversion-null -Werror=return-local-addr -Wnon-virtual-dtor -Werror=switch \ + -Werror=main -Werror=overflow -Werror=format-contains-nul -Werror=type-limits -Wreturn-type -Wextra -Wpessimizing-move -Wclass-memaccess -Wunused \ + -Wparentheses -Wno-vla -Wno-non-template-friend -Wno-long-long -Wno-cast-function-type -Wno-unused-but-set-parameter -Wno-ignored-qualifiers \ + -Wno-unused-parameter -Wno-unused-local-typedefs -Wno-attributes +CMSSW_WERRORS_CUDA = $(patsubst %,-Xcompiler %,$(CMSSW_WERRORS_CPU)) +CMSSW_WERRORS_ROCM = $(CMSSW_WERRORS_CPU) +T5CUTFLAGS = $(T5DNNFLAG) $(T5RZCHI2FLAG) $(T5RPHICHI2FLAG) + +LD_CPU = g++ +SOFLAGS_CPU = -g -shared -fPIC +ALPAKABACKEND_CPU = $(ALPAKASERIAL) +COMPILE_CMD_CPU = $(LD_CPU) -c + +LD_CUDA = nvcc +SOFLAGS_CUDA = -g -shared --compiler-options -fPIC --cudart shared $(GENCODE_CUDA) +ALPAKABACKEND_CUDA = $(ALPAKACUDA) +COMPILE_CMD_CUDA = $(LD_CUDA) -x cu + +LD_ROCM = hipcc +SOFLAGS_ROCM = -g -shared -fPIC +ALPAKABACKEND_ROCM = $(ALPAKAROCM) +COMPILE_CMD_ROCM = $(LD_ROCM) -c + +CUTVALUEFLAG = +CUTVALUEFLAG_FLAGS = -DCUT_VALUE_DEBUG + +%_cpu.o: ../../src/alpaka/%.dev.cc + $(COMPILE_CMD_CPU) $(CXXFLAGS_CPU) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_CPU) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_CPU) $< -o $@ + +%_cuda.o: ../../src/alpaka/%.dev.cc + $(COMPILE_CMD_CUDA) $(CXXFLAGS_CUDA) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_CUDA) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_CUDA) $< -o $@ + +%_rocm.o: ../../src/alpaka/%.dev.cc + $(COMPILE_CMD_ROCM) $(CXXFLAGS_ROCM) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_ROCM) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_ROCM) $< -o $@ + +%_cpu.o: ../../src/alpaka/%.cc + $(COMPILE_CMD_CPU) $(CXXFLAGS_CPU) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_CPU) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_CPU) $< -o $@ + +%_cuda.o: ../../src/alpaka/%.cc + $(COMPILE_CMD_CUDA) $(CXXFLAGS_CUDA) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_CUDA) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_CUDA) $< -o $@ + +%_rocm.o: ../../src/alpaka/%.cc + $(COMPILE_CMD_ROCM) $(CXXFLAGS_ROCM) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_ROCM) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_ROCM) $< -o $@ + +%_cpu.o: ../../src/%.cc + $(COMPILE_CMD_CPU) $(CXXFLAGS_CPU) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_CPU) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_CPU) $< -o $@ + +%_cuda.o: ../../src/%.cc + $(COMPILE_CMD_CUDA) $(CXXFLAGS_CUDA) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_CUDA) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_CUDA) $< -o $@ + +%_rocm.o: ../../src/%.cc + $(COMPILE_CMD_ROCM) $(CXXFLAGS_ROCM) $(ROOTINCLUDE) $(CUTVALUEFLAG) $(LSTWARNINGSFLAG) $(CMSSW_WERRORS_ROCM) $(T5CUTFLAGS) $(PTCUTFLAG) $(ALPAKAINCLUDE) $(ALPAKABACKEND_ROCM) $< -o $@ + +$(LIB_CPU): $(CCOBJECTS_CPU) $(LSTOBJECTS_CPU) + $(LD_CPU) $(SOFLAGS_CPU) $^ -o $@ + +$(LIB_CUDA): $(CCOBJECTS_CUDA) $(LSTOBJECTS_CUDA) + $(LD_CUDA) $(SOFLAGS_CUDA) $^ -o $@ + +$(LIB_ROCM): $(CCOBJECTS_ROCM) $(LSTOBJECTS_ROCM) + $(LD_ROCM) $(SOFLAGS_ROCM) $^ -o $@ + +explicit: $(LIBS) + +explicit_cutvalue: CUTVALUEFLAG = $(CUTVALUEFLAG_FLAGS) +explicit_cutvalue: $(LIBS) + +clean: + rm -f *.opp + rm -f *.o + rm -f *.d + rm -f *.so + +.PHONY: clean explicit explicit_cutvalue format check check-fix + +format: + clang-format --style=file:../.clang-format -i *.cc *.h + +# Collect all the include paths from the compiler. +# The .../gcc/x86_64-redhat-linux-gnu/*/include path is excluded since .../gcc/x86_64-redhat-linux-gnu/*/include-fixed should be used instead. +TIDYINCLUDEFLAGS := $(shell g++ -E -x c++ - -v < /dev/null 2>&1 | awk '/#include <...>/,/^End of search/{if (/^ / && !/x86_64-redhat-linux-gnu\/[0-9.]+\/include$$/) print "-I"$$1}' | tr '\n' ' ') +TIDYFLAGS := --language=c++ $(CXXFLAGS_CPU) $(ALPAKAINCLUDE) $(ALPAKASERIAL) $(ROOTCFLAGS) $(DUPLICATED) $(TIDYINCLUDEFLAGS) + +check: + clang-tidy --config-file=../.clang-tidy *.cc *.h -- $(TIDYFLAGS) + +check-fix: + clang-tidy --config-file=../.clang-tidy --format-style=file:../.clang-format --fix --fix-errors --fix-notes *.cc *.h -- $(TIDYFLAGS) diff --git a/RecoTracker/LSTCore/standalone/Makefile b/RecoTracker/LSTCore/standalone/Makefile new file mode 100644 index 0000000000000..b98df31df1b5e --- /dev/null +++ b/RecoTracker/LSTCore/standalone/Makefile @@ -0,0 +1,78 @@ +# Simple makefile + +EXES := bin/lst_cpu bin/lst_cuda + +SOURCES=$(wildcard code/core/*.cc) +OBJECTS_CPU=$(SOURCES:.cc=_cpu.o) +OBJECTS_CUDA=$(SOURCES:.cc=_cuda.o) +OBJECTS_ROCM=$(SOURCES:.cc=_rocm.o) +OBJECTS=$(OBJECTS_CPU) $(OBJECTS_CUDA) $(OBJECTS_ROCM) + +CXX = g++ +CXXFLAGS = -g -O2 -Wall -fPIC -Woverloaded-virtual -Wno-unused-function -fno-var-tracking -std=c++20 +INCLUDEFLAGS= -ILST -I$(shell pwd) -Icode -Icode/core -I${ALPAKA_ROOT}/include -I/${BOOST_ROOT}/include $(shell rooutil-config --include) -I$(shell root-config --incdir) -I${TRACKLOOPERDIR}/../../../ -I${CMSSW_BASE}/src -I../interface/ -I../interface/alpaka/ -I../src/ -I../src/alpaka/ +ifdef CMSSW_RELEASE_BASE +INCLUDEFLAGS:= ${INCLUDEFLAGS} -I${CMSSW_RELEASE_BASE}/src +endif +LDFLAGS = -g -O2 $(LSTLIB) -L${TRACKLOOPERDIR}/LST $(shell rooutil-config --libs) $(shell root-config --libs) +LDFLAGS_CUDA= -L${CUDA_HOME}/lib64 -lcudart +LDFLAGS_ROCM= -L${ROCM_ROOT}/lib -lamdhip64 +ALPAKAFLAGS = -DALPAKA_DEBUG=0 +CUDAINCLUDE = -I${CUDA_HOME}/include +ROCMINCLUDE = -I${ROCM_ROOT}/include +ALPAKA_CPU = -DALPAKA_ACC_CPU_B_SEQ_T_SEQ_ENABLED -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 +ALPAKA_CUDA = -DALPAKA_ACC_GPU_CUDA_ENABLED -DALPAKA_HOST_ONLY -DALPAKA_DISABLE_VENDOR_RNG -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 +ALPAKA_ROCM = -DALPAKA_ACC_GPU_HIP_ENABLED -DALPAKA_HOST_ONLY -DALPAKA_DISABLE_VENDOR_RNG -D__HIP_PLATFORM_HCC__ -D__HIP_PLATFORM_AMD__ -DALPAKA_DEFAULT_HOST_MEMORY_ALIGNMENT=128 +EXTRAFLAGS = -ITMultiDrawTreePlayer -Wunused-variable -lTMVA -lEG -lGenVector -lXMLIO -lMLP -lTreePlayer -fopenmp +DOQUINTUPLET = +PTCUTFLAG = +CUTVALUEFLAG = +CUTVALUEFLAG_FLAGS = -DCUT_VALUE_DEBUG + +PRIMITIVEFLAG = +PRIMITIVEFLAG_FLAGS = -DPRIMITIVE_STUDY + +all: rooutil efficiency $(EXES) + +cutvalue: CUTVALUEFLAG = ${CUTVALUEFLAG_FLAGS} +cutvalue: rooutil efficiency $(EXES) + +primitive: PRIMITIVEFLAG = ${PRIMITIVEFLAG_FLAGS} +primitive: rooutil efficiency $(EXES) + +cutvalue_primitive: CUTVALUEFLAG = ${CUTVALUEFLAG_FLAGS} +cutvalue_primitive: PRIMITIVEFLAG = ${PRIMITIVEFLAG_FLAGS} +cutvalue_primitive: rooutil efficiency $(EXES) + + +bin/lst_cpu: LSTLIB=-llst_cpu +bin/lst_cpu: bin/lst_cpu.o $(OBJECTS_CPU) + $(CXX) $(LDFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $^ $(ROOTLIBS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_CPU) -o $@ +bin/lst_cuda: LSTLIB=-llst_cuda +bin/lst_cuda: bin/lst_cuda.o $(OBJECTS_CUDA) + $(CXX) $(LDFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $^ $(ROOTLIBS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_CUDA) $(LDFLAGS_CUDA) -o $@ +bin/lst_rocm: LSTLIB=-llst_rocm +bin/lst_rocm: bin/lst_rocm.o $(OBJECTS_ROCM) + $(CXX) $(LDFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $^ $(ROOTLIBS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_ROCM) $(LDFLAGS_ROCM) -o $@ + +%_cpu.o: %.cc rooutil + $(CXX) $(CXXFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_CPU) $< -c -o $@ +%_cuda.o: %.cc rooutil + $(CXX) $(CXXFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_CUDA) $(CUDAINCLUDE) $< -c -o $@ +%_rocm.o: %.cc rooutil + $(CXX) $(CXXFLAGS) $(EXTRAFLAGS) $(INCLUDEFLAGS) $(ALPAKAFLAGS) $(PTCUTFLAG) $(CUTVALUEFLAG) $(PRIMITIVEFLAG) $(DOQUINTUPLET) $(ALPAKA_ROCM) $(ROCMINCLUDE) $< -c -o $@ + +rooutil: + $(MAKE) -C code/rooutil/ + +efficiency: rooutil + $(MAKE) -C efficiency/ + +clean: + rm -f $(OBJECTS) bin/*.o $(EXES) bin/lst + rm -f code/rooutil/*.so code/rooutil/*.o + rm -f bin/lst.o + rm -f LST/*.o + cd efficiency/ && make clean + +.PHONY: rooutil efficiency diff --git a/RecoTracker/LSTCore/standalone/README.md b/RecoTracker/LSTCore/standalone/README.md new file mode 100644 index 0000000000000..02fbef943f697 --- /dev/null +++ b/RecoTracker/LSTCore/standalone/README.md @@ -0,0 +1,291 @@ +# TrackLooper + + +## Quick Start + + +### Setting up LSTPerformanceWeb (only for lnx7188 and lnx4555) + +For lnx7188 and lnx4555 this needs to be done once + + cd /cdat/tem/${USER}/ + git clone git@github.com:SegmentLinking/LSTPerformanceWeb.git + +### Setting up container (only for lnx7188) + +For lnx7188 this needs to be done before compiling or running the code: + + singularity shell --nv --bind /mnt/data1:/data --bind /data2/segmentlinking/ --bind /opt --bind /nfs --bind /mnt --bind /usr/local/cuda/bin/ --bind /cvmfs /cvmfs/unpacked.cern.ch/registry.hub.docker.com/cmssw/el8:x86_64 + +### Setting up the code + + git clone git@github.com:SegmentLinking/TrackLooper.git + cd TrackLooper/ + # Source one of the commands below, depending on the site + source setup.sh # if on UCSD or Cornell + source setup_hpg.sh # if on Florida + +### Running the code + + sdl_make_tracklooper -mc + sdl_ -i PU200 -o LSTNtuple.root + createPerfNumDenHists -i LSTNtuple.root -o LSTNumDen.root + lst_plot_performance.py LSTNumDen.root -t "myTag" + # python3 efficiency/python/lst_plot_performance.py LSTNumDen.root -t "myTag" # if you are on cgpu-1 or Cornell + +The above can be even simplified + + sdl_run -f -mc -s PU200 -n -1 -t myTag + +The `-f` flag can be omitted when the code has already been compiled. If multiple backends were compiled, then the `-b` flag can be used to specify a backend. For example + + sdl_run -b cpu -s PU200 -n -1 -t myTag + +## Command explanations + +Compile the code with option flags. If none of `C,G,R,A` are used, then it defaults to compiling for CUDA and CPU. + + sdl_make_tracklooper -mc + -m: make clean binaries + -c: run with the cmssw caching allocator + -C: compile CPU backend + -G: compile CUDA backend + -R: compile ROCm backend + -A: compile all backends + -h: show help screen with all options + +Run the code + + sdl_ -n -v -w -s -i -o + + -i: PU200; muonGun, etc + -n: number of events; default: all + -v: 0-no printout; 1- timing printout only; 2- multiplicity printout; default: 0 + -s: number of streams/events in flight; default: 1 + -w: 0- no writeout; 1- minimum writeout; default: 1 + -o: provide an output root file name (e.g. LSTNtuple.root); default: debug.root + -l: add lower level object (pT3, pT5, T5, etc.) branches to the output + +Plotting numerators and denominators of performance plots + + createPerfNumDenHists -i -o [-g -n ] + + -i: Path to LSTNtuple.root + -o: provide an output root file name (e.g. num_den_hist.root) + -n: (optional) number of events + -g: (optional) comma separated pdgids to add more efficiency plots with different sim particle slices + +Plotting performance plots + + lst_plot_performance.py num_den_hist.root -t "mywork" + +There are several options you can provide to restrict number of plots being produced. +And by default, it creates a certain set of objects. +One can specifcy the type, range, metric, etc. +To see the full information type + + lst_plot_performance.py --help + +To give an example of plotting efficiency, object type of lower level T5, for |eta| < 2.5 only. + + lst_plot_performance.py num_den_hist.root -t "mywork" -m eff -o T5_lower -s loweta + +NOTE: in order to plot lower level object, ```-l``` option must have been used during ```sdl``` step! + +When running on ```cgpu-1``` remember to specify python3 as there is no python. +The shebang on the ```lst_plot_performance.py``` is not updated as ```lnx7188``` works with python2... + + python3 efficiency/python/lst_plot_performance.py num_den_hist.root -t "mywork" # If running on cgpu-1 + +Comparing two different runs + + lst_plot_performance.py \ + num_den_hist_1.root \ # Reference + num_den_hist_2.root \ # New work + -L BaseLine,MyNewWork \ # Labeling + -t "mywork" \ + --compare + +## CMSSW Integration +This is the a complete set of instruction on how the TrackLooper code +can be linked as an external tool in CMSSW: + +### Build TrackLooper +```bash +git clone git@github.com:SegmentLinking/TrackLooper.git +cd TrackLooper/ +# Source one of the commands below, depending on the site +source setup.sh # if on UCSD or Cornell +source setup_hpg.sh # if on Florida +sdl_make_tracklooper -mc +cd .. +``` + +### Set up `TrackLooper` as an external +```bash +mkdir workingFolder # Create the folder you will be working in +cd workingFolder +cmsrel CMSSW_14_1_0_pre3 +cd CMSSW_14_1_0_pre3/src +cmsenv +git cms-init +git remote add SegLink git@github.com:SegmentLinking/cmssw.git +git fetch SegLink CMSSW_14_1_0_pre3_LST_X +git cms-addpkg RecoTracker Configuration +git checkout CMSSW_14_1_0_pre3_LST_X +#To include both the CPU library and GPU library into CMSSW, create 3 xml files (headers file has no library). +#Before writing the following xml file, check that libsdl_cpu.so and libsdl_gpu.so can be found under the ../../../TrackLooper/SDL/ folder. +cat <lst_headers.xml + + + + + + + +EOF +cat <lst_cpu.xml + + + + + + + + + +EOF +cat <lst_cuda.xml + + + + + + + + + +EOF +scram setup lst_headers.xml +scram setup lst_cpu.xml +scram setup lst_cuda.xml +cmsenv +git cms-checkdeps -a -A +scram b -j 12 +``` + +### Run the LST reconstruction in CMSSW +A simple test configuration of the LST reconstruction can be run with the command: +```bash +cmsRun RecoTracker/LST/test/LSTAlpakaTester.py +``` + +For a more complete workflow, one can run a modified version of the 21034.1 workflow. +To get the commands of this workflow, one can run: +```bash +runTheMatrix.py -w upgrade -n -e -l 21034.1 +``` + +For convenience, the workflow has been run for 100 events and the output is stored here: +```bash +/data2/segmentlinking/CMSSW_14_1_0_pre0/step2_21034.1_100Events.root +``` + +For enabling the LST reconstruction in the CMSSW tracking workflow, a modified step3 needs to be run. +This is based on the step3 command of the 21034.1 workflow with the following changes: + - Remove the `--pileup_input` and `--pileup` flags. + - The number of threads and streams for the job can be optionally controlled by the `--nThreads` and `--nStreams` command line options respectively (`1` ends up being the actual default value for both, and more info can be found by running `cmsDriver.py --help`). + - Add at the end of the command: `--procModifiers gpu,trackingLST,trackingIters01 --no_exec` + +Run the command and modify the output configuration file with the following: + - If want to run a cpu version, remove the ```gpu``` in the line defining the `process` object: + ```python + process = cms.Process('RECO',...,gpu,...) + ``` + - Add the following lines below the part where the import of the standard configurations happens: + ```python + process.load('Configuration.StandardSequences.Accelerators_cff') + process.load("HeterogeneousCore.AlpakaCore.ProcessAcceleratorAlpaka_cfi") + ``` + - Modify the input and output file names accordingly, as well as the number of events. + +Then, run the configuration file with `cmsRun`. + +To get the DQM files, one would have to run step4 of the 21034.1 workflow with the following modifications: + - Add `--no_exec` to the end of command and then run it. + - Modify the output configuration file by changing the input file (the one containing `inDQM` from the previous step) and number of events accordingly. + +Running the configuration file with `cmsRun`, the output file will have a name starting with `DQM`. The name is the same every time this step runs, +so it is good practice to rename the file, e.g. to `tracking_Iters01LST.root`. +The MTV plots can be produced with the command: +```bash +makeTrackValidationPlots.py --extended tracking_Iters01LST.root +``` +Comparison plots can be made by including multiple ROOT files as arguments. + +**Note:** In case one wants to run step2 as well, similar modifications as in step4 (`--no_exec` flag and input file/number of events) need to be applied. Moreover, the PU files have better be modified to point to local ones. This can be done by inserting a dummy file when running the command (set the argument of the `--pileup_input` flag to `file:file.root`), and then change the PU input files in the configuration to the following line (by means of replacing the corresponding line in the configuration): +```python +process.mix.input.fileNames = cms.untracked.vstring(['file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/066fc95d-1cef-4469-9e08-3913973cd4ce.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/07928a25-231b-450d-9d17-e20e751323a1.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/26bd8fb0-575e-4201-b657-94cdcb633045.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/4206a9c5-44c2-45a5-aab2-1a8a6043a08a.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/55a372bf-a234-4111-8ce0-ead6157a1810.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/59ad346c-f405-4288-96d7-795f81c43fe8.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/7280f5ec-b71d-4579-a730-7ce2de0ff906.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/b93adc85-715f-477a-afc9-65f3241933ee.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/c7a0aa46-f55c-4b01-977f-34a397b71fba.root', 'file:/data2/segmentlinking/PUSamplesForCMSSW1263/CMSSW_12_3_0_pre5/RelValMinBias_14TeV/GEN-SIM/123X_mcRun4_realistic_v4_2026D88noPU-v1/e77fa467-97cb-4943-884f-6965b4eb0390.root']) +``` + +### Inclusion of LST in other CMSSW packages +Including the line +``` + +``` +in the relevant package `BuildFile.xml` allows for +including our headers in the code of that package. + +## Running LST in a CVMFS-less setup + +The setup scripts included in this repository assume that the [CernVM File System (CVMFS)](https://cernvm.cern.ch/fs/) is installed. This provides a convenient way to fetch the required dependencies, but it is not necessary to run LST in standalone mode. Here, we briefly describe how to build and run it when CVMFS is not available. + +The necessary dependencies are CUDA, ROOT, the Boost libraries, Alpaka, and some CMSSW headers. CUDA, ROOT, and Boost, are fairly standard libraries and are available from multiple package managers. For the remaining necessary headers you will need to clone the [Alpaka](https://github.com/alpaka-group/alpaka) and [CMSSW](https://github.com/cms-sw/cmssw) repositories. The Alpaka repository is reasonably sized, but the CMSSW one extremely large, especially considering that we only need a tiny fraction of its files to build LST. We can get only the Alpaka interface headers from CMSSW by running the following commands. + +``` bash +git clone --filter=blob:none --no-checkout --depth 1 --sparse --branch CMSSW_14_1_X https://github.com/cms-sw/cmssw.git +cd cmssw +git sparse-checkout add HeterogeneousCore/AlpakaInterface +git checkout +``` + +Then all that is left to do is set some environment variables. We give an example of how to do this in lnx7188/cgpu-1. + +```bash +# These two lines are only needed to set the right version of gcc and nvcc. They are not needed for standard installations. +export PATH=/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/gcc/12.3.1-40d504be6370b5a30e3947a6e575ca28/bin:/cvmfs/cms.cern.ch/el8_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre3/external/el8_amd64_gcc12/bin:$PATH +export LD_LIBRARY_PATH=/cvmfs/cms.cern.ch/el8_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre3/biglib/el8_amd64_gcc12:/cvmfs/cms.cern.ch/el8_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre3/lib/el8_amd64_gcc12:/cvmfs/cms.cern.ch/el8_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre3/external/el8_amd64_gcc12/lib:/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/gcc/12.3.1-40d504be6370b5a30e3947a6e575ca28/lib64:/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/gcc/12.3.1-40d504be6370b5a30e3947a6e575ca28/lib:$LD_LIBRARY_PATH + +# These are the lines that you need to manually change for a CVMFS-less setup. +# In this example we use cvmfs paths since that is where the dependencies are in lnx7188/cgpu1, but they can point to local directories. +export BOOST_ROOT=/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/boost/1.80.0-60a217837b5db1cff00c7d88ec42f53a +export ALPAKA_ROOT=/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/alpaka/1.1.0-7d0324257db47fde2d27987e7ff98fb4 +export CUDA_HOME=/cvmfs/cms.cern.ch/el8_amd64_gcc12/external/cuda/12.4.1-06cde0cd9f95a73a1ea05c8535f60bde +export ROOT_ROOT=/cvmfs/cms.cern.ch/el8_amd64_gcc12/lcg/root/6.30.07-21947a33e64ceb827a089697ad72e468 +export CMSSW_BASE=/cvmfs/cms.cern.ch/el8_amd64_gcc12/cms/cmssw/CMSSW_14_1_0_pre3 + +# These lines are needed to account for some extra environment variables that are exported in the setup script. +export LD_LIBRARY_PATH=$PWD/SDL/cuda:$PWD/SDL/cpu:$PWD:$LD_LIBRARY_PATH +export PATH=$PWD/bin:$PATH +export PATH=$PWD/efficiency/bin:$PATH +export PATH=$PWD/efficiency/python:$PATH +export TRACKLOOPERDIR=$PWD +export TRACKINGNTUPLEDIR=/data2/segmentlinking/CMSSW_12_2_0_pre2/ +export LSTOUTPUTDIR=. +source $PWD/code/rooutil/thisrooutil.sh + +# After this, you can compile and run LST as usual. +sdl_run -f -mc -s PU200 -n -1 -t myTag +``` + +## Code formatting and checking + +The makefile in the `SDL` directory includes phony targets to run `clang-format` and `clang-tidy` on the code using the formatting and checks used in CMSSW. The following are the available commands. + +- `make format` + Formats the code in the `SDL` directory using `clang-format` following the rules specified in `.clang-format`. +- `make check` + Runs `clang-tidy` on the code in the `SDL` directory to performs the checks specified in `.clang-tidy`. +- `make check-fix` + Same as `make check`, but fixes the issues that it knows how to fix. + \ No newline at end of file diff --git a/RecoTracker/LSTCore/standalone/bin/lst.cc b/RecoTracker/LSTCore/standalone/bin/lst.cc new file mode 100644 index 0000000000000..369680bc4309e --- /dev/null +++ b/RecoTracker/LSTCore/standalone/bin/lst.cc @@ -0,0 +1,527 @@ +#include "lst.h" + +#include + +using LSTEvent = ALPAKA_ACCELERATOR_NAMESPACE::lst::LSTEvent; +using namespace ::lst; + +//___________________________________________________________________________________________________________________________________________________________________________________________ +int main(int argc, char **argv) { + //******************************************************************************** + // + // 0. Preliminary operations + // + //******************************************************************************** + + // Checking the TRACKLOOPERDIR is set + ana.track_looper_dir_path = gSystem->Getenv("TRACKLOOPERDIR"); + if (ana.track_looper_dir_path.IsNull()) { + RooUtil::error( + "TRACKLOOPERDIR is not set! Did you run $ source setup.sh from TrackLooper/ main repository directory?"); + } + RooUtil::print(TString::Format("TRACKLOOPERDIR=%s", ana.track_looper_dir_path.Data())); + + // Write the command line used to run it + // N.B. This needs to be before the argument parsing as it will change some values + std::vector allArgs(argv, argv + argc); + ana.full_cmd_line = ""; + for (auto &str : allArgs) { + ana.full_cmd_line += TString::Format(" %s", str.c_str()); + } + + //******************************************************************************** + // + // 1. Parsing options + // + //******************************************************************************** + + // cxxopts is just a tool to parse argc, and argv easily + + // Grand option setting + cxxopts::Options options("\n $ lst", + "\n **********************\n * *\n * " + "Looper *\n * *\n **********************\n"); + + // Read the options + options.add_options()("m,mode", "Run mode (NOT DEFINED)", cxxopts::value()->default_value("5"))( + "i,input", + "Comma separated input file list OR if just a directory is provided it will glob all in the directory BUT must " + "end with '/' for the path", + cxxopts::value()->default_value("muonGun"))( + "t,tree", + "Name of the tree in the root file to open and loop over", + cxxopts::value()->default_value("trackingNtuple/tree"))( + "o,output", "Output file name", cxxopts::value())( + "N,nmatch", "N match for MTV-like matching", cxxopts::value()->default_value("9"))( + "n,nevents", "N events to loop over", cxxopts::value()->default_value("-1"))( + "x,event_index", "specific event index to process", cxxopts::value()->default_value("-1"))( + "g,pdg_id", "The simhit pdgId match option", cxxopts::value()->default_value("0"))( + "v,verbose", + "Verbose mode (0: no print, 1: only final timing, 2: object multiplitcity", + cxxopts::value()->default_value("0"))( + "w,write_ntuple", "Write Ntuple", cxxopts::value()->default_value("1"))( + "s,streams", "Set number of streams", cxxopts::value()->default_value("1"))( + "d,debug", "Run debug job. i.e. overrides output option to 'debug.root' and 'recreate's the file.")( + "l,lower_level", "write lower level objects ntuple results")("G,gnn_ntuple", "write gnn input variable ntuple")( + "j,nsplit_jobs", "Enable splitting jobs by N blocks (--job_index must be set)", cxxopts::value())( + "I,job_index", + "job_index of split jobs (--nsplit_jobs must be set. index starts from 0. i.e. 0, 1, 2, 3, etc...)", + cxxopts::value())("3,tc_pls_triplets", "Allow triplet pLSs in TC collection")( + "2,no_pls_dupclean", "Disable pLS duplicate cleaning (both steps)")("h,help", "Print help"); + + auto result = options.parse(argc, argv); + + // NOTE: When an option was provided (e.g. -i or --input), then the result.count("