forked from weft/warp
-
Notifications
You must be signed in to change notification settings - Fork 0
/
check_pointers.cu
71 lines (59 loc) · 2.6 KB
/
check_pointers.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#include <cuda.h>
#include <stdio.h>
#include "datadef.h"
#include "warp_device.cuh"
#include "check_cuda.h"
#include "wfloat3.h"
__global__ void check_pointers_kernel(unsigned N, unsigned dex0, unsigned dex1, cross_section_data* d_xsdata ){
// declare shared variables
__shared__ unsigned n_isotopes;
__shared__ unsigned energy_grid_len;
__shared__ unsigned total_reaction_channels;
__shared__ unsigned* rxn_numbers;
__shared__ unsigned* rxn_numbers_total;
__shared__ float* energy_grid;
__shared__ float* rxn_Q;
__shared__ float* xs;
__shared__ float* awr;
__shared__ float* temp;
__shared__ dist_container* dist_scatter;
__shared__ dist_container* dist_energy;
// have thread 0 of block copy all pointers and static info into shared memory
if (threadIdx.x == 0){
n_isotopes = d_xsdata[0].n_isotopes;
energy_grid_len = d_xsdata[0].energy_grid_len;
total_reaction_channels = d_xsdata[0].total_reaction_channels;
rxn_numbers = d_xsdata[0].rxn_numbers;
rxn_numbers_total = d_xsdata[0].rxn_numbers_total;
energy_grid = d_xsdata[0].energy_grid;
rxn_Q = d_xsdata[0].Q;
xs = d_xsdata[0].xs;
awr = d_xsdata[0].awr;
temp = d_xsdata[0].temp;
dist_scatter = d_xsdata[0].dist_scatter;
dist_energy = d_xsdata[0].dist_energy;
}
// make sure shared loads happen before anything else
__syncthreads();
// return immediately if out of bounds
int tid = threadIdx.x+blockIdx.x*blockDim.x;
if (tid >= N){return;}
printf("INDEX %u -> energy pointer = %p -> lower %p upper %p\n",dex0+tid,dist_energy, dist_energy[ dex0+tid].upper,dist_energy[ dex0+tid].lower);
printf("INDEX %u -> scatter pointer = %p -> lower %p upper %p\n",dex0+tid,dist_scatter,dist_scatter[dex0+tid].upper,dist_scatter[dex0+tid].lower);
}
/**
* \brief a
* \details b
*
* @param[in] NUM_THREADS - the number of threads to run per thread block
* @param[in] dex0 - starting index
* @param[in] dex1 - ending index
* @param[in] d_xsdata - device pointer to cross section data pointer array
*/
void check_pointers(unsigned NUM_THREADS, unsigned dex0, unsigned dex1, cross_section_data* d_xsdata){
int N = dex1-dex0+1;
if (N<1){printf("Negative range in check_pointers! dex0 %u dex1 %u -> N = %d\n",dex0,dex1,N);return;}
unsigned blks = ( N + NUM_THREADS - 1 ) / NUM_THREADS;
check_pointers_kernel <<< blks, NUM_THREADS >>> ( N, dex0, dex1, d_xsdata );
check_cuda(cudaThreadSynchronize());
}