-
Notifications
You must be signed in to change notification settings - Fork 0
/
energy_storms_cuda.cu
279 lines (233 loc) · 8.88 KB
/
energy_storms_cuda.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
/*
* Simplified simulation of high-energy particle storms
*
* Parallel computing (Degree in Computer Engineering)
* 2017/2018
*
* Version: 2.0
*
* Code prepared to be used with the Tablon on-line judge.
* The current Parallel Computing course includes contests using:
* OpenMP, MPI, and CUDA.
*
* (c) 2018 Arturo Gonzalez-Escribano, Eduardo Rodriguez-Gutiez
* Grupo Trasgo, Universidad de Valladolid (Spain)
*
* This work is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.
* https://creativecommons.org/licenses/by-sa/4.0/
*/
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<math.h>
#include<limits.h>
#include<sys/time.h>
/* Headers for the CUDA assignment versions */
#include<cuda.h>
/*
* Macros to show errors when calling a CUDA library function,
* or after launching a kernel
*/
#define CHECK_CUDA_CALL( a ) { \
cudaError_t ok = a; \
if ( ok != cudaSuccess ) \
fprintf(stderr, "-- Error CUDA call in line %d: %s\n", __LINE__, cudaGetErrorString( ok ) ); \
}
#define CHECK_CUDA_LAST() { \
cudaError_t ok = cudaGetLastError(); \
if ( ok != cudaSuccess ) \
fprintf(stderr, "-- Error CUDA last in line %d: %s\n", __LINE__, cudaGetErrorString( ok ) ); \
}
/* Use fopen function in local tests. The Tablon online judge software
substitutes it by a different function to run in its sandbox */
#ifdef CP_TABLON
#include "cputilstablon.h"
#else
#define cp_open_file(name) fopen(name,"r")
#endif
/* Function to get wall time */
double cp_Wtime(){
struct timeval tv;
gettimeofday(&tv, NULL);
return tv.tv_sec + 1.0e-6 * tv.tv_usec;
}
#define THRESHOLD 0.001f
/* Structure used to store data for one storm of particles */
typedef struct {
int size; // Number of particles
int *posval; // Positions and values
} Storm;
/* THIS FUNCTION CAN BE MODIFIED */
/* Function to update a single position of the layer */
void update( float *layer, int layer_size, int k, int pos, float energy ) {
/* 1. Compute the absolute value of the distance between the
impact position and the k-th position of the layer */
int distance = pos - k;
if ( distance < 0 ) distance = - distance;
/* 2. Impact cell has a distance value of 1 */
distance = distance + 1;
/* 3. Square root of the distance */
/* NOTE: Real world atenuation typically depends on the square of the distance.
We use here a tailored equation that affects a much wider range of cells */
float atenuacion = sqrtf( (float)distance );
/* 4. Compute attenuated energy */
float energy_k = energy / layer_size / atenuacion;
/* 5. Do not add if its absolute value is lower than the threshold */
if ( energy_k >= THRESHOLD / layer_size || energy_k <= -THRESHOLD / layer_size )
layer[k] = layer[k] + energy_k;
}
/* ANCILLARY FUNCTIONS: These are not called from the code section which is measured, leave untouched */
/* DEBUG function: Prints the layer status */
void debug_print(int layer_size, float *layer, int *positions, float *maximum, int num_storms ) {
int i,k;
/* Only print for array size up to 35 (change it for bigger sizes if needed) */
if ( layer_size <= 35 ) {
/* Traverse layer */
for( k=0; k<layer_size; k++ ) {
/* Print the energy value of the current cell */
printf("%10.4f |", layer[k] );
/* Compute the number of characters.
This number is normalized, the maximum level is depicted with 60 characters */
int ticks = (int)( 60 * layer[k] / maximum[num_storms-1] );
/* Print all characters except the last one */
for (i=0; i<ticks-1; i++ ) printf("o");
/* If the cell is a local maximum print a special trailing character */
if ( k>0 && k<layer_size-1 && layer[k] > layer[k-1] && layer[k] > layer[k+1] )
printf("x");
else
printf("o");
/* If the cell is the maximum of any storm, print the storm mark */
for (i=0; i<num_storms; i++)
if ( positions[i] == k ) printf(" M%d", i );
/* Line feed */
printf("\n");
}
}
}
/*
* Function: Read data of particle storms from a file
*/
Storm read_storm_file( char *fname ) {
FILE *fstorm = cp_open_file( fname );
if ( fstorm == NULL ) {
fprintf(stderr,"Error: Opening storm file %s\n", fname );
exit( EXIT_FAILURE );
}
Storm storm;
int ok = fscanf(fstorm, "%d", &(storm.size) );
if ( ok != 1 ) {
fprintf(stderr,"Error: Reading size of storm file %s\n", fname );
exit( EXIT_FAILURE );
}
storm.posval = (int *)malloc( sizeof(int) * storm.size * 2 );
if ( storm.posval == NULL ) {
fprintf(stderr,"Error: Allocating memory for storm file %s, with size %d\n", fname, storm.size );
exit( EXIT_FAILURE );
}
int elem;
for ( elem=0; elem<storm.size; elem++ ) {
ok = fscanf(fstorm, "%d %d\n",
&(storm.posval[elem*2]),
&(storm.posval[elem*2+1]) );
if ( ok != 2 ) {
fprintf(stderr,"Error: Reading element %d in storm file %s\n", elem, fname );
exit( EXIT_FAILURE );
}
}
fclose( fstorm );
return storm;
}
/*
* MAIN PROGRAM
*/
int main(int argc, char *argv[]) {
int i,j,k;
/* 1.1. Read arguments */
if (argc<3) {
fprintf(stderr,"Usage: %s <size> <storm_1_file> [ <storm_i_file> ] ... \n", argv[0] );
exit( EXIT_FAILURE );
}
int layer_size = atoi( argv[1] );
int num_storms = argc-2;
Storm storms[ num_storms ];
/* 1.2. Read storms information */
for( i=2; i<argc; i++ )
storms[i-2] = read_storm_file( argv[i] );
/* 1.3. Intialize maximum levels to zero */
float maximum[ num_storms ];
int positions[ num_storms ];
for (i=0; i<num_storms; i++) {
maximum[i] = 0.0f;
positions[i] = 0;
}
/* 2. Begin time measurement */
CHECK_CUDA_CALL( cudaSetDevice(0) );
CHECK_CUDA_CALL( cudaDeviceSynchronize() );
double ttotal = cp_Wtime();
/* START: Do NOT optimize/parallelize the code of the main program above this point */
/* 3. Allocate memory for the layer and initialize to zero */
float *layer = (float *)malloc( sizeof(float) * layer_size );
float *layer_copy = (float *)malloc( sizeof(float) * layer_size );
if ( layer == NULL || layer_copy == NULL ) {
fprintf(stderr,"Error: Allocating the layer memory\n");
exit( EXIT_FAILURE );
}
for( k=0; k<layer_size; k++ ) layer[k] = 0.0f;
for( k=0; k<layer_size; k++ ) layer_copy[k] = 0.0f;
/* 4. Storms simulation */
for( i=0; i<num_storms; i++) {
/* 4.1. Add impacts energies to layer cells */
/* For each particle */
for( j=0; j<storms[i].size; j++ ) {
/* Get impact energy (expressed in thousandths) */
float energy = (float)storms[i].posval[j*2+1] * 1000;
/* Get impact position */
int position = storms[i].posval[j*2];
/* For each cell in the layer */
for( k=0; k<layer_size; k++ ) {
/* Update the energy value for the cell */
update( layer, layer_size, k, position, energy );
}
}
/* 4.2. Energy relaxation between storms */
/* 4.2.1. Copy values to the ancillary array */
for( k=0; k<layer_size; k++ )
layer_copy[k] = layer[k];
/* 4.2.2. Update layer using the ancillary values.
Skip updating the first and last positions */
for( k=1; k<layer_size-1; k++ )
layer[k] = ( layer_copy[k-1] + layer_copy[k] + layer_copy[k+1] ) / 3;
/* 4.3. Locate the maximum value in the layer, and its position */
for( k=1; k<layer_size-1; k++ ) {
/* Check it only if it is a local maximum */
if ( layer[k] > layer[k-1] && layer[k] > layer[k+1] ) {
if ( layer[k] > maximum[i] ) {
maximum[i] = layer[k];
positions[i] = k;
}
}
}
}
/* END: Do NOT optimize/parallelize the code below this point */
/* 5. End time measurement */
CHECK_CUDA_CALL( cudaDeviceSynchronize() );
ttotal = cp_Wtime() - ttotal;
/* 6. DEBUG: Plot the result (only for layers up to 35 points) */
#ifdef DEBUG
debug_print( layer_size, layer, positions, maximum, num_storms );
#endif
/* 7. Results output, used by the Tablon online judge software */
printf("\n");
/* 7.1. Total computation time */
printf("Time: %lf\n", ttotal );
/* 7.2. Print the maximum levels */
printf("Result:");
for (i=0; i<num_storms; i++)
printf(" %d %f", positions[i], maximum[i] );
printf("\n");
/* 8. Free resources */
for( i=0; i<argc-2; i++ )
free( storms[i].posval );
/* 9. Program ended successfully */
return 0;
}