-
Notifications
You must be signed in to change notification settings - Fork 0
/
mpi.c
200 lines (164 loc) · 4.91 KB
/
mpi.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <mpi.h>
// f(x1, x2, x3, ..., xM) = theta0 * x0 + theta1 * x1 + theta2 * x2 + ... + thetaM * xM
#define M 10
#define N 1000
#define MAX_ITERATIONS 1000
#define ALPHA 0.5
#define ACCURACY_TORLERANCE 0.0001
/// @brief The function we are trying to find coefficients for
double f(double *x, int x_row, double *theta)
{
int row = x_row * M;
double result = 0;
for (int i = 0; i < M; i++)
{
result += theta[i] * x[row + i];
}
return result;
}
void init(double inputs[N * M], double outputs[N], double theta[M])
{
srand(time(NULL));
for (int i = 0; i < M; i++)
theta[i] = (double)rand() / RAND_MAX;
for (int i = 0; i < N; i++)
{
int upper = (i + 1) * M;
for (int k = i * M; k < upper; k++)
{
inputs[k] = (double)rand() / RAND_MAX;
}
outputs[i] = f(inputs, i, theta);
}
}
void checkThetaAccuracy(double *expectedTheta, double *theta)
{
int thetasAreAccurate = 1;
for (int i = 0; i < M; i++)
{
if (fabs(theta[i] - expectedTheta[i]) > ACCURACY_TORLERANCE)
{
thetasAreAccurate = 0;
break;
}
}
if (thetasAreAccurate)
printf("Thetas are accurate\n");
else
printf("Thetas are not accurate\n");
}
void printError(double inputs[N * M], double outputs[N], double *theta)
{
double error = 0;
for (int n = 0; n < N; n++)
{
double h = 0;
int inputRow = n * M;
for (int i = 0; i < M; i++)
{
h += inputs[inputRow + i] * theta[i];
}
error += abs(h - outputs[n]);
}
error /= N;
printf("error: %lf\n", error);
}
void printThetaMapping(double *expectedTheta, double *calculatedTheta)
{
puts("Expected Thetas vs Computed Thetas");
for (int i = 0; i < M; i++)
{
printf("%lf -> %lf\n", expectedTheta[i], calculatedTheta[i]);
}
}
int main()
{
int size, rank;
MPI_Init(NULL, NULL);
MPI_Comm_size(MPI_COMM_WORLD, &size);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
int localN = N / size;
double *inputs;
double *outputs;
double *actualTheta;
// theta are the coefficients we are trying to find
double *theta = malloc(sizeof(double) * M);
// init inputs, outputs and actual thetas in rank 0
if (rank == 0)
{
inputs = malloc(sizeof(double) * M * N); // a one dimentional two dimentional like array
outputs = malloc(sizeof(double) * N);
actualTheta = malloc(sizeof(double) * M);
init(inputs, outputs, actualTheta);
for (int i = 0; i < M; i++)
theta[i] = 0;
}
// dynamic arrays to store inputs and outputs in each rank
double *localInputs = malloc(sizeof(double) * localN * M); // a 2D array like 1D array
double *localOutputs = malloc(sizeof(double) * localN);
// time should be counted from here because data spread is part of MPI
// if it wasnt MPI, data doesnt need to be spread
double startTime = MPI_Wtime();
// distribute inputs
MPI_Scatter(inputs, localN * M, MPI_DOUBLE, localInputs, localN * M, MPI_DOUBLE, 0, MPI_COMM_WORLD);
// distribute outputs
MPI_Scatter(outputs, localN, MPI_DOUBLE, localOutputs, localN, MPI_DOUBLE, 0, MPI_COMM_WORLD);
for (int i = 0; i < MAX_ITERATIONS; i++)
{
// for iteration, thetas are updated. therefore distribute them
MPI_Bcast(theta, M, MPI_DOUBLE, 0, MPI_COMM_WORLD);
double newTheta[M];
for (int k = 0; k < M; k++)
{
double localT = 0;
for (int n = 0; n < localN; n++)
{
double h = 0;
int inputRow = n * M;
for (int i = 0; i < M; i++)
{
h += localInputs[inputRow + i] * theta[i];
}
localT += (h - localOutputs[n]) * localInputs[inputRow + k];
}
// reduce all totals into one
double t = 0;
MPI_Reduce(&localT, &t, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
if (rank == 0)
{
t = theta[k] - ALPHA * t / N;
newTheta[k] = t;
}
}
if (rank == 0)
{
for (int i = 0; i < M; i++)
theta[i] = newTheta[i];
}
}
double endTime = MPI_Wtime();
free(localInputs);
free(localOutputs);
if (rank == 0)
{
printf("Time taken = %lf\n", endTime - startTime);
// print mapping
printThetaMapping(actualTheta, theta);
// check if thetas are accurate
checkThetaAccuracy(actualTheta, theta);
// check error
printError(inputs, outputs, theta);
// clean up
free(inputs);
free(outputs);
free(actualTheta);
free(theta);
}
MPI_Finalize();
return 0;
}
// Time taken = 0.232665