-
Notifications
You must be signed in to change notification settings - Fork 0
/
mvm.c
277 lines (216 loc) · 7.13 KB
/
mvm.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
//#error Please comment out the next two lines under linux, then comment this error
//#include "stdafx.h" //Visual studio expects this line to be the first one, comment out if different compiler
//#include <windows.h> // Include if under windows
#ifndef WIN32
#include <sys/time.h>
#endif
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include "tsc_x86.h"
#define NUM_RUNS 1
#define CYCLES_REQUIRED 1e8
#define FREQUENCY 1.4e9
#define CALIBRATE
/*
* Initialize the input
*/
void fill_matrix(double * A, int n) {
for(int i=0; i < n; i++) {
for(int j=0; j < n; j++) {
A[n*i+j] = (double) i / (j+1) + 1.0;
}
}
}
void fill_vector(double * x, int n) {
for(int i=0; i < n; i++) {
x[i] = (double) i + 1.0;
}
}
/*
* Straightforward implementation of matrix-vector multiplication
*/
void compute(double A[], double x[], double y[], int n) {
for(int i = 0; i < n; i++) {
double sum = 0.0;
for(int j = 0; j < n; j++) {
sum = sum + A[n*i+j]*x[j];
}
y[i] = sum;
}
}
/*
* Timing function based on the TimeStep Counter of the CPU.
*/
double rdtsc(double A[], double x[], double y[], int n) {
int i, num_runs;
myInt64 cycles;
myInt64 start;
num_runs = NUM_RUNS;
/*
* The CPUID instruction serializes the pipeline.
* Using it, we can create execution barriers around the code we want to time.
* The calibrate section is used to make the computation large enough so as to
* avoid measurements bias due to the timing overhead.
*/
#ifdef CALIBRATE
while(num_runs < (1 << 14)) {
start = start_tsc();
for (i = 0; i < num_runs; ++i) {
compute(A, x, y, n);
}
cycles = stop_tsc(start);
if(cycles >= CYCLES_REQUIRED) break;
num_runs *= 2;
}
#endif
start = start_tsc();
for (i = 0; i < num_runs; ++i) {
compute(A, x, y, n);
}
cycles = stop_tsc(start)/num_runs;
return (double) cycles;
}
double c_clock(double A[], double x[], double y[], int n) {
int i, num_runs;
double cycles;
clock_t start, end;
num_runs = NUM_RUNS;
#ifdef CALIBRATE
while(num_runs < (1 << 14)) {
start = clock();
for (i = 0; i < num_runs; ++i) {
compute(A, x, y, n);
}
end = clock();
cycles = (double)(end-start);
// Same as in c_clock: CYCLES_REQUIRED should be expressed accordingly to the order of magnitude of CLOCKS_PER_SEC
if(cycles >= CYCLES_REQUIRED/(FREQUENCY/CLOCKS_PER_SEC)) break;
num_runs *= 2;
}
#endif
start = clock();
for(i=0; i<num_runs; ++i) {
compute(A, x, y, n);
}
end = clock();
return (double)(end-start)/num_runs;
}
#ifndef WIN32
double timeofday(double A[], double x[], double y[], int n) {
int i, num_runs;
double cycles;
struct timeval start, end;
num_runs = NUM_RUNS;
#ifdef CALIBRATE
while(num_runs < (1 << 14)) {
gettimeofday(&start, NULL);
for (i = 0; i < num_runs; ++i) {
compute(A, x, y, n);
}
gettimeofday(&end, NULL);
cycles = (double)((end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec)/1e6)*FREQUENCY;
if(cycles >= CYCLES_REQUIRED) break;
num_runs *= 2;
}
#endif
gettimeofday(&start, NULL);
for(i=0; i < num_runs; ++i) {
compute(A, x, y, n);
}
gettimeofday(&end, NULL);
return (double)((end.tv_sec - start.tv_sec) + (end.tv_usec - start.tv_usec)/1e6)/ num_runs;
}
#else
double gettickcount(double A[], double x[], double y[], int n) {
int i, num_runs;
double cycles, start, end;
num_runs = NUM_RUNS;
#ifdef CALIBRATE
while(num_runs < (1 << 14)) {
start = (double)GetTickCount();
for (i = 0; i < num_runs; ++i) {
compute(A, x, y, n);
}
end = (double)GetTickCount();
cycles = (end-start)*FREQUENCY/1e3; // end-start provides a measurement in the order of milliseconds
if(cycles >= CYCLES_REQUIRED) break;
num_runs *= 2;
}
#endif
start = (double)GetTickCount();
for(i=0; i < num_runs; ++i) {
compute(A, x, y, n);
}
end = (double)GetTickCount();
return (end-start)/num_runs;
}
double queryperfcounter(double A[], double x[], double y[], int n, LARGE_INTEGER f) {
int i, num_runs;
double cycles;
LARGE_INTEGER start, end;
num_runs = NUM_RUNS;
#ifdef CALIBRATE
while(num_runs < (1 << 14)) {
QueryPerformanceCounter(&start);
for (i = 0; i < num_runs; ++i) {
compute(A, x, y, n);
}
QueryPerformanceCounter(&end);
cycles = (double)(end.QuadPart - start.QuadPart);
// Same as in c_clock: CYCLES_REQUIRED should be expressed accordingly to the order of magnitude of f
if(cycles >= CYCLES_REQUIRED/(FREQUENCY/f.QuadPart)) break;
num_runs *= 2;
}
#endif
QueryPerformanceCounter(&start);
for(i=0; i < num_runs; ++i) {
compute(A, x, y, n);
}
QueryPerformanceCounter(&end);
return (double)(end.QuadPart - start.QuadPart)/num_runs;
}
#endif
int main(int argc, char **argv) {
// if (argc!=2) {printf("usage: FW <n>\n"); return -1;}
// int n = atoi(argv[1]);
FILE *fp;
fp=fopen("data.txt","w");
if(fp==NULL)
{
printf("failed to create file data.txt\n");
}
for (int n=200;n<=4000;n=n+200)
{
printf("n=%d and require %lf Kbytes\n",n,(n*n+n+n)*8/1024.);
double* A = (double *)malloc(n*n*sizeof(double));
double* x = (double *)malloc(n*sizeof(double));
double* y = (double *)calloc(n,sizeof(double));
fill_matrix(A, n);
fill_vector(x, n);
double r = rdtsc(A, x, y, n);
// printf("RDTSC instruction:\n %lf cycles measured => %lf seconds, assuming frequency is %lf MHz. (change in source file if different)\n\n", r, r/(FREQUENCY), (FREQUENCY)/1e6);
fprintf(fp,"%lf\n",(2*n*n)/r);
}
fclose(fp);
// }
// double c = c_clock(A, x, y, n);
// printf("C clock() function:\n %lf cycles measured. On some systems, this number
// seems to be actually computed from a timer in seconds then transformed into clock
// ticks using the variable CLOCKS_PER_SEC. Unfortunately, it appears that CLOCKS_PER_SEC is
// sometimes set improperly. (According to this variable, your computer should be running at %lf MHz).
// In any case, dividing by this value should give a correct timing: %lf seconds. \n\n"
// ,c, (double) CLOCKS_PER_SEC/1e6, c/CLOCKS_PER_SEC);
// #ifndef WIN32
// double t = timeofday(A, x, y, n);
// printf("C gettimeofday() function:\n %lf seconds measured\n\n",t);
// #else
// LARGE_INTEGER f;
// double t = gettickcount(A, x, y, n);
// printf("Windows getTickCount() function:\n %lf milliseconds measured\n\n",t);
// QueryPerformanceFrequency((LARGE_INTEGER *)&f);
// double p = queryperfcounter(A, x, y, n, f);
// printf("Windows QueryPerformanceCounter() function:\n %lf cycles measured => %lf seconds, with reported CPU frequency %lf MHz\n\n",p,p/f.QuadPart,(double)f.QuadPart/1000);
// #endif
return 0;
}