-
Notifications
You must be signed in to change notification settings - Fork 0
/
concurrency.c
395 lines (328 loc) · 12.3 KB
/
concurrency.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
#include <stdio.h>
// this custom header provides portable functions for Windows and POSIX threads
#include "thread_helper.h"
// define the number of concurrent threads to syncronize. Some implementations
// below will not support more than two therads, and will fall back to two if a
// larger number is used.
#define THREADS 2
// define the limit of the sum of consecutive integers to calculate
#define SUM_TO 1000000LLU
// this is a static shared variable, accessed by multiple threads concurrently
volatile unsigned long long res = 0;
// this thread function will access the shared resource without any protection.
// consequently, many write accesses will be lost and the result of the
// computation will be much lower than expected.
thread_helper_return_t
sum_unguarded (void *args)
{
int id = *((int*)args);
unsigned long i;
for (i = id; i <= SUM_TO; i += THREADS)
{
/* enter critical section *********************************************/
// no-op
/**********************************************************************/
res += i;
/* leave critical section *********************************************/
// no-op
/**********************************************************************/
}
return 0;
}
// this thread function will take turns between two accessing threads. this
// will usually produce correct results, because mutual exclusion is
// guaranteed, but will lead to unreasonably long run times for concurrent and
// not parallel execution of two threads on the same CPU, since every thread
// will only execute one iteration per scheduling quantum, and will spend the
// rest of its quantum spin-locking and waiting for the other thread, which is
// blocked by scheduling, to have a go.
thread_helper_return_t
sum_turns (void *args)
{
int id = *((int*)args);
static volatile int turn = 0;
unsigned long i;
for (i = id; i <= SUM_TO; i += THREADS)
{
/* enter critical section *********************************************/
while (turn != id);
/**********************************************************************/
res += i;
/* leave critical section *********************************************/
turn = id ^ 1;
/**********************************************************************/
}
return 0;
}
// this thread function will attempt to guarantee mutual exclusion by having
// each thread attempting to enter the critical section raise a flag, and then
// waiting until the flag of the other thread is lowered. This can easily lead
// to deadlocks where both threads raise their flags at the same time, and no
// progress will be made towards the computation.
thread_helper_return_t
sum_flags (void *args)
{
int id = *((int*)args);
static volatile int flags[2] = { 0 };
unsigned long i;
for (i = id; i <= SUM_TO; i += 2)
{
/* enter critical section *********************************************/
flags[id] = 1;
while (flags[id ^ 1] == 1);
/**********************************************************************/
res += i;
/* leave critical section *********************************************/
flags[id] = 0;
/**********************************************************************/
}
return 0;
}
// this thread function implements peterson's algorithm for two threads.
// Peterson's Algorithm solves the critical section problem correctly in
// theory, but will not work in practice due to hardware effects and the
// non-atomic nature of C statements.
thread_helper_return_t
sum_peterson (void *args)
{
int id = *((int*)args);
static volatile int flags[2] = { 0 };
static volatile int turn = 0;
unsigned long i;
for (i = id; i <= SUM_TO; i += 2)
{
/* enter critical section *********************************************/
flags[id] = 1; turn = id ^ 1;
// __sync_synchronize();
while ((flags[id ^ 1] == 1) && turn == (id ^ 1));
/**********************************************************************/
res += i;
/* leave critical section *********************************************/
flags[id] = 0;
/**********************************************************************/
}
return 0;
}
// this thread function implements Dekker's Algorithm for two threads.
// Similarly to Peterson's Algorithm, this approach solves the problem in
// theory, but does not perform correctly in practice.
thread_helper_return_t
sum_dekker (void *args)
{
int id = *((int*)args);
static volatile int flags[2] = { 0 };
static volatile int turn = 0;
unsigned long i;
for (i = id; i <= SUM_TO; i += 2)
{
/* enter critical section *********************************************/
flags[id] = 1;
while (flags[id ^ 1] == 1)
if (turn == (id ^ 1))
{
flags[id] = 0;
while (turn == (id ^ 1));
flags[id] = 1;
}
/**********************************************************************/
res += i;
/* leave critical section *********************************************/
turn = id ^ 1;
flags[id] = 0;
/**********************************************************************/
}
return 0;
}
// this is a helper function used by the Bakery algorithm below.
static int
bakery_max (volatile long long int *v, size_t n)
{
size_t i;
int res = 0;
for (i = 0; i < n; ++i)
if (v[i] > res)
res = v[i];
return res;
}
// this thread function implements Lamport's Bakery algorithm for two or more
// threads. This is the first of the software approaches that is implemented
// here that supports more than two threads. However, this approach does suffer
// from the same caveats as Peterson's algorithm and Dekker's algorithm and
// does not work correctly in practice.
//
// Additionally, it becomes clear that the runtime overhead of making decisions
// with these sorts of algorithms is unfealibly high, and does not scale well
// with the number of threads involved in the waiting queue.
thread_helper_return_t
sum_bakery (void *args)
{
int id = *((int*)args);
static volatile int choosing[THREADS] = { 0 };
static volatile long long int num[THREADS] = { 0 };
unsigned long i;
for (i = id; i <= SUM_TO; i += THREADS)
{
/* enter critical section *********************************************/
choosing[id] = 1;
num[id] = bakery_max(num, THREADS) + 1;
choosing[id] = 0;
int j;
for (j = 0; j < THREADS; ++j)
{
while (choosing[j] == 1);
while ((num[j] != 0) && (num[j] < num[id] || (num[j] == num[id] && j < id)));
}
/**********************************************************************/
res += i;
/* leave critical section *********************************************/
num[id] = 0;
/**********************************************************************/
}
return 0;
}
// this thread function attempts to solve the critical section problem by using
// the atomic hardware instruction test_and_set. By delegating this problem
// from software to hardware, we solve the issues that the software based
// approaches had, and gain a scalable, working and efficient way to syncronize
// thread access to critical sections.
//
// However, the act of waiting for entrance to the critical section is still
// implemented through busy waiting, which will waste CPU resources when the
// other threads take a long time in their critical section.
thread_helper_return_t
sum_test_and_set (void *args)
{
int id = *((int*)args);
static int lock = 0;
unsigned long i;
for (i = id; i <= SUM_TO; i += THREADS)
{
/* enter critical section *********************************************/
while (thread_helper_test_and_set_lock(&lock)) {
while (lock);
}
/**********************************************************************/
res += i;
/* leave critical section *********************************************/
thread_helper_test_and_set_unlock(&lock);
/**********************************************************************/
}
return 0;
}
// a shared mutex for the thread function below
thread_helper_mutex_t mutex;
// this thread function syncronizes the critical section by using operating
// system APIs. These APIs internally syncronize the participating threads
// using hardware constructs, like test_and_set, but instead of doing busy
// waiting they will implement the waiting by making scheduling decisions,
// evacuating threads blocked by a locked mutex from the CPU and making space
// for other threads instead.
//
// This is usually the most efficient construct to use, and the easiest to get
// right without any caveats.
thread_helper_return_t
sum_semaphore (void *args)
{
int id = *((int*)args);
unsigned long i;
for (i = id; i <= SUM_TO; i += THREADS)
{
/* enter critical section *********************************************/
thread_helper_mutex_lock(&mutex);
/**********************************************************************/
res += i;
/* leave critical section *********************************************/
thread_helper_mutex_unlock(&mutex);
/**********************************************************************/
}
return 0;
}
// this function is a blank space for you to experiment with your own
// solutions. Be creative, but remember that solutions only based in software
// have been shown above to fail in non-trivial ways.
thread_helper_return_t
sum_custom (void *args)
{
int id = *((int*)args);
unsigned long i;
for (i = id; i <= SUM_TO; i += THREADS)
{
/* enter critical section *********************************************/
// TODO!
/**********************************************************************/
res += i;
/* leave critical section *********************************************/
// TODO!
/**********************************************************************/
}
return 0;
}
// the code below chooses the active guard type from the funtions above by
// checking the preprocessor macros passed in the GNUMakefile.
struct guard_type_t
{
thread_func_t func;
const char *name;
size_t max_threads;
};
static const struct guard_type_t guard =
#if defined(HAVE_UNGUARDED)
{ sum_unguarded, "unguarded", 0 };
#elif defined(HAVE_TURNS)
{ sum_turns, "take turns", 2 };
#elif defined(HAVE_FLAGS)
{ sum_flags, "raise flags", 2 };
#elif defined(HAVE_PETERSON)
{ sum_peterson, "Peterson's Algorithm", 2 };
#elif defined(HAVE_DEKKER)
{ sum_dekker, "Dekker's Algorithm", 2 };
#elif defined(HAVE_BAKERY)
{ sum_bakery, "Bakery Algorithm (Lamport)", 0 };
#elif defined(HAVE_TEST_AND_SET)
{ sum_test_and_set, "test&set", 0 };
#elif defined(HAVE_SEMAPHORE)
{ sum_semaphore, "semaphore", 0 };
#elif defined(HAVE_CUSTOM)
{ sum_custom, "custom", 2 };
#endif
// this is the main function. Program execution begins here.
int
main (void)
{
// prepare an array of thread objects, and an array of thread arguments
thread_helper_t threads[THREADS] = { 0 };
int args[THREADS] = { 0 };
// initialize the shared mutex for the corresponding thread functions above
thread_helper_mutex_init(&mutex);
// limit the number of threads by the number of threads supported by the
// selected guard type
size_t nthreads = (guard.max_threads > 0 && guard.max_threads < THREADS) ? guard.max_threads : THREADS;
printf("starting experiment \"%s\" with %zu threads\n", guard.name, nthreads);
// create the threads. The threads will start executing immediately.
size_t i;
for (i = 0; i < nthreads; ++i)
{
args[i] = i;
if (thread_helper_create(threads + i, guard.func, args + i) != 0)
{
perror("thread_helper_create");
return 1;
}
}
// join the threads. This blocks until the threads have terminated.
for (i = 0; i < nthreads; ++i)
if (thread_helper_join(threads[i]) != 0)
{
perror("thread_helper_join");
return 1;
}
// print the result.
//
// Observe how the result of the computetaion will for most of the guard
// types usually not add up to the expected value of n * (n-1) / 2. The
// actual result is unpredictable and appears random, even though it is not
// truly random.
printf("sum is: %20llu\n", res);
printf("sum should be: %20llu\n", (SUM_TO * (SUM_TO + 1)) / 2);
return 0;
}