From 1148f74c1ca07d6ec13e058154cda23891193842 Mon Sep 17 00:00:00 2001 From: David Llewellyn-Jones Date: Sun, 19 May 2024 10:25:16 +0100 Subject: [PATCH] Align matmul-c tests with matmul-julia tests Updates the matmul-c square matrix scaling tests to align with mhauru's Julia tests. The benchmarks now perform 19 test with increasingly large square matrices. Single-threaded and multi-threaded matrix multiplication tests are both performed. The results are stored in the same results.csv file so that everything can be plotted together. --- implementation/matmul-c/Makefile | 1 + implementation/matmul-c/include/benchmarks.h | 3 +- implementation/matmul-c/include/load.h | 2 +- implementation/matmul-c/include/matrix.h | 6 +- implementation/matmul-c/include/operations.h | 2 +- .../matmul-c/include/parse_header.h | 2 +- implementation/matmul-c/include/store.h | 30 +++ implementation/matmul-c/include/tests.h | 2 +- implementation/matmul-c/include/threadpool.h | 5 +- implementation/matmul-c/include/utils.h | 4 +- implementation/matmul-c/main.c | 16 +- implementation/matmul-c/src/benchmarks.c | 196 +++++++++++++++++- implementation/matmul-c/src/matrix.c | 11 +- implementation/matmul-c/src/store.c | 127 ++++++++++++ implementation/matmul-c/src/threadpool.c | 35 ++-- implementation/matmul-c/src/utils.c | 10 +- 16 files changed, 404 insertions(+), 48 deletions(-) create mode 100644 implementation/matmul-c/include/store.h create mode 100644 implementation/matmul-c/src/store.c diff --git a/implementation/matmul-c/Makefile b/implementation/matmul-c/Makefile index 4f0a759..51e55b1 100644 --- a/implementation/matmul-c/Makefile +++ b/implementation/matmul-c/Makefile @@ -29,6 +29,7 @@ matmul-c: \ src/threadpool.c \ src/utils.c \ src/benchmarks.c \ + src/store.c \ src/tests.c $(CC) $(CFLAGS) -o$@ $^ $(CLIBS) diff --git a/implementation/matmul-c/include/benchmarks.h b/implementation/matmul-c/include/benchmarks.h index 9d1ed87..e79cd38 100644 --- a/implementation/matmul-c/include/benchmarks.h +++ b/implementation/matmul-c/include/benchmarks.h @@ -1,4 +1,4 @@ -/* vim: noet:ts=2:sts=2:sw=2 */ +/* vim: noet:ts=2:sts=2:sw=2 */ /* SPDX-License-Identifier: MIT */ /* Copyright © 2024 David Llewellyn-Jones */ @@ -19,6 +19,7 @@ void benchmarks_end(Benchmark *benchmark); void benchmark_set_quiet(Benchmark *benchmark, bool quiet); void benchmarks_multiply_big(ThreadPool *pool); void benchmarks_multiply_small(Matrices *a, Matrices *b, Matrices *d); +void benchmark_multiply_square(ThreadPool *pool); #endif /* __MATRIX_BENCHMARKS_H */ diff --git a/implementation/matmul-c/include/load.h b/implementation/matmul-c/include/load.h index 0af72a4..8c38841 100644 --- a/implementation/matmul-c/include/load.h +++ b/implementation/matmul-c/include/load.h @@ -1,4 +1,4 @@ -/* vim: noet:ts=2:sts=2:sw=2 */ +/* vim: noet:ts=2:sts=2:sw=2 */ /* SPDX-License-Identifier: MIT */ /* Copyright © 2024 David Llewellyn-Jones */ diff --git a/implementation/matmul-c/include/matrix.h b/implementation/matmul-c/include/matrix.h index 4d15c69..0767622 100644 --- a/implementation/matmul-c/include/matrix.h +++ b/implementation/matmul-c/include/matrix.h @@ -1,10 +1,12 @@ -/* vim: noet:ts=2:sts=2:sw=2 */ +/* vim: noet:ts=2:sts=2:sw=2 */ /* SPDX-License-Identifier: MIT */ /* Copyright © 2024 David Llewellyn-Jones */ #include +#include "utils.h" + #ifndef __MATRIX_MATRIX_H #define __MATRIX_MATRIX_H (1) @@ -18,6 +20,6 @@ Matrix * new_matrix(uint16_t height, uint16_t width); Matrix * delete_matrix(Matrix *A); Matrix * new_matrix_identity(uint16_t height, uint16_t width); void matrix_print(Matrix *A); -void matrix_fill(Matrix *A, uint32_t seed); +void matrix_fill(Matrix *A, Rand * const rand); #endif /* __MATRIX_MATRIX_H */ diff --git a/implementation/matmul-c/include/operations.h b/implementation/matmul-c/include/operations.h index ecc01a2..a812aca 100644 --- a/implementation/matmul-c/include/operations.h +++ b/implementation/matmul-c/include/operations.h @@ -1,4 +1,4 @@ -/* vim: noet:ts=2:sts=2:sw=2 */ +/* vim: noet:ts=2:sts=2:sw=2 */ /* SPDX-License-Identifier: MIT */ /* Copyright © 2024 David Llewellyn-Jones */ diff --git a/implementation/matmul-c/include/parse_header.h b/implementation/matmul-c/include/parse_header.h index 94c1c7d..67deb74 100644 --- a/implementation/matmul-c/include/parse_header.h +++ b/implementation/matmul-c/include/parse_header.h @@ -1,4 +1,4 @@ -/* vim: noet:ts=2:sts=2:sw=2 */ +/* vim: noet:ts=2:sts=2:sw=2 */ /* SPDX-License-Identifier: MIT */ /* Copyright © 2024 David Llewellyn-Jones */ diff --git a/implementation/matmul-c/include/store.h b/implementation/matmul-c/include/store.h new file mode 100644 index 0000000..664178a --- /dev/null +++ b/implementation/matmul-c/include/store.h @@ -0,0 +1,30 @@ +/* vim: noet:ts=2:sts=2:sw=2 */ + +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2024 David Llewellyn-Jones */ + +#include +#include + +#ifndef __MATRIX_STORE_H +#define __MATRIX_STORE_H (1) + +typedef struct _Store { + // Pointer to the data + char * data; + // The allocated size + size_t size; + // The size of the data store in the allocation + size_t length; + // The chunk size to allocate blocks in + size_t chunk_size; +} Store; + +Store * new_store(size_t chunk_size); +Store * delete_store(Store *store); +bool store_append(Store * const store, char const * const data, size_t length); +bool store_setsize(Store * const store, size_t size); +size_t store_printf (Store * const store, char const * const format, ...); +size_t store_printf_append (Store * const store, char const * const format, ...); + +#endif /* __MATRIX_STORE_H */ diff --git a/implementation/matmul-c/include/tests.h b/implementation/matmul-c/include/tests.h index 6c3c38d..5573320 100644 --- a/implementation/matmul-c/include/tests.h +++ b/implementation/matmul-c/include/tests.h @@ -1,4 +1,4 @@ -/* vim: noet:ts=2:sts=2:sw=2 */ +/* vim: noet:ts=2:sts=2:sw=2 */ /* SPDX-License-Identifier: MIT */ /* Copyright © 2024 David Llewellyn-Jones */ diff --git a/implementation/matmul-c/include/threadpool.h b/implementation/matmul-c/include/threadpool.h index 21af958..ae5d6cf 100644 --- a/implementation/matmul-c/include/threadpool.h +++ b/implementation/matmul-c/include/threadpool.h @@ -1,4 +1,4 @@ -/* vim: noet:ts=2:sts=2:sw=2 */ +/* vim: noet:ts=2:sts=2:sw=2 */ /* SPDX-License-Identifier: MIT */ /* Copyright © 2024 David Llewellyn-Jones */ @@ -17,8 +17,9 @@ typedef struct _ThreadPool ThreadPool; -ThreadPool * new_threadpool(); +ThreadPool * new_threadpool(uint32_t threads); ThreadPool * delete_threadpool(ThreadPool *pool); bool multiply_parallel(ThreadPool *pool, Matrix *result, Matrix *A, Matrix *B); +uint32_t threadpool_threads(ThreadPool *pool); #endif /* __MATRIX_THREADPOOL_H */ diff --git a/implementation/matmul-c/include/utils.h b/implementation/matmul-c/include/utils.h index 8517d70..e7223ff 100644 --- a/implementation/matmul-c/include/utils.h +++ b/implementation/matmul-c/include/utils.h @@ -1,4 +1,4 @@ -/* vim: noet:ts=2:sts=2:sw=2 */ +/* vim: noet:ts=2:sts=2:sw=2 */ /* SPDX-License-Identifier: MIT */ /* Copyright © 2024 David Llewellyn-Jones */ @@ -14,6 +14,6 @@ Rand * new_rand(); Rand * delete_rand(Rand *rand); void rand_seed(Rand *rand, uint32_t seed); double rand_next(Rand *rand); -double rand_digit(Rand *rand); +double rand_value(Rand *rand); #endif /* __MATRIX_UTILS_H */ diff --git a/implementation/matmul-c/main.c b/implementation/matmul-c/main.c index 17953cf..2a67714 100644 --- a/implementation/matmul-c/main.c +++ b/implementation/matmul-c/main.c @@ -1,4 +1,4 @@ -/* vim: noet:ts=2:sts=2:sw=2 */ +/* vim: noet:ts=2:sts=2:sw=2 */ /* SPDX-License-Identifier: MIT */ /* Copyright © 2024 David Llewellyn-Jones */ @@ -20,7 +20,7 @@ int main(int argc, char *argv[]) { bool result; uint32_t total; - ThreadPool *pool = new_threadpool(); + ThreadPool *pool = new_threadpool(10); // Play around with the API printf("Example matrix manipulation...\n"); @@ -55,11 +55,19 @@ int main(int argc, char *argv[]) { // Perform 512 multiplications and compare against the results from NumPy tests_compare(a, b, c, d, pool); + // Benchmark square matrix multiplications single-threaded + printf("Square matrix benchmark single-threaded\n"); + benchmark_multiply_square(NULL); + + // Benchmark square matrix multiplications using threads + printf("Square matrix benchmark multi-threaded\n"); + benchmark_multiply_square(pool); + // Benchmark large matrix multiplications - benchmarks_multiply_big(pool); + //benchmarks_multiply_big(pool); // Measure time taken to perform 16777216 multiplications - benchmarks_multiply_small(a, b, d); + //benchmarks_multiply_small(a, b, d); a = delete_matrices(a); b = delete_matrices(b); diff --git a/implementation/matmul-c/src/benchmarks.c b/implementation/matmul-c/src/benchmarks.c index bbd9446..db2df8b 100644 --- a/implementation/matmul-c/src/benchmarks.c +++ b/implementation/matmul-c/src/benchmarks.c @@ -1,4 +1,4 @@ -/* vim: noet:ts=2:sts=2:sw=2 */ +/* vim: noet:ts=2:sts=2:sw=2 */ /* SPDX-License-Identifier: MIT */ /* Copyright © 2024 David Llewellyn-Jones */ @@ -10,6 +10,7 @@ #include "load.h" #include "operations.h" #include "threadpool.h" +#include "store.h" #include "benchmarks.h" @@ -19,10 +20,12 @@ #define HEIGHT (2048) #define WIDTH (2048) +#define ALLOC_CHUNK (1024) + struct _Benchmark { struct timespec start_time; struct timespec end_time; - uint32_t operations; + uint64_t operations; double elapsed; double ops_per_sec; bool quiet; @@ -63,7 +66,7 @@ void benchmarks_end(Benchmark *benchmark) { benchmark->ops_per_sec = benchmark->operations / benchmark->elapsed; if (!benchmark->quiet) { - printf("Time taken to perform %u operations: %.02f seconds\n", benchmark->operations, benchmark->elapsed); + printf("Time taken to perform %" PRIu64 " operations: %.02f seconds\n", benchmark->operations, benchmark->elapsed); printf("Equivalent to %.02f operations per second\n", benchmark->ops_per_sec); } } @@ -74,12 +77,14 @@ void benchmarks_multiply_big(ThreadPool *pool) { Matrix *A; Matrix *B; Matrix *D; + Rand *rand; printf("\n"); printf("## Large matrix multiplication\n"); benchmark = new_benchmark(); benchmark_set_quiet(benchmark, true); + rand = new_rand(); for (uint32_t width = 128; width <= 2048; width += 128) { uint32_t diag = width; @@ -89,8 +94,10 @@ void benchmarks_multiply_big(ThreadPool *pool) { A = new_matrix(width, diag); B = new_matrix(diag, height); D = new_matrix(width, height); - matrix_fill(A, 8); - matrix_fill(B, 16); + rand_seed(rand, 8); + matrix_fill(A, rand); + rand_seed(rand, 16); + matrix_fill(B, rand); benchmarks_start(benchmark, repeat); for (uint32_t count = 0; count < repeat; ++count) { @@ -118,8 +125,10 @@ void benchmarks_multiply_big(ThreadPool *pool) { A = new_matrix(width, diag); B = new_matrix(diag, height); D = new_matrix(width, height); - matrix_fill(A, 8); - matrix_fill(B, 16); + rand_seed(rand, 8); + matrix_fill(A, rand); + rand_seed(rand, 16); + matrix_fill(B, rand); benchmarks_start(benchmark, repeat); for (uint32_t count = 0; count < repeat; ++count) { @@ -136,6 +145,7 @@ void benchmarks_multiply_big(ThreadPool *pool) { D = delete_matrix(D); } + rand = delete_rand(rand); benchmark = delete_benchmark(benchmark); } @@ -161,3 +171,175 @@ void benchmarks_multiply_small(Matrices *a, Matrices *b, Matrices *d) { benchmark = delete_benchmark(benchmark); } +char const * get_cell(char const * const line, size_t *length) { + char const * start = line; + char delimeter; + size_t pos; + + if (start[0] == '\"') { + delimeter = '\"'; + ++start; + } + else { + delimeter = ','; + } + pos = 0; + while (start[pos] != 0 && start[pos] != delimeter && start[pos] != '\n') { + ++pos; + } + + if (length) { + *length = pos; + } + return start; +} + +void export_data(char const * const filename, char const * const method, double const * const data, uint32_t const size) { + FILE * fh; + uint32_t pos; + char * line; + size_t len; + ssize_t read; + size_t length; + char const * line_method; + Store * store; + + store = new_store(ALLOC_CHUNK); + + // Read the existing data from file + fh = fopen(filename, "r"); + if (fh) { + line = NULL; + while (!feof(fh)) { + read = getline(&line, &len, fh); + if (read > 0) { + line_method = get_cell(line, &length); + if (strlen(method) != length || strncmp(method, line_method, length) != 0) { + store_append(store, line, read); + } + } + } + free(line); + fclose(fh); + } + + // Append the new data + store_printf_append(store, "\"%s\"", method); + for (pos = 0; pos < size; ++pos) { + store_printf_append(store, ",%g", data[pos]); + } + store_printf_append(store, "\n"); + + printf("Exporting results to: %s\n", filename); + fh = fopen(filename, "w"); + + if (fh) { + fwrite(store->data, sizeof(char), store->length, fh); + fclose(fh); + printf("Export complete\n"); + } + else { + printf("Export failed\n"); + } +} + +void benchmark_multiply_square(ThreadPool *pool) { + Benchmark *benchmark; + uint32_t const dims[] = {2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024}; + uint32_t const dim_num = sizeof(dims) / sizeof(dims[0]); + double times[dim_num]; + uint64_t const base_num_pairs = 2l << 31; + Rand * rand; + uint64_t pos; + uint64_t index; + Matrix * result; + uint32_t dim; + + rand = new_rand(); + rand_seed(rand, 42); + benchmark = new_benchmark(); + + // Loop through the matrix sizes + for (pos = 0; pos < dim_num; ++pos) { + dim = dims[pos]; + + printf("\nBenchmarking scaling with matrices of dimension %u\n", dim); + + // All square matrices + uint64_t const dim1 = dim; + uint64_t const dim2 = dim; + uint64_t const dim3 = dim; + + uint64_t num_pairs = base_num_pairs / (dim * dim * dim); + + // Use fewer pairs for small matrices + if (dim <= 2) { + num_pairs = num_pairs / 16; + } + else if (dim <= 8) { + num_pairs = num_pairs / 8; + } + else if (dim <= 2) { + num_pairs = num_pairs / 4; + } + printf("Using %" PRIu64 " pairs of matrices\n", num_pairs); + + // Create random matrices for our calculations + Matrix **as = calloc(num_pairs, sizeof(Matrix*)); + for (index = 0; index < num_pairs; ++index) { + as[index] = new_matrix(dim1, dim2); + matrix_fill(as[index], rand); + } + Matrix **bs = calloc(num_pairs, sizeof(Matrix*)); + for (index = 0; index < num_pairs; ++index) { + bs[index] = new_matrix(dim2, dim3); + matrix_fill(bs[index], rand); + } + printf("Matrices generated\n"); + + // Create a temporary array to store the result in + result = new_matrix(dim1, dim3); + + // Start benchmark + if (pool) { + benchmarks_start(benchmark, num_pairs); + for (index = 0; index < num_pairs; ++index) { + multiply_parallel(pool, result, as[index], bs[index]); + } + benchmarks_end(benchmark); + } + else { + for (index = 0; index < num_pairs; ++index) { + multiply(result, as[index], bs[index]); + } + benchmarks_end(benchmark); + } + // End benchmark + + double per_matrix_time = benchmark->elapsed / (double)benchmark->operations; + printf("Raw timing: %g\n", benchmark->elapsed); + printf("Per matrix time: %g\n", per_matrix_time); + times[pos] = per_matrix_time; + + // Clean up + result = delete_matrix(result); + for (index = 0; index < num_pairs; ++index) { + as[index] = delete_matrix(as[index]); + bs[index] = delete_matrix(bs[index]); + } + free(as); + free(bs); + } + benchmark = delete_benchmark(benchmark); + + Store *method = new_store(ALLOC_CHUNK); + if (pool) { + store_printf_append(method, "C naive, %u threads", threadpool_threads(pool)); + } + else { + store_printf_append(method, "C naive, single threaded"); + } + export_data("../results.csv", method->data, times, dim_num); + method = delete_store(method); +} + diff --git a/implementation/matmul-c/src/matrix.c b/implementation/matmul-c/src/matrix.c index b73907c..7144ed1 100644 --- a/implementation/matmul-c/src/matrix.c +++ b/implementation/matmul-c/src/matrix.c @@ -1,4 +1,4 @@ -/* vim: noet:ts=2:sts=2:sw=2 */ +/* vim: noet:ts=2:sts=2:sw=2 */ /* SPDX-License-Identifier: MIT */ /* Copyright © 2024 David Llewellyn-Jones */ @@ -58,17 +58,12 @@ void matrix_print(Matrix *A) { } } -void matrix_fill(Matrix *A, uint32_t seed) { - Rand * rand = new_rand(); - rand_seed(rand, seed); - +void matrix_fill(Matrix *A, Rand * const rand) { if (A) { uint32_t size = A->height * A->width; for (uint32_t index = 0; index < size; ++index) { - A->elements[index] = rand_digit(rand); + A->elements[index] = rand_value(rand); } } - - rand = delete_rand(rand); } diff --git a/implementation/matmul-c/src/store.c b/implementation/matmul-c/src/store.c new file mode 100644 index 0000000..ee86f4f --- /dev/null +++ b/implementation/matmul-c/src/store.c @@ -0,0 +1,127 @@ +/* vim: noet:ts=2:sts=2:sw=2 */ + +/* SPDX-License-Identifier: MIT */ +/* Copyright © 2024 David Llewellyn-Jones */ + +#include +#include +#include +#include + +#include "store.h" + +Store * new_store(size_t chunk_size) { + Store *store = malloc(sizeof(Store)); + + if (store) { + // Ensure the buffer is null terminated even for size 0 + store->data = calloc(sizeof(char), 1); + store->size = 1; + store->length = 0; + store->chunk_size = chunk_size; + } + return store; +} + +Store * delete_store(Store *store) { + if (store) { + if (store->data) { + free(store->data); + } + free(store); + } + return NULL; +} + +bool store_append(Store * const store, char const * const data, size_t length) { + bool success = false; + size_t size; + + size = store->length + length; + success = store_setsize(store, size); + if (success) { + memcpy(store->data + store->length, data, length); + store->length = size; + store->data[size] = 0; + } + return success; +} + +bool store_setsize(Store * const store, size_t size) { + bool success = false; + + if (store) { + // Ensure there's always space for a null terminator + size = size + 1; + // Quantize the size + size = ((size_t)(size / store->chunk_size) + 1) * store->chunk_size; + if (size != store->size) { + store->data = realloc(store->data, size); + if (store->data) { + store->size = size - 1; + if (store->length > store->size) { + store->length = store->size; + store->data[store->size] = 0; + printf("Set size to: %lu\n", store->size); + } + success = true; + } + } + else { + success = true; + } + } + return success; +} + +size_t store_printf (Store * const store, char const * const format, ...) { + int result; + size_t length; + + result = 0; + if (store) { + store->length = 0; + store->data[0] = 0; + va_list args; + + va_start (args, format); + length = vsnprintf(NULL, 0, format, args); + va_end (args); + + store_setsize(store, length); + + // This will be automatically null terminated + va_start (args, format); + result = vsnprintf(store->data, length + 1, format, args); + store->length += result; + va_end (args); + } + + return result; +} + +size_t store_printf_append (Store * const store, char const * const format, ...) { + int result; + size_t length; + + result = 0; + if (store) { + va_list args; + + va_start (args, format); + length = vsnprintf(NULL, 0, format, args); + va_end (args); + + store_setsize(store, store->length + length); + + // This will be automatically null terminated + va_start (args, format); + result = vsnprintf(store->data + store->length, length + 1, format, args); + store->length += result; + va_end (args); + } + + return result; +} + + diff --git a/implementation/matmul-c/src/threadpool.c b/implementation/matmul-c/src/threadpool.c index c1ecfa9..fbbce90 100644 --- a/implementation/matmul-c/src/threadpool.c +++ b/implementation/matmul-c/src/threadpool.c @@ -1,4 +1,4 @@ -/* vim: noet:ts=2:sts=2:sw=2 */ +/* vim: noet:ts=2:sts=2:sw=2 */ /* SPDX-License-Identifier: MIT */ /* Copyright © 2024 David Llewellyn-Jones */ @@ -10,8 +10,6 @@ #include "threadpool.h" -#define MAX_THREADS (8) - typedef struct _ThreadContext { pthread_mutex_t *working_mutex; pthread_cond_t *working_cond; @@ -28,13 +26,14 @@ typedef struct _ThreadContext { } ThreadContext; struct _ThreadPool { - pthread_t thread_id[MAX_THREADS]; - ThreadContext *context[MAX_THREADS]; + pthread_t *thread_id; + ThreadContext **context; pthread_mutex_t working_mutex; pthread_cond_t working_cond; pthread_mutex_t begin_mutex; pthread_cond_t begin_cond; + uint32_t threads; uint32_t working; }; @@ -65,19 +64,23 @@ inline void multiply_work(Matrix *result, Matrix *A, Matrix *B, uint32_t start, } } -ThreadPool * new_threadpool() { +ThreadPool * new_threadpool(uint32_t threads) { ThreadPool *pool = calloc(sizeof(ThreadPool), sizeof(char)); - if (pool) { + if (pool && threads > 0) { + pool->thread_id = calloc(sizeof(pthread_t), threads); + pool->context = calloc(sizeof(ThreadContext *), threads); + // Initialise the pool context pthread_mutex_init(&pool->working_mutex, NULL); pthread_cond_init(&pool->working_cond, NULL); pthread_mutex_init(&pool->begin_mutex, NULL); pthread_cond_init(&pool->begin_cond, NULL); - pool->working = MAX_THREADS; + pool->threads = threads; + pool->working = threads; // Initialise the threads - for (uint32_t thread = 0; thread < MAX_THREADS; ++thread) { + for (uint32_t thread = 0; thread < threads; ++thread) { pool->context[thread] = calloc(sizeof(ThreadContext), sizeof(char)); pool->context[thread]->working_mutex = &pool->working_mutex; pool->context[thread]->working_cond = &pool->working_cond; @@ -103,14 +106,14 @@ ThreadPool * delete_threadpool(ThreadPool *pool) { if (pool) { // Remove all work pthread_mutex_lock(&pool->begin_mutex); - for (uint32_t thread = 0; thread < MAX_THREADS; ++thread) { + for (uint32_t thread = 0; thread < pool->threads; ++thread) { pool->context[thread]->live = false; } pthread_cond_broadcast(&pool->begin_cond); pthread_mutex_unlock(&pool->begin_mutex); // Wait for the threads to complete - for (uint32_t thread = 0; thread < MAX_THREADS; ++thread) { + for (uint32_t thread = 0; thread < pool->threads; ++thread) { pthread_join(pool->thread_id[thread], NULL); free(pool->context[thread]); } @@ -120,6 +123,8 @@ ThreadPool * delete_threadpool(ThreadPool *pool) { pthread_mutex_destroy(&pool->begin_mutex); pthread_cond_destroy(&pool->begin_cond); + free(pool->thread_id); + free(pool->context); free(pool); } return NULL; @@ -155,7 +160,7 @@ void *thread_runner(void *vargp) { bool multiply_parallel(ThreadPool *pool, Matrix *result, Matrix *A, Matrix *B) { uint32_t size = result->height * result->width; - uint32_t chunk = (size + (MAX_THREADS - 1)) / MAX_THREADS; + uint32_t chunk = (size + (pool->threads - 1)) / pool->threads; uint32_t allocated = 0; uint32_t thread = 0; @@ -175,7 +180,7 @@ bool multiply_parallel(ThreadPool *pool, Matrix *result, Matrix *A, Matrix *B) { // Trigger the runners to work pthread_mutex_lock(&pool->begin_mutex); - pool->working = MAX_THREADS; + pool->working = pool->threads; pthread_cond_broadcast(&pool->begin_cond); pthread_mutex_unlock(&pool->begin_mutex); @@ -189,3 +194,7 @@ bool multiply_parallel(ThreadPool *pool, Matrix *result, Matrix *A, Matrix *B) { return true; } +uint32_t threadpool_threads(ThreadPool *pool) { + return pool->threads; +} + diff --git a/implementation/matmul-c/src/utils.c b/implementation/matmul-c/src/utils.c index b157583..3df29a4 100644 --- a/implementation/matmul-c/src/utils.c +++ b/implementation/matmul-c/src/utils.c @@ -1,4 +1,4 @@ -/* vim: noet:ts=2:sts=2:sw=2 */ +/* vim: noet:ts=2:sts=2:sw=2 */ /* SPDX-License-Identifier: MIT */ /* Copyright © 2024 David Llewellyn-Jones */ @@ -6,6 +6,7 @@ #include #include "utils.h" +#include "matrix.h" #define A (16807) #define C (0) @@ -35,16 +36,15 @@ void rand_seed(Rand *rand, uint32_t seed) { // A decimal between 0.0 and 1.0 // See https://www.math.arizona.edu/~tgk/mc/book_chap3.pdf -double rand_next(Rand *rand) { +inline double rand_next(Rand *rand) { rand->state = ((A * rand->state) + C) % M; return (double)rand->state / (double)M; } -// A digit between 0.0 and 100.0 -double rand_digit(Rand *rand) { +// A value between 0.0 and 100.0 +inline double rand_value(Rand *rand) { double result; result = rand_next(rand); - result = (double)rand->state / (double)M; result = ((int)(result * 1000.0)) / 10.0; return result; }