From 1148f74c1ca07d6ec13e058154cda23891193842 Mon Sep 17 00:00:00 2001
From: David Llewellyn-Jones <dllewellyn-jones@turing.ac.uk>
Date: Sun, 19 May 2024 10:25:16 +0100
Subject: [PATCH] Align matmul-c tests with matmul-julia tests

Updates the matmul-c square matrix scaling tests to align with mhauru's
Julia tests.

The benchmarks now perform 19 test with increasingly large square
matrices. Single-threaded and multi-threaded matrix multiplication tests
are both performed. The results are stored in the same results.csv file
so that everything can be plotted together.
---
 implementation/matmul-c/Makefile              |   1 +
 implementation/matmul-c/include/benchmarks.h  |   3 +-
 implementation/matmul-c/include/load.h        |   2 +-
 implementation/matmul-c/include/matrix.h      |   6 +-
 implementation/matmul-c/include/operations.h  |   2 +-
 .../matmul-c/include/parse_header.h           |   2 +-
 implementation/matmul-c/include/store.h       |  30 +++
 implementation/matmul-c/include/tests.h       |   2 +-
 implementation/matmul-c/include/threadpool.h  |   5 +-
 implementation/matmul-c/include/utils.h       |   4 +-
 implementation/matmul-c/main.c                |  16 +-
 implementation/matmul-c/src/benchmarks.c      | 196 +++++++++++++++++-
 implementation/matmul-c/src/matrix.c          |  11 +-
 implementation/matmul-c/src/store.c           | 127 ++++++++++++
 implementation/matmul-c/src/threadpool.c      |  35 ++--
 implementation/matmul-c/src/utils.c           |  10 +-
 16 files changed, 404 insertions(+), 48 deletions(-)
 create mode 100644 implementation/matmul-c/include/store.h
 create mode 100644 implementation/matmul-c/src/store.c

diff --git a/implementation/matmul-c/Makefile b/implementation/matmul-c/Makefile
index 4f0a759..51e55b1 100644
--- a/implementation/matmul-c/Makefile
+++ b/implementation/matmul-c/Makefile
@@ -29,6 +29,7 @@ matmul-c: \
 	src/threadpool.c \
 	src/utils.c \
 	src/benchmarks.c \
+	src/store.c \
 	src/tests.c
 	$(CC) $(CFLAGS) -o$@ $^ $(CLIBS)
 
diff --git a/implementation/matmul-c/include/benchmarks.h b/implementation/matmul-c/include/benchmarks.h
index 9d1ed87..e79cd38 100644
--- a/implementation/matmul-c/include/benchmarks.h
+++ b/implementation/matmul-c/include/benchmarks.h
@@ -1,4 +1,4 @@
-/* vim: noet:ts=2:sts=2:sw=2 */ 
+/* vim: noet:ts=2:sts=2:sw=2 */
 
 /* SPDX-License-Identifier: MIT */
 /* Copyright © 2024 David Llewellyn-Jones */
@@ -19,6 +19,7 @@ void benchmarks_end(Benchmark *benchmark);
 void benchmark_set_quiet(Benchmark *benchmark, bool quiet);
 void benchmarks_multiply_big(ThreadPool *pool);
 void benchmarks_multiply_small(Matrices *a, Matrices *b, Matrices *d);
+void benchmark_multiply_square(ThreadPool *pool);
 
 #endif /* __MATRIX_BENCHMARKS_H */
 
diff --git a/implementation/matmul-c/include/load.h b/implementation/matmul-c/include/load.h
index 0af72a4..8c38841 100644
--- a/implementation/matmul-c/include/load.h
+++ b/implementation/matmul-c/include/load.h
@@ -1,4 +1,4 @@
-/* vim: noet:ts=2:sts=2:sw=2 */ 
+/* vim: noet:ts=2:sts=2:sw=2 */
 
 /* SPDX-License-Identifier: MIT */
 /* Copyright © 2024 David Llewellyn-Jones */
diff --git a/implementation/matmul-c/include/matrix.h b/implementation/matmul-c/include/matrix.h
index 4d15c69..0767622 100644
--- a/implementation/matmul-c/include/matrix.h
+++ b/implementation/matmul-c/include/matrix.h
@@ -1,10 +1,12 @@
-/* vim: noet:ts=2:sts=2:sw=2 */ 
+/* vim: noet:ts=2:sts=2:sw=2 */
 
 /* SPDX-License-Identifier: MIT */
 /* Copyright © 2024 David Llewellyn-Jones */
 
 #include <stdint.h>
 
+#include "utils.h"
+
 #ifndef __MATRIX_MATRIX_H
 #define __MATRIX_MATRIX_H (1)
 
@@ -18,6 +20,6 @@ Matrix * new_matrix(uint16_t height, uint16_t width);
 Matrix * delete_matrix(Matrix *A);
 Matrix * new_matrix_identity(uint16_t height, uint16_t width);
 void matrix_print(Matrix *A);
-void matrix_fill(Matrix *A, uint32_t seed);
+void matrix_fill(Matrix *A, Rand * const rand);
 
 #endif /* __MATRIX_MATRIX_H */
diff --git a/implementation/matmul-c/include/operations.h b/implementation/matmul-c/include/operations.h
index ecc01a2..a812aca 100644
--- a/implementation/matmul-c/include/operations.h
+++ b/implementation/matmul-c/include/operations.h
@@ -1,4 +1,4 @@
-/* vim: noet:ts=2:sts=2:sw=2 */ 
+/* vim: noet:ts=2:sts=2:sw=2 */
 
 /* SPDX-License-Identifier: MIT */
 /* Copyright © 2024 David Llewellyn-Jones */
diff --git a/implementation/matmul-c/include/parse_header.h b/implementation/matmul-c/include/parse_header.h
index 94c1c7d..67deb74 100644
--- a/implementation/matmul-c/include/parse_header.h
+++ b/implementation/matmul-c/include/parse_header.h
@@ -1,4 +1,4 @@
-/* vim: noet:ts=2:sts=2:sw=2 */ 
+/* vim: noet:ts=2:sts=2:sw=2 */
 
 /* SPDX-License-Identifier: MIT */
 /* Copyright © 2024 David Llewellyn-Jones */
diff --git a/implementation/matmul-c/include/store.h b/implementation/matmul-c/include/store.h
new file mode 100644
index 0000000..664178a
--- /dev/null
+++ b/implementation/matmul-c/include/store.h
@@ -0,0 +1,30 @@
+/* vim: noet:ts=2:sts=2:sw=2 */
+
+/* SPDX-License-Identifier: MIT */
+/* Copyright © 2024 David Llewellyn-Jones */
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#ifndef __MATRIX_STORE_H
+#define __MATRIX_STORE_H (1)
+
+typedef struct _Store {
+	// Pointer to the data
+	char * data;
+	// The allocated size
+	size_t size;
+	// The size of the data store in the allocation
+	size_t length;
+	// The chunk size to allocate blocks in
+	size_t chunk_size;
+} Store;
+
+Store * new_store(size_t chunk_size);
+Store * delete_store(Store *store);
+bool store_append(Store * const store, char const * const data, size_t length);
+bool store_setsize(Store * const store, size_t size);
+size_t store_printf (Store * const store, char const * const format, ...);
+size_t store_printf_append (Store * const store, char const * const format, ...);
+
+#endif /* __MATRIX_STORE_H */
diff --git a/implementation/matmul-c/include/tests.h b/implementation/matmul-c/include/tests.h
index 6c3c38d..5573320 100644
--- a/implementation/matmul-c/include/tests.h
+++ b/implementation/matmul-c/include/tests.h
@@ -1,4 +1,4 @@
-/* vim: noet:ts=2:sts=2:sw=2 */ 
+/* vim: noet:ts=2:sts=2:sw=2 */
 
 /* SPDX-License-Identifier: MIT */
 /* Copyright © 2024 David Llewellyn-Jones */
diff --git a/implementation/matmul-c/include/threadpool.h b/implementation/matmul-c/include/threadpool.h
index 21af958..ae5d6cf 100644
--- a/implementation/matmul-c/include/threadpool.h
+++ b/implementation/matmul-c/include/threadpool.h
@@ -1,4 +1,4 @@
-/* vim: noet:ts=2:sts=2:sw=2 */ 
+/* vim: noet:ts=2:sts=2:sw=2 */
 
 /* SPDX-License-Identifier: MIT */
 /* Copyright © 2024 David Llewellyn-Jones */
@@ -17,8 +17,9 @@
 
 typedef struct _ThreadPool ThreadPool;
 
-ThreadPool * new_threadpool();
+ThreadPool * new_threadpool(uint32_t threads);
 ThreadPool * delete_threadpool(ThreadPool *pool);
 bool multiply_parallel(ThreadPool *pool, Matrix *result, Matrix *A, Matrix *B);
+uint32_t threadpool_threads(ThreadPool *pool);
 
 #endif /* __MATRIX_THREADPOOL_H */
diff --git a/implementation/matmul-c/include/utils.h b/implementation/matmul-c/include/utils.h
index 8517d70..e7223ff 100644
--- a/implementation/matmul-c/include/utils.h
+++ b/implementation/matmul-c/include/utils.h
@@ -1,4 +1,4 @@
-/* vim: noet:ts=2:sts=2:sw=2 */ 
+/* vim: noet:ts=2:sts=2:sw=2 */
 
 /* SPDX-License-Identifier: MIT */
 /* Copyright © 2024 David Llewellyn-Jones */
@@ -14,6 +14,6 @@ Rand * new_rand();
 Rand * delete_rand(Rand *rand);
 void rand_seed(Rand *rand, uint32_t seed);
 double rand_next(Rand *rand);
-double rand_digit(Rand *rand);
+double rand_value(Rand *rand);
 
 #endif /* __MATRIX_UTILS_H */
diff --git a/implementation/matmul-c/main.c b/implementation/matmul-c/main.c
index 17953cf..2a67714 100644
--- a/implementation/matmul-c/main.c
+++ b/implementation/matmul-c/main.c
@@ -1,4 +1,4 @@
-/* vim: noet:ts=2:sts=2:sw=2 */ 
+/* vim: noet:ts=2:sts=2:sw=2 */
 
 /* SPDX-License-Identifier: MIT */
 /* Copyright © 2024 David Llewellyn-Jones */
@@ -20,7 +20,7 @@ int main(int argc, char *argv[]) {
 	bool result;
 	uint32_t total;
 
-	ThreadPool *pool = new_threadpool();
+	ThreadPool *pool = new_threadpool(10);
 
 	// Play around with the API
 	printf("Example matrix manipulation...\n");	
@@ -55,11 +55,19 @@ int main(int argc, char *argv[]) {
 	// Perform 512 multiplications and compare against the results from NumPy
 	tests_compare(a, b, c, d, pool);
 
+	// Benchmark square matrix multiplications single-threaded
+	printf("Square matrix benchmark single-threaded\n");
+	benchmark_multiply_square(NULL);
+
+	// Benchmark square matrix multiplications using threads
+	printf("Square matrix benchmark multi-threaded\n");
+	benchmark_multiply_square(pool);
+
 	// Benchmark large matrix multiplications
-	benchmarks_multiply_big(pool);
+	//benchmarks_multiply_big(pool);
 
 	// Measure time taken to perform 16777216 multiplications
-	benchmarks_multiply_small(a, b, d);
+	//benchmarks_multiply_small(a, b, d);
 
 	a = delete_matrices(a);
 	b = delete_matrices(b);
diff --git a/implementation/matmul-c/src/benchmarks.c b/implementation/matmul-c/src/benchmarks.c
index bbd9446..db2df8b 100644
--- a/implementation/matmul-c/src/benchmarks.c
+++ b/implementation/matmul-c/src/benchmarks.c
@@ -1,4 +1,4 @@
-/* vim: noet:ts=2:sts=2:sw=2 */ 
+/* vim: noet:ts=2:sts=2:sw=2 */
 
 /* SPDX-License-Identifier: MIT */
 /* Copyright © 2024 David Llewellyn-Jones */
@@ -10,6 +10,7 @@
 #include "load.h"
 #include "operations.h"
 #include "threadpool.h"
+#include "store.h"
 
 #include "benchmarks.h"
 
@@ -19,10 +20,12 @@
 #define HEIGHT (2048)
 #define WIDTH (2048)
 
+#define ALLOC_CHUNK (1024)
+
 struct _Benchmark {
 	struct timespec start_time;
 	struct timespec end_time;
-	uint32_t operations;
+	uint64_t operations;
 	double elapsed;
 	double ops_per_sec;
 	bool quiet;
@@ -63,7 +66,7 @@ void benchmarks_end(Benchmark *benchmark) {
 		benchmark->ops_per_sec = benchmark->operations / benchmark->elapsed;
 
 		if (!benchmark->quiet) {
-			printf("Time taken to perform %u operations: %.02f seconds\n", benchmark->operations, benchmark->elapsed);
+			printf("Time taken to perform %" PRIu64 " operations: %.02f seconds\n", benchmark->operations, benchmark->elapsed);
 			printf("Equivalent to %.02f operations per second\n", benchmark->ops_per_sec);
 		}
 	}
@@ -74,12 +77,14 @@ void benchmarks_multiply_big(ThreadPool *pool) {
 	Matrix *A;
 	Matrix *B;
 	Matrix *D;
+	Rand *rand;
 
 	printf("\n");
 	printf("## Large matrix multiplication\n");
 
 	benchmark = new_benchmark();
 	benchmark_set_quiet(benchmark, true);
+	rand = new_rand();
 
 	for (uint32_t width = 128; width <= 2048; width += 128) {
 		uint32_t diag = width;
@@ -89,8 +94,10 @@ void benchmarks_multiply_big(ThreadPool *pool) {
 		A = new_matrix(width, diag);
 		B = new_matrix(diag, height);
 		D = new_matrix(width, height);
-		matrix_fill(A, 8);
-		matrix_fill(B, 16);
+		rand_seed(rand, 8);
+		matrix_fill(A, rand);
+		rand_seed(rand, 16);
+		matrix_fill(B, rand);
 
 		benchmarks_start(benchmark, repeat);
 		for (uint32_t count = 0; count < repeat; ++count) {
@@ -118,8 +125,10 @@ void benchmarks_multiply_big(ThreadPool *pool) {
 		A = new_matrix(width, diag);
 		B = new_matrix(diag, height);
 		D = new_matrix(width, height);
-		matrix_fill(A, 8);
-		matrix_fill(B, 16);
+		rand_seed(rand, 8);
+		matrix_fill(A, rand);
+		rand_seed(rand, 16);
+		matrix_fill(B, rand);
 
 		benchmarks_start(benchmark, repeat);
 		for (uint32_t count = 0; count < repeat; ++count) {
@@ -136,6 +145,7 @@ void benchmarks_multiply_big(ThreadPool *pool) {
 		D = delete_matrix(D);
 	}
 
+	rand = delete_rand(rand);
 	benchmark = delete_benchmark(benchmark);
 }
 
@@ -161,3 +171,175 @@ void benchmarks_multiply_small(Matrices *a, Matrices *b, Matrices *d) {
 	benchmark = delete_benchmark(benchmark);
 }
 
+char const * get_cell(char const * const line, size_t *length) {
+	char const * start = line;
+	char delimeter;
+	size_t pos;
+
+	if (start[0] == '\"') {
+		delimeter = '\"';
+		++start;
+	}
+	else {
+		delimeter = ',';
+	}
+	pos = 0;
+	while (start[pos] != 0 && start[pos] != delimeter && start[pos] != '\n') {
+		++pos;
+	}
+
+	if (length) {
+		*length = pos;
+	}
+	return start;
+}
+
+void export_data(char const * const filename, char const * const method, double const * const data, uint32_t const size) {
+	FILE * fh;
+	uint32_t pos;
+	char * line;
+	size_t len;
+	ssize_t read;
+	size_t length;
+	char const * line_method;
+	Store * store;
+
+	store = new_store(ALLOC_CHUNK);
+
+	// Read the existing data from file
+	fh = fopen(filename, "r");
+	if (fh) {
+		line = NULL;
+		while (!feof(fh)) {
+			read = getline(&line, &len, fh);
+			if (read > 0) {
+				line_method = get_cell(line, &length);
+				if (strlen(method) != length || strncmp(method, line_method, length) != 0) {
+					store_append(store, line, read);
+				}
+			}
+		}
+		free(line);
+		fclose(fh);
+	}
+
+	// Append the new data
+	store_printf_append(store, "\"%s\"", method);
+	for (pos = 0; pos < size; ++pos) {
+		store_printf_append(store, ",%g", data[pos]);
+	}
+	store_printf_append(store, "\n");
+
+	printf("Exporting results to: %s\n", filename);
+	fh = fopen(filename, "w");
+
+	if (fh) {
+		fwrite(store->data, sizeof(char), store->length, fh);
+		fclose(fh);
+		printf("Export complete\n");
+	}
+	else {
+		printf("Export failed\n");
+	}
+}
+
+void benchmark_multiply_square(ThreadPool *pool) {
+	Benchmark *benchmark;
+	uint32_t const dims[] = {2, 3, 4, 6, 8, 12, 16, 24, 32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024};
+	uint32_t const dim_num = sizeof(dims) / sizeof(dims[0]);
+	double times[dim_num];
+	uint64_t const base_num_pairs = 2l << 31;
+	Rand * rand;
+	uint64_t pos;
+	uint64_t index;
+	Matrix * result;
+	uint32_t dim;
+
+	rand = new_rand();
+	rand_seed(rand, 42);
+	benchmark = new_benchmark();
+
+	// Loop through the matrix sizes
+	for (pos = 0; pos < dim_num; ++pos) {
+		dim = dims[pos];
+
+		printf("\nBenchmarking scaling with matrices of dimension %u\n", dim);
+
+		// All square matrices
+		uint64_t const dim1 = dim;
+		uint64_t const dim2 = dim;
+		uint64_t const dim3 = dim;
+
+		uint64_t num_pairs = base_num_pairs / (dim * dim * dim);
+
+		// Use fewer pairs for small matrices
+		if (dim <= 2) {
+			num_pairs = num_pairs / 16;
+		}
+		else if (dim <= 8) {
+			num_pairs = num_pairs / 8;
+		}
+		else if (dim <= 2) {
+			num_pairs = num_pairs / 4;
+		}
+		printf("Using %" PRIu64 " pairs of matrices\n", num_pairs);
+
+		// Create random matrices for our calculations
+		Matrix **as = calloc(num_pairs, sizeof(Matrix*));
+		for (index = 0; index < num_pairs; ++index) {
+			as[index] = new_matrix(dim1, dim2);
+			matrix_fill(as[index], rand);
+		}
+		Matrix **bs = calloc(num_pairs, sizeof(Matrix*));
+		for (index = 0; index < num_pairs; ++index) {
+			bs[index] = new_matrix(dim2, dim3);
+			matrix_fill(bs[index], rand);
+		}
+		printf("Matrices generated\n");
+
+		// Create a temporary array to store the result in
+		result = new_matrix(dim1, dim3);
+
+		// Start benchmark
+		if (pool) {
+			benchmarks_start(benchmark, num_pairs);
+			for (index = 0; index < num_pairs; ++index) {
+				multiply_parallel(pool, result, as[index], bs[index]);
+			}
+			benchmarks_end(benchmark);
+		}
+		else {
+			for (index = 0; index < num_pairs; ++index) {
+				multiply(result, as[index], bs[index]);
+			}
+			benchmarks_end(benchmark);
+		}
+		// End benchmark
+
+		double per_matrix_time = benchmark->elapsed / (double)benchmark->operations;
+		printf("Raw timing: %g\n", benchmark->elapsed);
+		printf("Per matrix time: %g\n", per_matrix_time);
+		times[pos] = per_matrix_time;
+
+		// Clean up
+		result = delete_matrix(result);
+		for (index = 0; index < num_pairs; ++index) {
+			as[index] = delete_matrix(as[index]);
+			bs[index] = delete_matrix(bs[index]);
+		}
+		free(as);
+		free(bs);
+	}
+	benchmark = delete_benchmark(benchmark);
+
+	Store *method = new_store(ALLOC_CHUNK);
+	if (pool) {
+		store_printf_append(method, "C naive, %u threads", threadpool_threads(pool));
+	}
+	else {
+		store_printf_append(method, "C naive, single threaded");
+	}
+	export_data("../results.csv", method->data, times, dim_num);
+	method = delete_store(method);
+}
+
diff --git a/implementation/matmul-c/src/matrix.c b/implementation/matmul-c/src/matrix.c
index b73907c..7144ed1 100644
--- a/implementation/matmul-c/src/matrix.c
+++ b/implementation/matmul-c/src/matrix.c
@@ -1,4 +1,4 @@
-/* vim: noet:ts=2:sts=2:sw=2 */ 
+/* vim: noet:ts=2:sts=2:sw=2 */
 
 /* SPDX-License-Identifier: MIT */
 /* Copyright © 2024 David Llewellyn-Jones */
@@ -58,17 +58,12 @@ void matrix_print(Matrix *A) {
 	}
 }
 
-void matrix_fill(Matrix *A, uint32_t seed) {
-	Rand * rand = new_rand();
-	rand_seed(rand, seed);
-
+void matrix_fill(Matrix *A, Rand * const rand) {
 	if (A) {
 		uint32_t size = A->height * A->width;
 		for (uint32_t index = 0; index < size; ++index) {
-			A->elements[index] = rand_digit(rand);
+			A->elements[index] = rand_value(rand);
 		}
 	}
-
-	rand = delete_rand(rand);
 }
 
diff --git a/implementation/matmul-c/src/store.c b/implementation/matmul-c/src/store.c
new file mode 100644
index 0000000..ee86f4f
--- /dev/null
+++ b/implementation/matmul-c/src/store.c
@@ -0,0 +1,127 @@
+/* vim: noet:ts=2:sts=2:sw=2 */
+
+/* SPDX-License-Identifier: MIT */
+/* Copyright © 2024 David Llewellyn-Jones */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include "store.h"
+
+Store * new_store(size_t chunk_size) {
+	Store *store = malloc(sizeof(Store));
+	
+	if (store) {
+		// Ensure the buffer is null terminated even for size 0
+		store->data = calloc(sizeof(char), 1);
+		store->size = 1;
+		store->length = 0;
+		store->chunk_size = chunk_size;
+	}
+	return store;
+}
+
+Store * delete_store(Store *store) {
+	if (store) {
+		if (store->data) {
+			free(store->data);
+		}
+		free(store);
+	}	
+	return NULL;
+}
+
+bool store_append(Store * const store, char const * const data, size_t length) {
+	bool success = false;
+	size_t size;
+
+	size = store->length + length;
+	success = store_setsize(store, size);
+	if (success) {
+		memcpy(store->data + store->length, data, length);
+		store->length = size;
+		store->data[size] = 0;
+	}
+	return success;
+}
+
+bool store_setsize(Store * const store, size_t size) {
+	bool success = false;
+
+	if (store) {
+		// Ensure there's always space for a null terminator
+		size = size + 1;
+		// Quantize the size
+		size = ((size_t)(size / store->chunk_size) + 1) * store->chunk_size;
+		if (size != store->size) {
+			store->data = realloc(store->data, size);
+			if (store->data) {
+				store->size = size - 1;
+				if (store->length > store->size) {
+					store->length = store->size;
+					store->data[store->size] = 0;
+					printf("Set size to: %lu\n", store->size);
+				}
+				success = true;
+			}
+		}
+		else {
+			success = true;
+		}
+	}
+	return success;
+}
+
+size_t store_printf (Store * const store, char const * const format, ...) {
+	int result;
+	size_t length;
+
+	result = 0;
+	if (store) {
+		store->length = 0;
+		store->data[0] = 0;
+		va_list args;
+
+		va_start (args, format);
+		length = vsnprintf(NULL, 0, format, args);
+		va_end (args);
+
+		store_setsize(store, length);
+
+		// This will be automatically null terminated
+		va_start (args, format);
+		result = vsnprintf(store->data, length + 1, format, args);
+		store->length += result;
+		va_end (args);
+	}
+	
+	return result;
+}
+
+size_t store_printf_append (Store * const store, char const * const format, ...) {
+	int result;
+	size_t length;
+
+	result = 0;
+	if (store) {
+		va_list args;
+
+		va_start (args, format);
+		length = vsnprintf(NULL, 0, format, args);
+		va_end (args);
+
+		store_setsize(store, store->length + length);
+
+		// This will be automatically null terminated
+		va_start (args, format);
+		result = vsnprintf(store->data + store->length, length + 1, format, args);
+		store->length += result;
+		va_end (args);
+	}
+	
+	return result;
+}
+
+
diff --git a/implementation/matmul-c/src/threadpool.c b/implementation/matmul-c/src/threadpool.c
index c1ecfa9..fbbce90 100644
--- a/implementation/matmul-c/src/threadpool.c
+++ b/implementation/matmul-c/src/threadpool.c
@@ -1,4 +1,4 @@
-/* vim: noet:ts=2:sts=2:sw=2 */ 
+/* vim: noet:ts=2:sts=2:sw=2 */
 
 /* SPDX-License-Identifier: MIT */
 /* Copyright © 2024 David Llewellyn-Jones */
@@ -10,8 +10,6 @@
 
 #include "threadpool.h"
 
-#define MAX_THREADS (8)
-
 typedef struct _ThreadContext {
 	pthread_mutex_t *working_mutex;
 	pthread_cond_t *working_cond;
@@ -28,13 +26,14 @@ typedef struct _ThreadContext {
 } ThreadContext;
 
 struct _ThreadPool {
-	pthread_t thread_id[MAX_THREADS];
-	ThreadContext *context[MAX_THREADS];
+	pthread_t *thread_id;
+	ThreadContext **context;
 
 	pthread_mutex_t working_mutex;
 	pthread_cond_t working_cond;
 	pthread_mutex_t begin_mutex;
 	pthread_cond_t begin_cond;
+	uint32_t threads;
 	uint32_t working;
 };
 
@@ -65,19 +64,23 @@ inline void multiply_work(Matrix *result, Matrix *A, Matrix *B, uint32_t start,
 	}
 }
 
-ThreadPool * new_threadpool() {
+ThreadPool * new_threadpool(uint32_t threads) {
 	ThreadPool *pool = calloc(sizeof(ThreadPool), sizeof(char));
 
-	if (pool) {
+	if (pool && threads > 0) {
+		pool->thread_id = calloc(sizeof(pthread_t), threads);
+		pool->context = calloc(sizeof(ThreadContext *), threads);
+
 		// Initialise the pool context
 		pthread_mutex_init(&pool->working_mutex, NULL);
 		pthread_cond_init(&pool->working_cond, NULL);
 		pthread_mutex_init(&pool->begin_mutex, NULL);
 		pthread_cond_init(&pool->begin_cond, NULL);
-		pool->working = MAX_THREADS;
+		pool->threads = threads;
+		pool->working = threads;
 
 		// Initialise the threads
-		for (uint32_t thread = 0; thread < MAX_THREADS; ++thread) {
+		for (uint32_t thread = 0; thread < threads; ++thread) {
 			pool->context[thread] = calloc(sizeof(ThreadContext), sizeof(char));
 			pool->context[thread]->working_mutex = &pool->working_mutex;
 			pool->context[thread]->working_cond = &pool->working_cond;
@@ -103,14 +106,14 @@ ThreadPool * delete_threadpool(ThreadPool *pool) {
 	if (pool) {
 		// Remove all work
 		pthread_mutex_lock(&pool->begin_mutex);
-		for (uint32_t thread = 0; thread < MAX_THREADS; ++thread) {
+		for (uint32_t thread = 0; thread < pool->threads; ++thread) {
 			pool->context[thread]->live = false;
 		}
 		pthread_cond_broadcast(&pool->begin_cond);
 		pthread_mutex_unlock(&pool->begin_mutex);
 
 		// Wait for the threads to complete
-		for (uint32_t thread = 0; thread < MAX_THREADS; ++thread) {
+		for (uint32_t thread = 0; thread < pool->threads; ++thread) {
 			pthread_join(pool->thread_id[thread], NULL);
 			free(pool->context[thread]);
 		}
@@ -120,6 +123,8 @@ ThreadPool * delete_threadpool(ThreadPool *pool) {
 		pthread_mutex_destroy(&pool->begin_mutex);
 		pthread_cond_destroy(&pool->begin_cond);
 
+		free(pool->thread_id);
+		free(pool->context);
 		free(pool);
 	}
 	return NULL;
@@ -155,7 +160,7 @@ void *thread_runner(void *vargp) {
 bool multiply_parallel(ThreadPool *pool, Matrix *result, Matrix *A, Matrix *B) {
 	uint32_t size = result->height * result->width;
 
-	uint32_t chunk = (size + (MAX_THREADS - 1)) / MAX_THREADS;
+	uint32_t chunk = (size + (pool->threads - 1)) / pool->threads;
 	uint32_t allocated = 0;
 	uint32_t thread = 0;
 
@@ -175,7 +180,7 @@ bool multiply_parallel(ThreadPool *pool, Matrix *result, Matrix *A, Matrix *B) {
 
 	// Trigger the runners to work
 	pthread_mutex_lock(&pool->begin_mutex);
-	pool->working = MAX_THREADS;
+	pool->working = pool->threads;
 	pthread_cond_broadcast(&pool->begin_cond);
 	pthread_mutex_unlock(&pool->begin_mutex);
 
@@ -189,3 +194,7 @@ bool multiply_parallel(ThreadPool *pool, Matrix *result, Matrix *A, Matrix *B) {
 	return true;
 }
 
+uint32_t threadpool_threads(ThreadPool *pool) {
+	return pool->threads;
+}
+
diff --git a/implementation/matmul-c/src/utils.c b/implementation/matmul-c/src/utils.c
index b157583..3df29a4 100644
--- a/implementation/matmul-c/src/utils.c
+++ b/implementation/matmul-c/src/utils.c
@@ -1,4 +1,4 @@
-/* vim: noet:ts=2:sts=2:sw=2 */ 
+/* vim: noet:ts=2:sts=2:sw=2 */
 
 /* SPDX-License-Identifier: MIT */
 /* Copyright © 2024 David Llewellyn-Jones */
@@ -6,6 +6,7 @@
 #include <stdlib.h>
 
 #include "utils.h"
+#include "matrix.h"
 
 #define A (16807)
 #define C (0)
@@ -35,16 +36,15 @@ void rand_seed(Rand *rand, uint32_t seed) {
 
 // A decimal between 0.0 and 1.0
 // See https://www.math.arizona.edu/~tgk/mc/book_chap3.pdf
-double rand_next(Rand *rand) {
+inline double rand_next(Rand *rand) {
 	rand->state = ((A * rand->state) + C) % M;
 	return (double)rand->state / (double)M;
 }
 
-// A digit between 0.0 and 100.0
-double rand_digit(Rand *rand) {
+// A value between 0.0 and 100.0
+inline double rand_value(Rand *rand) {
 	double result;
 	result = rand_next(rand);
-	result = (double)rand->state / (double)M;
 	result = ((int)(result * 1000.0)) / 10.0;
 	return result;
 }