Skip to content

Commit

Permalink
Add large matrix benchmarks, refactor tests
Browse files Browse the repository at this point in the history
Refeactos the codebase to split the tests and benchmarks into separate
files. Adds some large matrix multiplication benchmarks for the serial
and parallel implementations.
  • Loading branch information
llewelld committed Apr 26, 2024
1 parent 6b00297 commit 7c7dfbb
Show file tree
Hide file tree
Showing 10 changed files with 314 additions and 96 deletions.
2 changes: 2 additions & 0 deletions implementation/matmul-c/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ matmul-c: \
src/operations.c \
src/parse_header.c \
src/threadpool.c \
src/utils.c \
src/benchmarks.c \
src/tests.c
$(CC) $(CFLAGS) -o$@ $^ $(CLIBS)

Expand Down
24 changes: 24 additions & 0 deletions implementation/matmul-c/include/benchmarks.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/* vim: noet:ts=2:sts=2:sw=2 */

/* SPDX-License-Identifier: MIT */
/* Copyright © 2024 David Llewellyn-Jones */

#include <stdbool.h>
#include <stdio.h>

#ifndef __MATRIX_BENCHMARKS_H
#define __MATRIX_BENCHMARKS_H (1)

typedef struct _Benchmark Benchmark;

Benchmark * new_benchmark();
Benchmark * delete_benchmark(Benchmark *benchmark);

void benchmarks_start(Benchmark *benchmark, uint32_t operations);
void benchmarks_end(Benchmark *benchmark);
void benchmark_set_quiet(Benchmark *benchmark, bool quiet);
void benchmarks_multiply_big(ThreadPool *pool);
void benchmarks_multiply_small(Matrices *a, Matrices *b, Matrices *d);

#endif /* __MATRIX_BENCHMARKS_H */

2 changes: 1 addition & 1 deletion implementation/matmul-c/include/load.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ void matrix_npz_load(char *filename, Matrices *matrices);
Matrices *new_matrices(uint32_t count);
Matrices *delete_matrices(Matrices *matrices);

#endif /* __MATRIX_OPERATIONS_H */
#endif /* __MATRIX_LOAD_H */
6 changes: 6 additions & 0 deletions implementation/matmul-c/include/tests.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
#include <stdbool.h>
#include <stdio.h>

#include "matrix.h"
#include "load.h"
#include "threadpool.h"

#include "load.h"

#ifndef __MATRIX_TESTS_H
Expand All @@ -14,5 +18,7 @@
uint32_t tests_load_matrices(Matrices *a, Matrices *b, Matrices *c);
bool tests_allocate_results(Matrices *c, Matrices *d);

void tests_compare(Matrices *a, Matrices *b, Matrices *c, Matrices *d, ThreadPool *pool);

#endif /* __MATRIX_TESTS_H */

19 changes: 19 additions & 0 deletions implementation/matmul-c/include/utils.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/* vim: noet:ts=2:sts=2:sw=2 */

/* SPDX-License-Identifier: MIT */
/* Copyright © 2024 David Llewellyn-Jones */

#ifndef __MATRIX_UTILS_H
#define __MATRIX_UTILS_H (1)

#include <stdint.h>

typedef struct _Rand Rand;

Rand * new_rand();
Rand * delete_rand(Rand *rand);
void rand_seed(Rand *rand, uint32_t seed);
double rand_next(Rand *rand);
double rand_digit(Rand *rand);

#endif /* __MATRIX_UTILS_H */
97 changes: 6 additions & 91 deletions implementation/matmul-c/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,7 @@
#include "load.h"
#include "tests.h"
#include "threadpool.h"

#define BENCHMARK_REPEAT_SMALL (32768)
#define BENCHMARK_REPEAT_LARGE (1)

#define HEIGHT (2048)
#define WIDTH (2048)
#include "benchmarks.h"

int main(int argc, char *argv[]) {
Matrix *A;
Expand Down Expand Up @@ -58,93 +53,13 @@ int main(int argc, char *argv[]) {
result = tests_allocate_results(c, d);

// Perform 512 multiplications and compare against the results from NumPy
printf("Performing unit tests...\n");
uint32_t passed = 0;
for (uint32_t index = 0; index < total; ++index) {
result = multiply(d->matrices[index].matrix, a->matrices[index].matrix, b->matrices[index].matrix);
result = result && equals(c->matrices[index].matrix, d->matrices[index].matrix);
if (result) {
passed += 1;
}
else {
printf("Incorrect result\n");
matrix_print(c->matrices[index].matrix);
matrix_print(d->matrices[index].matrix);
}
}
for (uint32_t index = 0; index < total; ++index) {
result = multiply_parallel(pool, d->matrices[index].matrix, a->matrices[index].matrix, b->matrices[index].matrix);
result = result && equals(c->matrices[index].matrix, d->matrices[index].matrix);
if (result) {
passed += 1;
}
else {
printf("Incorrect result\n");
matrix_print(c->matrices[index].matrix);
matrix_print(d->matrices[index].matrix);
}
}
printf("Multiplication tests passed: %u out of %u\n", passed, total * 2);

// Measure time taken to perform 16777216 multiplications
struct timespec start_time, end_time;
uint32_t operations;
double elapsed;
double ops_per_sec;

// Create a pair of big random matrices
A = new_matrix(HEIGHT, WIDTH);
B = new_matrix(WIDTH, HEIGHT);
D = new_matrix(HEIGHT, HEIGHT);
matrix_fill(A, 8);
matrix_fill(B, 16);
operations = BENCHMARK_REPEAT_LARGE;

printf("Benchmarking...\n");

clock_gettime(CLOCK_MONOTONIC, &start_time);
for (uint32_t count = 0; count < operations; ++count) {
multiply(D, A, B);
}
clock_gettime(CLOCK_MONOTONIC, &end_time);
elapsed = (end_time.tv_sec - start_time.tv_sec);
elapsed += (end_time.tv_nsec - start_time.tv_nsec) / 1000000000.0;

printf("Time taken to perform %u standard large multiply operations: %.02f seconds\n", operations, elapsed);
ops_per_sec = operations / elapsed;
printf("Equivalent to %.02f operations per second\n", ops_per_sec);

clock_gettime(CLOCK_MONOTONIC, &start_time);
for (uint32_t count = 0; count < operations; ++count) {
multiply_parallel(pool, D, A, B);
}
clock_gettime(CLOCK_MONOTONIC, &end_time);
elapsed = (end_time.tv_sec - start_time.tv_sec);
elapsed += (end_time.tv_nsec - start_time.tv_nsec) / 1000000000.0;

printf("Time taken to perform %u standard large parallel multiply operations: %.02f seconds\n", operations, elapsed);
ops_per_sec = operations / elapsed;
printf("Equivalent to %.02f operations per second\n", ops_per_sec);
tests_compare(a, b, c, d, pool);

A = delete_matrix(A);
B = delete_matrix(B);
D = delete_matrix(D);
// Benchmark large matrix multiplications
benchmarks_multiply_big(pool);

clock_gettime(CLOCK_MONOTONIC, &start_time);
for (uint32_t count = 0; count < BENCHMARK_REPEAT_SMALL; ++count) {
for (uint32_t index = 0; index < total; ++index) {
multiply(d->matrices[index].matrix, a->matrices[index].matrix, b->matrices[index].matrix);
}
}
clock_gettime(CLOCK_MONOTONIC, &end_time);

elapsed = (end_time.tv_sec - start_time.tv_sec);
elapsed += (end_time.tv_nsec - start_time.tv_nsec) / 1000000000.0;

operations = total * BENCHMARK_REPEAT_SMALL;
printf("Time taken to perform %u multiply operations: %.02f seconds\n", operations, elapsed);
ops_per_sec = operations / elapsed;
printf("Equivalent to %.02f operations per second\n", ops_per_sec);
// Measure time taken to perform 16777216 multiplications
benchmarks_multiply_small(a, b, d);

a = delete_matrices(a);
b = delete_matrices(b);
Expand Down
162 changes: 162 additions & 0 deletions implementation/matmul-c/src/benchmarks.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
/* vim: noet:ts=2:sts=2:sw=2 */

/* SPDX-License-Identifier: MIT */
/* Copyright © 2024 David Llewellyn-Jones */

#include <stdlib.h>

#include "matrix.h"
#include "load.h"
#include "operations.h"
#include "threadpool.h"

#include "benchmarks.h"

#define BENCHMARK_REPEAT_SMALL (32768)
#define BENCHMARK_REPEAT_LARGE (1)

#define HEIGHT (2048)
#define WIDTH (2048)

struct _Benchmark {
struct timespec start_time;
struct timespec end_time;
uint32_t operations;
double elapsed;
double ops_per_sec;
bool quiet;
};

Benchmark * new_benchmark() {
Benchmark *benchmark = calloc(sizeof(Benchmark), sizeof(char));

return benchmark;
}

Benchmark * delete_benchmark(Benchmark *benchmark) {
if (benchmark) {
free(benchmark);
}
return NULL;
}

void benchmark_set_quiet(Benchmark *benchmark, bool quiet) {
benchmark->quiet = quiet;
}

void benchmarks_start(Benchmark *benchmark, uint32_t operations) {
if (benchmark) {
if (!benchmark->quiet) {
printf("Benchmarking...\n");
}
benchmark->operations = operations;
clock_gettime(CLOCK_MONOTONIC, &benchmark->start_time);
}
}

void benchmarks_end(Benchmark *benchmark) {
if (benchmark) {
clock_gettime(CLOCK_MONOTONIC, &benchmark->end_time);
benchmark->elapsed = (benchmark->end_time.tv_sec - benchmark->start_time.tv_sec);
benchmark->elapsed += (benchmark->end_time.tv_nsec - benchmark->start_time.tv_nsec) / 1000000000.0;
benchmark->ops_per_sec = benchmark->operations / benchmark->elapsed;

if (!benchmark->quiet) {
printf("Time taken to perform %u operations: %.02f seconds\n", benchmark->operations, benchmark->elapsed);
printf("Equivalent to %.02f operations per second\n", benchmark->ops_per_sec);
}
}
}

void benchmarks_multiply_big(ThreadPool *pool) {
Benchmark *benchmark;
Matrix *A;
Matrix *B;
Matrix *D;

printf("\n");
printf("## Large matrix multiplication\n");

benchmark = new_benchmark();
benchmark_set_quiet(benchmark, true);

for (uint32_t width = 128; width <= 2048; width += 128) {
uint32_t diag = width;
uint32_t height = width;
uint32_t const repeat = BENCHMARK_REPEAT_LARGE * (width < 512 ? 1024 : width < 1024 ? 16 : 1);

A = new_matrix(width, diag);
B = new_matrix(diag, height);
D = new_matrix(width, height);
matrix_fill(A, 8);
matrix_fill(B, 16);

benchmarks_start(benchmark, repeat);
for (uint32_t count = 0; count < repeat; ++count) {
multiply(D, A, B);
}
benchmarks_end(benchmark);
printf("Size: (%d, %d, %d), time per operation: %.02f seconds\n", width, diag, height, benchmark->elapsed / benchmark->operations);
uint64_t order = (uint64_t)width * (uint64_t)diag * (uint64_t)height;
double speed = (double)order / (benchmark->elapsed / benchmark->operations);
printf("Order: %llu, elements per second: %.02f\n", order, speed);

A = delete_matrix(A);
B = delete_matrix(B);
D = delete_matrix(D);
}

printf("\n");
printf("## Large parallel matrix multiplication\n");

for (uint32_t width = 128; width <= 2048; width += 128) {
uint32_t diag = width;
uint32_t height = width;
uint32_t const repeat = BENCHMARK_REPEAT_LARGE * (width < 512 ? 1024 : width < 1024 ? 16 : 1);

A = new_matrix(width, diag);
B = new_matrix(diag, height);
D = new_matrix(width, height);
matrix_fill(A, 8);
matrix_fill(B, 16);

benchmarks_start(benchmark, repeat);
for (uint32_t count = 0; count < repeat; ++count) {
multiply_parallel(pool, D, A, B);
}
benchmarks_end(benchmark);
printf("Size: (%d, %d, %d), time per operation: %.02f seconds\n", width, diag, height, benchmark->elapsed / benchmark->operations);
uint64_t order = (uint64_t)width * (uint64_t)diag * (uint64_t)height;
double speed = (double)order / (benchmark->elapsed / benchmark->operations);
printf("Order: %llu, elements per second: %.02f\n", order, speed);

A = delete_matrix(A);
B = delete_matrix(B);
D = delete_matrix(D);
}

benchmark = delete_benchmark(benchmark);
}

void benchmarks_multiply_small(Matrices *a, Matrices *b, Matrices *d) {
Benchmark *benchmark;
uint32_t total;

benchmark = new_benchmark();
total = d->count;

printf("\n");
printf("## Small matrix multiplication\n");
benchmarks_start(benchmark, total * BENCHMARK_REPEAT_SMALL);

for (uint32_t count = 0; count < BENCHMARK_REPEAT_SMALL; ++count) {
for (uint32_t index = 0; index < total; ++index) {
multiply(d->matrices[index].matrix, a->matrices[index].matrix, b->matrices[index].matrix);
}
}

benchmarks_end(benchmark);

benchmark = delete_benchmark(benchmark);
}

10 changes: 8 additions & 2 deletions implementation/matmul-c/src/matrix.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
#include <stdio.h>
#include <stdlib.h>

#include "utils.h"

#include "matrix.h"

Matrix * new_matrix(uint16_t height, uint16_t width) {
Expand Down Expand Up @@ -57,12 +59,16 @@ void matrix_print(Matrix *A) {
}

void matrix_fill(Matrix *A, uint32_t seed) {
srand(seed);
Rand * rand = new_rand();
rand_seed(rand, seed);

if (A) {
uint32_t size = A->height * A->width;
for (uint32_t index = 0; index < size; ++index) {
A->elements[index] = ((uint32_t)(rand() / (RAND_MAX / 1000))) / 10;
A->elements[index] = rand_digit(rand);
}
}

rand = delete_rand(rand);
}

Loading

0 comments on commit 7c7dfbb

Please sign in to comment.