Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Switch to uint8_t, some optimizations #979

Merged
merged 8 commits into from
Oct 20, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions BENCHMARK.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ Some solutions are not included in the automated benchmark runs, either because
- [Running a benchmark of all solutions for a particular language](#running-a-benchmark-of-all-solutions-for-a-particular-language)
- [Running in unconfined mode](#running-in-unconfined-mode)
- [Output formats](#output-formats)
- [Setting the solution timeout](#setting-the-solution-timeout)

## What operating system to use?

Expand Down Expand Up @@ -375,3 +376,13 @@ The output format can be controlled via the `FORMATTER` variable like this:
make FORMATTER=json
make DIRECTORY=PrimeCrystal/solution_1 FORMATTER=csv
```

## Setting the solution timeout

The run of each solution is limited to a certain duration, which is 10 minutes by default.
You can change this setting through the `TIMEOUT` variable like this:

```shell
make TIMEOUT=15
make DIRECTORY=PrimeCPP/solution_2 TIMEOUT=15
```
2 changes: 2 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ SHELL := /bin/bash

DIRECTORY := $(shell pwd)
FORMATTER := "table"
TIMEOUT := "10"

.PHONY: all
all: benchmark
Expand All @@ -14,6 +15,7 @@ benchmark: check-env
ARGS=("-d $${REALPATH}" "-f $(FORMATTER)"); \
[ ! -z $${OUTPUT_FILE} ] && ARGS+=( "-o $${OUTPUT_FILE}" ); \
[ ! -z $${UNCONFINED} ] && ARGS+=( "--unconfined" ); \
[ ! -z $${TIMEOUT} ] && ARGS+=( "-t $${TIMEOUT}" ); \
cd ./tools; npm ci --silent && npm start --silent -- benchmark $${ARGS[@]}

.PHONY: check-env
Expand Down
111 changes: 52 additions & 59 deletions PrimeCPP/solution_2/PrimeCPP_PAR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,80 +24,66 @@ using namespace std::chrono;
const uint64_t DEFAULT_UPPER_LIMIT = 10'000'000LLU;

class BitArray {
uint32_t *array;
uint8_t *array;
size_t arrSize;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it necessary to declare arrSize as a member variable? It's private so only visible to the BitArray member functions, and I don't think it's used anywhere outside the constructor.

size_t logicalSize;

inline static size_t arraySize(size_t size)
static constexpr size_t arraySize(size_t size)
{
return (size >> 5) + ((size & 31) > 0);
return (size >> 3) + ((size & 7) > 0);
}

inline static size_t index(size_t n)
static constexpr size_t index(size_t n)
{
return (n >> 5);
}

inline static uint32_t getSubindex(size_t n, uint32_t d)
{
return d & uint32_t(uint32_t(0x01) << (n % 32));
}

inline void setFalseSubindex(size_t n, uint32_t &d)
{
d &= ~uint32_t(uint32_t(0x01) << (n % (8*sizeof(uint32_t))));
return (n >> 3);
}

public:
explicit BitArray(size_t size) : arrSize(size)
explicit BitArray(size_t size) : logicalSize(size)
{
array = new uint32_t[arraySize(size)];
std::memset(array, 0xFF, (size >> 3) + ((size & 7) > 0));
arrSize = (size + 1) / 2; // Only store bits for odd numbers
array = new uint8_t[arraySize(arrSize)];
// Bits are left at zero default, so no need to initialize them
// std::memset(array, 0x00, arraySize(arrSize));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you absolutely sure? When I was learning C++, it was hammered into me by my teacher that with C and C++, one has to assume garbage when allocating memory via new.

I actually looked up the documentation in the C++ reference, and this is what it says about default initialization, which is what's done when new T is used ("if T is an array type, every element of the array is default-initialized", as per Default-initialization):

When storage for an object with automatic or dynamic storage duration is obtained, the object has an indeterminate value.

If no initialization is performed for an object, that object retains an indeterminate value until that value is replaced.

It may work by accident in your office, but it looks to me we can't rely on that happening elsewhere. :)

}

~BitArray() {delete [] array;}
~BitArray() { delete[] array; }

bool get(size_t n) const
constexpr bool get(size_t n) const
{
return getSubindex(n, array[index(n)]);
if (n % 2 == 0)
return false; // Even numbers > 2 are not prime
n = n / 2; // Map the actual number to the index in the array
return !(array[index(n)] & (uint8_t(1) << (n % 8)));
}

static constexpr uint32_t rol(uint32_t x, uint32_t n)
void set(size_t n)
{
return (x<<n) | (x>>(32-n));
n = n / 2; // Map the actual number to the index in the array
array[index(n)] |= (uint8_t(1) << (n % 8));
}

void setFlagsFalse(size_t n, size_t skip)
constexpr size_t size() const
{
auto rolling_mask = ~uint32_t(1 << n % 32);
auto roll_bits = skip % 32;
while (n < arrSize) {
array[index(n)] &= rolling_mask;
n += skip;
rolling_mask = rol(rolling_mask, roll_bits);
}
}

inline size_t size() const
{
return arrSize;
return logicalSize;
}
};


// prime_sieve
//
// Represents the data comprising the sieve (an array of N bits, where N is the upper limit prime being tested)
// as well as the code needed to eliminate non-primes from its array, which you perform by calling runSieve.
// Represents the data comprising the sieve (an array of bits representing odd numbers starting from 3)
// and includes the code needed to eliminate non-primes from its array by calling runSieve.

class prime_sieve
{
private:

BitArray Bits; // Sieve data, where 1==prime, 0==not
BitArray Bits; // Sieve data, where 0==prime, 1==not

public:

prime_sieve(uint64_t n) : Bits(n) // Initialize all to true (potential primes)
prime_sieve(uint64_t n) : Bits(n) // Initialize bits to zero default
{
}

Expand All @@ -117,15 +103,21 @@ class prime_sieve

while (factor <= q)
{
for (uint64_t num = factor; num < Bits.size(); num += 2)
// Find the next prime number
for (; factor <= q; factor += 2)
{
if (Bits.get(num))
if (Bits.get(factor))
{
factor = num;
break;
}
}
Bits.setFlagsFalse(factor * factor, factor + factor);

// Mark multiples of the prime number as not prime
uint64_t start = factor * factor;
for (uint64_t num = start; num <= Bits.size(); num += factor * 2)
{
Bits.set(num);
}

factor += 2;
}
Expand All @@ -137,9 +129,9 @@ class prime_sieve

size_t countPrimes() const
{
size_t count = (Bits.size() >= 2); // Count 2 as prime if within range
for (int i = 3; i < Bits.size(); i+=2)
if (Bits.get(i))
size_t count = (Bits.size() >= 2); // Count 2 as prime if within range
for (uint64_t num = 3; num <= Bits.size(); num += 2)
if (Bits.get(num))
count++;
return count;
}
Expand All @@ -150,23 +142,24 @@ class prime_sieve

bool isPrime(uint64_t n) const
{
if (n & 1)
return Bits.get(n);
else
if (n == 2)
return true;
if (n < 2 || n % 2 == 0)
return false;
return Bits.get(n);
}

// validateResults
//
// Checks to see if the number of primes found matches what we should expect. This data isn't used in the
// Checks to see if the number of primes found matches what we should expect. This data isn't used in the
// sieve processing at all, only to sanity check that the results are right when done.

bool validateResults() const
{
const std::map<const uint64_t, const int> resultsDictionary =
{
{ 10LLU, 4 }, // Historical data for validating our results - the number of primes
{ 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000
{ 10LLU, 4 }, // Historical data for validating our results - the number of primes
{ 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000
{ 1'000LLU, 168 },
{ 10'000LLU, 1229 },
{ 100'000LLU, 9592 },
Expand All @@ -190,8 +183,8 @@ class prime_sieve
if (showResults)
cout << "2, ";

size_t count = (Bits.size() >= 2); // Count 2 as prime if in range
for (uint64_t num = 3; num <= Bits.size(); num+=2)
size_t count = (Bits.size() >= 2); // Count 2 as prime if in range
for (uint64_t num = 3; num <= Bits.size(); num += 2)
{
if (Bits.get(num))
{
Expand All @@ -210,7 +203,7 @@ class prime_sieve
<< "Average: " << duration/passes << ", "
<< "Limit: " << Bits.size() << ", "
<< "Counts: " << count << "/" << countPrimes() << ", "
<< "Valid : " << (validateResults() ? "Pass" : "FAIL!")
<< "Valid: " << (validateResults() ? "Pass" : "FAIL!")
<< "\n";

// Following 2 lines added by rbergen to conform to drag race output format
Expand Down Expand Up @@ -317,7 +310,7 @@ int main(int argc, char **argv)
}

if (bOneshot)
cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl;
cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl;

if (bOneshot && (cSecondsRequested > 0 || cThreadsRequested > 1))
{
Expand Down Expand Up @@ -352,8 +345,8 @@ int main(int argc, char **argv)
else
{
auto tStart = steady_clock::now();
std::thread threads[cThreads];
uint64_t l_passes[cThreads];
std::vector<std::thread> threads(cThreads);
std::vector<uint64_t> l_passes(cThreads);
for (unsigned int i = 0; i < cThreads; i++)
threads[i] = std::thread([i, &l_passes, &tStart](size_t llUpperLimit)
{
Expand Down
Binary file added PrimeCPP/solution_2/primes_par.exe
Binary file not shown.
2 changes: 1 addition & 1 deletion PrimeJulia/solution_1/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM julia:1.6-alpine3.13
FROM julia:1-alpine

WORKDIR /opt/app

Expand Down
2 changes: 1 addition & 1 deletion PrimeJulia/solution_2/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM julia:1.6.1-alpine3.13
FROM julia:1-alpine

WORKDIR /opt/app

Expand Down
2 changes: 1 addition & 1 deletion PrimeJulia/solution_3/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM julia:1.6-buster
FROM julia:1

WORKDIR /opt/app

Expand Down
4 changes: 2 additions & 2 deletions PrimeJulia/solution_3/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ optimizations. This is a sort-of "low-level" style implementation in
Julia to get as much as speed as possible out of the language. It is
*not* designed to be idiomatic Julia code.

This solution requires at least **Julia 1.5** to run. Julia 1.6 is
This solution requires at least **Julia 1.5** to run. the latest stable 1.X Julia version is
recommended and is used in the Docker image.

## Description
Expand Down Expand Up @@ -40,7 +40,7 @@ and bits are unset when the number is *prime*. This simplifies the
set_bit operation slightly (`arr[i] |= mask vs. arr[i] &= ~mask`).

If you see any room for improvement in the code or have any
suggestions, don't hesitate to open an issue, pull request (PR),
suggestions, don't hesitate to open an issue, pull request (PR),
Discussion, or the like. Don't forget to tag me at `@louie-github` so I
can be notified if my personal input is either wanted or needed.
I'm open to fixing stylistic issues or discussing cosmetic changes to
Expand Down
2 changes: 1 addition & 1 deletion PrimeJulia/solution_4/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM julia:1.6-buster
FROM julia:1

WORKDIR /opt/app

Expand Down
42 changes: 20 additions & 22 deletions PrimeV/solution_1/primes.v
Original file line number Diff line number Diff line change
@@ -1,31 +1,29 @@
import time
import math

const (
sieve_size = 1_000_000
q = math.sqrt(sieve_size)
all_bits_true_array = []bool{len: sieve_size, init: true}
dictionary = {
'10': 4
'100': 25
'1000': 168
'10000': 1229
'100000': 9592
'1000000': 78498
'10000000': 664579
'100000000': 5761455
'1000000000': 50847534
'10000000000': 455052511
}
)
const sieve_size = 1_000_000
const q = math.sqrt(sieve_size)
const all_bits_true_array = []bool{len: sieve_size, init: true}
const dictionary = {
'10': 4
'100': 25
'1000': 168
'10000': 1229
'100000': 9592
'1000000': 78498
'10000000': 664579
'100000000': 5761455
'1000000000': 50847534
'10000000000': 455052511
}

struct Sieve {
sieve_size u64
mut:
bits []bool
}

[direct_array_access]
@[direct_array_access]
fn (mut sieve Sieve) run_sieve() {
mut factor := u64(3)

Expand Down Expand Up @@ -54,7 +52,7 @@ fn (sieve Sieve) print_results(show_results bool, duration time.Duration, passes
for num := u64(3); num <= sieve.sieve_size; num += u64(2) {
if sieve.bits[num] {
if show_results {
print('$num, ')
print('${num}, ')
}

count++
Expand All @@ -68,9 +66,9 @@ fn (sieve Sieve) print_results(show_results bool, duration time.Duration, passes
avg := f64(duration / passes)
count_primes := sieve.count_primes()
valid := (count_primes == u64(dictionary[sieve.sieve_size.str()]))
eprintln('Passes: $passes, Time: $duration, Avg: $avg, Limit: $sieve.sieve_size, Count1: $count, Count2: $count_primes, Valid: $valid')
eprintln('Passes: ${passes}, Time: ${duration}, Avg: ${avg}, Limit: ${sieve.sieve_size}, Count1: ${count}, Count2: ${count_primes}, Valid: ${valid}')

println('marghidanu;$passes;$duration;1;algorithm=base,faithful=yes')
println('marghidanu;${passes};${duration};1;algorithm=base,faithful=yes')
}

fn (sieve Sieve) count_primes() u64 {
Expand All @@ -92,7 +90,7 @@ fn main() {
for {
mut sieve := Sieve{
sieve_size: 1_000_000
bits: all_bits_true_array
bits: all_bits_true_array
}
sieve.run_sieve()

Expand Down
Loading