diff --git a/BENCHMARK.md b/BENCHMARK.md index dd79da38e..be5bfbbf2 100644 --- a/BENCHMARK.md +++ b/BENCHMARK.md @@ -22,6 +22,7 @@ Some solutions are not included in the automated benchmark runs, either because - [Running a benchmark of all solutions for a particular language](#running-a-benchmark-of-all-solutions-for-a-particular-language) - [Running in unconfined mode](#running-in-unconfined-mode) - [Output formats](#output-formats) +- [Setting the solution timeout](#setting-the-solution-timeout) ## What operating system to use? @@ -375,3 +376,13 @@ The output format can be controlled via the `FORMATTER` variable like this: make FORMATTER=json make DIRECTORY=PrimeCrystal/solution_1 FORMATTER=csv ``` + +## Setting the solution timeout + +The run of each solution is limited to a certain duration, which is 10 minutes by default. +You can change this setting through the `TIMEOUT` variable like this: + +```shell +make TIMEOUT=15 +make DIRECTORY=PrimeCPP/solution_2 TIMEOUT=15 +``` diff --git a/Makefile b/Makefile index 55fdfb371..548d27050 100644 --- a/Makefile +++ b/Makefile @@ -4,6 +4,7 @@ SHELL := /bin/bash DIRECTORY := $(shell pwd) FORMATTER := "table" +TIMEOUT := "10" .PHONY: all all: benchmark @@ -14,6 +15,7 @@ benchmark: check-env ARGS=("-d $${REALPATH}" "-f $(FORMATTER)"); \ [ ! -z $${OUTPUT_FILE} ] && ARGS+=( "-o $${OUTPUT_FILE}" ); \ [ ! -z $${UNCONFINED} ] && ARGS+=( "--unconfined" ); \ + [ ! -z $${TIMEOUT} ] && ARGS+=( "-t $${TIMEOUT}" ); \ cd ./tools; npm ci --silent && npm start --silent -- benchmark $${ARGS[@]} .PHONY: check-env diff --git a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp b/PrimeCPP/solution_2/PrimeCPP_PAR.cpp index 7e20dea07..3345d6505 100644 --- a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp +++ b/PrimeCPP/solution_2/PrimeCPP_PAR.cpp @@ -24,80 +24,64 @@ using namespace std::chrono; const uint64_t DEFAULT_UPPER_LIMIT = 10'000'000LLU; class BitArray { - uint32_t *array; - size_t arrSize; + uint8_t *array; + size_t logicalSize; - inline static size_t arraySize(size_t size) + static constexpr size_t arraySize(size_t size) { - return (size >> 5) + ((size & 31) > 0); + return (size >> 3) + ((size & 7) > 0); } - inline static size_t index(size_t n) + static constexpr size_t index(size_t n) { - return (n >> 5); - } - - inline static uint32_t getSubindex(size_t n, uint32_t d) - { - return d & uint32_t(uint32_t(0x01) << (n % 32)); - } - - inline void setFalseSubindex(size_t n, uint32_t &d) - { - d &= ~uint32_t(uint32_t(0x01) << (n % (8*sizeof(uint32_t)))); + return (n >> 3); } public: - explicit BitArray(size_t size) : arrSize(size) + explicit BitArray(size_t size) : logicalSize(size) { - array = new uint32_t[arraySize(size)]; - std::memset(array, 0xFF, (size >> 3) + ((size & 7) > 0)); + auto arrSize = (size + 1) / 2; // Only store bits for odd numbers + array = new uint8_t[arraySize(arrSize)]; + std::memset(array, 0x00, arraySize(arrSize)); } - ~BitArray() {delete [] array;} + ~BitArray() { delete[] array; } - bool get(size_t n) const + constexpr bool get(size_t n) const { - return getSubindex(n, array[index(n)]); + if (n % 2 == 0) + return false; // Even numbers > 2 are not prime + n = n / 2; // Map the actual number to the index in the array + return !(array[index(n)] & (uint8_t(1) << (n % 8))); } - static constexpr uint32_t rol(uint32_t x, uint32_t n) + void set(size_t n) { - return (x<>(32-n)); + n = n / 2; // Map the actual number to the index in the array + array[index(n)] |= (uint8_t(1) << (n % 8)); } - void setFlagsFalse(size_t n, size_t skip) + constexpr size_t size() const { - auto rolling_mask = ~uint32_t(1 << n % 32); - auto roll_bits = skip % 32; - while (n < arrSize) { - array[index(n)] &= rolling_mask; - n += skip; - rolling_mask = rol(rolling_mask, roll_bits); - } - } - - inline size_t size() const - { - return arrSize; + return logicalSize; } }; // prime_sieve // -// Represents the data comprising the sieve (an array of N bits, where N is the upper limit prime being tested) -// as well as the code needed to eliminate non-primes from its array, which you perform by calling runSieve. +// Represents the data comprising the sieve (an array of bits representing odd numbers starting from 3) +// and includes the code needed to eliminate non-primes from its array by calling runSieve. class prime_sieve { private: - BitArray Bits; // Sieve data, where 1==prime, 0==not + BitArray Bits; // Sieve data, where 0==prime, 1==not public: - prime_sieve(uint64_t n) : Bits(n) // Initialize all to true (potential primes) + prime_sieve(uint64_t n) : Bits(n) // Initialize bits to zero default { } @@ -117,15 +101,21 @@ class prime_sieve while (factor <= q) { - for (uint64_t num = factor; num < Bits.size(); num += 2) + // Find the next prime number + for (; factor <= q; factor += 2) { - if (Bits.get(num)) + if (Bits.get(factor)) { - factor = num; break; } } - Bits.setFlagsFalse(factor * factor, factor + factor); + + // Mark multiples of the prime number as not prime + uint64_t start = factor * factor; + for (uint64_t num = start; num <= Bits.size(); num += factor * 2) + { + Bits.set(num); + } factor += 2; } @@ -137,9 +127,9 @@ class prime_sieve size_t countPrimes() const { - size_t count = (Bits.size() >= 2); // Count 2 as prime if within range - for (int i = 3; i < Bits.size(); i+=2) - if (Bits.get(i)) + size_t count = (Bits.size() >= 2); // Count 2 as prime if within range + for (uint64_t num = 3; num <= Bits.size(); num += 2) + if (Bits.get(num)) count++; return count; } @@ -150,23 +140,24 @@ class prime_sieve bool isPrime(uint64_t n) const { - if (n & 1) - return Bits.get(n); - else + if (n == 2) + return true; + if (n < 2 || n % 2 == 0) return false; + return Bits.get(n); } // validateResults // - // Checks to see if the number of primes found matches what we should expect. This data isn't used in the + // Checks to see if the number of primes found matches what we should expect. This data isn't used in the // sieve processing at all, only to sanity check that the results are right when done. bool validateResults() const { const std::map resultsDictionary = { - { 10LLU, 4 }, // Historical data for validating our results - the number of primes - { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 + { 10LLU, 4 }, // Historical data for validating our results - the number of primes + { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 { 1'000LLU, 168 }, { 10'000LLU, 1229 }, { 100'000LLU, 9592 }, @@ -190,8 +181,8 @@ class prime_sieve if (showResults) cout << "2, "; - size_t count = (Bits.size() >= 2); // Count 2 as prime if in range - for (uint64_t num = 3; num <= Bits.size(); num+=2) + size_t count = (Bits.size() >= 2); // Count 2 as prime if in range + for (uint64_t num = 3; num <= Bits.size(); num += 2) { if (Bits.get(num)) { @@ -210,7 +201,7 @@ class prime_sieve << "Average: " << duration/passes << ", " << "Limit: " << Bits.size() << ", " << "Counts: " << count << "/" << countPrimes() << ", " - << "Valid : " << (validateResults() ? "Pass" : "FAIL!") + << "Valid: " << (validateResults() ? "Pass" : "FAIL!") << "\n"; // Following 2 lines added by rbergen to conform to drag race output format @@ -317,7 +308,7 @@ int main(int argc, char **argv) } if (bOneshot) - cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; + cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; if (bOneshot && (cSecondsRequested > 0 || cThreadsRequested > 1)) { @@ -352,8 +343,8 @@ int main(int argc, char **argv) else { auto tStart = steady_clock::now(); - std::thread threads[cThreads]; - uint64_t l_passes[cThreads]; + std::vector threads(cThreads); + std::vector l_passes(cThreads); for (unsigned int i = 0; i < cThreads; i++) threads[i] = std::thread([i, &l_passes, &tStart](size_t llUpperLimit) { diff --git a/PrimeCPP/solution_2/primes_par.exe b/PrimeCPP/solution_2/primes_par.exe new file mode 100755 index 000000000..c815cbfcf Binary files /dev/null and b/PrimeCPP/solution_2/primes_par.exe differ diff --git a/PrimeJulia/solution_1/Dockerfile b/PrimeJulia/solution_1/Dockerfile index decf14cdf..982f83436 100644 --- a/PrimeJulia/solution_1/Dockerfile +++ b/PrimeJulia/solution_1/Dockerfile @@ -1,4 +1,4 @@ -FROM julia:1.6-alpine3.13 +FROM julia:1-alpine WORKDIR /opt/app diff --git a/PrimeJulia/solution_2/Dockerfile b/PrimeJulia/solution_2/Dockerfile index fbbfbaf6c..e73d5c67d 100644 --- a/PrimeJulia/solution_2/Dockerfile +++ b/PrimeJulia/solution_2/Dockerfile @@ -1,4 +1,4 @@ -FROM julia:1.6.1-alpine3.13 +FROM julia:1-alpine WORKDIR /opt/app diff --git a/PrimeJulia/solution_3/Dockerfile b/PrimeJulia/solution_3/Dockerfile index 1578aafbc..a1cd012cd 100644 --- a/PrimeJulia/solution_3/Dockerfile +++ b/PrimeJulia/solution_3/Dockerfile @@ -1,4 +1,4 @@ -FROM julia:1.6-buster +FROM julia:1 WORKDIR /opt/app diff --git a/PrimeJulia/solution_3/README.md b/PrimeJulia/solution_3/README.md index aa8f9e71d..8c4e3b935 100644 --- a/PrimeJulia/solution_3/README.md +++ b/PrimeJulia/solution_3/README.md @@ -10,7 +10,7 @@ optimizations. This is a sort-of "low-level" style implementation in Julia to get as much as speed as possible out of the language. It is *not* designed to be idiomatic Julia code. -This solution requires at least **Julia 1.5** to run. Julia 1.6 is +This solution requires at least **Julia 1.5** to run. the latest stable 1.X Julia version is recommended and is used in the Docker image. ## Description @@ -40,7 +40,7 @@ and bits are unset when the number is *prime*. This simplifies the set_bit operation slightly (`arr[i] |= mask vs. arr[i] &= ~mask`). If you see any room for improvement in the code or have any -suggestions, don't hesitate to open an issue, pull request (PR), +suggestions, don't hesitate to open an issue, pull request (PR), Discussion, or the like. Don't forget to tag me at `@louie-github` so I can be notified if my personal input is either wanted or needed. I'm open to fixing stylistic issues or discussing cosmetic changes to diff --git a/PrimeJulia/solution_4/Dockerfile b/PrimeJulia/solution_4/Dockerfile index 8c1f2fc9b..fde5f0052 100644 --- a/PrimeJulia/solution_4/Dockerfile +++ b/PrimeJulia/solution_4/Dockerfile @@ -1,4 +1,4 @@ -FROM julia:1.6-buster +FROM julia:1 WORKDIR /opt/app diff --git a/PrimeV/solution_1/primes.v b/PrimeV/solution_1/primes.v index 5921dd421..ebbe53523 100644 --- a/PrimeV/solution_1/primes.v +++ b/PrimeV/solution_1/primes.v @@ -1,23 +1,21 @@ import time import math -const ( - sieve_size = 1_000_000 - q = math.sqrt(sieve_size) - all_bits_true_array = []bool{len: sieve_size, init: true} - dictionary = { - '10': 4 - '100': 25 - '1000': 168 - '10000': 1229 - '100000': 9592 - '1000000': 78498 - '10000000': 664579 - '100000000': 5761455 - '1000000000': 50847534 - '10000000000': 455052511 - } -) +const sieve_size = 1_000_000 +const q = math.sqrt(sieve_size) +const all_bits_true_array = []bool{len: sieve_size, init: true} +const dictionary = { + '10': 4 + '100': 25 + '1000': 168 + '10000': 1229 + '100000': 9592 + '1000000': 78498 + '10000000': 664579 + '100000000': 5761455 + '1000000000': 50847534 + '10000000000': 455052511 +} struct Sieve { sieve_size u64 @@ -25,7 +23,7 @@ mut: bits []bool } -[direct_array_access] +@[direct_array_access] fn (mut sieve Sieve) run_sieve() { mut factor := u64(3) @@ -54,7 +52,7 @@ fn (sieve Sieve) print_results(show_results bool, duration time.Duration, passes for num := u64(3); num <= sieve.sieve_size; num += u64(2) { if sieve.bits[num] { if show_results { - print('$num, ') + print('${num}, ') } count++ @@ -68,9 +66,9 @@ fn (sieve Sieve) print_results(show_results bool, duration time.Duration, passes avg := f64(duration / passes) count_primes := sieve.count_primes() valid := (count_primes == u64(dictionary[sieve.sieve_size.str()])) - eprintln('Passes: $passes, Time: $duration, Avg: $avg, Limit: $sieve.sieve_size, Count1: $count, Count2: $count_primes, Valid: $valid') + eprintln('Passes: ${passes}, Time: ${duration}, Avg: ${avg}, Limit: ${sieve.sieve_size}, Count1: ${count}, Count2: ${count_primes}, Valid: ${valid}') - println('marghidanu;$passes;$duration;1;algorithm=base,faithful=yes') + println('marghidanu;${passes};${duration};1;algorithm=base,faithful=yes') } fn (sieve Sieve) count_primes() u64 { @@ -92,7 +90,7 @@ fn main() { for { mut sieve := Sieve{ sieve_size: 1_000_000 - bits: all_bits_true_array + bits: all_bits_true_array } sieve.run_sieve() diff --git a/PrimeV/solution_2/primes.v b/PrimeV/solution_2/primes.v index 441722a68..9cbc94905 100644 --- a/PrimeV/solution_2/primes.v +++ b/PrimeV/solution_2/primes.v @@ -2,915 +2,913 @@ import time type Prime = u64 -const ( - limit = Prime(1_000_000) - cpul1cache = 16384 - results = { - Prime(10): 4 - Prime(100): 25 - Prime(1000): 168 - Prime(10000): 1229 - Prime(100000): 9592 - Prime(1000000): 78498 - Prime(10000000): 664579 - Prime(100000000): 5761455 - Prime(1000000000): 50847534 - Prime(10000000000): 455052511 - } - result = results[limit] - bitmask = [ u8(1), u8(2), u8(4), u8(8), u8(16), u8(32), u8(64), u8(128) ] - dense_threshold = 19 - extreme_bitset = [ // only four case used -> base prime value modulo 8 - // for modulo 1 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - mut bytendx := strti >> 3 - r1 := ((strti + stepi) >> 3) - bytendx - r2 := ((strti + 2 * stepi) >> 3) - bytendx - r3 := ((strti + 3 * stepi) >> 3) - bytendx - r4 := ((strti + 4 * stepi) >> 3) - bytendx - r5 := ((strti + 5 * stepi) >> 3) - bytendx - r6 := ((strti + 6 * stepi) >> 3) - bytendx - r7 := ((strti + 7 * stepi) >> 3) - bytendx - bytelmt := (lmti >> 3) - r7 - for ; bytendx <= bytelmt; bytendx += stepi { - bytearrp[bytendx] |= u8(128) - bytearrp[bytendx + r1] |= u8(1) - bytearrp[bytendx + r2] |= u8(2) - bytearrp[bytendx + r3] |= u8(4) - bytearrp[bytendx + r4] |= u8(8) - bytearrp[bytendx + r5] |= u8(16) - bytearrp[bytendx + r6] |= u8(32) - bytearrp[bytendx + r7] |= u8(64) - } - for ndx := (bytendx << 3) + (strti & 7); ndx < lmti; ndx += stepi { - bytearrp[ndx >> 3] |= u8(1) << (ndx & 7) - } +const limit = Prime(1_000_000) +const cpul1cache = 16384 +const results = { + Prime(10): 4 + Prime(100): 25 + Prime(1000): 168 + Prime(10000): 1229 + Prime(100000): 9592 + Prime(1000000): 78498 + Prime(10000000): 664579 + Prime(100000000): 5761455 + Prime(1000000000): 50847534 + Prime(10000000000): 455052511 +} +const result = results[limit] +const bitmask = [u8(1), u8(2), u8(4), u8(8), u8(16), u8(32), u8(64), u8(128)] +const dense_threshold = 19 +const extreme_bitset = [// only four case used -> base prime value modulo 8 + // for modulo 1 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + mut bytendx := strti >> 3 + r1 := ((strti + stepi) >> 3) - bytendx + r2 := ((strti + 2 * stepi) >> 3) - bytendx + r3 := ((strti + 3 * stepi) >> 3) - bytendx + r4 := ((strti + 4 * stepi) >> 3) - bytendx + r5 := ((strti + 5 * stepi) >> 3) - bytendx + r6 := ((strti + 6 * stepi) >> 3) - bytendx + r7 := ((strti + 7 * stepi) >> 3) - bytendx + bytelmt := (lmti >> 3) - r7 + for ; bytendx <= bytelmt; bytendx += stepi { + bytearrp[bytendx] |= u8(128) + bytearrp[bytendx + r1] |= u8(1) + bytearrp[bytendx + r2] |= u8(2) + bytearrp[bytendx + r3] |= u8(4) + bytearrp[bytendx + r4] |= u8(8) + bytearrp[bytendx + r5] |= u8(16) + bytearrp[bytendx + r6] |= u8(32) + bytearrp[bytendx + r7] |= u8(64) } - }, - // for modulo 3 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - mut bytendx := strti >> 3 - r1 := ((strti + stepi) >> 3) - bytendx - r2 := ((strti + 2 * stepi) >> 3) - bytendx - r3 := ((strti + 3 * stepi) >> 3) - bytendx - r4 := ((strti + 4 * stepi) >> 3) - bytendx - r5 := ((strti + 5 * stepi) >> 3) - bytendx - r6 := ((strti + 6 * stepi) >> 3) - bytendx - r7 := ((strti + 7 * stepi) >> 3) - bytendx - bytelmt := (lmti >> 3) - r7 - for ; bytendx <= bytelmt; bytendx += stepi { - bytearrp[bytendx] |= u8(8) - bytearrp[bytendx + r1] |= u8(64) - bytearrp[bytendx + r2] |= u8(2) - bytearrp[bytendx + r3] |= u8(16) - bytearrp[bytendx + r4] |= u8(128) - bytearrp[bytendx + r5] |= u8(4) - bytearrp[bytendx + r6] |= u8(32) - bytearrp[bytendx + r7] |= u8(1) - } - for ndx := (bytendx << 3) + (strti & 7); ndx < lmti; ndx += stepi { - bytearrp[ndx >> 3] |= u8(1) << (ndx & 7) - } + for ndx := (bytendx << 3) + (strti & 7); ndx < lmti; ndx += stepi { + bytearrp[ndx >> 3] |= u8(1) << (ndx & 7) } - }, - // for modulo 5 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - mut bytendx := strti >> 3 - r1 := ((strti + stepi) >> 3) - bytendx - r2 := ((strti + 2 * stepi) >> 3) - bytendx - r3 := ((strti + 3 * stepi) >> 3) - bytendx - r4 := ((strti + 4 * stepi) >> 3) - bytendx - r5 := ((strti + 5 * stepi) >> 3) - bytendx - r6 := ((strti + 6 * stepi) >> 3) - bytendx - r7 := ((strti + 7 * stepi) >> 3) - bytendx - bytelmt := (lmti >> 3) - r7 - for ; bytendx <= bytelmt; bytendx += stepi { - bytearrp[bytendx] |= u8(8) - bytearrp[bytendx + r1] |= u8(1) - bytearrp[bytendx + r2] |= u8(32) - bytearrp[bytendx + r3] |= u8(4) - bytearrp[bytendx + r4] |= u8(128) - bytearrp[bytendx + r5] |= u8(16) - bytearrp[bytendx + r6] |= u8(2) - bytearrp[bytendx + r7] |= u8(64) - } - for ndx := (bytendx << 3) + (strti & 7); ndx < lmti; ndx += stepi { - bytearrp[ndx >> 3] |= u8(1) << (ndx & 7) - } + } + }, + // for modulo 3 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + mut bytendx := strti >> 3 + r1 := ((strti + stepi) >> 3) - bytendx + r2 := ((strti + 2 * stepi) >> 3) - bytendx + r3 := ((strti + 3 * stepi) >> 3) - bytendx + r4 := ((strti + 4 * stepi) >> 3) - bytendx + r5 := ((strti + 5 * stepi) >> 3) - bytendx + r6 := ((strti + 6 * stepi) >> 3) - bytendx + r7 := ((strti + 7 * stepi) >> 3) - bytendx + bytelmt := (lmti >> 3) - r7 + for ; bytendx <= bytelmt; bytendx += stepi { + bytearrp[bytendx] |= u8(8) + bytearrp[bytendx + r1] |= u8(64) + bytearrp[bytendx + r2] |= u8(2) + bytearrp[bytendx + r3] |= u8(16) + bytearrp[bytendx + r4] |= u8(128) + bytearrp[bytendx + r5] |= u8(4) + bytearrp[bytendx + r6] |= u8(32) + bytearrp[bytendx + r7] |= u8(1) } - }, - // for modulo 7 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - mut bytendx := strti >> 3 - r1 := ((strti + stepi) >> 3) - bytendx - r2 := ((strti + 2 * stepi) >> 3) - bytendx - r3 := ((strti + 3 * stepi) >> 3) - bytendx - r4 := ((strti + 4 * stepi) >> 3) - bytendx - r5 := ((strti + 5 * stepi) >> 3) - bytendx - r6 := ((strti + 6 * stepi) >> 3) - bytendx - r7 := ((strti + 7 * stepi) >> 3) - bytendx - bytelmt := (lmti >> 3) - r7 - for ; bytendx <= bytelmt; bytendx += stepi { - bytearrp[bytendx] |= u8(128) - bytearrp[bytendx + r1] |= u8(64) - bytearrp[bytendx + r2] |= u8(32) - bytearrp[bytendx + r3] |= u8(16) - bytearrp[bytendx + r4] |= u8(8) - bytearrp[bytendx + r5] |= u8(4) - bytearrp[bytendx + r6] |= u8(2) - bytearrp[bytendx + r7] |= u8(1) - } - for ndx := (bytendx << 3) + (strti & 7); ndx < lmti; ndx += stepi { - bytearrp[ndx >> 3] |= u8(1) << (ndx & 7) - } + for ndx := (bytendx << 3) + (strti & 7); ndx < lmti; ndx += stepi { + bytearrp[ndx >> 3] |= u8(1) << (ndx & 7) } } - ] - dense_bitset = [ - // for step of 3 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000000004 - v |= 0x0000000000000020 - v |= 0x0000000000000100 - v |= 0x0000000000000800 - v |= 0x0000000000004000 - v |= 0x0000000000020000 - v |= 0x0000000000100000 - v |= 0x0000000000800000 - v |= 0x0000000004000000 - v |= 0x0000000020000000 - v |= 0x0000000100000000 - v |= 0x0000000800000000 - v |= 0x0000004000000000 - v |= 0x0000020000000000 - v |= 0x0000100000000000 - v |= 0x0000800000000000 - v |= 0x0004000000000000 - v |= 0x0020000000000000 - v |= 0x0100000000000000 - v |= 0x0800000000000000 - wordp[0] = v | 0x4000000000000000 - v = wordp[1] | 0x0000000000000002 - v |= 0x0000000000000010 - v |= 0x0000000000000080 - v |= 0x0000000000000400 - v |= 0x0000000000002000 - v |= 0x0000000000010000 - v |= 0x0000000000080000 - v |= 0x0000000000400000 - v |= 0x0000000002000000 - v |= 0x0000000010000000 - v |= 0x0000000080000000 - v |= 0x0000000400000000 - v |= 0x0000002000000000 - v |= 0x0000010000000000 - v |= 0x0000080000000000 - v |= 0x0000400000000000 - v |= 0x0002000000000000 - v |= 0x0010000000000000 - v |= 0x0080000000000000 - v |= 0x0400000000000000 - wordp[1] = v | 0x2000000000000000 - v = wordp[2] | 0x0000000000000001 - v |= 0x0000000000000008 - v |= 0x0000000000000040 - v |= 0x0000000000000200 - v |= 0x0000000000001000 - v |= 0x0000000000008000 - v |= 0x0000000000040000 - v |= 0x0000000000200000 - v |= 0x0000000001000000 - v |= 0x0000000008000000 - v |= 0x0000000040000000 - v |= 0x0000000200000000 - v |= 0x0000001000000000 - v |= 0x0000008000000000 - v |= 0x0000040000000000 - v |= 0x0000200000000000 - v |= 0x0001000000000000 - v |= 0x0008000000000000 - v |= 0x0040000000000000 - v |= 0x0200000000000000 - v |= 0x1000000000000000 - wordp[2] |= v | 0x8000000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + }, + // for modulo 5 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + mut bytendx := strti >> 3 + r1 := ((strti + stepi) >> 3) - bytendx + r2 := ((strti + 2 * stepi) >> 3) - bytendx + r3 := ((strti + 3 * stepi) >> 3) - bytendx + r4 := ((strti + 4 * stepi) >> 3) - bytendx + r5 := ((strti + 5 * stepi) >> 3) - bytendx + r6 := ((strti + 6 * stepi) >> 3) - bytendx + r7 := ((strti + 7 * stepi) >> 3) - bytendx + bytelmt := (lmti >> 3) - r7 + for ; bytendx <= bytelmt; bytendx += stepi { + bytearrp[bytendx] |= u8(8) + bytearrp[bytendx + r1] |= u8(1) + bytearrp[bytendx + r2] |= u8(32) + bytearrp[bytendx + r3] |= u8(4) + bytearrp[bytendx + r4] |= u8(128) + bytearrp[bytendx + r5] |= u8(16) + bytearrp[bytendx + r6] |= u8(2) + bytearrp[bytendx + r7] |= u8(64) } - }, - // for step of 5 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000000004 - v |= 0x0000000000000080 - v |= 0x0000000000001000 - v |= 0x0000000000020000 - v |= 0x0000000000400000 - v |= 0x0000000008000000 - v |= 0x0000000100000000 - v |= 0x0000002000000000 - v |= 0x0000040000000000 - v |= 0x0000800000000000 - v |= 0x0010000000000000 - v |= 0x0200000000000000 - wordp[0] = v | 0x4000000000000000 - v = wordp[1] | 0x0000000000000008 - v |= 0x0000000000000100 - v |= 0x0000000000002000 - v |= 0x0000000000040000 - v |= 0x0000000000800000 - v |= 0x0000000010000000 - v |= 0x0000000200000000 - v |= 0x0000004000000000 - v |= 0x0000080000000000 - v |= 0x0001000000000000 - v |= 0x0020000000000000 - v |= 0x0400000000000000 - wordp[1] = v | 0x8000000000000000 - v = wordp[2] | 0x0000000000000010 - v |= 0x0000000000000200 - v |= 0x0000000000004000 - v |= 0x0000000000080000 - v |= 0x0000000001000000 - v |= 0x0000000020000000 - v |= 0x0000000400000000 - v |= 0x0000008000000000 - v |= 0x0000100000000000 - v |= 0x0002000000000000 - v |= 0x0040000000000000 - wordp[2] = v | 0x0800000000000000 - v = wordp[3] | 0x0000000000000001 - v |= 0x0000000000000020 - v |= 0x0000000000000400 - v |= 0x0000000000008000 - v |= 0x0000000000100000 - v |= 0x0000000002000000 - v |= 0x0000000040000000 - v |= 0x0000000800000000 - v |= 0x0000010000000000 - v |= 0x0000200000000000 - v |= 0x0004000000000000 - v |= 0x0080000000000000 - wordp[3] = v | 0x1000000000000000 - v = wordp[4] | 0x0000000000000002 - v |= 0x0000000000000040 - v |= 0x0000000000000800 - v |= 0x0000000000010000 - v |= 0x0000000000200000 - v |= 0x0000000004000000 - v |= 0x0000000080000000 - v |= 0x0000001000000000 - v |= 0x0000020000000000 - v |= 0x0000400000000000 - v |= 0x0008000000000000 - v |= 0x0100000000000000 - wordp[4] = v | 0x2000000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + for ndx := (bytendx << 3) + (strti & 7); ndx < lmti; ndx += stepi { + bytearrp[ndx >> 3] |= u8(1) << (ndx & 7) } - }, - // for step of 7 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000000002 - v |= 0x0000000000000100 - v |= 0x0000000000008000 - v |= 0x0000000000400000 - v |= 0x0000000020000000 - v |= 0x0000001000000000 - v |= 0x0000080000000000 - v |= 0x0004000000000000 - wordp[0] = v | 0x0200000000000000 - v = wordp[1] | 0x0000000000000001 - v |= 0x0000000000000080 - v |= 0x0000000000004000 - v |= 0x0000000000200000 - v |= 0x0000000010000000 - v |= 0x0000000800000000 - v |= 0x0000040000000000 - v |= 0x0002000000000000 - v |= 0x0100000000000000 - wordp[1] = v | 0x8000000000000000 - v = wordp[2] | 0x0000000000000040 - v |= 0x0000000000002000 - v |= 0x0000000000100000 - v |= 0x0000000008000000 - v |= 0x0000000400000000 - v |= 0x0000020000000000 - v |= 0x0001000000000000 - v |= 0x0080000000000000 - wordp[2] = v | 0x4000000000000000 - v = wordp[3] | 0x0000000000000020 - v |= 0x0000000000001000 - v |= 0x0000000000080000 - v |= 0x0000000004000000 - v |= 0x0000000200000000 - v |= 0x0000010000000000 - v |= 0x0000800000000000 - v |= 0x0040000000000000 - wordp[3] = v | 0x2000000000000000 - v = wordp[4] | 0x0000000000000010 - v |= 0x0000000000000800 - v |= 0x0000000000040000 - v |= 0x0000000002000000 - v |= 0x0000000100000000 - v |= 0x0000008000000000 - v |= 0x0000400000000000 - v |= 0x0020000000000000 - wordp[4] = v | 0x1000000000000000 - v = wordp[5] | 0x0000000000000008 - v |= 0x0000000000000400 - v |= 0x0000000000020000 - v |= 0x0000000001000000 - v |= 0x0000000080000000 - v |= 0x0000004000000000 - v |= 0x0000200000000000 - v |= 0x0010000000000000 - wordp[5] = v | 0x0800000000000000 - v = wordp[6] | 0x0000000000000004 - v |= 0x0000000000000200 - v |= 0x0000000000010000 - v |= 0x0000000000800000 - v |= 0x0000000040000000 - v |= 0x0000002000000000 - v |= 0x0000100000000000 - v |= 0x0008000000000000 - wordp[6] = v | 0x0400000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + } + }, + // for modulo 7 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + mut bytendx := strti >> 3 + r1 := ((strti + stepi) >> 3) - bytendx + r2 := ((strti + 2 * stepi) >> 3) - bytendx + r3 := ((strti + 3 * stepi) >> 3) - bytendx + r4 := ((strti + 4 * stepi) >> 3) - bytendx + r5 := ((strti + 5 * stepi) >> 3) - bytendx + r6 := ((strti + 6 * stepi) >> 3) - bytendx + r7 := ((strti + 7 * stepi) >> 3) - bytendx + bytelmt := (lmti >> 3) - r7 + for ; bytendx <= bytelmt; bytendx += stepi { + bytearrp[bytendx] |= u8(128) + bytearrp[bytendx + r1] |= u8(64) + bytearrp[bytendx + r2] |= u8(32) + bytearrp[bytendx + r3] |= u8(16) + bytearrp[bytendx + r4] |= u8(8) + bytearrp[bytendx + r5] |= u8(4) + bytearrp[bytendx + r6] |= u8(2) + bytearrp[bytendx + r7] |= u8(1) } - }, - // for step of 9; never used - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000000004 - v |= 0x0000000000000800 - v |= 0x0000000000100000 - v |= 0x0000000020000000 - v |= 0x0000004000000000 - v |= 0x0000800000000000 - wordp[0] = v | 0x0100000000000000 - v = wordp[1] | 0x0000000000000002 - v |= 0x0000000000000400 - v |= 0x0000000000080000 - v |= 0x0000000010000000 - v |= 0x0000002000000000 - v |= 0x0000400000000000 - wordp[1] = v | 0x0080000000000000 - v = wordp[2] | 0x0000000000000001 - v |= 0x0000000000000200 - v |= 0x0000000000040000 - v |= 0x0000000008000000 - v |= 0x0000001000000000 - v |= 0x0000200000000000 - v |= 0x0040000000000000 - wordp[2] = v | 0x8000000000000000 - v = wordp[3] | 0x0000000000000100 - v |= 0x0000000000020000 - v |= 0x0000000004000000 - v |= 0x0000000800000000 - v |= 0x0000100000000000 - v |= 0x0020000000000000 - wordp[3] = v | 0x4000000000000000 - v = wordp[4] | 0x0000000000000080 - v |= 0x0000000000010000 - v |= 0x0000000002000000 - v |= 0x0000000400000000 - v |= 0x0000080000000000 - v |= 0x0010000000000000 - wordp[4] = v | 0x2000000000000000 - v = wordp[5] | 0x0000000000000040 - v |= 0x0000000000008000 - v |= 0x0000000001000000 - v |= 0x0000000200000000 - v |= 0x0000040000000000 - v |= 0x0008000000000000 - wordp[5] = v | 0x1000000000000000 - v = wordp[6] | 0x0000000000000020 - v |= 0x0000000000004000 - v |= 0x0000000000800000 - v |= 0x0000000100000000 - v |= 0x0000020000000000 - v |= 0x0004000000000000 - wordp[6] = v | 0x0800000000000000 - v = wordp[7] | 0x0000000000000010 - v |= 0x0000000000002000 - v |= 0x0000000000400000 - v |= 0x0000000080000000 - v |= 0x0000010000000000 - v |= 0x0002000000000000 - wordp[7] = v | 0x0400000000000000 - v = wordp[8] | 0x0000000000000008 - v |= 0x0000000000001000 - v |= 0x0000000000200000 - v |= 0x0000000040000000 - v |= 0x0000008000000000 - v |= 0x0001000000000000 - wordp[8] = v | 0x0200000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + for ndx := (bytendx << 3) + (strti & 7); ndx < lmti; ndx += stepi { + bytearrp[ndx >> 3] |= u8(1) << (ndx & 7) } - }, - // for step of 11 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000000040 - v |= 0x0000000000020000 - v |= 0x0000000010000000 - v |= 0x0000008000000000 - v |= 0x0004000000000000 - wordp[0] = v | 0x2000000000000000 - v = wordp[1] | 0x0000000000000100 - v |= 0x0000000000080000 - v |= 0x0000000040000000 - v |= 0x0000020000000000 - v |= 0x0010000000000000 - wordp[1] = v | 0x8000000000000000 - v = wordp[2] | 0x0000000000000400 - v |= 0x0000000000200000 - v |= 0x0000000100000000 - v |= 0x0000080000000000 - wordp[2] = v | 0x0040000000000000 - v = wordp[3] | 0x0000000000000002 - v |= 0x0000000000001000 - v |= 0x0000000000800000 - v |= 0x0000000400000000 - v |= 0x0000200000000000 - wordp[3] = v | 0x0100000000000000 - v = wordp[4] | 0x0000000000000008 - v |= 0x0000000000004000 - v |= 0x0000000002000000 - v |= 0x0000001000000000 - v |= 0x0000800000000000 - wordp[4] = v | 0x0400000000000000 - v = wordp[5] | 0x0000000000000020 - v |= 0x0000000000010000 - v |= 0x0000000008000000 - v |= 0x0000004000000000 - v |= 0x0002000000000000 - wordp[5] = v | 0x1000000000000000 - v = wordp[6] | 0x0000000000000080 - v |= 0x0000000000040000 - v |= 0x0000000020000000 - v |= 0x0000010000000000 - v |= 0x0008000000000000 - wordp[6] = v | 0x4000000000000000 - v = wordp[7] | 0x0000000000000200 - v |= 0x0000000000100000 - v |= 0x0000000080000000 - v |= 0x0000040000000000 - wordp[7] = v | 0x0020000000000000 - v = wordp[8] | 0x0000000000000001 - v |= 0x0000000000000800 - v |= 0x0000000000400000 - v |= 0x0000000200000000 - v |= 0x0000100000000000 - wordp[8] = v | 0x0080000000000000 - v = wordp[9] | 0x0000000000000004 - v |= 0x0000000000002000 - v |= 0x0000000001000000 - v |= 0x0000000800000000 - v |= 0x0000400000000000 - wordp[9] = v | 0x0200000000000000 - v = wordp[10] | 0x0000000000000010 - v |= 0x0000000000008000 - v |= 0x0000000004000000 - v |= 0x0000002000000000 - v |= 0x0001000000000000 - wordp[10] = v | 0x0800000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + } + }, +] +const dense_bitset = [ + // for step of 3 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] } - }, - // for step of 13 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000000080 - v |= 0x0000000000100000 - v |= 0x0000000200000000 - v |= 0x0000400000000000 - wordp[0] = v | 0x0800000000000000 - v = wordp[1] | 0x0000000000000100 - v |= 0x0000000000200000 - v |= 0x0000000400000000 - v |= 0x0000800000000000 - wordp[1] = v | 0x1000000000000000 - v = wordp[2] | 0x0000000000000200 - v |= 0x0000000000400000 - v |= 0x0000000800000000 - v |= 0x0001000000000000 - wordp[2] = v | 0x2000000000000000 - v = wordp[3] | 0x0000000000000400 - v |= 0x0000000000800000 - v |= 0x0000001000000000 - v |= 0x0002000000000000 - wordp[3] = v | 0x4000000000000000 - v = wordp[4] | 0x0000000000000800 - v |= 0x0000000001000000 - v |= 0x0000002000000000 - v |= 0x0004000000000000 - wordp[4] = v | 0x8000000000000000 - v = wordp[5] | 0x0000000000001000 - v |= 0x0000000002000000 - v |= 0x0000004000000000 - wordp[5] = v | 0x0008000000000000 - v = wordp[6] | 0x0000000000000001 - v |= 0x0000000000002000 - v |= 0x0000000004000000 - v |= 0x0000008000000000 - wordp[6] = v | 0x0010000000000000 - v = wordp[7] | 0x0000000000000002 - v |= 0x0000000000004000 - v |= 0x0000000008000000 - v |= 0x0000010000000000 - wordp[7] = v | 0x0020000000000000 - v = wordp[8] | 0x0000000000000004 - v |= 0x0000000000008000 - v |= 0x0000000010000000 - v |= 0x0000020000000000 - wordp[8] = v | 0x0040000000000000 - v = wordp[9] | 0x0000000000000008 - v |= 0x0000000000010000 - v |= 0x0000000020000000 - v |= 0x0000040000000000 - wordp[9] = v | 0x0080000000000000 - v = wordp[10] | 0x0000000000000010 - v |= 0x0000000000020000 - v |= 0x0000000040000000 - v |= 0x0000080000000000 - wordp[10] = v | 0x0100000000000000 - v = wordp[11] | 0x0000000000000020 - v |= 0x0000000000040000 - v |= 0x0000000080000000 - v |= 0x0000100000000000 - wordp[11] = v | 0x0200000000000000 - v = wordp[12] | 0x0000000000000040 - v |= 0x0000000000080000 - v |= 0x0000000100000000 - v |= 0x0000200000000000 - wordp[12] = v | 0x0400000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000000004 + v |= 0x0000000000000020 + v |= 0x0000000000000100 + v |= 0x0000000000000800 + v |= 0x0000000000004000 + v |= 0x0000000000020000 + v |= 0x0000000000100000 + v |= 0x0000000000800000 + v |= 0x0000000004000000 + v |= 0x0000000020000000 + v |= 0x0000000100000000 + v |= 0x0000000800000000 + v |= 0x0000004000000000 + v |= 0x0000020000000000 + v |= 0x0000100000000000 + v |= 0x0000800000000000 + v |= 0x0004000000000000 + v |= 0x0020000000000000 + v |= 0x0100000000000000 + v |= 0x0800000000000000 + wordp[0] = v | 0x4000000000000000 + v = wordp[1] | 0x0000000000000002 + v |= 0x0000000000000010 + v |= 0x0000000000000080 + v |= 0x0000000000000400 + v |= 0x0000000000002000 + v |= 0x0000000000010000 + v |= 0x0000000000080000 + v |= 0x0000000000400000 + v |= 0x0000000002000000 + v |= 0x0000000010000000 + v |= 0x0000000080000000 + v |= 0x0000000400000000 + v |= 0x0000002000000000 + v |= 0x0000010000000000 + v |= 0x0000080000000000 + v |= 0x0000400000000000 + v |= 0x0002000000000000 + v |= 0x0010000000000000 + v |= 0x0080000000000000 + v |= 0x0400000000000000 + wordp[1] = v | 0x2000000000000000 + v = wordp[2] | 0x0000000000000001 + v |= 0x0000000000000008 + v |= 0x0000000000000040 + v |= 0x0000000000000200 + v |= 0x0000000000001000 + v |= 0x0000000000008000 + v |= 0x0000000000040000 + v |= 0x0000000000200000 + v |= 0x0000000001000000 + v |= 0x0000000008000000 + v |= 0x0000000040000000 + v |= 0x0000000200000000 + v |= 0x0000001000000000 + v |= 0x0000008000000000 + v |= 0x0000040000000000 + v |= 0x0000200000000000 + v |= 0x0001000000000000 + v |= 0x0008000000000000 + v |= 0x0040000000000000 + v |= 0x0200000000000000 + v |= 0x1000000000000000 + wordp[2] |= v | 0x8000000000000000 } - }, - // for step of 15 - never used - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000002000 - v |= 0x0000000010000000 - v |= 0x0000080000000000 - wordp[0] = v | 0x0400000000000000 - v = wordp[1] | 0x0000000000000200 - v |= 0x0000000001000000 - v |= 0x0000008000000000 - wordp[1] = v | 0x0040000000000000 - v = wordp[2] | 0x0000000000000020 - v |= 0x0000000000100000 - v |= 0x0000000800000000 - wordp[2] = v | 0x0004000000000000 - v = wordp[3] | 0x0000000000000002 - v |= 0x0000000000010000 - v |= 0x0000000080000000 - v |= 0x0000400000000000 - wordp[3] = v | 0x2000000000000000 - v = wordp[4] | 0x0000000000001000 - v |= 0x0000000008000000 - v |= 0x0000040000000000 - wordp[4] = v | 0x0200000000000000 - v = wordp[5] | 0x0000000000000100 - v |= 0x0000000000800000 - v |= 0x0000004000000000 - wordp[5] = v | 0x0020000000000000 - v = wordp[6] | 0x0000000000000010 - v |= 0x0000000000080000 - v |= 0x0000000400000000 - wordp[6] = v | 0x0002000000000000 - v = wordp[7] | 0x0000000000000001 - v |= 0x0000000000008000 - v |= 0x0000000040000000 - v |= 0x0000200000000000 - wordp[7] = v | 0x1000000000000000 - v = wordp[8] | 0x0000000000000800 - v |= 0x0000000004000000 - v |= 0x0000020000000000 - wordp[8] = v | 0x0100000000000000 - v = wordp[9] | 0x0000000000000080 - v |= 0x0000000000400000 - v |= 0x0000002000000000 - wordp[9] = v | 0x0010000000000000 - v = wordp[10] | 0x0000000000000008 - v |= 0x0000000000040000 - v |= 0x0000000200000000 - v |= 0x0001000000000000 - wordp[10] = v | 0x8000000000000000 - v = wordp[11] | 0x0000000000004000 - v |= 0x0000000020000000 - v |= 0x0000100000000000 - wordp[11] = v | 0x0800000000000000 - v = wordp[12] | 0x0000000000000400 - v |= 0x0000000002000000 - v |= 0x0000010000000000 - wordp[12] = v | 0x0080000000000000 - v = wordp[13] | 0x0000000000000040 - v |= 0x0000000000200000 - v |= 0x0000001000000000 - wordp[13] = v | 0x0008000000000000 - v = wordp[14] | 0x0000000000000004 - v |= 0x0000000000020000 - v |= 0x0000000100000000 - v |= 0x0000800000000000 - wordp[14] = v | 0x4000000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] } - }, - // for step of 17 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000000004 - v |= 0x0000000000080000 - v |= 0x0000001000000000 - wordp[0] = v | 0x0020000000000000 - v = wordp[1] | 0x0000000000000040 - v |= 0x0000000000800000 - v |= 0x0000010000000000 - wordp[1] = v | 0x0200000000000000 - v = wordp[2] | 0x0000000000000400 - v |= 0x0000000008000000 - v |= 0x0000100000000000 - wordp[2] = v | 0x2000000000000000 - v = wordp[3] | 0x0000000000004000 - v |= 0x0000000080000000 - wordp[3] = v | 0x0001000000000000 - v = wordp[4] | 0x0000000000000002 - v |= 0x0000000000040000 - v |= 0x0000000800000000 - wordp[4] = v | 0x0010000000000000 - v = wordp[5] | 0x0000000000000020 - v |= 0x0000000000400000 - v |= 0x0000008000000000 - wordp[5] = v | 0x0100000000000000 - v = wordp[6] | 0x0000000000000200 - v |= 0x0000000004000000 - v |= 0x0000080000000000 - wordp[6] = v | 0x1000000000000000 - v = wordp[7] | 0x0000000000002000 - v |= 0x0000000040000000 - wordp[7] = v | 0x0000800000000000 - v = wordp[8] | 0x0000000000000001 - v |= 0x0000000000020000 - v |= 0x0000000400000000 - wordp[8] = v | 0x0008000000000000 - v = wordp[9] | 0x0000000000000010 - v |= 0x0000000000200000 - v |= 0x0000004000000000 - wordp[9] = v | 0x0080000000000000 - v = wordp[10] | 0x0000000000000100 - v |= 0x0000000002000000 - v |= 0x0000040000000000 - wordp[10] = v | 0x0800000000000000 - v = wordp[11] | 0x0000000000001000 - v |= 0x0000000020000000 - v |= 0x0000400000000000 - wordp[11] = v | 0x8000000000000000 - v = wordp[12] | 0x0000000000010000 - v |= 0x0000000200000000 - wordp[12] = v | 0x0004000000000000 - v = wordp[13] | 0x0000000000000008 - v |= 0x0000000000100000 - v |= 0x0000002000000000 - wordp[13] = v | 0x0040000000000000 - v = wordp[14] | 0x0000000000000080 - v |= 0x0000000001000000 - v |= 0x0000020000000000 - wordp[14] = v | 0x0400000000000000 - v = wordp[15] | 0x0000000000000800 - v |= 0x0000000010000000 - v |= 0x0000200000000000 - wordp[15] = v | 0x4000000000000000 - v = wordp[16] | 0x0000000000008000 - v |= 0x0000000100000000 - wordp[16] = v | 0x0002000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + } + }, + // for step of 5 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] } - }, - // for step of 19 - fn (bytearr []u8, strti int, lmti int, stepi int) { - unsafe { - bytearrp := &bytearr[0] - ilmt := strti | 63 - mut ndx := strti - for ; ndx <= ilmt; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } - byteadv := stepi << 3 - bytelmt := (lmti >> 3) - (byteadv - 8) - mut bytendx := (ndx >> 3) & (-8) - ndx &= 63 - for ; bytendx < bytelmt; bytendx += byteadv { - wordp := &u64(&bytearr[bytendx]) - mut v := wordp[0] | 0x0000000000000040 - v |= 0x0000000002000000 - v |= 0x0000100000000000 - wordp[0] = v | 0x8000000000000000 - v = wordp[1] | 0x0000000000040000 - v |= 0x0000002000000000 - wordp[1] = v | 0x0100000000000000 - v = wordp[2] | 0x0000000000000800 - v |= 0x0000000040000000 - wordp[2] = v | 0x0002000000000000 - v = wordp[3] | 0x0000000000000010 - v |= 0x0000000000800000 - v |= 0x0000040000000000 - wordp[3] = v | 0x2000000000000000 - v = wordp[4] | 0x0000000000010000 - v |= 0x0000000800000000 - wordp[4] = v | 0x0040000000000000 - v = wordp[5] | 0x0000000000000200 - v |= 0x0000000010000000 - wordp[5] = v | 0x0000800000000000 - v = wordp[6] | 0x0000000000000004 - v |= 0x0000000000200000 - v |= 0x0000010000000000 - wordp[6] = v | 0x0800000000000000 - v = wordp[7] | 0x0000000000004000 - v |= 0x0000000200000000 - wordp[7] = v | 0x0010000000000000 - v = wordp[8] | 0x0000000000000080 - v |= 0x0000000004000000 - wordp[8] = v | 0x0000200000000000 - v = wordp[9] | 0x0000000000000001 - v |= 0x0000000000080000 - v |= 0x0000004000000000 - wordp[9] = v | 0x0200000000000000 - v = wordp[10] | 0x0000000000001000 - v |= 0x0000000080000000 - wordp[10] = v | 0x0004000000000000 - v = wordp[11] | 0x0000000000000020 - v |= 0x0000000001000000 - v |= 0x0000080000000000 - wordp[11] = v | 0x4000000000000000 - v = wordp[12] | 0x0000000000020000 - v |= 0x0000001000000000 - wordp[12] = v | 0x0080000000000000 - v = wordp[13] | 0x0000000000000400 - v |= 0x0000000020000000 - wordp[13] = v | 0x0001000000000000 - v = wordp[14] | 0x0000000000000008 - v |= 0x0000000000400000 - v |= 0x0000020000000000 - wordp[14] = v | 0x1000000000000000 - v = wordp[15] | 0x0000000000008000 - v |= 0x0000000400000000 - wordp[15] = v | 0x0020000000000000 - v = wordp[16] | 0x0000000000000100 - v |= 0x0000000008000000 - wordp[16] = v | 0x0000400000000000 - v = wordp[17] | 0x0000000000000002 - v |= 0x0000000000100000 - v |= 0x0000008000000000 - wordp[17] = v | 0x0400000000000000 - v = wordp[18] | 0x0000000000002000 - v |= 0x0000000100000000 - wordp[18] = v | 0x0008000000000000 - } - ndx |= bytendx << 3 - for ; ndx <= lmti; ndx += stepi { - bytearrp[ndx >> 3] |= bitmask[ndx & 7] - } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000000004 + v |= 0x0000000000000080 + v |= 0x0000000000001000 + v |= 0x0000000000020000 + v |= 0x0000000000400000 + v |= 0x0000000008000000 + v |= 0x0000000100000000 + v |= 0x0000002000000000 + v |= 0x0000040000000000 + v |= 0x0000800000000000 + v |= 0x0010000000000000 + v |= 0x0200000000000000 + wordp[0] = v | 0x4000000000000000 + v = wordp[1] | 0x0000000000000008 + v |= 0x0000000000000100 + v |= 0x0000000000002000 + v |= 0x0000000000040000 + v |= 0x0000000000800000 + v |= 0x0000000010000000 + v |= 0x0000000200000000 + v |= 0x0000004000000000 + v |= 0x0000080000000000 + v |= 0x0001000000000000 + v |= 0x0020000000000000 + v |= 0x0400000000000000 + wordp[1] = v | 0x8000000000000000 + v = wordp[2] | 0x0000000000000010 + v |= 0x0000000000000200 + v |= 0x0000000000004000 + v |= 0x0000000000080000 + v |= 0x0000000001000000 + v |= 0x0000000020000000 + v |= 0x0000000400000000 + v |= 0x0000008000000000 + v |= 0x0000100000000000 + v |= 0x0002000000000000 + v |= 0x0040000000000000 + wordp[2] = v | 0x0800000000000000 + v = wordp[3] | 0x0000000000000001 + v |= 0x0000000000000020 + v |= 0x0000000000000400 + v |= 0x0000000000008000 + v |= 0x0000000000100000 + v |= 0x0000000002000000 + v |= 0x0000000040000000 + v |= 0x0000000800000000 + v |= 0x0000010000000000 + v |= 0x0000200000000000 + v |= 0x0004000000000000 + v |= 0x0080000000000000 + wordp[3] = v | 0x1000000000000000 + v = wordp[4] | 0x0000000000000002 + v |= 0x0000000000000040 + v |= 0x0000000000000800 + v |= 0x0000000000010000 + v |= 0x0000000000200000 + v |= 0x0000000004000000 + v |= 0x0000000080000000 + v |= 0x0000001000000000 + v |= 0x0000020000000000 + v |= 0x0000400000000000 + v |= 0x0008000000000000 + v |= 0x0100000000000000 + wordp[4] = v | 0x2000000000000000 + } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + } + }, + // for step of 7 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000000002 + v |= 0x0000000000000100 + v |= 0x0000000000008000 + v |= 0x0000000000400000 + v |= 0x0000000020000000 + v |= 0x0000001000000000 + v |= 0x0000080000000000 + v |= 0x0004000000000000 + wordp[0] = v | 0x0200000000000000 + v = wordp[1] | 0x0000000000000001 + v |= 0x0000000000000080 + v |= 0x0000000000004000 + v |= 0x0000000000200000 + v |= 0x0000000010000000 + v |= 0x0000000800000000 + v |= 0x0000040000000000 + v |= 0x0002000000000000 + v |= 0x0100000000000000 + wordp[1] = v | 0x8000000000000000 + v = wordp[2] | 0x0000000000000040 + v |= 0x0000000000002000 + v |= 0x0000000000100000 + v |= 0x0000000008000000 + v |= 0x0000000400000000 + v |= 0x0000020000000000 + v |= 0x0001000000000000 + v |= 0x0080000000000000 + wordp[2] = v | 0x4000000000000000 + v = wordp[3] | 0x0000000000000020 + v |= 0x0000000000001000 + v |= 0x0000000000080000 + v |= 0x0000000004000000 + v |= 0x0000000200000000 + v |= 0x0000010000000000 + v |= 0x0000800000000000 + v |= 0x0040000000000000 + wordp[3] = v | 0x2000000000000000 + v = wordp[4] | 0x0000000000000010 + v |= 0x0000000000000800 + v |= 0x0000000000040000 + v |= 0x0000000002000000 + v |= 0x0000000100000000 + v |= 0x0000008000000000 + v |= 0x0000400000000000 + v |= 0x0020000000000000 + wordp[4] = v | 0x1000000000000000 + v = wordp[5] | 0x0000000000000008 + v |= 0x0000000000000400 + v |= 0x0000000000020000 + v |= 0x0000000001000000 + v |= 0x0000000080000000 + v |= 0x0000004000000000 + v |= 0x0000200000000000 + v |= 0x0010000000000000 + wordp[5] = v | 0x0800000000000000 + v = wordp[6] | 0x0000000000000004 + v |= 0x0000000000000200 + v |= 0x0000000000010000 + v |= 0x0000000000800000 + v |= 0x0000000040000000 + v |= 0x0000002000000000 + v |= 0x0000100000000000 + v |= 0x0008000000000000 + wordp[6] = v | 0x0400000000000000 + } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + } + }, + // for step of 9; never used + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000000004 + v |= 0x0000000000000800 + v |= 0x0000000000100000 + v |= 0x0000000020000000 + v |= 0x0000004000000000 + v |= 0x0000800000000000 + wordp[0] = v | 0x0100000000000000 + v = wordp[1] | 0x0000000000000002 + v |= 0x0000000000000400 + v |= 0x0000000000080000 + v |= 0x0000000010000000 + v |= 0x0000002000000000 + v |= 0x0000400000000000 + wordp[1] = v | 0x0080000000000000 + v = wordp[2] | 0x0000000000000001 + v |= 0x0000000000000200 + v |= 0x0000000000040000 + v |= 0x0000000008000000 + v |= 0x0000001000000000 + v |= 0x0000200000000000 + v |= 0x0040000000000000 + wordp[2] = v | 0x8000000000000000 + v = wordp[3] | 0x0000000000000100 + v |= 0x0000000000020000 + v |= 0x0000000004000000 + v |= 0x0000000800000000 + v |= 0x0000100000000000 + v |= 0x0020000000000000 + wordp[3] = v | 0x4000000000000000 + v = wordp[4] | 0x0000000000000080 + v |= 0x0000000000010000 + v |= 0x0000000002000000 + v |= 0x0000000400000000 + v |= 0x0000080000000000 + v |= 0x0010000000000000 + wordp[4] = v | 0x2000000000000000 + v = wordp[5] | 0x0000000000000040 + v |= 0x0000000000008000 + v |= 0x0000000001000000 + v |= 0x0000000200000000 + v |= 0x0000040000000000 + v |= 0x0008000000000000 + wordp[5] = v | 0x1000000000000000 + v = wordp[6] | 0x0000000000000020 + v |= 0x0000000000004000 + v |= 0x0000000000800000 + v |= 0x0000000100000000 + v |= 0x0000020000000000 + v |= 0x0004000000000000 + wordp[6] = v | 0x0800000000000000 + v = wordp[7] | 0x0000000000000010 + v |= 0x0000000000002000 + v |= 0x0000000000400000 + v |= 0x0000000080000000 + v |= 0x0000010000000000 + v |= 0x0002000000000000 + wordp[7] = v | 0x0400000000000000 + v = wordp[8] | 0x0000000000000008 + v |= 0x0000000000001000 + v |= 0x0000000000200000 + v |= 0x0000000040000000 + v |= 0x0000008000000000 + v |= 0x0001000000000000 + wordp[8] = v | 0x0200000000000000 + } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + } + }, + // for step of 11 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000000040 + v |= 0x0000000000020000 + v |= 0x0000000010000000 + v |= 0x0000008000000000 + v |= 0x0004000000000000 + wordp[0] = v | 0x2000000000000000 + v = wordp[1] | 0x0000000000000100 + v |= 0x0000000000080000 + v |= 0x0000000040000000 + v |= 0x0000020000000000 + v |= 0x0010000000000000 + wordp[1] = v | 0x8000000000000000 + v = wordp[2] | 0x0000000000000400 + v |= 0x0000000000200000 + v |= 0x0000000100000000 + v |= 0x0000080000000000 + wordp[2] = v | 0x0040000000000000 + v = wordp[3] | 0x0000000000000002 + v |= 0x0000000000001000 + v |= 0x0000000000800000 + v |= 0x0000000400000000 + v |= 0x0000200000000000 + wordp[3] = v | 0x0100000000000000 + v = wordp[4] | 0x0000000000000008 + v |= 0x0000000000004000 + v |= 0x0000000002000000 + v |= 0x0000001000000000 + v |= 0x0000800000000000 + wordp[4] = v | 0x0400000000000000 + v = wordp[5] | 0x0000000000000020 + v |= 0x0000000000010000 + v |= 0x0000000008000000 + v |= 0x0000004000000000 + v |= 0x0002000000000000 + wordp[5] = v | 0x1000000000000000 + v = wordp[6] | 0x0000000000000080 + v |= 0x0000000000040000 + v |= 0x0000000020000000 + v |= 0x0000010000000000 + v |= 0x0008000000000000 + wordp[6] = v | 0x4000000000000000 + v = wordp[7] | 0x0000000000000200 + v |= 0x0000000000100000 + v |= 0x0000000080000000 + v |= 0x0000040000000000 + wordp[7] = v | 0x0020000000000000 + v = wordp[8] | 0x0000000000000001 + v |= 0x0000000000000800 + v |= 0x0000000000400000 + v |= 0x0000000200000000 + v |= 0x0000100000000000 + wordp[8] = v | 0x0080000000000000 + v = wordp[9] | 0x0000000000000004 + v |= 0x0000000000002000 + v |= 0x0000000001000000 + v |= 0x0000000800000000 + v |= 0x0000400000000000 + wordp[9] = v | 0x0200000000000000 + v = wordp[10] | 0x0000000000000010 + v |= 0x0000000000008000 + v |= 0x0000000004000000 + v |= 0x0000002000000000 + v |= 0x0001000000000000 + wordp[10] = v | 0x0800000000000000 + } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + } + }, + // for step of 13 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000000080 + v |= 0x0000000000100000 + v |= 0x0000000200000000 + v |= 0x0000400000000000 + wordp[0] = v | 0x0800000000000000 + v = wordp[1] | 0x0000000000000100 + v |= 0x0000000000200000 + v |= 0x0000000400000000 + v |= 0x0000800000000000 + wordp[1] = v | 0x1000000000000000 + v = wordp[2] | 0x0000000000000200 + v |= 0x0000000000400000 + v |= 0x0000000800000000 + v |= 0x0001000000000000 + wordp[2] = v | 0x2000000000000000 + v = wordp[3] | 0x0000000000000400 + v |= 0x0000000000800000 + v |= 0x0000001000000000 + v |= 0x0002000000000000 + wordp[3] = v | 0x4000000000000000 + v = wordp[4] | 0x0000000000000800 + v |= 0x0000000001000000 + v |= 0x0000002000000000 + v |= 0x0004000000000000 + wordp[4] = v | 0x8000000000000000 + v = wordp[5] | 0x0000000000001000 + v |= 0x0000000002000000 + v |= 0x0000004000000000 + wordp[5] = v | 0x0008000000000000 + v = wordp[6] | 0x0000000000000001 + v |= 0x0000000000002000 + v |= 0x0000000004000000 + v |= 0x0000008000000000 + wordp[6] = v | 0x0010000000000000 + v = wordp[7] | 0x0000000000000002 + v |= 0x0000000000004000 + v |= 0x0000000008000000 + v |= 0x0000010000000000 + wordp[7] = v | 0x0020000000000000 + v = wordp[8] | 0x0000000000000004 + v |= 0x0000000000008000 + v |= 0x0000000010000000 + v |= 0x0000020000000000 + wordp[8] = v | 0x0040000000000000 + v = wordp[9] | 0x0000000000000008 + v |= 0x0000000000010000 + v |= 0x0000000020000000 + v |= 0x0000040000000000 + wordp[9] = v | 0x0080000000000000 + v = wordp[10] | 0x0000000000000010 + v |= 0x0000000000020000 + v |= 0x0000000040000000 + v |= 0x0000080000000000 + wordp[10] = v | 0x0100000000000000 + v = wordp[11] | 0x0000000000000020 + v |= 0x0000000000040000 + v |= 0x0000000080000000 + v |= 0x0000100000000000 + wordp[11] = v | 0x0200000000000000 + v = wordp[12] | 0x0000000000000040 + v |= 0x0000000000080000 + v |= 0x0000000100000000 + v |= 0x0000200000000000 + wordp[12] = v | 0x0400000000000000 + } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + } + }, + // for step of 15 - never used + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000002000 + v |= 0x0000000010000000 + v |= 0x0000080000000000 + wordp[0] = v | 0x0400000000000000 + v = wordp[1] | 0x0000000000000200 + v |= 0x0000000001000000 + v |= 0x0000008000000000 + wordp[1] = v | 0x0040000000000000 + v = wordp[2] | 0x0000000000000020 + v |= 0x0000000000100000 + v |= 0x0000000800000000 + wordp[2] = v | 0x0004000000000000 + v = wordp[3] | 0x0000000000000002 + v |= 0x0000000000010000 + v |= 0x0000000080000000 + v |= 0x0000400000000000 + wordp[3] = v | 0x2000000000000000 + v = wordp[4] | 0x0000000000001000 + v |= 0x0000000008000000 + v |= 0x0000040000000000 + wordp[4] = v | 0x0200000000000000 + v = wordp[5] | 0x0000000000000100 + v |= 0x0000000000800000 + v |= 0x0000004000000000 + wordp[5] = v | 0x0020000000000000 + v = wordp[6] | 0x0000000000000010 + v |= 0x0000000000080000 + v |= 0x0000000400000000 + wordp[6] = v | 0x0002000000000000 + v = wordp[7] | 0x0000000000000001 + v |= 0x0000000000008000 + v |= 0x0000000040000000 + v |= 0x0000200000000000 + wordp[7] = v | 0x1000000000000000 + v = wordp[8] | 0x0000000000000800 + v |= 0x0000000004000000 + v |= 0x0000020000000000 + wordp[8] = v | 0x0100000000000000 + v = wordp[9] | 0x0000000000000080 + v |= 0x0000000000400000 + v |= 0x0000002000000000 + wordp[9] = v | 0x0010000000000000 + v = wordp[10] | 0x0000000000000008 + v |= 0x0000000000040000 + v |= 0x0000000200000000 + v |= 0x0001000000000000 + wordp[10] = v | 0x8000000000000000 + v = wordp[11] | 0x0000000000004000 + v |= 0x0000000020000000 + v |= 0x0000100000000000 + wordp[11] = v | 0x0800000000000000 + v = wordp[12] | 0x0000000000000400 + v |= 0x0000000002000000 + v |= 0x0000010000000000 + wordp[12] = v | 0x0080000000000000 + v = wordp[13] | 0x0000000000000040 + v |= 0x0000000000200000 + v |= 0x0000001000000000 + wordp[13] = v | 0x0008000000000000 + v = wordp[14] | 0x0000000000000004 + v |= 0x0000000000020000 + v |= 0x0000000100000000 + v |= 0x0000800000000000 + wordp[14] = v | 0x4000000000000000 + } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + } + }, + // for step of 17 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000000004 + v |= 0x0000000000080000 + v |= 0x0000001000000000 + wordp[0] = v | 0x0020000000000000 + v = wordp[1] | 0x0000000000000040 + v |= 0x0000000000800000 + v |= 0x0000010000000000 + wordp[1] = v | 0x0200000000000000 + v = wordp[2] | 0x0000000000000400 + v |= 0x0000000008000000 + v |= 0x0000100000000000 + wordp[2] = v | 0x2000000000000000 + v = wordp[3] | 0x0000000000004000 + v |= 0x0000000080000000 + wordp[3] = v | 0x0001000000000000 + v = wordp[4] | 0x0000000000000002 + v |= 0x0000000000040000 + v |= 0x0000000800000000 + wordp[4] = v | 0x0010000000000000 + v = wordp[5] | 0x0000000000000020 + v |= 0x0000000000400000 + v |= 0x0000008000000000 + wordp[5] = v | 0x0100000000000000 + v = wordp[6] | 0x0000000000000200 + v |= 0x0000000004000000 + v |= 0x0000080000000000 + wordp[6] = v | 0x1000000000000000 + v = wordp[7] | 0x0000000000002000 + v |= 0x0000000040000000 + wordp[7] = v | 0x0000800000000000 + v = wordp[8] | 0x0000000000000001 + v |= 0x0000000000020000 + v |= 0x0000000400000000 + wordp[8] = v | 0x0008000000000000 + v = wordp[9] | 0x0000000000000010 + v |= 0x0000000000200000 + v |= 0x0000004000000000 + wordp[9] = v | 0x0080000000000000 + v = wordp[10] | 0x0000000000000100 + v |= 0x0000000002000000 + v |= 0x0000040000000000 + wordp[10] = v | 0x0800000000000000 + v = wordp[11] | 0x0000000000001000 + v |= 0x0000000020000000 + v |= 0x0000400000000000 + wordp[11] = v | 0x8000000000000000 + v = wordp[12] | 0x0000000000010000 + v |= 0x0000000200000000 + wordp[12] = v | 0x0004000000000000 + v = wordp[13] | 0x0000000000000008 + v |= 0x0000000000100000 + v |= 0x0000002000000000 + wordp[13] = v | 0x0040000000000000 + v = wordp[14] | 0x0000000000000080 + v |= 0x0000000001000000 + v |= 0x0000020000000000 + wordp[14] = v | 0x0400000000000000 + v = wordp[15] | 0x0000000000000800 + v |= 0x0000000010000000 + v |= 0x0000200000000000 + wordp[15] = v | 0x4000000000000000 + v = wordp[16] | 0x0000000000008000 + v |= 0x0000000100000000 + wordp[16] = v | 0x0002000000000000 + } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + } + }, + // for step of 19 + fn (bytearr []u8, strti int, lmti int, stepi int) { + unsafe { + bytearrp := &bytearr[0] + ilmt := strti | 63 + mut ndx := strti + for ; ndx <= ilmt; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] + } + byteadv := stepi << 3 + bytelmt := (lmti >> 3) - (byteadv - 8) + mut bytendx := (ndx >> 3) & (-8) + ndx &= 63 + for ; bytendx < bytelmt; bytendx += byteadv { + wordp := &u64(&bytearr[bytendx]) + mut v := wordp[0] | 0x0000000000000040 + v |= 0x0000000002000000 + v |= 0x0000100000000000 + wordp[0] = v | 0x8000000000000000 + v = wordp[1] | 0x0000000000040000 + v |= 0x0000002000000000 + wordp[1] = v | 0x0100000000000000 + v = wordp[2] | 0x0000000000000800 + v |= 0x0000000040000000 + wordp[2] = v | 0x0002000000000000 + v = wordp[3] | 0x0000000000000010 + v |= 0x0000000000800000 + v |= 0x0000040000000000 + wordp[3] = v | 0x2000000000000000 + v = wordp[4] | 0x0000000000010000 + v |= 0x0000000800000000 + wordp[4] = v | 0x0040000000000000 + v = wordp[5] | 0x0000000000000200 + v |= 0x0000000010000000 + wordp[5] = v | 0x0000800000000000 + v = wordp[6] | 0x0000000000000004 + v |= 0x0000000000200000 + v |= 0x0000010000000000 + wordp[6] = v | 0x0800000000000000 + v = wordp[7] | 0x0000000000004000 + v |= 0x0000000200000000 + wordp[7] = v | 0x0010000000000000 + v = wordp[8] | 0x0000000000000080 + v |= 0x0000000004000000 + wordp[8] = v | 0x0000200000000000 + v = wordp[9] | 0x0000000000000001 + v |= 0x0000000000080000 + v |= 0x0000004000000000 + wordp[9] = v | 0x0200000000000000 + v = wordp[10] | 0x0000000000001000 + v |= 0x0000000080000000 + wordp[10] = v | 0x0004000000000000 + v = wordp[11] | 0x0000000000000020 + v |= 0x0000000001000000 + v |= 0x0000080000000000 + wordp[11] = v | 0x4000000000000000 + v = wordp[12] | 0x0000000000020000 + v |= 0x0000001000000000 + wordp[12] = v | 0x0080000000000000 + v = wordp[13] | 0x0000000000000400 + v |= 0x0000000020000000 + wordp[13] = v | 0x0001000000000000 + v = wordp[14] | 0x0000000000000008 + v |= 0x0000000000400000 + v |= 0x0000020000000000 + wordp[14] = v | 0x1000000000000000 + v = wordp[15] | 0x0000000000008000 + v |= 0x0000000400000000 + wordp[15] = v | 0x0020000000000000 + v = wordp[16] | 0x0000000000000100 + v |= 0x0000000008000000 + wordp[16] = v | 0x0000400000000000 + v = wordp[17] | 0x0000000000000002 + v |= 0x0000000000100000 + v |= 0x0000008000000000 + wordp[17] = v | 0x0400000000000000 + v = wordp[18] | 0x0000000000002000 + v |= 0x0000000100000000 + wordp[18] = v | 0x0008000000000000 + } + ndx |= bytendx << 3 + for ; ndx <= lmti; ndx += stepi { + bytearrp[ndx >> 3] |= bitmask[ndx & 7] } } - ] -) + }, +] enum Technique { bit_twiddle @@ -921,8 +919,8 @@ enum Technique { } struct PrimeSieve { - sieve_size Prime - sieve_buffer []u8 + sieve_size Prime + sieve_buffer []u8 } fn new_prime_sieve(lmt Prime, tec Technique) PrimeSieve { @@ -934,10 +932,14 @@ fn new_prime_sieve(lmt Prime, tec Technique) PrimeSieve { .bit_twiddle { unsafe { sievep := &sieve[0] - for i := 0; ; i++ { + for i := 0; true; i++ { mut swi := (i + i) * (i + 3) + 3 // computer mark start index - if swi > bitlmt { break } - if (sievep[i >> 3] & bitmask[i & 7]) != u8(0) { continue } + if swi > bitlmt { + break + } + if (sievep[i >> 3] & bitmask[i & 7]) != u8(0) { + continue + } bp := i + i + 3 for ; swi <= bitlmt; swi += bp { sievep[swi >> 3] |= bitmask[swi & 7] @@ -948,10 +950,14 @@ fn new_prime_sieve(lmt Prime, tec Technique) PrimeSieve { .stride8 { unsafe { sievep := &sieve[0] - for i := 0; ; i++ { + for i := 0; true; i++ { mut swi := (i + i) * (i + 3) + 3 // computer mark start index - if swi > bitlmt { break } - if (sievep[i >> 3] & bitmask[i & 7]) != u8(0) { continue } + if swi > bitlmt { + break + } + if (sievep[i >> 3] & bitmask[i & 7]) != u8(0) { + continue + } bp := i + i + 3 tstlmt := swi + (bp << 3) - 1 slmt := if tstlmt > bitlmt { bitlmt } else { tstlmt } @@ -969,10 +975,14 @@ fn new_prime_sieve(lmt Prime, tec Technique) PrimeSieve { sievep := &sieve[0] strts := []int{len: 8, cap: 8} strtsp := &strts[0] - for i := 0; ; i++ { + for i := 0; true; i++ { mut swi := (i + i) * (i + 3) + 3 // computer mark start index - if swi > bitlmt { break } - if (sievep[i >> 3] & bitmask[i & 7]) != u8(0) { continue } + if swi > bitlmt { + break + } + if (sievep[i >> 3] & bitmask[i & 7]) != u8(0) { + continue + } bp := i + i + 3 bp2 := bp + bp bp3 := bp + bp2 @@ -983,8 +993,11 @@ fn new_prime_sieve(lmt Prime, tec Technique) PrimeSieve { swi += bp } for pgndx := pgndx0; pgndx < size; pgndx += cpul1cache { - pglmt := if pgndx + cpul1cache > size { size - 1 } - else { pgndx + cpul1cache - 1 } + pglmt := if pgndx + cpul1cache > size { + size - 1 + } else { + pgndx + cpul1cache - 1 + } pgstp := pglmt - bp3 for si := 0; si < 8; si++ { mut bytendx := strtsp[si] @@ -1005,16 +1018,19 @@ fn new_prime_sieve(lmt Prime, tec Technique) PrimeSieve { } } .extreme, .extreme_hybrid { - for i := 0; ; i++ { + for i := 0; true; i++ { mut swi := (i + i) * (i + 3) + 3 // computer mark start index - if swi > bitlmt { break } - if (sieve[i >> 3] & bitmask[i & 7]) != u8(0) { continue } + if swi > bitlmt { + break + } + if (sieve[i >> 3] & bitmask[i & 7]) != u8(0) { + continue + } bp := i + i + 3 if tec == Technique.extreme_hybrid && bp <= dense_threshold { // only from 3 to 19; cases 9 and 15 actually not used dense_bitset[(bp - 3) >> 1](sieve, swi, bitlmt, bp) - } - else { + } else { // only four cases are actually used! extreme_bitset[(bp >> 1) & 3](sieve, swi, bitlmt, bp) } @@ -1022,15 +1038,17 @@ fn new_prime_sieve(lmt Prime, tec Technique) PrimeSieve { } } - return PrimeSieve { - sieve_size: lmt + return PrimeSieve{ + sieve_size: lmt sieve_buffer: sieve } } fn (sieve PrimeSieve) count_primes() int { if sieve.sieve_size < 3 { - if sieve.sieve_size < 2 { return 0 } + if sieve.sieve_size < 2 { + return 0 + } return 1 } @@ -1053,21 +1071,24 @@ fn bench(tec Technique) { passes++ duration := (time.now() - start_time).seconds() if duration >= 5.0 { - mut rsltstr := "2 " + mut rsltstr := '2 ' mut count := 1 bitlmt := int((sieve.sieve_size - 3) >> 1) for ndx := 0; ndx <= bitlmt; ndx++ { if (sieve.sieve_buffer[ndx >> 3] & bitmask[ndx & 7]) == u8(0) { - if count < 25 { rsltstr += (ndx + ndx + 3).str() + " " } + if count < 25 { + rsltstr += (ndx + ndx + 3).str() + ' ' + } count++ } } avg := duration / f64(passes) count_primes := sieve.count_primes() - valid := rsltstr == "2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97 " - && count_primes == result && count == result - eprintln('Passes: $passes, Time: $duration, Avg: $avg, Limit: $sieve.sieve_size, Count1: $count, Count2: $count_primes, Valid: $valid') + valid := + rsltstr == '2 3 5 7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89 97 ' + && count_primes == result && count == result + eprintln('Passes: ${passes}, Time: ${duration}, Avg: ${avg}, Limit: ${sieve.sieve_size}, Count1: ${count}, Count2: ${count_primes}, Valid: ${valid}') label := 'GordonBGood_' + match tec { .bit_twiddle { 'bittwiddle' } .stride8 { 'stride8' } @@ -1075,7 +1096,7 @@ fn bench(tec Technique) { .extreme { 'extreme' } .extreme_hybrid { 'extreme-hybrid' } } - println('$label;$passes;$duration;1;algorithm=base,faithful=yes,bits=1') + println('${label};${passes};${duration};1;algorithm=base,faithful=yes,bits=1') break } } @@ -1088,4 +1109,3 @@ fn main() { bench(Technique.extreme) bench(Technique.extreme_hybrid) } - diff --git a/tools/src/commands/benchmark.ts b/tools/src/commands/benchmark.ts index 0f34acb74..5dc2a8331 100644 --- a/tools/src/commands/benchmark.ts +++ b/tools/src/commands/benchmark.ts @@ -30,9 +30,11 @@ export const command = new Command('benchmark') .option('-f, --formatter ', 'Output formatter', 'table') .option('-o, --output-file ', 'Write output to given file') .option('-u, --unconfined', 'Run with seccomp:unconfined (native performance for interpreted languages)') + .option('-t, --timeout ', 'Timeout for each benchmark in minutes', '10') .action(async (args) => { const directory = path.resolve(args.directory as string); const unconfined = args.unconfined === true; + const timeout = parseInt(args.timeout as string); logger.info(`Unconfined mode: ${unconfined}`); @@ -106,11 +108,18 @@ export const command = new Command('benchmark') let output = ''; try { logger.info(`[${implementation}][${solution}] Running...`); - output = dockerService.runContainer(imageName, options); + output = dockerService.runContainer(imageName, timeout, options); } catch (err) { - logger.warn( - `[${implementation}][${solution}] Exited with abnormal code: ${err.status}. Results might be partial...` - ); + if (err.signal) { + logger.warn( + `[${implementation}][${solution}] Killed after ${timeout} minutes with signal: ${err.signal}. Results are likely partial...` + ); + } + else { + logger.warn( + `[${implementation}][${solution}] Exited with abnormal code: ${err.status}. Results might be partial...` + ); + } output = err.output .filter((block: Buffer | null) => block !== null) .map((block: Buffer) => block.toString('utf8')) diff --git a/tools/src/services/docker.ts b/tools/src/services/docker.ts index cf891c158..04aafc5fb 100644 --- a/tools/src/services/docker.ts +++ b/tools/src/services/docker.ts @@ -7,9 +7,11 @@ export default class DockerService { }); } - public runContainer(imageName: string, options: Array): string { + public runContainer(imageName: string, duration: number, options: Array): string { const output = child_process.execSync(`docker run --rm ${options.join(' ')} ${imageName}`, { - stdio: 'pipe' + stdio: 'pipe', + timeout: duration ? duration * 60000 : undefined, + killSignal: 'SIGKILL' }); return output.toString('utf8'); }