From 6ac504530b5d054a35f25ed2a1f0dc9689921273 Mon Sep 17 00:00:00 2001 From: Dave Plummer Date: Fri, 18 Oct 2024 13:38:43 -0700 Subject: [PATCH 01/10] Switch to uint8_t, some optimizations --- PrimeCPP/solution_2/PrimeCPP_PAR.cpp | 37 ++++++++++++++++------------ 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp b/PrimeCPP/solution_2/PrimeCPP_PAR.cpp index 7e20dea07..b145ecf16 100644 --- a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp +++ b/PrimeCPP/solution_2/PrimeCPP_PAR.cpp @@ -24,56 +24,61 @@ using namespace std::chrono; const uint64_t DEFAULT_UPPER_LIMIT = 10'000'000LLU; class BitArray { - uint32_t *array; + uint8_t *array; size_t arrSize; inline static size_t arraySize(size_t size) { - return (size >> 5) + ((size & 31) > 0); + return (size >> 3) + ((size & 7) > 0); } inline static size_t index(size_t n) { - return (n >> 5); + return (n >> 3); } - inline static uint32_t getSubindex(size_t n, uint32_t d) + inline static uint8_t getSubindex(size_t n, uint8_t d) { - return d & uint32_t(uint32_t(0x01) << (n % 32)); + return d & uint8_t(uint8_t(0x01) << (n % 8)); } - inline void setFalseSubindex(size_t n, uint32_t &d) + inline void setFalseSubindex(size_t n, uint8_t &d) { - d &= ~uint32_t(uint32_t(0x01) << (n % (8*sizeof(uint32_t)))); + d &= ~uint8_t(uint8_t(0x01) << (n % 8)); } public: explicit BitArray(size_t size) : arrSize(size) { - array = new uint32_t[arraySize(size)]; - std::memset(array, 0xFF, (size >> 3) + ((size & 7) > 0)); + array = new uint8_t[arraySize(size)]; + std::memset(array, 0xFF, arraySize(size)); } - ~BitArray() {delete [] array;} + ~BitArray() { delete[] array; } bool get(size_t n) const { - return getSubindex(n, array[index(n)]); + return (array[index(n)] & (uint8_t(1) << (n % 8))) != 0; } - static constexpr uint32_t rol(uint32_t x, uint32_t n) + static constexpr uint8_t rol(uint8_t x, uint8_t n) { - return (x<>(32-n)); + n %= 8; + if (n == 0) + return x; + else + return (x << n) | (x >> (8 - n)); } void setFlagsFalse(size_t n, size_t skip) { - auto rolling_mask = ~uint32_t(1 << n % 32); - auto roll_bits = skip % 32; + auto rolling_mask = ~uint8_t(1 << n % 8); + auto roll_bits = skip % 8; while (n < arrSize) { array[index(n)] &= rolling_mask; n += skip; - rolling_mask = rol(rolling_mask, roll_bits); + if (roll_bits != 0) + rolling_mask = rol(rolling_mask, roll_bits); } } From ed9e8449aa07f904a4ea6a24df9da024d2044382 Mon Sep 17 00:00:00 2001 From: Dave Plummer Date: Sat, 19 Oct 2024 12:13:23 -0700 Subject: [PATCH 02/10] Half sieve size, flip meaning of zero and one --- PrimeCPP/solution_2/PrimeCPP_PAR.cpp | 110 ++++++++++++--------------- PrimeCPP/solution_2/primes_par.exe | Bin 0 -> 62304 bytes 2 files changed, 49 insertions(+), 61 deletions(-) create mode 100755 PrimeCPP/solution_2/primes_par.exe diff --git a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp b/PrimeCPP/solution_2/PrimeCPP_PAR.cpp index b145ecf16..456d5281c 100644 --- a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp +++ b/PrimeCPP/solution_2/PrimeCPP_PAR.cpp @@ -1,7 +1,7 @@ // --------------------------------------------------------------------------- -// PrimeCPP.cpp : Pol Marcet's Modified version of Dave's Garage Prime Sieve -// Some great ideas taken from Rust's implementation from Michael Barber -// @mike-barber https://www.github.com/mike-barber (bit-storage-rotate) +// PrimeCPP.cpp : Optimized version of Dave's Garage Prime Sieve +// Optimization: Use a BitArray that represents only odd numbers to reduce memory usage +// and improve cache performance. // --------------------------------------------------------------------------- #include @@ -26,83 +26,64 @@ const uint64_t DEFAULT_UPPER_LIMIT = 10'000'000LLU; class BitArray { uint8_t *array; size_t arrSize; + size_t logicalSize; - inline static size_t arraySize(size_t size) + static constexpr size_t arraySize(size_t size) { return (size >> 3) + ((size & 7) > 0); } - inline static size_t index(size_t n) + static constexpr size_t index(size_t n) { return (n >> 3); } - inline static uint8_t getSubindex(size_t n, uint8_t d) - { - return d & uint8_t(uint8_t(0x01) << (n % 8)); - } - - inline void setFalseSubindex(size_t n, uint8_t &d) - { - d &= ~uint8_t(uint8_t(0x01) << (n % 8)); - } - public: - explicit BitArray(size_t size) : arrSize(size) + explicit BitArray(size_t size) : logicalSize(size) { - array = new uint8_t[arraySize(size)]; - std::memset(array, 0xFF, arraySize(size)); + arrSize = (size + 1) / 2; // Only store bits for odd numbers + array = new uint8_t[arraySize(arrSize)]; + // Bits are left at zero default, so no need to initialize them + // std::memset(array, 0x00, arraySize(arrSize)); } ~BitArray() { delete[] array; } bool get(size_t n) const { - return (array[index(n)] & (uint8_t(1) << (n % 8))) != 0; + if (n % 2 == 0) + return false; // Even numbers > 2 are not prime + n = n / 2; // Map the actual number to the index in the array + return !(array[index(n)] & (uint8_t(1) << (n % 8))); } - static constexpr uint8_t rol(uint8_t x, uint8_t n) + void set(size_t n) { - n %= 8; - if (n == 0) - return x; - else - return (x << n) | (x >> (8 - n)); + n = n / 2; // Map the actual number to the index in the array + array[index(n)] |= (uint8_t(1) << (n % 8)); } - void setFlagsFalse(size_t n, size_t skip) - { - auto rolling_mask = ~uint8_t(1 << n % 8); - auto roll_bits = skip % 8; - while (n < arrSize) { - array[index(n)] &= rolling_mask; - n += skip; - if (roll_bits != 0) - rolling_mask = rol(rolling_mask, roll_bits); - } - } - inline size_t size() const { - return arrSize; + return logicalSize; } }; // prime_sieve // -// Represents the data comprising the sieve (an array of N bits, where N is the upper limit prime being tested) -// as well as the code needed to eliminate non-primes from its array, which you perform by calling runSieve. +// Represents the data comprising the sieve (an array of bits representing odd numbers starting from 3) +// and includes the code needed to eliminate non-primes from its array by calling runSieve. class prime_sieve { private: - BitArray Bits; // Sieve data, where 1==prime, 0==not + BitArray Bits; // Sieve data, where 0==prime, 1==not public: - prime_sieve(uint64_t n) : Bits(n) // Initialize all to true (potential primes) + prime_sieve(uint64_t n) : Bits(n) // Initialize bits to zero default { } @@ -122,15 +103,21 @@ class prime_sieve while (factor <= q) { - for (uint64_t num = factor; num < Bits.size(); num += 2) + // Find the next prime number + for (; factor <= q; factor += 2) { - if (Bits.get(num)) + if (Bits.get(factor)) { - factor = num; break; } } - Bits.setFlagsFalse(factor * factor, factor + factor); + + // Mark multiples of the prime number as not prime + uint64_t start = factor * factor; + for (uint64_t num = start; num <= Bits.size(); num += factor * 2) + { + Bits.set(num); + } factor += 2; } @@ -142,9 +129,9 @@ class prime_sieve size_t countPrimes() const { - size_t count = (Bits.size() >= 2); // Count 2 as prime if within range - for (int i = 3; i < Bits.size(); i+=2) - if (Bits.get(i)) + size_t count = (Bits.size() >= 2); // Count 2 as prime if within range + for (uint64_t num = 3; num <= Bits.size(); num += 2) + if (Bits.get(num)) count++; return count; } @@ -155,23 +142,24 @@ class prime_sieve bool isPrime(uint64_t n) const { - if (n & 1) - return Bits.get(n); - else + if (n == 2) + return true; + if (n < 2 || n % 2 == 0) return false; + return Bits.get(n); } // validateResults // - // Checks to see if the number of primes found matches what we should expect. This data isn't used in the + // Checks to see if the number of primes found matches what we should expect. This data isn't used in the // sieve processing at all, only to sanity check that the results are right when done. bool validateResults() const { const std::map resultsDictionary = { - { 10LLU, 4 }, // Historical data for validating our results - the number of primes - { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 + { 10LLU, 4 }, // Historical data for validating our results - the number of primes + { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 { 1'000LLU, 168 }, { 10'000LLU, 1229 }, { 100'000LLU, 9592 }, @@ -195,8 +183,8 @@ class prime_sieve if (showResults) cout << "2, "; - size_t count = (Bits.size() >= 2); // Count 2 as prime if in range - for (uint64_t num = 3; num <= Bits.size(); num+=2) + size_t count = (Bits.size() >= 2); // Count 2 as prime if in range + for (uint64_t num = 3; num <= Bits.size(); num += 2) { if (Bits.get(num)) { @@ -215,7 +203,7 @@ class prime_sieve << "Average: " << duration/passes << ", " << "Limit: " << Bits.size() << ", " << "Counts: " << count << "/" << countPrimes() << ", " - << "Valid : " << (validateResults() ? "Pass" : "FAIL!") + << "Valid: " << (validateResults() ? "Pass" : "FAIL!") << "\n"; // Following 2 lines added by rbergen to conform to drag race output format @@ -322,7 +310,7 @@ int main(int argc, char **argv) } if (bOneshot) - cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; + cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; if (bOneshot && (cSecondsRequested > 0 || cThreadsRequested > 1)) { @@ -357,8 +345,8 @@ int main(int argc, char **argv) else { auto tStart = steady_clock::now(); - std::thread threads[cThreads]; - uint64_t l_passes[cThreads]; + std::vector threads(cThreads); + std::vector l_passes(cThreads); for (unsigned int i = 0; i < cThreads; i++) threads[i] = std::thread([i, &l_passes, &tStart](size_t llUpperLimit) { diff --git a/PrimeCPP/solution_2/primes_par.exe b/PrimeCPP/solution_2/primes_par.exe new file mode 100755 index 0000000000000000000000000000000000000000..c815cbfcf1741a58a60a4acb1c6a2bc2822d5d62 GIT binary patch literal 62304 zcmeHw3tZIIw*UUk@R%9UPzO{@8GIDr3lMo!<^YnQrXHk|z5Fr2C?mr|7*w=kd{ijv zamvbrUA0c#fV5*BI48Sx#qyl2tmEr?yxr~*&3i(zQnQ@#X7m59{re647_d5z`+t3Y zzuNA-_G9g}*Is+=wIA&H6dMYc4JV3o)2C$b0wzm& z2&Q&*eE~kqQj928IjRe)x|03zT|r$$-={JXf|dAmGCuH-f1EBNRdqMmA7B0o315d?f?#b|>D~e0t8!G@$`;8<{`j(2 zO86$oNC;Mb{b=fjoS&?WfTUd^a{`i(ZD&eb>g@j;#97_BoCtEq5Y3X_CV&}AdlD8>vzON@;zF7Eeesn34 zD5#{1U{Nfn>XtSiU6dk!Z4$nC9}Hja3LYb~u`9KEe9p?5Ju_=sj>Ijb3(}xA%URnb zKYx=s>+FWTfxMl8ha67`i8*~9Zj$F*Sx7Y(Z}n6P%y z0(uI^j-8M&0YupfIGw4={A@bZRnPqMYN{O8(g_)I5#(m<7C53O2K+R`kCkEKFMY$| zD82n62=&P*C8(du0hI$P2UHHI98fu+azN#P$^n%FDhE^!s2or^pmIRvfXV@t11bkp z4yYVZIiPYt<$%fol>;gVR1T;dP&uG-K;?kS0hI$P2UHHI98fu+azN#P$^n%FDhE^! zs2or^pmIRvfXV@t11bkp4yYVZIiPYt<-q?}9PnlaA2~N_NU2*K+Vt(ckxf_T3~Rc+ z^5&-N+F;k$TGouei`}g2m+sLCW>4dqVQ&0+>}H+7T)O@HQnN?0YPf zXxQ|UVcRU=+8Ai|?5Zhqj|*Tezd&2R(y|sRSF2~23N$Q_%3e%2r$uREOYld|d0!#V zA2qDyBFaT+^qnTq8>R_eAEoKf!#64NJM~ijIcTRcf0V}1SqM4?7(Qr-f5)(m%C>#) z&1+vHxK4vV8o`-cD6lOY{uOgw3T0b#%(!I>h~H9fCWham@p6{EhPFSja{!crjJbbw-0v z?b<=j*H=CWIeGx{bbpguyszopsQIOkGgq5dJkdVN>=^_+eU&$%?eR#Xexv^BtIsB& z&pt+=F{No8M$BfElx`H$pp#B zCrp-+HOyuG0U5Cv&_9FB*H3E-w(E435r40&Jde5t3Cp*Wyp$pAlb3XaNnSR>@9?&z z6@TeXZij3>4Y&gBhXTFG4yU?aGq5dL;3u9a*t|tU?Kd{Rj6C`O=}pohkM8+c7Yj7+ zZV%S(W`XJO19qE{7k}9AIt{s84cT9awgca0Hg}k773B4d*Sk4`G1ue3L-?M7|EKR! zr^cE4D8g63Tf7NyG5X>0t`NX{EPN!5g*pxMpno?yQuc3>e!X z1EG4h1#bZu3EyHFA8(IoysKjr=t7=c#7_oorvX#>E6{)b^@Jn6uewT6*2GN5N%!)$ zqAeogXk48D9*Xa%ZA)vU@fvLQn32}XSPJ-Mtj047q^Bqk)jJz$cq%}G!LJ?Ql@F$V ziO~QD`eh>W;e`Yv-a0V8E!RQi??gP_OE7lf1ddI>L2=+A$4mNv>kf?T;mA*P?gEZ) z?|mCOK)_t0X}Z3K@wtSZW%MM-Zr#O~b9E-RHv{_cidJa8=fcCssN8APNizO#I<{pC z^i*pGyOfSGD(RFA;7f;2dAE`D2=!w==9-<1wS+?_evdk7oV0qqt(3o%!^Y-y@UP7I zSI_}kGG#h+QaK~aU6k{+pjr7@0G*}*9a_W( z0B<1h3a+bKP4l;C3(<8uWM}2OX3q+(z}vlIQ?NVS%v@=RPx}|MC+1~y6Z(tmaeZ-& zJ1PKS&_a5B2=scpb_jS8-uyez5H+3BoC9@FvAxBG%tbQVaHpgbHX)tMm8{E&zi#l4 z-SpTUI_=Hw4$bgH7VMm&8^!iYIwYd`3Aw%3S7w3!X=q0#+LM8Hr8j+7{s-R9Z_$Rj z?NYxot=aQ3FB9He4WHzP(xYYBzKf>~YJQUY5zXV_Q#$p*Mma8q;`)#WrT!s)_~t6& ziLzb9)4KkGyKaF*Q+}M;vzp^JHqV96^$zMAg>XLfPB~AbzUwJ7-pl~lI)>8k%0tlJ zP_(-r+TS01xC#9j2Ad}U{UtOR1;Z z4>8`3G9L7CD*CfyI_wAFHjQCE>e1Gcu`P9m zKBV3(M4ixU%{9$|8*`@$h7vX!-%lWH3>6Tw2wd@8P{px0zG6ibkS@cB&Uyo-PeuJ6B}(# z1HPm6{dZrU`Rbv|V>ev547{Bt;lUL<2iyRs+-^3o8>irhQxuFGM?2>EzUh*9We$?l zhxedsZ)}434Qglo_}#pnal1F>#%+AHlG?g6peIfp+RAwgUK4Lk#DC%i+8PbIC|wUb zLWj0a6~a5$K~|WMv!cCUH=bynluy}5tAbfx8TFCYyw@B=H($X#Otw!H({-MCfaSR{-mYjPo1-3Nmo%_- z-CiN>+OGtcJCwC9hWvu&z4{RJA9!FuUq?X>>v&&huhz zt>()!75zbR`QRPr2g*f(7j@u;NyuK|3?#d9h%|3I<6Sx}bp3w?r*y^bjCW}f*M@np zD{fc3>w69K!913njPW)ayz=p3Cuk%-w99-jf!3|y0rc>a$OAhfBNchgedOIJ=hY!^ z$5ghrKk}14@L-)1f^ZDNu&ejd7{3kSAjEl*kJ7aWlm4JEL--r*ux8!Yl068XcG-i& zxIH+mc{b|0hW;qlroZkU72MRG!aV;?^PHc~X-9vZMqi#mol8(gEJ;mmN1xSw=Y1CK zx-|H}j)o5J{s^I%%6eA-_OF-hbgK7Xd0j?6XV=T&4e)!^mz~J#d-DeM zS&?5^-vYols_zB44zdx-W?FJ#ILi}5*pg=Wji7_#jKWiqky@6dJN#s>ZqUw3uZCS3Bs|;jpq3>M zfh?7xEb*{7h`IFFryYyZ*IqUSu4~wY_B_H!CfIr& zhCFQZWLG2b&HwM}Vb3#lFdRqXgHr zHG<0sdbYfvb1}&H-N-{anbsaG(3zV9|0-atLp;NDZ12s*Leroysz-1Mz(qFnXV3-9 zk*|Fmo7}i!rMqq%YwZ|?eSsCXyS<~NyhD+`GMMIuVV(QG^XAdGC*Hctr=v|~&vz5e zdv|IE*HgV6w_=X6Eqh~&Mi+7w>#S3n!F(7_pVMdd)kpFAGUKl+7XE|Cp3dU zY=w>AemnLeATv=cyt57SKQpl-*HDh&=V9M>Ko@<(`K04*?C@+qTDm+OX{ zy?pS|>}~q%)1QjDo?Xv`g6%^DmaMz~oukEZ9hU{{`Iv^}k1#?`bejuCkbTwo+CLp_ zjA*&s@#mA5btgXOd1$>W$yxfbb8~{5l=5~Nv{2vq%iKGdpJ{wIVh?6#5KHQq3R?|( z6W=_$Bht{1U0O4k<(&iVBJ7}dFt0BL?{=Yn-6Q6w^w8fWfaBn3FRVn*T(#IEsk`pY z8!NCBJK!DIGuXIlT|=jKZ1XEZNPUT?_Q`tq2IRj0yrmcu8*QbjW&MPCyT^PSiTCl6 zeRyKQH~S+Y+jat3Q z|NGdF_!M&wtwHNi7i9mUtiumS*Cn||LSJkS=vyD(4qgeQ>t%hM1lV6+ken0WmJ?s) zbzPPmz6(Cp`^z8UQ)By&^9keHXh%Cki|whUX#0jSA4P7s0gtj3Jo*L7gcX;k60bfM z!s7ZDPfYdkMi0K8l6b?%g4Y|+`D9ad{g99_`Ab5MuuUm;^Dwc=@{Q%3E?3h3pa(d3jITBh0snXPO8_eFmLu z(XxRgYb-r~#Dla~3fVV92VF(JuQSb_N!X*kG99`Ja=1e?i0{$S_}Wsyju1@xsP6Kq z)H<&>*|~aw`vS^&(68+%`!&|y5)I*4OU}hQaSqmtv$1Zx8|%keSTE1S-sTMKZ3ghY zP0TkJbpz5pjO=-=<;cb!0^1V0o9v^*(TCz)0gxjz@E<}wJgy?m)rc_dsZcu{;Gs$0 z{~;J}gMtSd*F&&nsSWoaZzl3)z+vDTv92_tucbX6@N65>vB74%4UYD6@D`=CPXpWU zU=a3uz@OhB58=#~ahd^x76}LHTra1?_T_b^0=~tUj`B|Ik?)Z6q{w+DDsT*t^Y-(V zflZo^bQ-T+^xVeyeh=+0NqL0kH<3m*D4kh!;LJid-29}7HBUNhd@+E%M(af4XK@g8I^t<9O7U&* zt3a<$7M}s0D2%W6!8;k}jf&_MciXKjx#p4veQLX6%|D z6L}p*K0g+tZbuN!*}CRwh$pvDc>Mdrn*yyUuM!xI5Os#v(iD z`8e08z|7qy{j}Y}D(!B~80Qod-&@t}HU(zvZXf2Ha{3oqglXSZ*nNfedEic?{AW?j zl|lQ>7-y$L%%0~lw$?$v(3#H{I9K@+ygsk(-`uI~*L-Mz+4B+hxOYHzyqd77p)_HA z!^apGSSz*e!@Ni1j?xw*eg^Vl4cU2y^B;PW>IA@=8HYf7_f$w+CLlP@$M$H zosYfqa{ZmC|AN-g{1I?|fcp2Lj@hVxeL{U#{i0kyrPU+8gx4R^i7}N&cyeN$dug6O zk9j^6^ZY2Rze~^uJ21}|AdemWOzR=^_t61x@*Hq<19bmJ=>Aus``>}?r+l+6KJuH9 z@IM1x0i&5K5PJV{IIGQ?y3n>THJJ21+~?5sUqkm_f$sktx}WBPd8T8Ld8Xr$(@pP1 zPBR^dtkMJ;w!r-t3pA`{K?bt1XN0eB*a1Hw!qu=3G8q=JxnT!<%mob$`&fmt?vK+L z>J4z`1w;KloL`Xc9}l`VLHC#0%2Iy;-T&+8KSq8j^f#P>?kBuh!|?ti-A_FG8uI>2 z=&tv$zMBMhZq#1vr3ui58t6nVbYlSe1pA>YgCK{r_o>A`f~Hw#&L43VvOzZU^8qZ$ zH-~Wjw>jBW06HA#Z(7$D-<0kq-hT_;!|uO?J-xi;NXOH{7t@xRJ&};%`=3a5kxexu zgt=(%tOI8;P3RwkciOR7+u~HiO~Sm`F&{+gy|hO#6zejq!$I$9tdVJ+x({`oz?!2N zy7?=#$G={(?WoTQC)sw?=QNLxL!G|9#(aMi^Zn5q&G(lu*W*!Nmw>rG+Ga`B$n(7x zb3H?U7ovahhH;`5{auLuW`IYJJ`jn%s~kQ0_mL+eo}qrMj2``NB=zM~gw3PdA~Cl& zL?Mj1y@BTTgn`(f<8ymNJ?8fM-wOTfX>K>8e`lg^$J$C#XQ6LP#vF>o`DrEL_08#I z>s*Ds)T2LkCc8*p!@#2+@r}uRZErw4&HbN2uVJ1!eD(2UoK+6yeAOb2_vsgZ~}7h2gm`*#jPXXi%cGQg4=zgM!p;Q zISVkH&;%F`z%Rtu_hNiU8&@@)!nh~dfSi!c*H8=JCl?uAHlM)d;(1vvXx#sh%}02B z@8`~Bg~3%>V0i+N{{Zf^?BHG5)TN^wC}mqZV!Yp4S+ zG~sm(v=**GnQ)}HzCf}Q-c0;=AT9#(bYdyZvk`nRh2qP6^&L*(=c5IXD;wfS1~Wh# zl<=M{npEI1ay$h{BYB$4Wk8C1i02>Foa}3hF4eUJn_yqsgMG%9V!-%d{7r8{d-z;U>q86Y-=OBdaGTE9yaRTx4)ax%Aaw2o|JH$qxG>ZOy8wL( z{^HC4PDHv1_F4}5fadB>%=I&H{{8{%3bNg3p3-2?`#g9tbEDZk7WYFgXhX0c656~0 zbRI%JC|hGE(nH|$e7HwqN7@e5bsl-0w6=y#upaeM9QcU6YSuCne9nhWLFbPl$lu9W zYc}G@E~B`yi0c4NGhx%610J#~=)58m_-cV?HSkb+2pJWKquA*ph+s*7o9UXod!-PwU_9meR+va zy-cT}i_RGwpVUT)P6MaQC$F?e^)mWz3Y>s39Nl8}Xy9jzHhcCWZYpD)9k3l@@>!Y_ z=N7al0zF}jv#_+&;9Zo!I?o|p4|uV_((=)Nu{fiRv6$0zL2Pmi+D`4LMc)$qyw-sY)@M$`m?{RJ}ooz=$2BFKImBz2J`CEjgu`ZpH z-yFb<)MoFy!jT){^#W50Q=VF3xYTKbe*t$db5D!CClH-0S*NgxT}&2J_yN zPn!2e1+xSJW51)2^j1XkR+QoLMtf0+a{(sZg>{`rfPSa50opgEw0}d|$tT%fU4%0? zMyoTd(S+AWtzt_G*p`OCC^m@B!DtRS3%qgANlaj^K05M2ht6c)OLwgVK_7P? zOY-?ztoJ;E_H#XfAAd0N{V>qbg8Ud45{&ZelrZZ46tJ>R%Kr)6=jabwFa8N(sxJ?I zHRjqM;xXlC2G-)%27=cCxSODb{jI@W7PH63gb#HXON00CXt)pIsln`02y`s3JAvjk zx@$DFnfMpQhIT%WG3&*hybo~>N9TlI+>siIvE{}6DAH+HYgw8H?Vr0kZdsACAt@7 z2oK31_H87&Zj|+?QR$ph7y!klRl&__SVm~Yc`-=Wmu4kg*cjo{1sLF`DXFc@1y%t$sc z?bXm3KrzzJ8BR2${%6ECn+E!+X1O~S`Z)=D=?ZL;DBK@7gFFXO9`>CvN*K}^jdR{D z$cwhL)WHsj5(aY{sxE`heN*_oJlaEw5+eBBIlcZwL(KCL{O(+=pj$r`I7kl=j^m(t z9dJwqj%dt@>wsgbj6>jkXvAJgCfc2W_NT)RIl%eBpdYS+U&K$Gb2sgTeS^3u#dKd1 z^2&r!+btu_Z!ZP@cQm86kJODEVK$jYV9w8@cE+Hc#X8nPbHq9g-^Vf@yjFXR^z#R3 z>nV-lJn8RK{n@2h>~X{hIPV=2Jwnf7w?|>@g=62p5i&_KcMr-CU-xNr=XV08yE>(y zjcmN7XxlzbL}wF^mu$6rARjj)e*x?`qSr?&;m0q>Y^OT07Ru|ZFJlm|3t&fd1KK|6 z(CR)UdSjvwHo%U&#Nz83NcLx=?tL28*}B@EQhk$byd#9ap`r z9gs;wzjXI{@WP0BCZ;CUT?alDqmOl%r$~NIY4qo*-=KTe4@5mAKkuVmBuf&mW$wCf zysckA-*rHyhR8BSygqov+e&(nWTFsdd@{ky_0!c;oxZ*+#u$!9yJ&p$)%PUxl(q!z zq_Mk%eCHr9-D6W(*M zf9XCd(T;s1PJ04quLa%8y+pc;LHv!=A8*Kc$+&GX^3Mc)WRt{cSnD^yF$>{P;3wKk zQ2z#7iJNTeS#n%GVAye<^RVqYVP|xZjgIsWutu1PcGb#t*ilFQZsRtx>)No^Bzzld zO5FXfdt2*j#)59C9NE${upT*qx(ly+pY>mtNqS7OPsI7s-QlMfur&N$l9A>sjH}Mo zkh?~CZfa7@O%Z%d<)?F<_p|ekaYR`y>+=S50mEDmwqQ2 zXgzldcjuz8X3J(mXB+r%?rZjXZ1eidq=z)nA+$&L4P<@D^~|Sq!9(g9`g9NKr!!{A zcau&FdtIyBMr%ml+T3@>JPf)B_Byv2`WkE0KG+3(51iV&40%b`DQrQ1uc9%8eXKLc z7bNrHW(6P6#?34*%{7Va9jyj7r4~9o0J=O7Ivx9rD}ynYNqdc--K1&iSc5(r#hN;P zg}Zej%;PO&X;Yb|vmN8j*G>;)g6_L6g^c_b^%ITK-I<8y7f^TY;^fp;(9|&)chxPp zGm1Odd|eOT{SGvft@twXk!>^*{39OIU8yxVV>~F!*!v0@!(9i+(U9f`P(}yaXDMV1 zeD!b{flO2V*H=zQd+tKJve3S1*o(+)LVLLlN$Fog?oGaPA+2P0Q2Y__R?=q-=C7pB z4#D>yBa}y%Z^qwhuq3Izo_i0r7tBpYjM2$()IVGJ`B-@8PK-%|5PiAeGmkxv;#r}j1_Pq}EMnTVXpbXdJ zfblmDAJRQA*imFZblnk2!X1$|<^2#nThd5#p#oPdWY}P{xo3d)23xHAXdig%(Keqx z%!eHO4tXxX5r59ZXVB|?-IGPzdA|(CIlmiprD3k2HQgsv#}Cl(8l{tcp=g@{b1RK~ z>c?W>p?z%~_T9-QqPqf^uT!vQbCJyxw`yxchc>1;Tflk*cLgwy*6Wd9j|NeLu1T@OM4fLqY!&e?{a4Q^8+j^OM}~d5+bA zvLq+4`}zEYcJQ@iZ&{XfeFXC2&I{~Nf$w+IJa7>EZe)kIVc)F=b)0}4pTaqBI{2^? zv^)h`l)AhEbzq*@6NWUVi8{f9lkln8ENJThK5uTt3Co00pitufU z@STcqh9Z2IB0NJ8o~;PyD#AZkgzr~`7bwC7its{3c(Ec}rU+Lm!jCG#s}MYur`-mVD0pa}o7B79U4KA{MIq6q(45k8{`|3wi#uLyss2wzr&zg2{< zDZ<|?!Wx|W`((qQ2uCQwk&5sLMR=qle2XF+rwC6_gcB9vWJUNkMfgrB?DYy|`MS-a zY=aQ+7X3X(*o&d;MQvzfDBGjOh6CFWtlNP2;L(h|5gdj;RXG?OdNzbz(S^Q&EP5k; z{PTc5p0O+X&@G{CvtbBI?lMrB76S&$0fX^)2>aBKx(Ao;Lk5_;7YM z#E7snH2!!L+Y@TqIf8u}It}5S{Y=k|V2Aq6LU?O`)5YQJKz}2`jW-1xLO4u!a5y^@ zHjXiOm~Pi_<{Xd&9W+39b_9ECfbPNwc3^<+$_Umzz_>Pwy)iHVx74>918|SA#i(mT zID+as7-2d*oLz|^yaytuA--;y=~Ltx7LV}RVY|@a&BH_MK+SO7mEo*$cpGL<;=#57 zLQtKc+ad^@CvtlY4-Ubq*n!(Ul*cO=9*jRJfHyb_q%!G3a!lwhjqqUTa~h%8v_T{6 z5{w9bDg@MNgm%hyKof9S!wzaf89S~CXx9iQDX6St0E00L7h0#qPldiHC84fRhDGDe zx;xlt@C5KcwhR5gOS@Ls5XANbT;v)1$QM300M=l!5tz0LIs|tLkO!jID@3428Jupd zhPegZRt;(dcAhq1wDGK#9n$EqPjg)JhC~N@`|Y=Pu6es@CS}40ozK6!C6AdiFq7h& zF2n4DhTAEa=YzvuIu08FTU*k{8HpfgAbgw?^UsI3QHH~Al>#5h1$N@#ioWGzq7vmtrZ^QSgM!6arZr);UkZYwgC zRhL>TZH1-^OQqvBQ&+UDtfJasT3B9LYH_gf#ccfeVynG^&8sPMSe7H7>4EW%apT83 ziYu*_qAHW@KZNKiL|0kyBhZMJeTudt+HNbgIZXUtib_CKd6~7UxZFYD5`aspZB`Bx zKV)L#QM0cGHogjgjE{}C1K@br_yh!HENpxQfC~ItG)gW(kfUSwl$Df~FDo-yDi@)# zWoYJ@DmJ&$R%)#>WmwA!i%Tt)C8n6dSW{wrVuER!WvTVnD${gJrDc)TG~QI~a8%qj zapEGIqqw?YLScF7#3JOVuunuqK!IxhK_8nVF~el5GL@H2Fr}NS@VnP`EBdCY%CyX8 zx0?#Ars^tdk;zexw9;z3#bGsBOp|3sn<}fzCUjvbuW;DP%S?rqva)gql`OR_N6}?y zhcAC&d3Bk?R9-3fpNYd0SY~-CdLMP0q$W~zW9;^7P-Pl}%6X@csUqw&JCXgI#1bJY&hARjnH9|> zev1&%+Zq0|*cX*oq7|i63M^IDaSJU7FRZptsj*g#E3i4Lrr_@ekYX#uPo?u;4zDgN zuPm}wT8qR|OT}%sSx{q%wZ=qjS%`{@*ivhuqr8$y4FebO1Ll?06lDT(yOqW+jXJ0& zhJ7%GS%AZLz?ZZs3qX1R1{(Q!&=8~!G{~E$Ta76L2TUgskq<||$4(!?%8&BUZ>wkD z#n`mvs0#{aE`0jEXFL1@^N|LFvE@F-9)!Qaf_V7Z3mJPD{;oxgwZSj40XO`U@H^ng zm!p;ym^^AZ93R58 z_MuETFqCQUxS56A9L+QvqJb-#1#K9~0`C}&dPlRsA-6DL{w*xvty@_@ek@~-Sf)Jy zzay3f>BcjmWdhTfCo=8kiHseb$bz;)P49|l0VflfCN+@-OiN5xo?zxoOyE?d&=A&Ec-)bf2iz7%Kk9fCmEyq z>G!nB$A&5Q!3XY}WFPFu7v>x(`=ewZU4bvmDNP$;&Rb;oR@sk{eRK=HFlU_XkClBs zIU;_%3{Q}KbQ!)dr!=#MIkB$daH8x_lKmvvpDg=Wli>?DiQ&Y33@7gA$o>r3m*)F0=PVh% zTlQzm{v7Tzr{t;ssvJ-`pmIRvfXV@t11bkp4yYVZIiPYt<$%fol>;gVR1T;dP&uG- zK;?kS0hI$P2UHHI98fu+azN#P$^n%FDhE^!s2or^pmIRvfXV@t11bkp4yYVZIiPYt z<$%fol>;gVR1T;dP&uG-K;?kS0hI$P2UHHI98fu+azNz(aliyO7j82i0U#e<*C+km zzXVEuYp)2K@fv`$D=ROj!2<#U{)k?vfZGMv4)>&h#}44|;}1f-h6$B$EpT|~O~9)J zLL1z;046lRg$6QVJsci&7wmBO;SRwZ%!FpR^KfxGCTxWZ(lg;GT!aBX5eK&xt_6;T z02dryAP`=M`!n3!P$tyDwZWPCG2vuCcKsUME&bVb{F0aOINUpM^KW9pU*H}NV?ra` z@i2B>H-KFq0v7|9Jb($)1~AQX_*>xKg!>duH;@Sv;O4`vhI?)x(;S4m05@uBio`c&3cL?rVym}yt_mnNOl@*D()+#)&0{H&fGv_%biDE*sy}Zz3 zx2ECA5^8%Bw_r$|HG^)qx^eOL^)%M?!+Vd=Z}UuvS)< zS7s(=&7E0zgOuEvv!yB%5|uB1S8mP*l4iHAa}atgDuC^F4rC+hIV2gPJb z1fj}AyR~eQqgbkQT0%E1iRIM}v3#LeX(?M|RVF0cq^ER5-}^F(#KialOA!((Ej419 zb(tt3&+JCqcLoIp;$uRhh|%0aPRhJO-t-*$4@U* zS(TMr^jB9!rIzp6c$o5xLiJJNnl^?CCMp@HjQIwv=p~rOaCXPaOc|ZB5 z`}1US*LZ|@7E~|1frMgo`$oNl_NNu|7wqoZ@Ose=Yr`1v8~&5fxGYj4qQ3%}mnhCl z61(&RCoZjvCtSgaMHYvp2lPKf+ z!6aLm&5<=%+3#IxlMC%uOJ&w#g(#-Unywr={HJKS3VYxAdiOpi1pj`|6Z7K5yaaUo z+?n%|#oU?m(zu$K=SXu{D@$!9E+AO7b~olRpn)P2+dI=E{&(yDu<=8 zL@X>WfgxqH_l)8Aq$rHJ$_ffP%Bw4Yg^93mFmn{FuvV5caUl$dGRHzDmRd^-OOX+N zY57tRfdIxZBMXCPsF0L_SO}}afpi~?OjQ-uLfb+cW^2eF3>w*<7DVhcsl`^t zmdoT^tUVdJLCAhfvsUPc+A7R<2ej)e!Y&GXv@tDO^I6S{TI1Z&JA-yXzyoN6kKUW3^cop% zk>Tw!Oz+T9`g?dNjqg+@E|K650QW}%y@N;TKf?oU1SdHp_=pTYEW@)P z8I-<7hTkF>kIvDXeN_Hq8K$@U2yT~Qdc%+4%Q8%F`4Jp|2jHkYz3E4AgbdT$egwzJ zFun0d@a-~8Z~YNGTZZY)KY}eXOmF`Y?2uu41CZb+WSHIpB)CC_=}ka_cgrxn4M=dC z4AUEd1Ro=q9?xr&U@V*2O-P`33n@L1!w?dD1Q)|mI=x>=FsTXnRE&S5D*#gx|6ans zO88wr_&@#NWGQo3`8j@Yu^;@HAN{NOkJ;IIAQ zMD)E9Uzs2LD?j+8AKWjXXZZ|2xWEry?FZv`&wA8<(GUK4V9)gBe(-ib_}6~$dw%d2 zey}#EC;st%@LhiJgMRSSe(-C4@JD{|bw3!t>D;6Ji~V4yAB^8r?va0wAN;W&JQQO^ z*}wUI@GE}sen0q6e()tf7(YJH1HYi}3FCeJ9_csv!Lbqysg%AHzjXY@W{>g@_`ys3 z;I)46Fa6+G{osG`gO5mX7d@Z(!QcA90_J1zw<~|BA3V?x9_j~=^n>I4;7NY)?S62k zA3V#NPTA)em) zq<224{ddCsBYBVueDmP`8eWLVpATn&D}XD6D}uAaEreSHR}A;J@N6l{5#KQFvl_U+ zhJVP;Ot9wv(RQ>b_BN#PG%~Dbbu(Ap%Spf$g`2tP7~cuOq#nPUkyE%Fd$LKzma1au3|&mME~&QI zvDGZYiZeCJSz)nN=FBWyPN(ZxI5w2F?rjbm_O-DcE!uHroii_0%pzgAGbS{bn?p`vvGu*gJZnDD8^|jZgk*$#|}~U9qlPNF$upJlQXks zrj2CUZmp`y;Z5(!4NBwkjx?MYlf=Uc>h7i+2!D!*Lub&Ys4f|2H#l7)#?!SAaIe}b z@?&A5f$pw|2{?ZSQF0o7kO)7FW6hBcs)-PZNTOm^K>`RtoU-HSG&_rMFQi*Ld<^H* zWM$>fCDd~DVp(~SmEXz9nY)x@NKB9j<7c}L(FRuW%QK*!7xX1l!5l=+%R)7FA6mL4 zz|YK3qm*MhlILaPU^P!ZS4~Vs9y%Vdk)|3pxiahT#iL;wG1=I(nMJ@13{(EZ(+_@xjG zD4H8Zho!(S4POW+k5Pyi@HX?Pjq># zG_Vsz65Xt<0*rkO?r9ZsaFaqI$)tJyA7NAim|RqjODsfhj>OQVJ%&oJ+lD`7IwJnl z)fcGJ3S8#F6-1nBV@Tt`pRPD6E9jO~ zIdkV0&a%Zrs-UYgC28{AG3_=~{x;2l@z`Y?bhG;>Xpu?iMhqWxq-4=TWBRU?N3KXd8|->4;5`qm78uAZ`)U1ki~m!u&{zj`!%p zrHhO0CJlbZ@h5$gfrNhoU#C^k?`lrKwJN5|iu+mY$YiDu6HK<~pX53hLaleg=$*NuDm$I=Dk zf;USKAHI0!`=4~a{y^E_zx?-er_$P{C$&X9lljhy?9c_r$F2S3_LpvZ`NF;5wSV~K zeKWRg5)*&-z^Va1^Zw$GSD!gFmv7dmoO=$@eegiO JejOKz{{;yy2mAm4 literal 0 HcmV?d00001 From 2468906145cbb0524be4f437aab28af03b011c05 Mon Sep 17 00:00:00 2001 From: Dave Plummer Date: Sat, 19 Oct 2024 12:30:48 -0700 Subject: [PATCH 03/10] Revert whoops --- PrimeCPP/solution_2/PrimeCPP_PAR.cpp | 110 +++++++++++++++------------ PrimeCPP/solution_2/primes_par.exe | Bin 62304 -> 62304 bytes 2 files changed, 61 insertions(+), 49 deletions(-) diff --git a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp b/PrimeCPP/solution_2/PrimeCPP_PAR.cpp index 456d5281c..b145ecf16 100644 --- a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp +++ b/PrimeCPP/solution_2/PrimeCPP_PAR.cpp @@ -1,7 +1,7 @@ // --------------------------------------------------------------------------- -// PrimeCPP.cpp : Optimized version of Dave's Garage Prime Sieve -// Optimization: Use a BitArray that represents only odd numbers to reduce memory usage -// and improve cache performance. +// PrimeCPP.cpp : Pol Marcet's Modified version of Dave's Garage Prime Sieve +// Some great ideas taken from Rust's implementation from Michael Barber +// @mike-barber https://www.github.com/mike-barber (bit-storage-rotate) // --------------------------------------------------------------------------- #include @@ -26,64 +26,83 @@ const uint64_t DEFAULT_UPPER_LIMIT = 10'000'000LLU; class BitArray { uint8_t *array; size_t arrSize; - size_t logicalSize; - static constexpr size_t arraySize(size_t size) + inline static size_t arraySize(size_t size) { return (size >> 3) + ((size & 7) > 0); } - static constexpr size_t index(size_t n) + inline static size_t index(size_t n) { return (n >> 3); } + inline static uint8_t getSubindex(size_t n, uint8_t d) + { + return d & uint8_t(uint8_t(0x01) << (n % 8)); + } + + inline void setFalseSubindex(size_t n, uint8_t &d) + { + d &= ~uint8_t(uint8_t(0x01) << (n % 8)); + } + public: - explicit BitArray(size_t size) : logicalSize(size) + explicit BitArray(size_t size) : arrSize(size) { - arrSize = (size + 1) / 2; // Only store bits for odd numbers - array = new uint8_t[arraySize(arrSize)]; - // Bits are left at zero default, so no need to initialize them - // std::memset(array, 0x00, arraySize(arrSize)); + array = new uint8_t[arraySize(size)]; + std::memset(array, 0xFF, arraySize(size)); } ~BitArray() { delete[] array; } bool get(size_t n) const { - if (n % 2 == 0) - return false; // Even numbers > 2 are not prime - n = n / 2; // Map the actual number to the index in the array - return !(array[index(n)] & (uint8_t(1) << (n % 8))); + return (array[index(n)] & (uint8_t(1) << (n % 8))) != 0; } - void set(size_t n) + static constexpr uint8_t rol(uint8_t x, uint8_t n) { - n = n / 2; // Map the actual number to the index in the array - array[index(n)] |= (uint8_t(1) << (n % 8)); + n %= 8; + if (n == 0) + return x; + else + return (x << n) | (x >> (8 - n)); } + void setFlagsFalse(size_t n, size_t skip) + { + auto rolling_mask = ~uint8_t(1 << n % 8); + auto roll_bits = skip % 8; + while (n < arrSize) { + array[index(n)] &= rolling_mask; + n += skip; + if (roll_bits != 0) + rolling_mask = rol(rolling_mask, roll_bits); + } + } + inline size_t size() const { - return logicalSize; + return arrSize; } }; // prime_sieve // -// Represents the data comprising the sieve (an array of bits representing odd numbers starting from 3) -// and includes the code needed to eliminate non-primes from its array by calling runSieve. +// Represents the data comprising the sieve (an array of N bits, where N is the upper limit prime being tested) +// as well as the code needed to eliminate non-primes from its array, which you perform by calling runSieve. class prime_sieve { private: - BitArray Bits; // Sieve data, where 0==prime, 1==not + BitArray Bits; // Sieve data, where 1==prime, 0==not public: - prime_sieve(uint64_t n) : Bits(n) // Initialize bits to zero default + prime_sieve(uint64_t n) : Bits(n) // Initialize all to true (potential primes) { } @@ -103,21 +122,15 @@ class prime_sieve while (factor <= q) { - // Find the next prime number - for (; factor <= q; factor += 2) + for (uint64_t num = factor; num < Bits.size(); num += 2) { - if (Bits.get(factor)) + if (Bits.get(num)) { + factor = num; break; } } - - // Mark multiples of the prime number as not prime - uint64_t start = factor * factor; - for (uint64_t num = start; num <= Bits.size(); num += factor * 2) - { - Bits.set(num); - } + Bits.setFlagsFalse(factor * factor, factor + factor); factor += 2; } @@ -129,9 +142,9 @@ class prime_sieve size_t countPrimes() const { - size_t count = (Bits.size() >= 2); // Count 2 as prime if within range - for (uint64_t num = 3; num <= Bits.size(); num += 2) - if (Bits.get(num)) + size_t count = (Bits.size() >= 2); // Count 2 as prime if within range + for (int i = 3; i < Bits.size(); i+=2) + if (Bits.get(i)) count++; return count; } @@ -142,24 +155,23 @@ class prime_sieve bool isPrime(uint64_t n) const { - if (n == 2) - return true; - if (n < 2 || n % 2 == 0) + if (n & 1) + return Bits.get(n); + else return false; - return Bits.get(n); } // validateResults // - // Checks to see if the number of primes found matches what we should expect. This data isn't used in the + // Checks to see if the number of primes found matches what we should expect. This data isn't used in the // sieve processing at all, only to sanity check that the results are right when done. bool validateResults() const { const std::map resultsDictionary = { - { 10LLU, 4 }, // Historical data for validating our results - the number of primes - { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 + { 10LLU, 4 }, // Historical data for validating our results - the number of primes + { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 { 1'000LLU, 168 }, { 10'000LLU, 1229 }, { 100'000LLU, 9592 }, @@ -183,8 +195,8 @@ class prime_sieve if (showResults) cout << "2, "; - size_t count = (Bits.size() >= 2); // Count 2 as prime if in range - for (uint64_t num = 3; num <= Bits.size(); num += 2) + size_t count = (Bits.size() >= 2); // Count 2 as prime if in range + for (uint64_t num = 3; num <= Bits.size(); num+=2) { if (Bits.get(num)) { @@ -203,7 +215,7 @@ class prime_sieve << "Average: " << duration/passes << ", " << "Limit: " << Bits.size() << ", " << "Counts: " << count << "/" << countPrimes() << ", " - << "Valid: " << (validateResults() ? "Pass" : "FAIL!") + << "Valid : " << (validateResults() ? "Pass" : "FAIL!") << "\n"; // Following 2 lines added by rbergen to conform to drag race output format @@ -310,7 +322,7 @@ int main(int argc, char **argv) } if (bOneshot) - cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; + cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; if (bOneshot && (cSecondsRequested > 0 || cThreadsRequested > 1)) { @@ -345,8 +357,8 @@ int main(int argc, char **argv) else { auto tStart = steady_clock::now(); - std::vector threads(cThreads); - std::vector l_passes(cThreads); + std::thread threads[cThreads]; + uint64_t l_passes[cThreads]; for (unsigned int i = 0; i < cThreads; i++) threads[i] = std::thread([i, &l_passes, &tStart](size_t llUpperLimit) { diff --git a/PrimeCPP/solution_2/primes_par.exe b/PrimeCPP/solution_2/primes_par.exe index c815cbfcf1741a58a60a4acb1c6a2bc2822d5d62..c3549c8b447f6a034855e07d4d14b8ac2d1e5d8b 100755 GIT binary patch delta 3876 zcmbVPc~nzp7QbH-Ac3$+0us!|I+R5gfuQXWAS@~>LQvc)N^yun3+S=n(u9H5o`M~p zA6AFcsiMOXGdPC|^e~-P(PFhDcCd6>Pix1ZVzF2imr9OH=DwHbDfQ3coR{xz-@W(U z?|1LVvt1Y3t_yb!O*7~L2mo}=!XNxI=s&#jjnGH2*#F@dCCS!*%=43Oug(K{R-_Ze z&FvPr+?q}MU5xZj+AG9Ni}Vp>fA@C1nvhHMWM&ZUOV3pXctVR!D6@`Ap`|St7+V8j z`jGCwLKqQMX5=WJBG=KdVlon7SEOgUJ(B3{X+%7(dFn6*(fSMr`IOdYs>vvA!{2=L zyrHNW_xO?84_MF3rg)C#J=fwo=IBo?zvW1_1?cS-2IJ+9wAMF#>)feGpRLa<_`?r68gFu!=v zTZZt!jVKO6pSDB5JQPFAbfj7HA{vtw;QJD4lfY6i65@<@nHFV5kz!h(<)@s7I-AhX zdeQ@0?xEu`bAM_FOeWjh^!6zFVO9`HrRTFE0u^ZIgom>5_({W|>Y>Zl7=0}Ti3zd}|C$rAMl{Gqv_Rp5CUk7Vrj_G4-gj|(WtI}}TVnEa?)icL7(S?dFF%wqvU z2jEv(XB_hoWtIZ^RbDu0p!U2dQcJ@YY`~f^3lhmndU`q+HeOuBbHe!wG>?D_lQAn7uUZ$2_SA7J4BdwB+#No^k{3*5AqGP zZDEl73`*G=u%23YdJny@P)$zKiG=~a70>M|qa;5l<_Kpg!(DRgW}bc>R}K1yO?o?* zPi_JeEU^@{bAIxl^D~$AleBY+pXEHg0@oRcJDmuUIrKaI4QZ8C&q3T)<_E;op{ztu`T_K>QHh~erS^pCgDNmqqI^C+GK}jF z@^hOJJqzfE`HLhJrNxijQsSv(aS$=mu*D&fH7M&4_&1Hs0Xz|ZYYy^kdx@oiO|}TI zkIe-82+GT8_2Lj;9cm|0%XS#JSPEv*_Qes}Qj|h8Bz0`rI@u8gy<;JId&ibJla3J9 zCp&3=UL2uh_l!m0IhtPYYY~V&9%7?k76goI9S-AH3js(71$BZ}1PK|)yF4`v!(+p6n5j&sF_q{Jg?^?m1JoP}9Z>Z8 z@8RgS(36VW@;@okP!6G|6kp2URj83ZD&MK_!ScpWgNc)m9md;4?(rq@_UI*E9%dpn zkLw8-=VNM7rVYA0Omjonpu$X`hIKqo0?cZ>0VG)eZBuZ|cGU$pt{Owy#$+Y~-}M0w z35#H@3Ia=1XHdt^&)-xqgfM?`?zzMDD)R5S(V8U+`FALOR>H$P-9g)y1bG(W+1l_7 z^#DCL=od>;eE*AjJ6<5ROuhYH4%ok=zDpNre?e&uN{{1;M}$q4%CsUaM8LB{VI8es z8X#GO^NEdkfL_9vIzyuY+je(^dMu?XLoMn^$EQG1?0ogE2qcIEF$P( zc$-MxMoUwR)uezfD-JTvN1X+qNnrpE<>}V#tSv&n1mDOO1@=0Y$2f7|%tOli5TTrl zfw{TyGKVWRuty?`kz8jhJo)4KNu#PW-+g zaBp(Q=O^Ocbb@%z=J2}{@mp!C)V+)NJsS3<;soNCb=-{j-OuB9HIaV}EdXA53O~(RLE~5k&X}w0&lC@0rd0Wabp^xs5!em#9unvH6LB+3Cn9g>!Tr zjV$bqg5BIV#nAT5nJ$I7g3c}>zecFH_fE0-_L)s_3ikrT$g}rGC)#kyZo?S3_dLqW zaW6ZxNA`-HQ#;d*y06MG3veU~vB(C5R4GEJ3?a1+AytZyVj<>+BQXOZ6=C{=F4(kI zH;RxsfyEpR{am4y)wisu`i4yuGI&2?__>vO7c7Nr7 z3i>=WeM-3IAw$|Cjq6iGmsHcJf=f~Z(g!>>78MM7<{`b$OY^l72E1fQcX*4skoFOG zDq+wk8eq~#yh{ldU;G&8@fDv{!6&}rAr%bxiZ80b<|`Y+NV(HQ11i`j6AdY$M<)I& z(*A6~v;LYMCEWMt94ibczp2plD`8(y9MWflI`KsA&ye0!L8DT9Q3=gdUl;6pQ3RXD zgx%$Jc`gEjo__R3a3uRlGW0sY``P$8QVUR(1CWnv++2VPTpt$$0zoL60Ul>HAkb;iLJO|CU~n13JI*H*Tn55`h{8cs9swdLet2jiL0E-rOC-4V zL<1>_1;Nc&5WYAQVAD)+HQ<~qjssC+JP5AEgXm5o5Zx?r$)5!RH@wMk;2YHd-S^B# z=DXv;1BhV2LTLNB_0irU=-Q31D_9MFm1|6|0%ptpq5h(@ndE*VXe5K`eWdu3Xn@Wc zj5b%f1E`3Fdu7}Q*LePS`06Y_&%>!_OTqG=@_c8mlb7K40lS|q4Xdx@`Ico){wUAi zeZvtQdqx85x=d*ACY9N+L2 zWVy%s?eUpz-8(TByjB*J8@;~b>ffJUuMWGlHGBNp<6~Qv->}Wly_|mR<1?qb?poh3 z(x^Y~u3m6)ThgEX+AkhH9Cl8&K52WYVpZ_*%zypu)6wrs%D-*-CNB$medE0|A9@d& ze{4-{ua22BQC3%_yi~D!^S-3$)F$#CU2)f<$?KXqZHMYsX~Mqdgq?5w{`4D98n+Dk UHoyJmKUXX%yd1iRKDt}>Kd8G0TmS$7 delta 3948 zcma)93sjRw7QXW*US{GNe_^POMK~`N)4c02HR-s}gwrlqOe?r&ObM~C&oXOnR+_`h- zn|t}KH$~Q)qN><^%@eh8e*4XS;S=FOMT^+_tPOw^8t zvdo@9JR~|Cpz%rGv?a+`U5uIl2LeBP0dno5IBWp2F?N!FsMMJ z3lIq}QokwB5ldsylM@-)K`1 zqm57WT-^4$Gx8qf2VJ0AK)}dIZlSC!bAX;pSH;)i#q01Ab)~rKL&EJuUIIp&L~XRi zld?=8aos$2c->Xh@u}y%?RC-?8a!R=(v7|c=r`%!UhDthZw)P;?yvm2V6DKFN=t>4 zuB7iySGgSiePz)fR?g>FPWa%7l`lH!x1iTL)%RIU-Lq%StCs3D$ zx+ZHeaenD#0$J)iC~Wg-S9+=dfT(a8TVM3tg$2vJE+IvHmr#_%@A zkSfRhWs8>@igEwS;B#yx?axxt@R>%EK#OO_na7}^RV1^J5a5`{iJgJJC3fmf3k`v$ zLW6i+e5Y=0d}ldmM!W_vb`xmFmWyGm4*6jH2{uH3k`2`_79Kk*=rHULkZ>I*0o_m20L`+ zFi_YvJY%U{`x(xKEgJL!Y; z@+^Okc?bk6Ubqnv^N^C+v0isjZ^Qm#oouw-PXSvmoiclRycjRSh|+e4{|?>q30ytW zBD{aT9IV2yhWY@{uLOQahRegM#qT_mUY@Nb?@-(9z?qFg1Pqtz)EON<9bEy}zTwmx zoq7ZEA-Ipw7ckZ`F*MaVZDO4^qiIQwmK0Df$3Hz6b=z(a+!Rtn^KwT?cVI=~kLt$I?YWU;EIpR1B8l{J z?nZKj=FKT2R(foXiqXmFl{qVvffC4m0h>@AzNm!&*-P;b$Iu0HN163VeJAp?h=YK0 zL8xxRQDiDIJa2m5FpP&Pj#~)N`v}(ogzGJYD-YkSfKE0$po>ijIK?Igw6QB07il$) z@4-b{4z5yccO_GmZ`9V|JjREs<>4#$sj97Wz_%6xfGd>4ECH@hG13YtjvtA%;wbSH z;2ME}Z9}*&Gc7Z`iE#Z)cY(c2Jf+tVuDs()glnA>uKC36_I*_y`!-_Z6gHl5G-=*+ zvWo7Q7Z@UYIMByVXtK3bh= zfq`*8ToFE6z$_HvsTW}uiZBa>m<2#SD5#Ca6RijeI>C1P)bOWUp$qzo#Rpm&hqN)M zmHsWyWR~Ma4!tL=L>;>N7>>bSCPg{_zP`iT#Q&u_9&U=^dJr(e8$+F7+aAN|(bkTS zqz6HK32ldL9-&OI8HnMI;{kp_9^wl$XP&U>Ct!9M$zugaKcP_>-%RlF9(}^_){rwp zEO(uU1@a3-MqA4hHs21}>&1XJTe~OLKcMByl zQ2!Ns3&hfHMl5Z^xezgDM_lO?>uawe-uUd}P4MNh){HZj$6CHTCdgs3vpkXtERPlM z6VHnOr#$kG|63lZXF(*H-#DqD)SGWcZi`I@DFPIM)%exQgJSa~H5_s?U^{Y?4630^ zx~N41huj0Y)NsRHfpnQHs!I!>%K~<5;HoSU>D?XyyEM@5F%9XRo&kMoX!BGcT|Zph zj;aDYL%q(Kd3UeQ=6y(Bj@(BdVzqk%Rr$$$o|UW#%p97-J_wrXIfLVQOJ zO$tdX(mwo#r+orC)G*+~J8okWQNG1L;40eqM0MZl*jBzxGszw8x9u2NLv2Zp-f%wuGU<_kHoEQzHG8$YyiUwvFwma+z*;;_c z`{upRxvj&;2O{V*)4!fw67DI6s~nz|$%hfe3rx!ZgBJe!%$H=9B(;SpC%xL8Bxg`; zrO&j7n_;;WUppxij*JW8hLi8Y6!831cZZ%&1<&ILgkYb8DdBlH{HoyB^Qqx^{9qCI zdASaLpObHL@|{jTd>*fFyxDGH^h2MF*|>fjsp?P7uF)8BaDLX~d9c(I^Dh@26TKAyhvpw?4iFx)3r~K(F{YEoU z%s*V*(b4zpnJc#s%w49s`RlH06IxTETYYLKA77Iyo8J{)er4jvPw%-i`#0;^ySY=} z*_J={i@EE)GVE_&cv#c!`*GIvysXIls=_}P)NMI3D#;l0^_k?&re#0e)Ycx|11sk2 zpZsOdnynWSJRaUK|2pMg!${{x4-=PnSzmNhH%vX&i%P8;Pr*GC*AeBQ0~)GrBB)vF-gB+^P<-eZh!XM5v!UX MxE~PFUw Date: Sat, 19 Oct 2024 12:36:12 -0700 Subject: [PATCH 04/10] Half sieve size, invert bit meaning --- PrimeCPP/solution_2/PrimeCPP_PAR.cpp | 108 ++++++++++++--------------- PrimeCPP/solution_2/primes_par.exe | Bin 62304 -> 62304 bytes 2 files changed, 48 insertions(+), 60 deletions(-) diff --git a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp b/PrimeCPP/solution_2/PrimeCPP_PAR.cpp index b145ecf16..fa168bea5 100644 --- a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp +++ b/PrimeCPP/solution_2/PrimeCPP_PAR.cpp @@ -26,83 +26,64 @@ const uint64_t DEFAULT_UPPER_LIMIT = 10'000'000LLU; class BitArray { uint8_t *array; size_t arrSize; + size_t logicalSize; - inline static size_t arraySize(size_t size) + static constexpr size_t arraySize(size_t size) { return (size >> 3) + ((size & 7) > 0); } - inline static size_t index(size_t n) + static constexpr size_t index(size_t n) { return (n >> 3); } - inline static uint8_t getSubindex(size_t n, uint8_t d) - { - return d & uint8_t(uint8_t(0x01) << (n % 8)); - } - - inline void setFalseSubindex(size_t n, uint8_t &d) - { - d &= ~uint8_t(uint8_t(0x01) << (n % 8)); - } - public: - explicit BitArray(size_t size) : arrSize(size) + explicit BitArray(size_t size) : logicalSize(size) { - array = new uint8_t[arraySize(size)]; - std::memset(array, 0xFF, arraySize(size)); + arrSize = (size + 1) / 2; // Only store bits for odd numbers + array = new uint8_t[arraySize(arrSize)]; + // Bits are left at zero default, so no need to initialize them + // std::memset(array, 0x00, arraySize(arrSize)); } ~BitArray() { delete[] array; } - bool get(size_t n) const + constexpr bool get(size_t n) const { - return (array[index(n)] & (uint8_t(1) << (n % 8))) != 0; + if (n % 2 == 0) + return false; // Even numbers > 2 are not prime + n = n / 2; // Map the actual number to the index in the array + return !(array[index(n)] & (uint8_t(1) << (n % 8))); } - static constexpr uint8_t rol(uint8_t x, uint8_t n) + void set(size_t n) { - n %= 8; - if (n == 0) - return x; - else - return (x << n) | (x >> (8 - n)); + n = n / 2; // Map the actual number to the index in the array + array[index(n)] |= (uint8_t(1) << (n % 8)); } - void setFlagsFalse(size_t n, size_t skip) + constexpr size_t size() const { - auto rolling_mask = ~uint8_t(1 << n % 8); - auto roll_bits = skip % 8; - while (n < arrSize) { - array[index(n)] &= rolling_mask; - n += skip; - if (roll_bits != 0) - rolling_mask = rol(rolling_mask, roll_bits); - } - } - - inline size_t size() const - { - return arrSize; + return logicalSize; } }; // prime_sieve // -// Represents the data comprising the sieve (an array of N bits, where N is the upper limit prime being tested) -// as well as the code needed to eliminate non-primes from its array, which you perform by calling runSieve. +// Represents the data comprising the sieve (an array of bits representing odd numbers starting from 3) +// and includes the code needed to eliminate non-primes from its array by calling runSieve. class prime_sieve { private: - BitArray Bits; // Sieve data, where 1==prime, 0==not + BitArray Bits; // Sieve data, where 0==prime, 1==not public: - prime_sieve(uint64_t n) : Bits(n) // Initialize all to true (potential primes) + prime_sieve(uint64_t n) : Bits(n) // Initialize bits to zero default { } @@ -122,15 +103,21 @@ class prime_sieve while (factor <= q) { - for (uint64_t num = factor; num < Bits.size(); num += 2) + // Find the next prime number + for (; factor <= q; factor += 2) { - if (Bits.get(num)) + if (Bits.get(factor)) { - factor = num; break; } } - Bits.setFlagsFalse(factor * factor, factor + factor); + + // Mark multiples of the prime number as not prime + uint64_t start = factor * factor; + for (uint64_t num = start; num <= Bits.size(); num += factor * 2) + { + Bits.set(num); + } factor += 2; } @@ -142,9 +129,9 @@ class prime_sieve size_t countPrimes() const { - size_t count = (Bits.size() >= 2); // Count 2 as prime if within range - for (int i = 3; i < Bits.size(); i+=2) - if (Bits.get(i)) + size_t count = (Bits.size() >= 2); // Count 2 as prime if within range + for (uint64_t num = 3; num <= Bits.size(); num += 2) + if (Bits.get(num)) count++; return count; } @@ -155,23 +142,24 @@ class prime_sieve bool isPrime(uint64_t n) const { - if (n & 1) - return Bits.get(n); - else + if (n == 2) + return true; + if (n < 2 || n % 2 == 0) return false; + return Bits.get(n); } // validateResults // - // Checks to see if the number of primes found matches what we should expect. This data isn't used in the + // Checks to see if the number of primes found matches what we should expect. This data isn't used in the // sieve processing at all, only to sanity check that the results are right when done. bool validateResults() const { const std::map resultsDictionary = { - { 10LLU, 4 }, // Historical data for validating our results - the number of primes - { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 + { 10LLU, 4 }, // Historical data for validating our results - the number of primes + { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 { 1'000LLU, 168 }, { 10'000LLU, 1229 }, { 100'000LLU, 9592 }, @@ -195,8 +183,8 @@ class prime_sieve if (showResults) cout << "2, "; - size_t count = (Bits.size() >= 2); // Count 2 as prime if in range - for (uint64_t num = 3; num <= Bits.size(); num+=2) + size_t count = (Bits.size() >= 2); // Count 2 as prime if in range + for (uint64_t num = 3; num <= Bits.size(); num += 2) { if (Bits.get(num)) { @@ -215,7 +203,7 @@ class prime_sieve << "Average: " << duration/passes << ", " << "Limit: " << Bits.size() << ", " << "Counts: " << count << "/" << countPrimes() << ", " - << "Valid : " << (validateResults() ? "Pass" : "FAIL!") + << "Valid: " << (validateResults() ? "Pass" : "FAIL!") << "\n"; // Following 2 lines added by rbergen to conform to drag race output format @@ -322,7 +310,7 @@ int main(int argc, char **argv) } if (bOneshot) - cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; + cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; if (bOneshot && (cSecondsRequested > 0 || cThreadsRequested > 1)) { @@ -357,8 +345,8 @@ int main(int argc, char **argv) else { auto tStart = steady_clock::now(); - std::thread threads[cThreads]; - uint64_t l_passes[cThreads]; + std::vector threads(cThreads); + std::vector l_passes(cThreads); for (unsigned int i = 0; i < cThreads; i++) threads[i] = std::thread([i, &l_passes, &tStart](size_t llUpperLimit) { diff --git a/PrimeCPP/solution_2/primes_par.exe b/PrimeCPP/solution_2/primes_par.exe index c3549c8b447f6a034855e07d4d14b8ac2d1e5d8b..c815cbfcf1741a58a60a4acb1c6a2bc2822d5d62 100755 GIT binary patch delta 3948 zcma)93sjRw7QXW*US{GNe_^POMK~`N)4c02HR-s}gwrlqOe?r&ObM~C&oXOnR+_`h- zn|t}KH$~Q)qN><^%@eh8e*4XS;S=FOMT^+_tPOw^8t zvdo@9JR~|Cpz%rGv?a+`U5uIl2LeBP0dno5IBWp2F?N!FsMMJ z3lIq}QokwB5ldsylM@-)K`1 zqm57WT-^4$Gx8qf2VJ0AK)}dIZlSC!bAX;pSH;)i#q01Ab)~rKL&EJuUIIp&L~XRi zld?=8aos$2c->Xh@u}y%?RC-?8a!R=(v7|c=r`%!UhDthZw)P;?yvm2V6DKFN=t>4 zuB7iySGgSiePz)fR?g>FPWa%7l`lH!x1iTL)%RIU-Lq%StCs3D$ zx+ZHeaenD#0$J)iC~Wg-S9+=dfT(a8TVM3tg$2vJE+IvHmr#_%@A zkSfRhWs8>@igEwS;B#yx?axxt@R>%EK#OO_na7}^RV1^J5a5`{iJgJJC3fmf3k`v$ zLW6i+e5Y=0d}ldmM!W_vb`xmFmWyGm4*6jH2{uH3k`2`_79Kk*=rHULkZ>I*0o_m20L`+ zFi_YvJY%U{`x(xKEgJL!Y; z@+^Okc?bk6Ubqnv^N^C+v0isjZ^Qm#oouw-PXSvmoiclRycjRSh|+e4{|?>q30ytW zBD{aT9IV2yhWY@{uLOQahRegM#qT_mUY@Nb?@-(9z?qFg1Pqtz)EON<9bEy}zTwmx zoq7ZEA-Ipw7ckZ`F*MaVZDO4^qiIQwmK0Df$3Hz6b=z(a+!Rtn^KwT?cVI=~kLt$I?YWU;EIpR1B8l{J z?nZKj=FKT2R(foXiqXmFl{qVvffC4m0h>@AzNm!&*-P;b$Iu0HN163VeJAp?h=YK0 zL8xxRQDiDIJa2m5FpP&Pj#~)N`v}(ogzGJYD-YkSfKE0$po>ijIK?Igw6QB07il$) z@4-b{4z5yccO_GmZ`9V|JjREs<>4#$sj97Wz_%6xfGd>4ECH@hG13YtjvtA%;wbSH z;2ME}Z9}*&Gc7Z`iE#Z)cY(c2Jf+tVuDs()glnA>uKC36_I*_y`!-_Z6gHl5G-=*+ zvWo7Q7Z@UYIMByVXtK3bh= zfq`*8ToFE6z$_HvsTW}uiZBa>m<2#SD5#Ca6RijeI>C1P)bOWUp$qzo#Rpm&hqN)M zmHsWyWR~Ma4!tL=L>;>N7>>bSCPg{_zP`iT#Q&u_9&U=^dJr(e8$+F7+aAN|(bkTS zqz6HK32ldL9-&OI8HnMI;{kp_9^wl$XP&U>Ct!9M$zugaKcP_>-%RlF9(}^_){rwp zEO(uU1@a3-MqA4hHs21}>&1XJTe~OLKcMByl zQ2!Ns3&hfHMl5Z^xezgDM_lO?>uawe-uUd}P4MNh){HZj$6CHTCdgs3vpkXtERPlM z6VHnOr#$kG|63lZXF(*H-#DqD)SGWcZi`I@DFPIM)%exQgJSa~H5_s?U^{Y?4630^ zx~N41huj0Y)NsRHfpnQHs!I!>%K~<5;HoSU>D?XyyEM@5F%9XRo&kMoX!BGcT|Zph zj;aDYL%q(Kd3UeQ=6y(Bj@(BdVzqk%Rr$$$o|UW#%p97-J_wrXIfLVQOJ zO$tdX(mwo#r+orC)G*+~J8okWQNG1L;40eqM0MZl*jBzxGszw8x9u2NLv2Zp-f%wuGU<_kHoEQzHG8$YyiUwvFwma+z*;;_c z`{upRxvj&;2O{V*)4!fw67DI6s~nz|$%hfe3rx!ZgBJe!%$H=9B(;SpC%xL8Bxg`; zrO&j7n_;;WUppxij*JW8hLi8Y6!831cZZ%&1<&ILgkYb8DdBlH{HoyB^Qqx^{9qCI zdASaLpObHL@|{jTd>*fFyxDGH^h2MF*|>fjsp?P7uF)8BaDLX~d9c(I^Dh@26TKAyhvpw?4iFx)3r~K(F{YEoU z%s*V*(b4zpnJc#s%w49s`RlH06IxTETYYLKA77Iyo8J{)er4jvPw%-i`#0;^ySY=} z*_J={i@EE)GVE_&cv#c!`*GIvysXIls=_}P)NMI3D#;l0^_k?&re#0e)Ycx|11sk2 zpZsOdnynWSJRaUK|2pMg!${{x4-=PnSzmNhH%vX&i%P8;Pr*GC*AeBQ0~)GrBB)vF-gB+^P<-eZh!XM5v!UX MxE~PFUwLQvc)N^yun3+S=n(u9H5o`M~p zA6AFcsiMOXGdPC|^e~-P(PFhDcCd6>Pix1ZVzF2imr9OH=DwHbDfQ3coR{xz-@W(U z?|1LVvt1Y3t_yb!O*7~L2mo}=!XNxI=s&#jjnGH2*#F@dCCS!*%=43Oug(K{R-_Ze z&FvPr+?q}MU5xZj+AG9Ni}Vp>fA@C1nvhHMWM&ZUOV3pXctVR!D6@`Ap`|St7+V8j z`jGCwLKqQMX5=WJBG=KdVlon7SEOgUJ(B3{X+%7(dFn6*(fSMr`IOdYs>vvA!{2=L zyrHNW_xO?84_MF3rg)C#J=fwo=IBo?zvW1_1?cS-2IJ+9wAMF#>)feGpRLa<_`?r68gFu!=v zTZZt!jVKO6pSDB5JQPFAbfj7HA{vtw;QJD4lfY6i65@<@nHFV5kz!h(<)@s7I-AhX zdeQ@0?xEu`bAM_FOeWjh^!6zFVO9`HrRTFE0u^ZIgom>5_({W|>Y>Zl7=0}Ti3zd}|C$rAMl{Gqv_Rp5CUk7Vrj_G4-gj|(WtI}}TVnEa?)icL7(S?dFF%wqvU z2jEv(XB_hoWtIZ^RbDu0p!U2dQcJ@YY`~f^3lhmndU`q+HeOuBbHe!wG>?D_lQAn7uUZ$2_SA7J4BdwB+#No^k{3*5AqGP zZDEl73`*G=u%23YdJny@P)$zKiG=~a70>M|qa;5l<_Kpg!(DRgW}bc>R}K1yO?o?* zPi_JeEU^@{bAIxl^D~$AleBY+pXEHg0@oRcJDmuUIrKaI4QZ8C&q3T)<_E;op{ztu`T_K>QHh~erS^pCgDNmqqI^C+GK}jF z@^hOJJqzfE`HLhJrNxijQsSv(aS$=mu*D&fH7M&4_&1Hs0Xz|ZYYy^kdx@oiO|}TI zkIe-82+GT8_2Lj;9cm|0%XS#JSPEv*_Qes}Qj|h8Bz0`rI@u8gy<;JId&ibJla3J9 zCp&3=UL2uh_l!m0IhtPYYY~V&9%7?k76goI9S-AH3js(71$BZ}1PK|)yF4`v!(+p6n5j&sF_q{Jg?^?m1JoP}9Z>Z8 z@8RgS(36VW@;@okP!6G|6kp2URj83ZD&MK_!ScpWgNc)m9md;4?(rq@_UI*E9%dpn zkLw8-=VNM7rVYA0Omjonpu$X`hIKqo0?cZ>0VG)eZBuZ|cGU$pt{Owy#$+Y~-}M0w z35#H@3Ia=1XHdt^&)-xqgfM?`?zzMDD)R5S(V8U+`FALOR>H$P-9g)y1bG(W+1l_7 z^#DCL=od>;eE*AjJ6<5ROuhYH4%ok=zDpNre?e&uN{{1;M}$q4%CsUaM8LB{VI8es z8X#GO^NEdkfL_9vIzyuY+je(^dMu?XLoMn^$EQG1?0ogE2qcIEF$P( zc$-MxMoUwR)uezfD-JTvN1X+qNnrpE<>}V#tSv&n1mDOO1@=0Y$2f7|%tOli5TTrl zfw{TyGKVWRuty?`kz8jhJo)4KNu#PW-+g zaBp(Q=O^Ocbb@%z=J2}{@mp!C)V+)NJsS3<;soNCb=-{j-OuB9HIaV}EdXA53O~(RLE~5k&X}w0&lC@0rd0Wabp^xs5!em#9unvH6LB+3Cn9g>!Tr zjV$bqg5BIV#nAT5nJ$I7g3c}>zecFH_fE0-_L)s_3ikrT$g}rGC)#kyZo?S3_dLqW zaW6ZxNA`-HQ#;d*y06MG3veU~vB(C5R4GEJ3?a1+AytZyVj<>+BQXOZ6=C{=F4(kI zH;RxsfyEpR{am4y)wisu`i4yuGI&2?__>vO7c7Nr7 z3i>=WeM-3IAw$|Cjq6iGmsHcJf=f~Z(g!>>78MM7<{`b$OY^l72E1fQcX*4skoFOG zDq+wk8eq~#yh{ldU;G&8@fDv{!6&}rAr%bxiZ80b<|`Y+NV(HQ11i`j6AdY$M<)I& z(*A6~v;LYMCEWMt94ibczp2plD`8(y9MWflI`KsA&ye0!L8DT9Q3=gdUl;6pQ3RXD zgx%$Jc`gEjo__R3a3uRlGW0sY``P$8QVUR(1CWnv++2VPTpt$$0zoL60Ul>HAkb;iLJO|CU~n13JI*H*Tn55`h{8cs9swdLet2jiL0E-rOC-4V zL<1>_1;Nc&5WYAQVAD)+HQ<~qjssC+JP5AEgXm5o5Zx?r$)5!RH@wMk;2YHd-S^B# z=DXv;1BhV2LTLNB_0irU=-Q31D_9MFm1|6|0%ptpq5h(@ndE*VXe5K`eWdu3Xn@Wc zj5b%f1E`3Fdu7}Q*LePS`06Y_&%>!_OTqG=@_c8mlb7K40lS|q4Xdx@`Ico){wUAi zeZvtQdqx85x=d*ACY9N+L2 zWVy%s?eUpz-8(TByjB*J8@;~b>ffJUuMWGlHGBNp<6~Qv->}Wly_|mR<1?qb?poh3 z(x^Y~u3m6)ThgEX+AkhH9Cl8&K52WYVpZ_*%zypu)6wrs%D-*-CNB$medE0|A9@d& ze{4-{ua22BQC3%_yi~D!^S-3$)F$#CU2)f<$?KXqZHMYsX~Mqdgq?5w{`4D98n+Dk UHoyJmKUXX%yd1iRKDt}>Kd8G0TmS$7 From 76e7e42b65024371d22dcd07ce148f2e125c224e Mon Sep 17 00:00:00 2001 From: Dave Plummer Date: Wed, 6 Nov 2024 10:36:01 -0800 Subject: [PATCH 05/10] Refactor solution2 into 2 and 2a --- PrimeCPP/solution_2/PrimeCPP_PAR.cpp | 150 ++++++---- PrimeCPP/solution_2a/Dockerfile | 13 + PrimeCPP/solution_2a/PrimeCPP_PAR.cpp | 379 ++++++++++++++++++++++++++ PrimeCPP/solution_2a/run.cmd | 2 + PrimeCPP/solution_2a/run.sh | 6 + 5 files changed, 497 insertions(+), 53 deletions(-) create mode 100644 PrimeCPP/solution_2a/Dockerfile create mode 100644 PrimeCPP/solution_2a/PrimeCPP_PAR.cpp create mode 100644 PrimeCPP/solution_2a/run.cmd create mode 100755 PrimeCPP/solution_2a/run.sh diff --git a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp b/PrimeCPP/solution_2/PrimeCPP_PAR.cpp index fa168bea5..a9cc811ce 100644 --- a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp +++ b/PrimeCPP/solution_2/PrimeCPP_PAR.cpp @@ -24,66 +24,117 @@ using namespace std::chrono; const uint64_t DEFAULT_UPPER_LIMIT = 10'000'000LLU; class BitArray { - uint8_t *array; + uint32_t *array; size_t arrSize; - size_t logicalSize; - static constexpr size_t arraySize(size_t size) + inline static size_t arraySize(size_t size) { - return (size >> 3) + ((size & 7) > 0); + return (size >> 5) + ((size & 31) > 0); } - static constexpr size_t index(size_t n) + inline static size_t index(size_t n) { - return (n >> 3); + return (n >> 5); + } + + inline static uint32_t getSubindex(size_t n, uint32_t d) + { + return d & uint32_t(uint32_t(0x01) << (n % 32)); + } + + inline void setFalseSubindex(size_t n, uint32_t &d) + { + d &= ~uint32_t(uint32_t(0x01) << (n % (8*sizeof(uint32_t)))); } public: - explicit BitArray(size_t size) : logicalSize(size) + explicit BitArray(size_t size) : arrSize(size) { - arrSize = (size + 1) / 2; // Only store bits for odd numbers - array = new uint8_t[arraySize(arrSize)]; - // Bits are left at zero default, so no need to initialize them - // std::memset(array, 0x00, arraySize(arrSize)); + array = new uint32_t[arraySize(size)]; + std::memset(array, 0xFF, (size >> 3) + ((size & 7) > 0)); } - ~BitArray() { delete[] array; } + ~BitArray() {delete [] array;} - constexpr bool get(size_t n) const + bool get(size_t n) const { - if (n % 2 == 0) - return false; // Even numbers > 2 are not prime - n = n / 2; // Map the actual number to the index in the array - return !(array[index(n)] & (uint8_t(1) << (n % 8))); + return getSubindex(n, array[index(n)]); } - void set(size_t n) + static constexpr uint32_t rol(uint32_t x, uint32_t n) { - n = n / 2; // Map the actual number to the index in the array - array[index(n)] |= (uint8_t(1) << (n % 8)); + return (x<>(32-n)); } - constexpr size_t size() const + static constexpr uint32_t buildSkipMask(size_t skip, size_t offset) { - return logicalSize; + uint32_t mask = 0; + for (size_t i = offset; i < 32; i += skip) { + mask |= (1u << i); + } + return ~mask; + } + + void setFlagsFalse(size_t n, size_t skip) + { + if (skip <= 12) { + // For small skips, use pre-built mask approach + size_t word_idx = index(n); + size_t bit_pos = n % 32; + size_t curr_n = n; + + while (curr_n < arrSize) + { + // Build mask for current word starting at bit_pos + uint32_t mask = buildSkipMask(skip, bit_pos); + + // Apply mask to current word + array[word_idx] &= mask; + + // Move to next word + size_t bits_remaining = 32 - bit_pos; + curr_n += ((bits_remaining + skip - 1) / skip) * skip; + + if (curr_n >= arrSize) break; + + word_idx = index(curr_n); + bit_pos = curr_n % 32; + } + } + else + { + // Original implementation for larger skips + auto rolling_mask = ~uint32_t(1 << (n % 32)); + auto roll_bits = skip % 32; + while (n < arrSize) { + array[index(n)] &= rolling_mask; + n += skip; + rolling_mask = rol(rolling_mask, roll_bits); + } + } + } + + inline size_t size() const + { + return arrSize; } }; // prime_sieve // -// Represents the data comprising the sieve (an array of bits representing odd numbers starting from 3) -// and includes the code needed to eliminate non-primes from its array by calling runSieve. +// Represents the data comprising the sieve (an array of N bits, where N is the upper limit prime being tested) +// as well as the code needed to eliminate non-primes from its array, which you perform by calling runSieve. class prime_sieve { private: - BitArray Bits; // Sieve data, where 0==prime, 1==not + BitArray Bits; // Sieve data, where 1==prime, 0==not public: - prime_sieve(uint64_t n) : Bits(n) // Initialize bits to zero default + prime_sieve(uint64_t n) : Bits(n) // Initialize all to true (potential primes) { } @@ -103,21 +154,15 @@ class prime_sieve while (factor <= q) { - // Find the next prime number - for (; factor <= q; factor += 2) + for (uint64_t num = factor; num < Bits.size(); num += 2) { - if (Bits.get(factor)) + if (Bits.get(num)) { + factor = num; break; } } - - // Mark multiples of the prime number as not prime - uint64_t start = factor * factor; - for (uint64_t num = start; num <= Bits.size(); num += factor * 2) - { - Bits.set(num); - } + Bits.setFlagsFalse(factor * factor, factor + factor); factor += 2; } @@ -129,9 +174,9 @@ class prime_sieve size_t countPrimes() const { - size_t count = (Bits.size() >= 2); // Count 2 as prime if within range - for (uint64_t num = 3; num <= Bits.size(); num += 2) - if (Bits.get(num)) + size_t count = (Bits.size() >= 2); // Count 2 as prime if within range + for (int i = 3; i < Bits.size(); i+=2) + if (Bits.get(i)) count++; return count; } @@ -142,24 +187,23 @@ class prime_sieve bool isPrime(uint64_t n) const { - if (n == 2) - return true; - if (n < 2 || n % 2 == 0) + if (n & 1) + return Bits.get(n); + else return false; - return Bits.get(n); } // validateResults // - // Checks to see if the number of primes found matches what we should expect. This data isn't used in the + // Checks to see if the number of primes found matches what we should expect. This data isn't used in the // sieve processing at all, only to sanity check that the results are right when done. bool validateResults() const { const std::map resultsDictionary = { - { 10LLU, 4 }, // Historical data for validating our results - the number of primes - { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 + { 10LLU, 4 }, // Historical data for validating our results - the number of primes + { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 { 1'000LLU, 168 }, { 10'000LLU, 1229 }, { 100'000LLU, 9592 }, @@ -183,8 +227,8 @@ class prime_sieve if (showResults) cout << "2, "; - size_t count = (Bits.size() >= 2); // Count 2 as prime if in range - for (uint64_t num = 3; num <= Bits.size(); num += 2) + size_t count = (Bits.size() >= 2); // Count 2 as prime if in range + for (uint64_t num = 3; num <= Bits.size(); num+=2) { if (Bits.get(num)) { @@ -203,7 +247,7 @@ class prime_sieve << "Average: " << duration/passes << ", " << "Limit: " << Bits.size() << ", " << "Counts: " << count << "/" << countPrimes() << ", " - << "Valid: " << (validateResults() ? "Pass" : "FAIL!") + << "Valid : " << (validateResults() ? "Pass" : "FAIL!") << "\n"; // Following 2 lines added by rbergen to conform to drag race output format @@ -310,7 +354,7 @@ int main(int argc, char **argv) } if (bOneshot) - cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; + cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; if (bOneshot && (cSecondsRequested > 0 || cThreadsRequested > 1)) { @@ -345,8 +389,8 @@ int main(int argc, char **argv) else { auto tStart = steady_clock::now(); - std::vector threads(cThreads); - std::vector l_passes(cThreads); + std::thread threads[cThreads]; + uint64_t l_passes[cThreads]; for (unsigned int i = 0; i < cThreads; i++) threads[i] = std::thread([i, &l_passes, &tStart](size_t llUpperLimit) { @@ -383,4 +427,4 @@ int main(int argc, char **argv) // On success return the count of primes found; on failure, return 0 return (int) result; -} +} \ No newline at end of file diff --git a/PrimeCPP/solution_2a/Dockerfile b/PrimeCPP/solution_2a/Dockerfile new file mode 100644 index 000000000..f6c0ceb2f --- /dev/null +++ b/PrimeCPP/solution_2a/Dockerfile @@ -0,0 +1,13 @@ +FROM ubuntu:22.04 AS build + +RUN apt-get update -qq \ + && apt-get install -y clang + +WORKDIR /opt/app +COPY *.cpp . +RUN clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_PAR.cpp -oprimes_par + +FROM ubuntu:22.04 +COPY --from=build /opt/app/primes_par /usr/local/bin + +ENTRYPOINT [ "primes_par", "-l", "1000000" ] \ No newline at end of file diff --git a/PrimeCPP/solution_2a/PrimeCPP_PAR.cpp b/PrimeCPP/solution_2a/PrimeCPP_PAR.cpp new file mode 100644 index 000000000..017e8220a --- /dev/null +++ b/PrimeCPP/solution_2a/PrimeCPP_PAR.cpp @@ -0,0 +1,379 @@ +// --------------------------------------------------------------------------- +// PrimeCPP.cpp : Pol Marcet's Modified version of Dave's Garage Prime Sieve +// Some great ideas taken from Rust's implementation from Michael Barber +// @mike-barber https://www.github.com/mike-barber (bit-storage-rotate) +// --------------------------------------------------------------------------- + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +using namespace std; +using namespace std::chrono; + +const uint64_t DEFAULT_UPPER_LIMIT = 10'000'000LLU; + +class BitArray { + uint8_t *array; + size_t arrSize; + size_t logicalSize; + + static constexpr size_t arraySize(size_t size) + { + return (size >> 3) + ((size & 7) > 0); + } + + static constexpr size_t index(size_t n) + { + return (n >> 3); + } + +public: + explicit BitArray(size_t size) : logicalSize(size) + { + arrSize = (size + 1) / 2; // Only store bits for odd numbers + array = new uint8_t[arraySize(arrSize)]; + fill_n(array, arrSize, 0x00); + } + + ~BitArray() { delete[] array; } + + constexpr bool get(size_t n) const + { + if (n % 2 == 0) + return false; // Even numbers > 2 are not prime + n = n / 2; // Map the actual number to the index in the array + return !(array[index(n)] & (uint8_t(1) << (n % 8))); + } + + void set(size_t n) + { + n = n / 2; // Map the actual number to the index in the array + array[index(n)] |= (uint8_t(1) << (n % 8)); + } + + constexpr size_t size() const + { + return logicalSize; + } +}; + + +// prime_sieve +// +// Represents the data comprising the sieve (an array of bits representing odd numbers starting from 3) +// and includes the code needed to eliminate non-primes from its array by calling runSieve. + +class prime_sieve +{ + private: + + BitArray Bits; // Sieve data, where 0==prime, 1==not + + public: + + prime_sieve(uint64_t n) : Bits(n) // Initialize bits to zero default + { + } + + ~prime_sieve() + { + } + + // runSieve + // + // Scan the array for the next factor (>2) that hasn't yet been eliminated from the array, and then + // walk through the array crossing off every multiple of that factor. + + void runSieve() + { + uint64_t factor = 3; + uint64_t q = (int) sqrt(Bits.size()); + + while (factor <= q) + { + // Find the next prime number + for (NULL; factor <= q; factor += 2) + if (Bits.get(factor)) + break; + + // Mark multiples of the prime number as not prime + uint64_t start = factor * factor; + for (uint64_t num = start; num <= Bits.size(); num += factor * 2) + Bits.set(num); + + factor += 2; + } + } + + // countPrimes + // + // Can be called after runSieve to determine how many primes were found in total + + size_t countPrimes() const + { + size_t count = (Bits.size() >= 2); // Count 2 as prime if within range + for (uint64_t num = 3; num <= Bits.size(); num += 2) + if (Bits.get(num)) + count++; + return count; + } + + // isPrime + // + // Can be called after runSieve to determine whether a given number is prime. + + bool isPrime(uint64_t n) const + { + if (n == 2) + return true; + if (n < 2 || n % 2 == 0) + return false; + return Bits.get(n); + } + + // validateResults + // + // Checks to see if the number of primes found matches what we should expect. This data isn't used in the + // sieve processing at all, only to sanity check that the results are right when done. + + bool validateResults() const + { + const std::map resultsDictionary = + { + { 10LLU, 4 }, // Historical data for validating our results - the number of primes + { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 + { 1'000LLU, 168 }, + { 10'000LLU, 1229 }, + { 100'000LLU, 9592 }, + { 1'000'000LLU, 78498 }, + { 10'000'000LLU, 664579 }, + { 100'000'000LLU, 5761455 }, + { 1'000'000'000LLU, 50847534 }, + { 10'000'000'000LLU, 455052511 }, + }; + if (resultsDictionary.end() == resultsDictionary.find(Bits.size())) + return false; + return resultsDictionary.find(Bits.size())->second == countPrimes(); + } + + // printResults + // + // Displays stats about what was found as well as (optionally) the primes themselves + + void printResults(bool showResults, double duration, size_t passes, size_t threads) const + { + if (showResults) + cout << "2, "; + + size_t count = (Bits.size() >= 2); // Count 2 as prime if in range + for (uint64_t num = 3; num <= Bits.size(); num += 2) + { + if (Bits.get(num)) + { + if (showResults) + cout << num << ", "; + count++; + } + } + + if (showResults) + cout << "\n"; + + cout << "Passes: " << passes << ", " + << "Threads: " << threads << ", " + << "Time: " << duration << ", " + << "Average: " << duration/passes << ", " + << "Limit: " << Bits.size() << ", " + << "Counts: " << count << "/" << countPrimes() << ", " + << "Valid: " << (validateResults() ? "Pass" : "FAIL!") + << "\n"; + + // Following 2 lines added by rbergen to conform to drag race output format + cout << "\n"; + cout << "davepl_par;" << passes << ";" << duration << ";" << threads << ";algorithm=base,faithful=yes,bits=1\n"; + } + +}; + +// custom_atoll +// +// Like atoll(), but accepts K, M, G, and T as magnitude suffixes. + +long long custom_atoll(const std::string& value_str) { + static const std::unordered_map suffixes = { + {'K', 1000LL}, + {'M', 1000000LL}, + {'G', 1000000000LL}, + {'T', 1000000000000LL} + }; + + std::string input_str = value_str; + for (char& c : input_str) { + c = std::toupper(c); + } + + char last_char = input_str.back(); + if (suffixes.find(last_char) != suffixes.end()) { + long long multiplier = suffixes.at(last_char); + std::string numeric_part = input_str.substr(0, input_str.size() - 1); + std::istringstream iss(numeric_part); + double numeric_value; + if (!(iss >> numeric_value)) { + throw std::invalid_argument("Invalid numeric part: " + numeric_part); + } + return static_cast(numeric_value * multiplier); + } + + std::istringstream iss(input_str); + long long result; + if (!(iss >> result)) { + throw std::invalid_argument("Invalid input format"); + } + return result; +} + +int main(int argc, char **argv) +{ + vector args(argv + 1, argv + argc); // From first to last argument in the argv array + uint64_t ullLimitRequested = 0; + auto cThreadsRequested = 0; + auto cSecondsRequested = 0; + auto bPrintPrimes = false; + auto bOneshot = false; + auto bQuiet = false; + + // Process command-line args + + for (auto i = args.begin(); i != args.end(); ++i) + { + if (*i == "-h" || *i == "--help") { + cout << "Syntax: " << argv[0] << " [-t,--threads threads] [-s,--seconds seconds] [-l,--limit limit] [-1,--oneshot] [-q,--quiet] [-h] " << endl; + return 0; + } + else if (*i == "-t" || *i == "--threads") + { + i++; + cThreadsRequested = (i == args.end()) ? 0 : max(1, atoi(i->c_str())); + } + else if (*i == "-s" || *i == "--seconds") + { + i++; + cSecondsRequested = (i == args.end()) ? 0 : max(1, atoi(i->c_str())); + } + else if (*i == "-l" || *i == "--limit") + { + i++; + ullLimitRequested = (i == args.end()) ? 0LL : max((long long)1, custom_atoll(i->c_str())); + } + else if (*i == "-1" || *i == "--oneshot") + { + bOneshot = true; + cThreadsRequested = 1; + } + else if (*i == "-p" || *i == "--print") + { + bPrintPrimes = true; + } + else if (*i == "-q" || *i == "--quiet") + { + bQuiet = true; + } + else + { + fprintf(stderr, "Unknown argument: %s", i->c_str()); + return 0; + } + } + + if (!bQuiet) + { + cout << "Primes Benchmark (c) 2021 Dave's Garage - http://github.com/davepl/primes" << endl; + cout << "-------------------------------------------------------------------------" << endl; + } + + if (bOneshot) + cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; + + if (bOneshot && (cSecondsRequested > 0 || cThreadsRequested > 1)) + { + cout << "Oneshot option cannot be mixed with second count or thread count." << endl; + return 0; + } + + auto cPasses = 0; + auto cSeconds = (cSecondsRequested ? cSecondsRequested : 5); + auto cThreads = (cThreadsRequested ? cThreadsRequested : thread::hardware_concurrency()); + auto llUpperLimit = (ullLimitRequested ? ullLimitRequested : DEFAULT_UPPER_LIMIT); + + if (!bQuiet) + { + printf("Computing primes to %llu on %d thread%s for %d second%s.\n", + (unsigned long long)llUpperLimit, + cThreads, + cThreads == 1 ? "" : "s", + cSeconds, + cSeconds == 1 ? "" : "s" + ); + } + double duration; + + if (bOneshot) + { + auto tStart = steady_clock::now(); + prime_sieve(llUpperLimit).runSieve(); + auto tEnd = steady_clock::now() - tStart; + duration = duration_cast(tEnd).count()/1000000.0; + } + else + { + auto tStart = steady_clock::now(); + std::vector threads(cThreads); + std::vector l_passes(cThreads); + for (unsigned int i = 0; i < cThreads; i++) + threads[i] = std::thread([i, &l_passes, &tStart](size_t llUpperLimit) + { + l_passes[i] = 0; + while (duration_cast(steady_clock::now() - tStart).count() < 5) { + prime_sieve(llUpperLimit).runSieve(); + ++l_passes[i]; + } + }, llUpperLimit); + for (auto i = 0; i < cThreads; i++) { + threads[i].join(); + cPasses += l_passes[i]; + } + auto tEnd = steady_clock::now() - tStart; + duration = duration_cast(tEnd).count()/1000000.0; + } + + + if (bOneshot) + { + cPasses = 1.0 / duration * 5; + duration = 5.0; + } + + prime_sieve checkSieve(llUpperLimit); + checkSieve.runSieve(); + auto result = checkSieve.validateResults() ? checkSieve.countPrimes() : 0; + + if (!bQuiet) + checkSieve.printResults(bPrintPrimes, duration , cPasses, cThreads); + else + cout << cPasses << ", " << duration / cPasses << endl; + + // On success return the count of primes found; on failure, return 0 + + return (int) result; +} diff --git a/PrimeCPP/solution_2a/run.cmd b/PrimeCPP/solution_2a/run.cmd new file mode 100644 index 000000000..3390a0df2 --- /dev/null +++ b/PrimeCPP/solution_2a/run.cmd @@ -0,0 +1,2 @@ +g++ -Ofast PrimeCPP_PAR.cpp -std=c++17 -lstdc++ -oPrimes_par_gcc.exe +.\Primes_par_gcc.exe diff --git a/PrimeCPP/solution_2a/run.sh b/PrimeCPP/solution_2a/run.sh new file mode 100755 index 000000000..dc59d3612 --- /dev/null +++ b/PrimeCPP/solution_2a/run.sh @@ -0,0 +1,6 @@ +# g++ -Ofast -std=c++17 -lc++ PrimeCPP.cpp -oPrimes.exe +# gcc -Ofast -std=c++17 PrimeCPP.cpp -lc++ -oPrimes_gcc.exe +# clang -Ofast -std=c++17 -lc++ PrimeCPP.cpp -oPrimes_clang.exe + +clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_PAR.cpp -oprimes_par.exe +./primes_par.exe -t 1 From 67344d83b09376ff3f974563a0c93aaa4e427d62 Mon Sep 17 00:00:00 2001 From: Rutger van Bergen Date: Thu, 7 Nov 2024 12:50:32 +0100 Subject: [PATCH 06/10] Combine array and mask approaches in one solution_2 --- PrimeCPP/solution_2/.gitignore | 3 ++- PrimeCPP/solution_2/Dockerfile | 14 +++++++++---- .../PrimeCPP_array.cpp} | 15 +++++++++----- .../{PrimeCPP_PAR.cpp => PrimeCPP_mask.cpp} | 19 +++++++++++++----- PrimeCPP/solution_2/benchmark.sh | 9 +++++++++ PrimeCPP/solution_2/primes_par.exe | Bin 62304 -> 0 bytes PrimeCPP/solution_2/run.cmd | 11 ++++++++-- PrimeCPP/solution_2/run.sh | 15 ++++++++++++-- PrimeCPP/solution_2a/Dockerfile | 13 ------------ PrimeCPP/solution_2a/run.cmd | 2 -- PrimeCPP/solution_2a/run.sh | 6 ------ 11 files changed, 67 insertions(+), 40 deletions(-) rename PrimeCPP/{solution_2a/PrimeCPP_PAR.cpp => solution_2/PrimeCPP_array.cpp} (96%) rename PrimeCPP/solution_2/{PrimeCPP_PAR.cpp => PrimeCPP_mask.cpp} (96%) create mode 100755 PrimeCPP/solution_2/benchmark.sh delete mode 100755 PrimeCPP/solution_2/primes_par.exe delete mode 100644 PrimeCPP/solution_2a/Dockerfile delete mode 100644 PrimeCPP/solution_2a/run.cmd delete mode 100755 PrimeCPP/solution_2a/run.sh diff --git a/PrimeCPP/solution_2/.gitignore b/PrimeCPP/solution_2/.gitignore index 5b1fd1a1c..2cafdba9f 100644 --- a/PrimeCPP/solution_2/.gitignore +++ b/PrimeCPP/solution_2/.gitignore @@ -1 +1,2 @@ -.vscore/** \ No newline at end of file +.vscore/** +*.exe \ No newline at end of file diff --git a/PrimeCPP/solution_2/Dockerfile b/PrimeCPP/solution_2/Dockerfile index f6c0ceb2f..d56ddc06a 100644 --- a/PrimeCPP/solution_2/Dockerfile +++ b/PrimeCPP/solution_2/Dockerfile @@ -1,13 +1,19 @@ FROM ubuntu:22.04 AS build RUN apt-get update -qq \ - && apt-get install -y clang + && apt-get install -y bash clang WORKDIR /opt/app COPY *.cpp . -RUN clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_PAR.cpp -oprimes_par +RUN clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_array.cpp -oprimes_array +RUN clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_mask.cpp -oprimes_mask FROM ubuntu:22.04 -COPY --from=build /opt/app/primes_par /usr/local/bin -ENTRYPOINT [ "primes_par", "-l", "1000000" ] \ No newline at end of file +COPY --from=build /opt/app/primes_array /opt/app/primes_mask /opt/app/ + +WORKDIR /opt/app +COPY benchmark.sh . + +ENTRYPOINT [ "./benchmark.sh"] +CMD ["both", "-l", "1000000" ] \ No newline at end of file diff --git a/PrimeCPP/solution_2a/PrimeCPP_PAR.cpp b/PrimeCPP/solution_2/PrimeCPP_array.cpp similarity index 96% rename from PrimeCPP/solution_2a/PrimeCPP_PAR.cpp rename to PrimeCPP/solution_2/PrimeCPP_array.cpp index 017e8220a..4edd24af0 100644 --- a/PrimeCPP/solution_2a/PrimeCPP_PAR.cpp +++ b/PrimeCPP/solution_2/PrimeCPP_array.cpp @@ -25,7 +25,6 @@ const uint64_t DEFAULT_UPPER_LIMIT = 10'000'000LLU; class BitArray { uint8_t *array; - size_t arrSize; size_t logicalSize; static constexpr size_t arraySize(size_t size) @@ -41,9 +40,9 @@ class BitArray { public: explicit BitArray(size_t size) : logicalSize(size) { - arrSize = (size + 1) / 2; // Only store bits for odd numbers + auto arrSize = (size + 1) / 2; // Only store bits for odd numbers array = new uint8_t[arraySize(arrSize)]; - fill_n(array, arrSize, 0x00); + std::memset(array, 0x00, arraySize(arrSize)); } ~BitArray() { delete[] array; } @@ -103,14 +102,20 @@ class prime_sieve while (factor <= q) { // Find the next prime number - for (NULL; factor <= q; factor += 2) + for (; factor <= q; factor += 2) + { if (Bits.get(factor)) + { break; + } + } // Mark multiples of the prime number as not prime uint64_t start = factor * factor; for (uint64_t num = start; num <= Bits.size(); num += factor * 2) + { Bits.set(num); + } factor += 2; } @@ -201,7 +206,7 @@ class prime_sieve // Following 2 lines added by rbergen to conform to drag race output format cout << "\n"; - cout << "davepl_par;" << passes << ";" << duration << ";" << threads << ";algorithm=base,faithful=yes,bits=1\n"; + cout << "davepl_array;" << passes << ";" << duration << ";" << threads << ";algorithm=base,faithful=yes,bits=1\n"; } }; diff --git a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp b/PrimeCPP/solution_2/PrimeCPP_mask.cpp similarity index 96% rename from PrimeCPP/solution_2/PrimeCPP_PAR.cpp rename to PrimeCPP/solution_2/PrimeCPP_mask.cpp index be5864816..58ea0dac6 100644 --- a/PrimeCPP/solution_2/PrimeCPP_PAR.cpp +++ b/PrimeCPP/solution_2/PrimeCPP_mask.cpp @@ -70,6 +70,15 @@ class BitArray { return ~mask; } + uint32_t rol(uint32_t value, size_t bits) + { + bits %= 32; + if (bits == 0) + return value; + // Ensure that the number of bits to rotate is within 0-31 + return (value << bits) | (value >> (32 - bits)); + } + void setFlagsFalse(size_t n, size_t skip) { if (skip <= 12) { @@ -78,7 +87,7 @@ class BitArray { size_t bit_pos = n % 32; size_t curr_n = n; - while (curr_n < arrSize) + while (curr_n < size()) { // Build mask for current word starting at bit_pos uint32_t mask = buildSkipMask(skip, bit_pos); @@ -90,7 +99,7 @@ class BitArray { size_t bits_remaining = 32 - bit_pos; curr_n += ((bits_remaining + skip - 1) / skip) * skip; - if (curr_n >= arrSize) break; + if (curr_n >= size()) break; word_idx = index(curr_n); bit_pos = curr_n % 32; @@ -101,7 +110,7 @@ class BitArray { // Original implementation for larger skips auto rolling_mask = ~uint32_t(1 << (n % 32)); auto roll_bits = skip % 32; - while (n < arrSize) { + while (n < size()) { array[index(n)] &= rolling_mask; n += skip; rolling_mask = rol(rolling_mask, roll_bits); @@ -254,7 +263,7 @@ class prime_sieve // Following 2 lines added by rbergen to conform to drag race output format cout << "\n"; - cout << "davepl_par;" << passes << ";" << duration << ";" << threads << ";algorithm=base,faithful=yes,bits=1\n"; + cout << "davepl_mask;" << passes << ";" << duration << ";" << threads << ";algorithm=base,faithful=yes,bits=1\n"; } }; @@ -429,4 +438,4 @@ int main(int argc, char **argv) // On success return the count of primes found; on failure, return 0 return (int) result; -} \ No newline at end of file +} diff --git a/PrimeCPP/solution_2/benchmark.sh b/PrimeCPP/solution_2/benchmark.sh new file mode 100755 index 000000000..a1cd0ea64 --- /dev/null +++ b/PrimeCPP/solution_2/benchmark.sh @@ -0,0 +1,9 @@ +#!/bin/bash + +if [[ $1 == both || $1 == 1 || $1 == array ]]; then + ./primes_array ${@:2} +fi + +if [[ $1 == both || $1 == 2 || $1 == mask ]]; then + ./primes_mask ${@:2} +fi diff --git a/PrimeCPP/solution_2/primes_par.exe b/PrimeCPP/solution_2/primes_par.exe deleted file mode 100755 index c815cbfcf1741a58a60a4acb1c6a2bc2822d5d62..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 62304 zcmeHw3tZIIw*UUk@R%9UPzO{@8GIDr3lMo!<^YnQrXHk|z5Fr2C?mr|7*w=kd{ijv zamvbrUA0c#fV5*BI48Sx#qyl2tmEr?yxr~*&3i(zQnQ@#X7m59{re647_d5z`+t3Y zzuNA-_G9g}*Is+=wIA&H6dMYc4JV3o)2C$b0wzm& z2&Q&*eE~kqQj928IjRe)x|03zT|r$$-={JXf|dAmGCuH-f1EBNRdqMmA7B0o315d?f?#b|>D~e0t8!G@$`;8<{`j(2 zO86$oNC;Mb{b=fjoS&?WfTUd^a{`i(ZD&eb>g@j;#97_BoCtEq5Y3X_CV&}AdlD8>vzON@;zF7Eeesn34 zD5#{1U{Nfn>XtSiU6dk!Z4$nC9}Hja3LYb~u`9KEe9p?5Ju_=sj>Ijb3(}xA%URnb zKYx=s>+FWTfxMl8ha67`i8*~9Zj$F*Sx7Y(Z}n6P%y z0(uI^j-8M&0YupfIGw4={A@bZRnPqMYN{O8(g_)I5#(m<7C53O2K+R`kCkEKFMY$| zD82n62=&P*C8(du0hI$P2UHHI98fu+azN#P$^n%FDhE^!s2or^pmIRvfXV@t11bkp z4yYVZIiPYt<$%fol>;gVR1T;dP&uG-K;?kS0hI$P2UHHI98fu+azN#P$^n%FDhE^! zs2or^pmIRvfXV@t11bkp4yYVZIiPYt<-q?}9PnlaA2~N_NU2*K+Vt(ckxf_T3~Rc+ z^5&-N+F;k$TGouei`}g2m+sLCW>4dqVQ&0+>}H+7T)O@HQnN?0YPf zXxQ|UVcRU=+8Ai|?5Zhqj|*Tezd&2R(y|sRSF2~23N$Q_%3e%2r$uREOYld|d0!#V zA2qDyBFaT+^qnTq8>R_eAEoKf!#64NJM~ijIcTRcf0V}1SqM4?7(Qr-f5)(m%C>#) z&1+vHxK4vV8o`-cD6lOY{uOgw3T0b#%(!I>h~H9fCWham@p6{EhPFSja{!crjJbbw-0v z?b<=j*H=CWIeGx{bbpguyszopsQIOkGgq5dJkdVN>=^_+eU&$%?eR#Xexv^BtIsB& z&pt+=F{No8M$BfElx`H$pp#B zCrp-+HOyuG0U5Cv&_9FB*H3E-w(E435r40&Jde5t3Cp*Wyp$pAlb3XaNnSR>@9?&z z6@TeXZij3>4Y&gBhXTFG4yU?aGq5dL;3u9a*t|tU?Kd{Rj6C`O=}pohkM8+c7Yj7+ zZV%S(W`XJO19qE{7k}9AIt{s84cT9awgca0Hg}k773B4d*Sk4`G1ue3L-?M7|EKR! zr^cE4D8g63Tf7NyG5X>0t`NX{EPN!5g*pxMpno?yQuc3>e!X z1EG4h1#bZu3EyHFA8(IoysKjr=t7=c#7_oorvX#>E6{)b^@Jn6uewT6*2GN5N%!)$ zqAeogXk48D9*Xa%ZA)vU@fvLQn32}XSPJ-Mtj047q^Bqk)jJz$cq%}G!LJ?Ql@F$V ziO~QD`eh>W;e`Yv-a0V8E!RQi??gP_OE7lf1ddI>L2=+A$4mNv>kf?T;mA*P?gEZ) z?|mCOK)_t0X}Z3K@wtSZW%MM-Zr#O~b9E-RHv{_cidJa8=fcCssN8APNizO#I<{pC z^i*pGyOfSGD(RFA;7f;2dAE`D2=!w==9-<1wS+?_evdk7oV0qqt(3o%!^Y-y@UP7I zSI_}kGG#h+QaK~aU6k{+pjr7@0G*}*9a_W( z0B<1h3a+bKP4l;C3(<8uWM}2OX3q+(z}vlIQ?NVS%v@=RPx}|MC+1~y6Z(tmaeZ-& zJ1PKS&_a5B2=scpb_jS8-uyez5H+3BoC9@FvAxBG%tbQVaHpgbHX)tMm8{E&zi#l4 z-SpTUI_=Hw4$bgH7VMm&8^!iYIwYd`3Aw%3S7w3!X=q0#+LM8Hr8j+7{s-R9Z_$Rj z?NYxot=aQ3FB9He4WHzP(xYYBzKf>~YJQUY5zXV_Q#$p*Mma8q;`)#WrT!s)_~t6& ziLzb9)4KkGyKaF*Q+}M;vzp^JHqV96^$zMAg>XLfPB~AbzUwJ7-pl~lI)>8k%0tlJ zP_(-r+TS01xC#9j2Ad}U{UtOR1;Z z4>8`3G9L7CD*CfyI_wAFHjQCE>e1Gcu`P9m zKBV3(M4ixU%{9$|8*`@$h7vX!-%lWH3>6Tw2wd@8P{px0zG6ibkS@cB&Uyo-PeuJ6B}(# z1HPm6{dZrU`Rbv|V>ev547{Bt;lUL<2iyRs+-^3o8>irhQxuFGM?2>EzUh*9We$?l zhxedsZ)}434Qglo_}#pnal1F>#%+AHlG?g6peIfp+RAwgUK4Lk#DC%i+8PbIC|wUb zLWj0a6~a5$K~|WMv!cCUH=bynluy}5tAbfx8TFCYyw@B=H($X#Otw!H({-MCfaSR{-mYjPo1-3Nmo%_- z-CiN>+OGtcJCwC9hWvu&z4{RJA9!FuUq?X>>v&&huhz zt>()!75zbR`QRPr2g*f(7j@u;NyuK|3?#d9h%|3I<6Sx}bp3w?r*y^bjCW}f*M@np zD{fc3>w69K!913njPW)ayz=p3Cuk%-w99-jf!3|y0rc>a$OAhfBNchgedOIJ=hY!^ z$5ghrKk}14@L-)1f^ZDNu&ejd7{3kSAjEl*kJ7aWlm4JEL--r*ux8!Yl068XcG-i& zxIH+mc{b|0hW;qlroZkU72MRG!aV;?^PHc~X-9vZMqi#mol8(gEJ;mmN1xSw=Y1CK zx-|H}j)o5J{s^I%%6eA-_OF-hbgK7Xd0j?6XV=T&4e)!^mz~J#d-DeM zS&?5^-vYols_zB44zdx-W?FJ#ILi}5*pg=Wji7_#jKWiqky@6dJN#s>ZqUw3uZCS3Bs|;jpq3>M zfh?7xEb*{7h`IFFryYyZ*IqUSu4~wY_B_H!CfIr& zhCFQZWLG2b&HwM}Vb3#lFdRqXgHr zHG<0sdbYfvb1}&H-N-{anbsaG(3zV9|0-atLp;NDZ12s*Leroysz-1Mz(qFnXV3-9 zk*|Fmo7}i!rMqq%YwZ|?eSsCXyS<~NyhD+`GMMIuVV(QG^XAdGC*Hctr=v|~&vz5e zdv|IE*HgV6w_=X6Eqh~&Mi+7w>#S3n!F(7_pVMdd)kpFAGUKl+7XE|Cp3dU zY=w>AemnLeATv=cyt57SKQpl-*HDh&=V9M>Ko@<(`K04*?C@+qTDm+OX{ zy?pS|>}~q%)1QjDo?Xv`g6%^DmaMz~oukEZ9hU{{`Iv^}k1#?`bejuCkbTwo+CLp_ zjA*&s@#mA5btgXOd1$>W$yxfbb8~{5l=5~Nv{2vq%iKGdpJ{wIVh?6#5KHQq3R?|( z6W=_$Bht{1U0O4k<(&iVBJ7}dFt0BL?{=Yn-6Q6w^w8fWfaBn3FRVn*T(#IEsk`pY z8!NCBJK!DIGuXIlT|=jKZ1XEZNPUT?_Q`tq2IRj0yrmcu8*QbjW&MPCyT^PSiTCl6 zeRyKQH~S+Y+jat3Q z|NGdF_!M&wtwHNi7i9mUtiumS*Cn||LSJkS=vyD(4qgeQ>t%hM1lV6+ken0WmJ?s) zbzPPmz6(Cp`^z8UQ)By&^9keHXh%Cki|whUX#0jSA4P7s0gtj3Jo*L7gcX;k60bfM z!s7ZDPfYdkMi0K8l6b?%g4Y|+`D9ad{g99_`Ab5MuuUm;^Dwc=@{Q%3E?3h3pa(d3jITBh0snXPO8_eFmLu z(XxRgYb-r~#Dla~3fVV92VF(JuQSb_N!X*kG99`Ja=1e?i0{$S_}Wsyju1@xsP6Kq z)H<&>*|~aw`vS^&(68+%`!&|y5)I*4OU}hQaSqmtv$1Zx8|%keSTE1S-sTMKZ3ghY zP0TkJbpz5pjO=-=<;cb!0^1V0o9v^*(TCz)0gxjz@E<}wJgy?m)rc_dsZcu{;Gs$0 z{~;J}gMtSd*F&&nsSWoaZzl3)z+vDTv92_tucbX6@N65>vB74%4UYD6@D`=CPXpWU zU=a3uz@OhB58=#~ahd^x76}LHTra1?_T_b^0=~tUj`B|Ik?)Z6q{w+DDsT*t^Y-(V zflZo^bQ-T+^xVeyeh=+0NqL0kH<3m*D4kh!;LJid-29}7HBUNhd@+E%M(af4XK@g8I^t<9O7U&* zt3a<$7M}s0D2%W6!8;k}jf&_MciXKjx#p4veQLX6%|D z6L}p*K0g+tZbuN!*}CRwh$pvDc>Mdrn*yyUuM!xI5Os#v(iD z`8e08z|7qy{j}Y}D(!B~80Qod-&@t}HU(zvZXf2Ha{3oqglXSZ*nNfedEic?{AW?j zl|lQ>7-y$L%%0~lw$?$v(3#H{I9K@+ygsk(-`uI~*L-Mz+4B+hxOYHzyqd77p)_HA z!^apGSSz*e!@Ni1j?xw*eg^Vl4cU2y^B;PW>IA@=8HYf7_f$w+CLlP@$M$H zosYfqa{ZmC|AN-g{1I?|fcp2Lj@hVxeL{U#{i0kyrPU+8gx4R^i7}N&cyeN$dug6O zk9j^6^ZY2Rze~^uJ21}|AdemWOzR=^_t61x@*Hq<19bmJ=>Aus``>}?r+l+6KJuH9 z@IM1x0i&5K5PJV{IIGQ?y3n>THJJ21+~?5sUqkm_f$sktx}WBPd8T8Ld8Xr$(@pP1 zPBR^dtkMJ;w!r-t3pA`{K?bt1XN0eB*a1Hw!qu=3G8q=JxnT!<%mob$`&fmt?vK+L z>J4z`1w;KloL`Xc9}l`VLHC#0%2Iy;-T&+8KSq8j^f#P>?kBuh!|?ti-A_FG8uI>2 z=&tv$zMBMhZq#1vr3ui58t6nVbYlSe1pA>YgCK{r_o>A`f~Hw#&L43VvOzZU^8qZ$ zH-~Wjw>jBW06HA#Z(7$D-<0kq-hT_;!|uO?J-xi;NXOH{7t@xRJ&};%`=3a5kxexu zgt=(%tOI8;P3RwkciOR7+u~HiO~Sm`F&{+gy|hO#6zejq!$I$9tdVJ+x({`oz?!2N zy7?=#$G={(?WoTQC)sw?=QNLxL!G|9#(aMi^Zn5q&G(lu*W*!Nmw>rG+Ga`B$n(7x zb3H?U7ovahhH;`5{auLuW`IYJJ`jn%s~kQ0_mL+eo}qrMj2``NB=zM~gw3PdA~Cl& zL?Mj1y@BTTgn`(f<8ymNJ?8fM-wOTfX>K>8e`lg^$J$C#XQ6LP#vF>o`DrEL_08#I z>s*Ds)T2LkCc8*p!@#2+@r}uRZErw4&HbN2uVJ1!eD(2UoK+6yeAOb2_vsgZ~}7h2gm`*#jPXXi%cGQg4=zgM!p;Q zISVkH&;%F`z%Rtu_hNiU8&@@)!nh~dfSi!c*H8=JCl?uAHlM)d;(1vvXx#sh%}02B z@8`~Bg~3%>V0i+N{{Zf^?BHG5)TN^wC}mqZV!Yp4S+ zG~sm(v=**GnQ)}HzCf}Q-c0;=AT9#(bYdyZvk`nRh2qP6^&L*(=c5IXD;wfS1~Wh# zl<=M{npEI1ay$h{BYB$4Wk8C1i02>Foa}3hF4eUJn_yqsgMG%9V!-%d{7r8{d-z;U>q86Y-=OBdaGTE9yaRTx4)ax%Aaw2o|JH$qxG>ZOy8wL( z{^HC4PDHv1_F4}5fadB>%=I&H{{8{%3bNg3p3-2?`#g9tbEDZk7WYFgXhX0c656~0 zbRI%JC|hGE(nH|$e7HwqN7@e5bsl-0w6=y#upaeM9QcU6YSuCne9nhWLFbPl$lu9W zYc}G@E~B`yi0c4NGhx%610J#~=)58m_-cV?HSkb+2pJWKquA*ph+s*7o9UXod!-PwU_9meR+va zy-cT}i_RGwpVUT)P6MaQC$F?e^)mWz3Y>s39Nl8}Xy9jzHhcCWZYpD)9k3l@@>!Y_ z=N7al0zF}jv#_+&;9Zo!I?o|p4|uV_((=)Nu{fiRv6$0zL2Pmi+D`4LMc)$qyw-sY)@M$`m?{RJ}ooz=$2BFKImBz2J`CEjgu`ZpH z-yFb<)MoFy!jT){^#W50Q=VF3xYTKbe*t$db5D!CClH-0S*NgxT}&2J_yN zPn!2e1+xSJW51)2^j1XkR+QoLMtf0+a{(sZg>{`rfPSa50opgEw0}d|$tT%fU4%0? zMyoTd(S+AWtzt_G*p`OCC^m@B!DtRS3%qgANlaj^K05M2ht6c)OLwgVK_7P? zOY-?ztoJ;E_H#XfAAd0N{V>qbg8Ud45{&ZelrZZ46tJ>R%Kr)6=jabwFa8N(sxJ?I zHRjqM;xXlC2G-)%27=cCxSODb{jI@W7PH63gb#HXON00CXt)pIsln`02y`s3JAvjk zx@$DFnfMpQhIT%WG3&*hybo~>N9TlI+>siIvE{}6DAH+HYgw8H?Vr0kZdsACAt@7 z2oK31_H87&Zj|+?QR$ph7y!klRl&__SVm~Yc`-=Wmu4kg*cjo{1sLF`DXFc@1y%t$sc z?bXm3KrzzJ8BR2${%6ECn+E!+X1O~S`Z)=D=?ZL;DBK@7gFFXO9`>CvN*K}^jdR{D z$cwhL)WHsj5(aY{sxE`heN*_oJlaEw5+eBBIlcZwL(KCL{O(+=pj$r`I7kl=j^m(t z9dJwqj%dt@>wsgbj6>jkXvAJgCfc2W_NT)RIl%eBpdYS+U&K$Gb2sgTeS^3u#dKd1 z^2&r!+btu_Z!ZP@cQm86kJODEVK$jYV9w8@cE+Hc#X8nPbHq9g-^Vf@yjFXR^z#R3 z>nV-lJn8RK{n@2h>~X{hIPV=2Jwnf7w?|>@g=62p5i&_KcMr-CU-xNr=XV08yE>(y zjcmN7XxlzbL}wF^mu$6rARjj)e*x?`qSr?&;m0q>Y^OT07Ru|ZFJlm|3t&fd1KK|6 z(CR)UdSjvwHo%U&#Nz83NcLx=?tL28*}B@EQhk$byd#9ap`r z9gs;wzjXI{@WP0BCZ;CUT?alDqmOl%r$~NIY4qo*-=KTe4@5mAKkuVmBuf&mW$wCf zysckA-*rHyhR8BSygqov+e&(nWTFsdd@{ky_0!c;oxZ*+#u$!9yJ&p$)%PUxl(q!z zq_Mk%eCHr9-D6W(*M zf9XCd(T;s1PJ04quLa%8y+pc;LHv!=A8*Kc$+&GX^3Mc)WRt{cSnD^yF$>{P;3wKk zQ2z#7iJNTeS#n%GVAye<^RVqYVP|xZjgIsWutu1PcGb#t*ilFQZsRtx>)No^Bzzld zO5FXfdt2*j#)59C9NE${upT*qx(ly+pY>mtNqS7OPsI7s-QlMfur&N$l9A>sjH}Mo zkh?~CZfa7@O%Z%d<)?F<_p|ekaYR`y>+=S50mEDmwqQ2 zXgzldcjuz8X3J(mXB+r%?rZjXZ1eidq=z)nA+$&L4P<@D^~|Sq!9(g9`g9NKr!!{A zcau&FdtIyBMr%ml+T3@>JPf)B_Byv2`WkE0KG+3(51iV&40%b`DQrQ1uc9%8eXKLc z7bNrHW(6P6#?34*%{7Va9jyj7r4~9o0J=O7Ivx9rD}ynYNqdc--K1&iSc5(r#hN;P zg}Zej%;PO&X;Yb|vmN8j*G>;)g6_L6g^c_b^%ITK-I<8y7f^TY;^fp;(9|&)chxPp zGm1Odd|eOT{SGvft@twXk!>^*{39OIU8yxVV>~F!*!v0@!(9i+(U9f`P(}yaXDMV1 zeD!b{flO2V*H=zQd+tKJve3S1*o(+)LVLLlN$Fog?oGaPA+2P0Q2Y__R?=q-=C7pB z4#D>yBa}y%Z^qwhuq3Izo_i0r7tBpYjM2$()IVGJ`B-@8PK-%|5PiAeGmkxv;#r}j1_Pq}EMnTVXpbXdJ zfblmDAJRQA*imFZblnk2!X1$|<^2#nThd5#p#oPdWY}P{xo3d)23xHAXdig%(Keqx z%!eHO4tXxX5r59ZXVB|?-IGPzdA|(CIlmiprD3k2HQgsv#}Cl(8l{tcp=g@{b1RK~ z>c?W>p?z%~_T9-QqPqf^uT!vQbCJyxw`yxchc>1;Tflk*cLgwy*6Wd9j|NeLu1T@OM4fLqY!&e?{a4Q^8+j^OM}~d5+bA zvLq+4`}zEYcJQ@iZ&{XfeFXC2&I{~Nf$w+IJa7>EZe)kIVc)F=b)0}4pTaqBI{2^? zv^)h`l)AhEbzq*@6NWUVi8{f9lkln8ENJThK5uTt3Co00pitufU z@STcqh9Z2IB0NJ8o~;PyD#AZkgzr~`7bwC7its{3c(Ec}rU+Lm!jCG#s}MYur`-mVD0pa}o7B79U4KA{MIq6q(45k8{`|3wi#uLyss2wzr&zg2{< zDZ<|?!Wx|W`((qQ2uCQwk&5sLMR=qle2XF+rwC6_gcB9vWJUNkMfgrB?DYy|`MS-a zY=aQ+7X3X(*o&d;MQvzfDBGjOh6CFWtlNP2;L(h|5gdj;RXG?OdNzbz(S^Q&EP5k; z{PTc5p0O+X&@G{CvtbBI?lMrB76S&$0fX^)2>aBKx(Ao;Lk5_;7YM z#E7snH2!!L+Y@TqIf8u}It}5S{Y=k|V2Aq6LU?O`)5YQJKz}2`jW-1xLO4u!a5y^@ zHjXiOm~Pi_<{Xd&9W+39b_9ECfbPNwc3^<+$_Umzz_>Pwy)iHVx74>918|SA#i(mT zID+as7-2d*oLz|^yaytuA--;y=~Ltx7LV}RVY|@a&BH_MK+SO7mEo*$cpGL<;=#57 zLQtKc+ad^@CvtlY4-Ubq*n!(Ul*cO=9*jRJfHyb_q%!G3a!lwhjqqUTa~h%8v_T{6 z5{w9bDg@MNgm%hyKof9S!wzaf89S~CXx9iQDX6St0E00L7h0#qPldiHC84fRhDGDe zx;xlt@C5KcwhR5gOS@Ls5XANbT;v)1$QM300M=l!5tz0LIs|tLkO!jID@3428Jupd zhPegZRt;(dcAhq1wDGK#9n$EqPjg)JhC~N@`|Y=Pu6es@CS}40ozK6!C6AdiFq7h& zF2n4DhTAEa=YzvuIu08FTU*k{8HpfgAbgw?^UsI3QHH~Al>#5h1$N@#ioWGzq7vmtrZ^QSgM!6arZr);UkZYwgC zRhL>TZH1-^OQqvBQ&+UDtfJasT3B9LYH_gf#ccfeVynG^&8sPMSe7H7>4EW%apT83 ziYu*_qAHW@KZNKiL|0kyBhZMJeTudt+HNbgIZXUtib_CKd6~7UxZFYD5`aspZB`Bx zKV)L#QM0cGHogjgjE{}C1K@br_yh!HENpxQfC~ItG)gW(kfUSwl$Df~FDo-yDi@)# zWoYJ@DmJ&$R%)#>WmwA!i%Tt)C8n6dSW{wrVuER!WvTVnD${gJrDc)TG~QI~a8%qj zapEGIqqw?YLScF7#3JOVuunuqK!IxhK_8nVF~el5GL@H2Fr}NS@VnP`EBdCY%CyX8 zx0?#Ars^tdk;zexw9;z3#bGsBOp|3sn<}fzCUjvbuW;DP%S?rqva)gql`OR_N6}?y zhcAC&d3Bk?R9-3fpNYd0SY~-CdLMP0q$W~zW9;^7P-Pl}%6X@csUqw&JCXgI#1bJY&hARjnH9|> zev1&%+Zq0|*cX*oq7|i63M^IDaSJU7FRZptsj*g#E3i4Lrr_@ekYX#uPo?u;4zDgN zuPm}wT8qR|OT}%sSx{q%wZ=qjS%`{@*ivhuqr8$y4FebO1Ll?06lDT(yOqW+jXJ0& zhJ7%GS%AZLz?ZZs3qX1R1{(Q!&=8~!G{~E$Ta76L2TUgskq<||$4(!?%8&BUZ>wkD z#n`mvs0#{aE`0jEXFL1@^N|LFvE@F-9)!Qaf_V7Z3mJPD{;oxgwZSj40XO`U@H^ng zm!p;ym^^AZ93R58 z_MuETFqCQUxS56A9L+QvqJb-#1#K9~0`C}&dPlRsA-6DL{w*xvty@_@ek@~-Sf)Jy zzay3f>BcjmWdhTfCo=8kiHseb$bz;)P49|l0VflfCN+@-OiN5xo?zxoOyE?d&=A&Ec-)bf2iz7%Kk9fCmEyq z>G!nB$A&5Q!3XY}WFPFu7v>x(`=ewZU4bvmDNP$;&Rb;oR@sk{eRK=HFlU_XkClBs zIU;_%3{Q}KbQ!)dr!=#MIkB$daH8x_lKmvvpDg=Wli>?DiQ&Y33@7gA$o>r3m*)F0=PVh% zTlQzm{v7Tzr{t;ssvJ-`pmIRvfXV@t11bkp4yYVZIiPYt<$%fol>;gVR1T;dP&uG- zK;?kS0hI$P2UHHI98fu+azN#P$^n%FDhE^!s2or^pmIRvfXV@t11bkp4yYVZIiPYt z<$%fol>;gVR1T;dP&uG-K;?kS0hI$P2UHHI98fu+azNz(aliyO7j82i0U#e<*C+km zzXVEuYp)2K@fv`$D=ROj!2<#U{)k?vfZGMv4)>&h#}44|;}1f-h6$B$EpT|~O~9)J zLL1z;046lRg$6QVJsci&7wmBO;SRwZ%!FpR^KfxGCTxWZ(lg;GT!aBX5eK&xt_6;T z02dryAP`=M`!n3!P$tyDwZWPCG2vuCcKsUME&bVb{F0aOINUpM^KW9pU*H}NV?ra` z@i2B>H-KFq0v7|9Jb($)1~AQX_*>xKg!>duH;@Sv;O4`vhI?)x(;S4m05@uBio`c&3cL?rVym}yt_mnNOl@*D()+#)&0{H&fGv_%biDE*sy}Zz3 zx2ECA5^8%Bw_r$|HG^)qx^eOL^)%M?!+Vd=Z}UuvS)< zS7s(=&7E0zgOuEvv!yB%5|uB1S8mP*l4iHAa}atgDuC^F4rC+hIV2gPJb z1fj}AyR~eQqgbkQT0%E1iRIM}v3#LeX(?M|RVF0cq^ER5-}^F(#KialOA!((Ej419 zb(tt3&+JCqcLoIp;$uRhh|%0aPRhJO-t-*$4@U* zS(TMr^jB9!rIzp6c$o5xLiJJNnl^?CCMp@HjQIwv=p~rOaCXPaOc|ZB5 z`}1US*LZ|@7E~|1frMgo`$oNl_NNu|7wqoZ@Ose=Yr`1v8~&5fxGYj4qQ3%}mnhCl z61(&RCoZjvCtSgaMHYvp2lPKf+ z!6aLm&5<=%+3#IxlMC%uOJ&w#g(#-Unywr={HJKS3VYxAdiOpi1pj`|6Z7K5yaaUo z+?n%|#oU?m(zu$K=SXu{D@$!9E+AO7b~olRpn)P2+dI=E{&(yDu<=8 zL@X>WfgxqH_l)8Aq$rHJ$_ffP%Bw4Yg^93mFmn{FuvV5caUl$dGRHzDmRd^-OOX+N zY57tRfdIxZBMXCPsF0L_SO}}afpi~?OjQ-uLfb+cW^2eF3>w*<7DVhcsl`^t zmdoT^tUVdJLCAhfvsUPc+A7R<2ej)e!Y&GXv@tDO^I6S{TI1Z&JA-yXzyoN6kKUW3^cop% zk>Tw!Oz+T9`g?dNjqg+@E|K650QW}%y@N;TKf?oU1SdHp_=pTYEW@)P z8I-<7hTkF>kIvDXeN_Hq8K$@U2yT~Qdc%+4%Q8%F`4Jp|2jHkYz3E4AgbdT$egwzJ zFun0d@a-~8Z~YNGTZZY)KY}eXOmF`Y?2uu41CZb+WSHIpB)CC_=}ka_cgrxn4M=dC z4AUEd1Ro=q9?xr&U@V*2O-P`33n@L1!w?dD1Q)|mI=x>=FsTXnRE&S5D*#gx|6ans zO88wr_&@#NWGQo3`8j@Yu^;@HAN{NOkJ;IIAQ zMD)E9Uzs2LD?j+8AKWjXXZZ|2xWEry?FZv`&wA8<(GUK4V9)gBe(-ib_}6~$dw%d2 zey}#EC;st%@LhiJgMRSSe(-C4@JD{|bw3!t>D;6Ji~V4yAB^8r?va0wAN;W&JQQO^ z*}wUI@GE}sen0q6e()tf7(YJH1HYi}3FCeJ9_csv!Lbqysg%AHzjXY@W{>g@_`ys3 z;I)46Fa6+G{osG`gO5mX7d@Z(!QcA90_J1zw<~|BA3V?x9_j~=^n>I4;7NY)?S62k zA3V#NPTA)em) zq<224{ddCsBYBVueDmP`8eWLVpATn&D}XD6D}uAaEreSHR}A;J@N6l{5#KQFvl_U+ zhJVP;Ot9wv(RQ>b_BN#PG%~Dbbu(Ap%Spf$g`2tP7~cuOq#nPUkyE%Fd$LKzma1au3|&mME~&QI zvDGZYiZeCJSz)nN=FBWyPN(ZxI5w2F?rjbm_O-DcE!uHroii_0%pzgAGbS{bn?p`vvGu*gJZnDD8^|jZgk*$#|}~U9qlPNF$upJlQXks zrj2CUZmp`y;Z5(!4NBwkjx?MYlf=Uc>h7i+2!D!*Lub&Ys4f|2H#l7)#?!SAaIe}b z@?&A5f$pw|2{?ZSQF0o7kO)7FW6hBcs)-PZNTOm^K>`RtoU-HSG&_rMFQi*Ld<^H* zWM$>fCDd~DVp(~SmEXz9nY)x@NKB9j<7c}L(FRuW%QK*!7xX1l!5l=+%R)7FA6mL4 zz|YK3qm*MhlILaPU^P!ZS4~Vs9y%Vdk)|3pxiahT#iL;wG1=I(nMJ@13{(EZ(+_@xjG zD4H8Zho!(S4POW+k5Pyi@HX?Pjq># zG_Vsz65Xt<0*rkO?r9ZsaFaqI$)tJyA7NAim|RqjODsfhj>OQVJ%&oJ+lD`7IwJnl z)fcGJ3S8#F6-1nBV@Tt`pRPD6E9jO~ zIdkV0&a%Zrs-UYgC28{AG3_=~{x;2l@z`Y?bhG;>Xpu?iMhqWxq-4=TWBRU?N3KXd8|->4;5`qm78uAZ`)U1ki~m!u&{zj`!%p zrHhO0CJlbZ@h5$gfrNhoU#C^k?`lrKwJN5|iu+mY$YiDu6HK<~pX53hLaleg=$*NuDm$I=Dk zf;USKAHI0!`=4~a{y^E_zx?-er_$P{C$&X9lljhy?9c_r$F2S3_LpvZ`NF;5wSV~K zeKWRg5)*&-z^Va1^Zw$GSD!gFmv7dmoO=$@eegiO JejOKz{{;yy2mAm4 diff --git a/PrimeCPP/solution_2/run.cmd b/PrimeCPP/solution_2/run.cmd index 3390a0df2..aee24f625 100644 --- a/PrimeCPP/solution_2/run.cmd +++ b/PrimeCPP/solution_2/run.cmd @@ -1,2 +1,9 @@ -g++ -Ofast PrimeCPP_PAR.cpp -std=c++17 -lstdc++ -oPrimes_par_gcc.exe -.\Primes_par_gcc.exe +IF "%1" == "" || "%1" == "1" || "%1" == "array" ( + g++ -Ofast PrimeCPP_array.cpp -std=c++17 -lstdc++ -oPrimes_array.exe + .\Primes_array.exe +) + +IF "%1" == "" || "%1" == "2" || "%1" == "mask" ( + g++ -Ofast PrimeCPP_mask.cpp -std=c++17 -lstdc++ -oPrimes_mask.exe + .\Primes_mask.exe +) diff --git a/PrimeCPP/solution_2/run.sh b/PrimeCPP/solution_2/run.sh index ea150e106..70f698fa5 100755 --- a/PrimeCPP/solution_2/run.sh +++ b/PrimeCPP/solution_2/run.sh @@ -1,6 +1,17 @@ +#!/bin/bash + # g++ -Ofast -std=c++17 -lc++ PrimeCPP.cpp -oPrimes.exe # gcc -Ofast -std=c++17 PrimeCPP.cpp -lc++ -oPrimes_gcc.exe # clang -Ofast -std=c++17 -lc++ PrimeCPP.cpp -oPrimes_clang.exe -clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_PAR.cpp -oprimes_par.exe -./primes_par.exe +if [[ "$#" -eq "0" -o "$1" -eq "1" -o "$1" -eq "array" ]]; then + echo "Building and running the array approach" + clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_array.cpp -oprimes_array.exe + ./primes_array.exe +fi + +if [[ "$#" -eq "0" -o "$1" -eq "2" -o "$1" -eq "mask" ]]; then + echo "Building and running the mask approach" + clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_mask.cpp -oprimes_mask.exe + ./primes_mask.exe +fi diff --git a/PrimeCPP/solution_2a/Dockerfile b/PrimeCPP/solution_2a/Dockerfile deleted file mode 100644 index f6c0ceb2f..000000000 --- a/PrimeCPP/solution_2a/Dockerfile +++ /dev/null @@ -1,13 +0,0 @@ -FROM ubuntu:22.04 AS build - -RUN apt-get update -qq \ - && apt-get install -y clang - -WORKDIR /opt/app -COPY *.cpp . -RUN clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_PAR.cpp -oprimes_par - -FROM ubuntu:22.04 -COPY --from=build /opt/app/primes_par /usr/local/bin - -ENTRYPOINT [ "primes_par", "-l", "1000000" ] \ No newline at end of file diff --git a/PrimeCPP/solution_2a/run.cmd b/PrimeCPP/solution_2a/run.cmd deleted file mode 100644 index 3390a0df2..000000000 --- a/PrimeCPP/solution_2a/run.cmd +++ /dev/null @@ -1,2 +0,0 @@ -g++ -Ofast PrimeCPP_PAR.cpp -std=c++17 -lstdc++ -oPrimes_par_gcc.exe -.\Primes_par_gcc.exe diff --git a/PrimeCPP/solution_2a/run.sh b/PrimeCPP/solution_2a/run.sh deleted file mode 100755 index dc59d3612..000000000 --- a/PrimeCPP/solution_2a/run.sh +++ /dev/null @@ -1,6 +0,0 @@ -# g++ -Ofast -std=c++17 -lc++ PrimeCPP.cpp -oPrimes.exe -# gcc -Ofast -std=c++17 PrimeCPP.cpp -lc++ -oPrimes_gcc.exe -# clang -Ofast -std=c++17 -lc++ PrimeCPP.cpp -oPrimes_clang.exe - -clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_PAR.cpp -oprimes_par.exe -./primes_par.exe -t 1 From f53f8eb1cdc8eb804f2a4989f9b8740131a9fd4f Mon Sep 17 00:00:00 2001 From: Rutger van Bergen Date: Thu, 7 Nov 2024 13:13:10 +0100 Subject: [PATCH 07/10] Fix run scripts --- PrimeCPP/solution_2/run.cmd | 8 ++++++++ PrimeCPP/solution_2/run.sh | 10 ++++++---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/PrimeCPP/solution_2/run.cmd b/PrimeCPP/solution_2/run.cmd index aee24f625..f5191ffd1 100644 --- a/PrimeCPP/solution_2/run.cmd +++ b/PrimeCPP/solution_2/run.cmd @@ -1,9 +1,17 @@ +@echo off + IF "%1" == "" || "%1" == "1" || "%1" == "array" ( + ECHO Building and running the array approach... + ECHO( g++ -Ofast PrimeCPP_array.cpp -std=c++17 -lstdc++ -oPrimes_array.exe .\Primes_array.exe + ECHO( ) IF "%1" == "" || "%1" == "2" || "%1" == "mask" ( + ECHO Building and running the mask approach... + ECHO( g++ -Ofast PrimeCPP_mask.cpp -std=c++17 -lstdc++ -oPrimes_mask.exe .\Primes_mask.exe + ECHO( ) diff --git a/PrimeCPP/solution_2/run.sh b/PrimeCPP/solution_2/run.sh index 70f698fa5..3ecc9be9c 100755 --- a/PrimeCPP/solution_2/run.sh +++ b/PrimeCPP/solution_2/run.sh @@ -4,14 +4,16 @@ # gcc -Ofast -std=c++17 PrimeCPP.cpp -lc++ -oPrimes_gcc.exe # clang -Ofast -std=c++17 -lc++ PrimeCPP.cpp -oPrimes_clang.exe -if [[ "$#" -eq "0" -o "$1" -eq "1" -o "$1" -eq "array" ]]; then - echo "Building and running the array approach" +if [[ $# == 0 || $1 == 1 || $1 == array ]]; then + echo -e "Building and running the array approach...\n" clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_array.cpp -oprimes_array.exe ./primes_array.exe + echo fi -if [[ "$#" -eq "0" -o "$1" -eq "2" -o "$1" -eq "mask" ]]; then - echo "Building and running the mask approach" +if [[ $# == 0 || $1 == 2 || $1 == mask ]]; then + echo -e "Building and running the mask approach...\n" clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_mask.cpp -oprimes_mask.exe ./primes_mask.exe + echo fi From 5c8e79fd5de369f86da3871bf1ca938feb690b18 Mon Sep 17 00:00:00 2001 From: Dave Plummer Date: Thu, 7 Nov 2024 11:12:51 -0800 Subject: [PATCH 08/10] Fix code --- PrimeCPP/solution_2/PrimeCPP_mask.cpp | 105 ++++++++++++-------------- 1 file changed, 47 insertions(+), 58 deletions(-) diff --git a/PrimeCPP/solution_2/PrimeCPP_mask.cpp b/PrimeCPP/solution_2/PrimeCPP_mask.cpp index 58ea0dac6..90a871c3d 100644 --- a/PrimeCPP/solution_2/PrimeCPP_mask.cpp +++ b/PrimeCPP/solution_2/PrimeCPP_mask.cpp @@ -24,41 +24,46 @@ using namespace std::chrono; const uint64_t DEFAULT_UPPER_LIMIT = 10'000'000LLU; class BitArray { - uint8_t *array; + uint32_t *array; size_t logicalSize; - static constexpr size_t arraySize(size_t size) + inline static size_t arraySize(size_t size) { - return (size >> 3) + ((size & 7) > 0); + return (size >> 5) + ((size & 31) > 0); } - static constexpr size_t index(size_t n) + inline static size_t index(size_t n) { - return (n >> 3); + return (n >> 5); + } + + inline static uint32_t getSubindex(size_t n, uint32_t d) + { + return d & uint32_t(0x01) << (n % 32); + } + + inline void setFalseSubindex(size_t n, uint32_t &d) + { + d &= ~uint32_t(uint32_t(0x01) << (n % (8*sizeof(uint32_t)))); } public: - explicit BitArray(size_t size) : logicalSize(size) + explicit BitArray(size_t size) : logicalSize(size) { - auto arrSize = (size + 1) / 2; // Only store bits for odd numbers - array = new uint8_t[arraySize(arrSize)]; - std::memset(array, 0x00, arraySize(arrSize)); + array = new uint32_t[arraySize(size)]; + std::memset(array, 0xFF, arraySize(size) * sizeof(uint32_t)); } - ~BitArray() { delete[] array; } + ~BitArray() {delete [] array;} - constexpr bool get(size_t n) const + bool get(size_t n) const { - if (n % 2 == 0) - return false; // Even numbers > 2 are not prime - n = n / 2; // Map the actual number to the index in the array - return !(array[index(n)] & (uint8_t(1) << (n % 8))); + return getSubindex(n, array[index(n)]); } - void set(size_t n) + static constexpr uint32_t rol(uint32_t x, uint32_t n) { - n = n / 2; // Map the actual number to the index in the array - array[index(n)] |= (uint8_t(1) << (n % 8)); + return (x<>(32-n)); } static constexpr uint32_t buildSkipMask(size_t skip, size_t offset) @@ -70,15 +75,6 @@ class BitArray { return ~mask; } - uint32_t rol(uint32_t value, size_t bits) - { - bits %= 32; - if (bits == 0) - return value; - // Ensure that the number of bits to rotate is within 0-31 - return (value << bits) | (value >> (32 - bits)); - } - void setFlagsFalse(size_t n, size_t skip) { if (skip <= 12) { @@ -117,7 +113,7 @@ class BitArray { } } } - + inline size_t size() const { return logicalSize; @@ -127,18 +123,18 @@ class BitArray { // prime_sieve // -// Represents the data comprising the sieve (an array of bits representing odd numbers starting from 3) -// and includes the code needed to eliminate non-primes from its array by calling runSieve. +// Represents the data comprising the sieve (an array of N bits, where N is the upper limit prime being tested) +// as well as the code needed to eliminate non-primes from its array, which you perform by calling runSieve. class prime_sieve { private: - BitArray Bits; // Sieve data, where 0==prime, 1==not + BitArray Bits; // Sieve data, where 1==prime, 0==not public: - prime_sieve(uint64_t n) : Bits(n) // Initialize bits to zero default + prime_sieve(uint64_t n) : Bits(n) // Initialize all to true (potential primes) { } @@ -158,21 +154,15 @@ class prime_sieve while (factor <= q) { - // Find the next prime number - for (; factor <= q; factor += 2) + for (uint64_t num = factor; num < Bits.size(); num += 2) { - if (Bits.get(factor)) + if (Bits.get(num)) { + factor = num; break; } } - - // Mark multiples of the prime number as not prime - uint64_t start = factor * factor; - for (uint64_t num = start; num <= Bits.size(); num += factor * 2) - { - Bits.set(num); - } + Bits.setFlagsFalse(factor * factor, factor + factor); factor += 2; } @@ -184,9 +174,9 @@ class prime_sieve size_t countPrimes() const { - size_t count = (Bits.size() >= 2); // Count 2 as prime if within range - for (uint64_t num = 3; num <= Bits.size(); num += 2) - if (Bits.get(num)) + size_t count = (Bits.size() >= 2); // Count 2 as prime if within range + for (int i = 3; i < Bits.size(); i+=2) + if (Bits.get(i)) count++; return count; } @@ -197,24 +187,23 @@ class prime_sieve bool isPrime(uint64_t n) const { - if (n == 2) - return true; - if (n < 2 || n % 2 == 0) + if (n & 1) + return Bits.get(n); + else return false; - return Bits.get(n); } // validateResults // - // Checks to see if the number of primes found matches what we should expect. This data isn't used in the + // Checks to see if the number of primes found matches what we should expect. This data isn't used in the // sieve processing at all, only to sanity check that the results are right when done. bool validateResults() const { const std::map resultsDictionary = { - { 10LLU, 4 }, // Historical data for validating our results - the number of primes - { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 + { 10LLU, 4 }, // Historical data for validating our results - the number of primes + { 100LLU, 25 }, // to be found under some limit, such as 168 primes under 1000 { 1'000LLU, 168 }, { 10'000LLU, 1229 }, { 100'000LLU, 9592 }, @@ -238,8 +227,8 @@ class prime_sieve if (showResults) cout << "2, "; - size_t count = (Bits.size() >= 2); // Count 2 as prime if in range - for (uint64_t num = 3; num <= Bits.size(); num += 2) + size_t count = (Bits.size() >= 2); // Count 2 as prime if in range + for (uint64_t num = 3; num <= Bits.size(); num+=2) { if (Bits.get(num)) { @@ -258,7 +247,7 @@ class prime_sieve << "Average: " << duration/passes << ", " << "Limit: " << Bits.size() << ", " << "Counts: " << count << "/" << countPrimes() << ", " - << "Valid: " << (validateResults() ? "Pass" : "FAIL!") + << "Valid : " << (validateResults() ? "Pass" : "FAIL!") << "\n"; // Following 2 lines added by rbergen to conform to drag race output format @@ -365,7 +354,7 @@ int main(int argc, char **argv) } if (bOneshot) - cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; + cout << "Oneshot is on. A single pass will be used to simulate a 5 second run." << endl; if (bOneshot && (cSecondsRequested > 0 || cThreadsRequested > 1)) { @@ -400,8 +389,8 @@ int main(int argc, char **argv) else { auto tStart = steady_clock::now(); - std::vector threads(cThreads); - std::vector l_passes(cThreads); + std::thread threads[cThreads]; + uint64_t l_passes[cThreads]; for (unsigned int i = 0; i < cThreads; i++) threads[i] = std::thread([i, &l_passes, &tStart](size_t llUpperLimit) { From 558b2b07ade2ad501f84908423a3efdd1f67b9b7 Mon Sep 17 00:00:00 2001 From: Rutger van Bergen Date: Thu, 7 Nov 2024 21:25:00 +0100 Subject: [PATCH 09/10] Clean up Dockerfile and README --- PrimeCPP/solution_2/Dockerfile | 4 ++-- PrimeCPP/solution_2/README.md | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/PrimeCPP/solution_2/Dockerfile b/PrimeCPP/solution_2/Dockerfile index d56ddc06a..e3a2dd0b0 100644 --- a/PrimeCPP/solution_2/Dockerfile +++ b/PrimeCPP/solution_2/Dockerfile @@ -5,8 +5,8 @@ RUN apt-get update -qq \ WORKDIR /opt/app COPY *.cpp . -RUN clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_array.cpp -oprimes_array -RUN clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_mask.cpp -oprimes_mask +RUN clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_array.cpp -oprimes_array \ + && clang++ -march=native -mtune=native -pthread -Ofast -std=c++17 PrimeCPP_mask.cpp -oprimes_mask FROM ubuntu:22.04 diff --git a/PrimeCPP/solution_2/README.md b/PrimeCPP/solution_2/README.md index d4dd3405a..7c67d9449 100644 --- a/PrimeCPP/solution_2/README.md +++ b/PrimeCPP/solution_2/README.md @@ -8,13 +8,16 @@ ## Run instructions -(Linux): clang++ -march=native -mtune=native -Ofast -pthread -std=c++17 PrimeCPP_PAR.cpp -o Primes_clang++ && ./Primes_clang++ +(Linux): `./run.sh` +(Windows): `.\run.cmd` ## Output +```text Primes Benchmark (c) 2021 Dave's Garage - http://github.com/davepl/primes ------------------------------------------------------------------------- Computing primes to 1000000 on 24 threads for 5 seconds. Passes: 185267, Threads: 24, Time: 5.00074, Average: 2.69921e-05, Limit: 1000000, Counts: 78498/78498, Valid : Pass -davepl_par;185267;5.00074;24;algorithm=base,faithful=yes,bits=1 +davepl_array;185267;5.00074;24;algorithm=base,faithful=yes,bits=1 +``` From 99304e64df3c50b6788271aefd934d4365d99882 Mon Sep 17 00:00:00 2001 From: Rutger van Bergen Date: Fri, 8 Nov 2024 06:02:15 +0100 Subject: [PATCH 10/10] Fix CMD script --- PrimeCPP/solution_2/run.cmd | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/PrimeCPP/solution_2/run.cmd b/PrimeCPP/solution_2/run.cmd index f5191ffd1..ba425b7f4 100644 --- a/PrimeCPP/solution_2/run.cmd +++ b/PrimeCPP/solution_2/run.cmd @@ -1,17 +1,27 @@ -@echo off +@ECHO OFF -IF "%1" == "" || "%1" == "1" || "%1" == "array" ( +SET "_RUN_ARRAY=0" +IF [%1] == [] SET "_RUN_ARRAY=1" +IF [%1] == [1] SET "_RUN_ARRAY=1" +IF [%1] == [array] SET "_RUN_ARRAY=1" +IF %_RUN_ARRAY% == 1 ( ECHO Building and running the array approach... - ECHO( + ECHO: g++ -Ofast PrimeCPP_array.cpp -std=c++17 -lstdc++ -oPrimes_array.exe .\Primes_array.exe - ECHO( + ECHO: ) +SET _RUN_ARRAY= -IF "%1" == "" || "%1" == "2" || "%1" == "mask" ( +SET "_RUN_MASK=0" +IF [%1] == [] SET "_RUN_MASK=1" +IF [%1] == [2] SET "_RUN_MASK=1" +IF [%1] == [mask] SET "_RUN_MASK=1" +IF %_RUN_MASK% == 1 ( ECHO Building and running the mask approach... - ECHO( + ECHO: g++ -Ofast PrimeCPP_mask.cpp -std=c++17 -lstdc++ -oPrimes_mask.exe .\Primes_mask.exe - ECHO( + ECHO: ) +SET _RUN_MASK=