From e7ff90a1da741a673812b43fc3bf4444966f582b Mon Sep 17 00:00:00 2001 From: James Munro <283605+jamesmunro@users.noreply.github.com> Date: Thu, 12 Dec 2024 11:24:11 +0000 Subject: [PATCH 01/11] Create slack notification step for master builds (#2058) From here: https://github.com/integrations/slack/issues/1563#issuecomment-1588009077 Uses this integration: https://github.com/ravsamhq/notify-slack-action --- .github/workflows/failure_notification.yaml | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 .github/workflows/failure_notification.yaml diff --git a/.github/workflows/failure_notification.yaml b/.github/workflows/failure_notification.yaml new file mode 100644 index 0000000000..4bc6e9ad5f --- /dev/null +++ b/.github/workflows/failure_notification.yaml @@ -0,0 +1,20 @@ +name: Check for master failure +on: + workflow_run: + workflows: ["Build and Test", "Build with conda", "Build with analysis tools", "Coverity Static Analysis"] + types: [completed] + branches: [master] + +jobs: + on-failure: + runs-on: ubuntu-latest + if: github.event.workflow_run.conclusion == 'failure' || github.event.workflow_run.conclusion == 'timed_out' + steps: + - uses: ravsamhq/notify-slack-action@be814b201e233b2dc673608aa46e5447c8ab13f2 + with: + status: ${{ github.event.workflow_run.conclusion }} + notification_title: " ${{github.event.workflow_run.name}} - ${{github.event.workflow_run.conclusion}} on ${{github.event.workflow_run.head_branch}} - <${{github.server_url}}/${{github.repository}}/actions/runs/${{github.event.workflow_run.id}}|View Failure>" + message_format: ":fire: *${{github.event.workflow_run.name}}* ${{github.event.workflow_run.conclusion}} in <${{github.server_url}}/${{github.repository}}/${{github.event.workflow_run.head_branch}}|${{github.repository}}>" + footer: "Linked Repo <${{github.server_url}}/${{github.repository}}|${{github.repository}}> | <${{github.server_url}}/${{github.repository}}/actions/runs/${{github.event.workflow_run.id}}|View Failure>" + env: + SLACK_WEBHOOK_URL: ${{ secrets.SLACK_WEBHOOK_URL }} From a749cf38225381d45d4efb9614408164cda912aa Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 12 Dec 2024 11:30:23 +0000 Subject: [PATCH 02/11] Bump dawidd6/action-download-artifact from 2.28.0 to 6 in /.github/workflows (#2020) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bumps [dawidd6/action-download-artifact](https://github.com/dawidd6/action-download-artifact) from 2.28.0 to 6.
Release notes

Sourced from dawidd6/action-download-artifact's releases.

v6

Full Changelog: https://github.com/dawidd6/action-download-artifact/compare/v5...v6

v5

Full Changelog: https://github.com/dawidd6/action-download-artifact/compare/v4...v5

v4

What's Changed

New Contributors

Full Changelog: https://github.com/dawidd6/action-download-artifact/compare/v3...v4

v3.1.4

What's Changed

New Contributors

Full Changelog: https://github.com/dawidd6/action-download-artifact/compare/v3...v3.1.4

v3.1.3

What's Changed

Full Changelog: https://github.com/dawidd6/action-download-artifact/compare/v3.1.2...v3.1.3

v3.1.2

What's Changed

New Contributors

Full Changelog: https://github.com/dawidd6/action-download-artifact/compare/v3.1.1...v3.1.2

v3.1.1

What's Changed

... (truncated)

Commits

[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=dawidd6/action-download-artifact&package-manager=github_actions&previous-version=2.28.0&new-version=6)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores) Dependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`. [//]: # (dependabot-automerge-start) [//]: # (dependabot-automerge-end) ---
Dependabot commands and options
You can trigger Dependabot actions by commenting on this PR: - `@dependabot rebase` will rebase this PR - `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it - `@dependabot merge` will merge this PR after your CI passes on it - `@dependabot squash and merge` will squash and merge this PR after your CI passes on it - `@dependabot cancel merge` will cancel a previously requested merge and block automerging - `@dependabot reopen` will reopen this PR if it is closed - `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually - `@dependabot show ignore conditions` will show all of the ignore conditions of the specified dependency - `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself) - `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself) You can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/man-group/ArcticDB/network/alerts).
Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/docs_build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/docs_build.yml b/.github/workflows/docs_build.yml index ad1c870206..82e20c2ab9 100644 --- a/.github/workflows/docs_build.yml +++ b/.github/workflows/docs_build.yml @@ -38,7 +38,7 @@ jobs: - id: download-wheel-artifact name: Download wheel artifact from last successful build if: ${{!inputs.version}} - uses: dawidd6/action-download-artifact@v2.28.0 + uses: dawidd6/action-download-artifact@v6 with: name: wheel-${{env.PY_IMPL}}-manylinux_x86_64 workflow: build.yml From 9b3eed2cf2ab2024dd4be1c351b1117046aee84b Mon Sep 17 00:00:00 2001 From: Nick Randell Date: Thu, 12 Dec 2024 13:20:01 +0000 Subject: [PATCH 03/11] Update documentation logo and theme styles (#2064) #### Reference Issues/PRs Documentation style updates from Something Else #### What does this implement or fix? Documentation fonts, icons and colours #### Any other comments? No code changes - just inside docs --- docs/mkdocs/docs/assets/fonts/AspektaVF.woff2 | Bin 0 -> 28516 bytes .../images/ArcticDB Logo Purple Stacked.png | Bin 0 -> 41521 bytes docs/mkdocs/docs/images/ArcticDB Logo.svg | 3 +++ docs/mkdocs/docs/index.md | 2 +- docs/mkdocs/docs/stylesheets/extra.css | 20 +++++++++++++++--- docs/mkdocs/mkdocs.yml | 8 +++++-- 6 files changed, 27 insertions(+), 6 deletions(-) create mode 100644 docs/mkdocs/docs/assets/fonts/AspektaVF.woff2 create mode 100644 docs/mkdocs/docs/images/ArcticDB Logo Purple Stacked.png create mode 100644 docs/mkdocs/docs/images/ArcticDB Logo.svg diff --git a/docs/mkdocs/docs/assets/fonts/AspektaVF.woff2 b/docs/mkdocs/docs/assets/fonts/AspektaVF.woff2 new file mode 100644 index 0000000000000000000000000000000000000000..82967520600e9bf28355d927b0f3811acf1ff728 GIT binary patch literal 28516 zcmV(*K;FN1Pew8T0RR910B>Xf5C8xG0SFiX0B-*P0ssI200000000000000000000 z0000Qgg_gm3>=Mo24Fu^R6$fa0E;XyMhbzZaDk2!FoER)0X7081CCS+hb#aDAO(vH z2b(t=`1S|bbl~5tpmsvx)!MlA>qe1?z10WGtxWeq*d5E29@j2Cqn}cgCkkuH%a&Z(_UjkqT*Cj1 zg$l#Tu)lH}qpxW8&o;g>RT?ENcKWBC(L4G#tw45lPzU-bCIk+$1iKX;!GLA)zD2xl zKYK3;A)Ockgcu(!KxnZc0zV?e()bN)QR|7i<}xw)s!z zw6|I4W)+cf90we59tS8AEn>#1e@430AMLMLyYlbv*U-CJKbeG(1PQXG)Q#)16@RSt zO#N@=xt_SM^LiX*ooy-2Q8AjPNgzsw=l_4~|2Nk<_nQJ$w2?(TnfU!cB6Wx+`41|g zd58M;58rA3Cz_0qku!lBRoH^N*L)B%#RK&9-nYyyoVL4Zs>G`4(sZac`}Ya^l+t`_ zx*EeEG*ZV1oC-%dt*SxPLYo744ET9mwMIwq`T@`aV3LbNCfNdPzfWh*NOIthWZzZk zT540}t+lpw$MVS+mm%DQwRH$v0+xM13c>KX+4o9mr=>bp{UW;Na@&91(@!&- z-yT?YXT-Tl+J2G;q);*Q{#5(ghSlTsGNqJu8wLgIhk&0DY2)^ZqUCBVLLrkfIIP{e zAt;Zqcs@oQqWUcV1Hk|PHCE+w`G`-X=|6`OIaAX(yb;7dCNhYIN~DX&fi;b4sqE&w zweJ6&3IMk(_xyr$;lx;W$9`UrZlIXex$2+r1nU^6YSk>;NklvN6|$8>I9lS62>JXC z6#uAofP96yb3F1K#v<$+9_dhy`A^mCiVwmTXS(DmOtz6*kuLAy1c_R2ukXHxBR&9@ zk75y&WD}%h6Lcf!DFEdn(vueeq`Yi#=ra3cIYrhs%bvXFkyAC3JV2a6+LH`9RY~7c zo5x#vy|s65x67XQaN3vKT=##cDNbZPckFX}sA~~s9WvB~Axrv->*#*g@-^2nCzFbF zK}wMUY-!;h(B~dzZ2`YiM5%zp!&nYu%Z9N?b}ZO`TM6eW1|UZLFT++%s#E84|NEZY=k~S;DIy|9 zM9ffbub&4daXxR5<3Qk`cJqI{jDg2N0)iZB1Pw0)AN3I$@J9#~feWr67$pZ`lkEsQ?LpXQKSF_Ggc8*Vhnz+@>oUSsPZ6H`2*PK40ih1x zK=?r%h=dd{C_$kpVWBIL5h%@~x@%u*a3BCOcU=euR-*-g2odN%6bE1qXwj|xGtmd*J`}paIASq zKXoZH<~OeTV`!v9mTIhkS=Kpcwkt*x}F-0D#Li zHuLHzRxVi07|5~8*PSbGNmh6X(fKnw-c@PaRb z5QXMwk4{KIH}u02til@PAQyX4jA}S>1GjJ|_DOPblT(zOk`GEHEkX~3K?uVUCK6y4 z!UAr{Lt4=CM$+6w?_@>Z@o;GZ8uD@`+a zj6WuwcwiSuGu;J{<|L}XNv`rhY6EG-NqYQ1egg^W0qJG@#~II%iLjDs-8=^qVUQYsgL;`HH( z1$B7&$o?a70Ne#oe9N~2FQo*fV{amkm0H>Uh}XT<^Is|#fpYbc?P!h^*Bultfbup< zdLrC#tjfE3!!@OKF@(G!?9sFdQi0S)G(lt0CJ4J0X9lzY(5R{2M2!Z7=Jq2-0;~WI zIYzk4t||$6-vepX4iL}3EM>Arwz-nS|3RTUsZe@zRBkB`^l@D(XFhI%5$bJu)Ke2v zG%8^{m4er_z{_8Z@ZQ$EeBj;Cc?`aL)3V$I+&dJ^nOWsfUrxdxsviyZ^t{Jj67wkD4&0 zCn4PPDiRuNU1rE1)ezpS8sTx;yu(+Z25b9G5r3Ui(fH_19VhT+5^t{PCJ@-ia&obE1a=sCRc zV|8gyck&f4fKjM$QJVjy#vp_k?vOzVHBa7r8Tbhl%p`(YV@)1=3>{0-Vbv!dV+NCU z#lT@B3Hq}aM+_WF=o@3q9Kv13r9j4`RK}-~Oh9v)kalGvx+W9TZHa^)Sgs+Pp2|4% zO2(!4G9DdW!ly590Ss$`B@AX6xQ_%7fgl2i+h4f>Gz~F8!f*#j6cT{kLJE*LWB|E` z93V+30Fs6hAX%t@834@gBTGI)lv(k4yynVxQ@WHQLX1c~53V5)8;K(w{3Uft;s2=>xzK! zqo0+Xlbe@cP*_x4Qd(ACQCZa*Zfl90I(_Esxf|~=-vSFclqyrMLZvFzYStgeHsYIG z+X>m~2^J2B_ZyL({PJblOy=BecU5YAp;}!M`lYLwYT|%XSC88mAAYlsV7~+5!6QT` z^B$1hY)VJSVB65v)@rP_{TEDk1WoW^H(M;c&kk80)Y2VpDLs_^aH8#)u zQYBbAwaLQLvfk?6r3hujbgvLY-Dr~!E?HJa9Xf0b4_3#U zaXU`%95djeB(DDt6$sY?N@O(h>oZDoy-l;t4twOYE9P*_1y|hi$Qxhq6E56cR8 zkDLf1jWYT`v8OZ9B-5X{e43TyQrFP3D(Z(#Ex*OZoP?8c0gHS(SAU@X3`2k+)X>n- z5(an>L@#Es6N|ANmH2?a?2U*EO!Ow$V z_gyu4Ftb>F<=SC)&YcgccEW$Ix#NjSNkThuxIrQ)&i6WYqVha``iKH`>Bbkq! zuQitGUi5<|7vMy?j?-EF{`&FPlfNOvU^Z9`R^SLAg&AzZA}m7%-XoD5@6GFZ9$>{o z4dPuaWC<%+<2KW(;lXzSM;!|-{AuNB*RM4@|L@;jX#c{qA4Do%ev70eE~jexFPLLJ8nvg28~gahIf&;0a|YbJ>y1Y+crRFV4ke z!$-)s{K&6@iI|v4n5?pheEKAf#p-XMp(X%esm*i*zn|F14t1nso#<3&x}|fvs7t!6 zE4rz<-P-Nl2|Zu;LLS=$EFTBd-oip^@k2k3ld`Y(7ORY$864`l#~-`f`#L&bWtru= z?6-=7ik420U=bRMW6_cU#D?JI(<3Tr*r;iEIhW+fw$5hjZL&kLJOv7!_P%)s{NN8i`PFg1{SywqV#G*IXbq{2 zJZz(}lc!zOw)3)wx4rc8`6&|Mfco|eQz=xra8;rlB6SweaS~^U9Hro-^%3zX$ z3G7>_pLIE-+p>Pn8)(G<7xj0+KmL~Lve9!f#Q)Ln)fhPk6Bc2{avA3KF~6^Qbwt)` z;sl3Zqyv~#Nbm{VF=S;r`d)zz4D3?;{_x()+M|!jU(7#HDP*%Rr7e8yW8`3V7*h?E zi;TWv@->U^c%0;S)*ydM^N+FfG5H#dUxG8>_iMPtK z#yT5qk|Wz%>t$MLn_afsZHGN}+KbTHUKIwm!1Id5{ZbeNv;w#}0MJ5TjT@f8&a)6i z(;hDY!0iQm1w1~n;C(ir7)AHPLZIPj=DL|?F>o%l6DZ_ZV#Ed}&+Wr(np)y4lAU2g zq%4t9co!CJEo{@<$|YcYBlI2*UZQF6U zYg9%h?Ci^GgC%ARbStqP+izT*IJT`Pr?JeHktn1>+$|7PvLq6lG9++84T624Ckb(8 zQlr8JnsP2#_i$#GY+o^R)1_yi4guI(JGdo@_dIOJqsO;6hMaZkVv)W$Tb9ADpN%R< z-Wb!d!FOpvPKXpsw#n`ngFNTLi|Em^;5hUZ^uQg{D zMr*Q4nRt>JQz>Dq6j`nJq}E?knBU_AWQB=fI!{S0t!zADh&4=4d@mbra}x1Y_XTe* z%##Q#E>R(&uRw1=J^`mX8?p%n{ATDE^1xL38*^|X-Be?I*3xLta^Qd;3VX!~T=C$F z+74>Qn;e_;kfmj=P`m;2GZrkgS@T}uinIB?D3Q31nj#rvKz^iHlard%7?Zuy-H}%4 zLXSs9!!IXcsb?_jC#hYA4QIwZPxo<4pwh6S);KoPU{M{BjHvVOV}A zH)Zd40g?JT-`-%(J=&RcFr6f0wkVhoc1>(q6bkizu#GEI^<66i)8SEyz-swTaEahW zDTE92q%!94nGU z3dj@Pfa`G_XP=NU*SC#eWKd~Z!G5loin(_AI!vLrjuBYcWTzyAGz3shcaX6~Co+Z$ zxyd08TJ{qz;^Uo#Ye*#*mr@+FTV|BCLX>?SEvkxeVS&#DLP#eWvR|XT#)tq*BTz53 zuJ&cwhS_%3&SoVFmX%-b5W1x84CxO0^m$EW@{+j|0TXgM*gJlqR6B?47mnGhM4sc> zp(;1e1m4Co6Q_4?P%uVI4;*B|H|hdabdXK&$GO^J+8miS10>8jWr zTwQiWp_Eg4D9U1kzt_6_Jb-Rv~~CS zyho$SAvf>O{OPIIliRfvu(e2sx{T<@p{l$%R2o{ot`T*O!d1{HL8+!R0>*AqoslXT zjA6`M3brzj3!1_;$@?XVN@&E#Wt;|WE)%7tT1kzN<1)?$%PjO(P&)&fO1;%%oviH= zt6b--v-h={CoLKMoE$51PO!RMIvEblmWA&UiHz21ClAV3Y&!9|^lgc4{XBFyE!d_N zjh`P-k<)5^`rs%xanwtcV$Zjp?+$)m9EB#BGp!3Zc21qXY83>nr`$RG1W4^jO7TmS zwK!mfkPw11;HYu$a$&B6m#z}#u9G=){Jb6nEEAhmwQB7AnRjJlnQT><$i3s9Fb}!Y z4XlqGDenu1?;fjz9gJSp<|^cMFE;K(jW;uqn;+cFW4ebahp|EJo=>qR&GGzjpM>5; zNsI082(LUr>q;S8@mM1$)n#rNu+_CeAvayB0a%X@sgyd zOP*R*F>yWfUMu@24kLd;2>hKPjX={>8fR^7T;9kbm!ANlKqMQ`1jj?Ywj zqi|{}s4t;#;?n}tY|)WA+2-Qf==8cZV|$mZ{~&8&wegevEo5^G)4*Z&VipZx`r~|U zi_AYq;l1N>GwA_Vo`1A6Ed1x}q1i&9ZNZNov8}9sscr{8?+WyvoMCjl`pG&B-$rds zQ{59=u-N3?EVAGfp=$sUZgD#E_Z9<)OX_8d@VKYIip*>_Vt{iZkGr}GqF4OXP3%}@{4j>j8bqLecAgy?=TNcPTY^mWbeA0yV`J;0YtYy*vZ+}+4Ry<39)iD* zPb?DfK9~Tl7B%HBQYs1&6yLc>fM&Z+D_p!{(H`fkqyroK$OPG{wi^tsS`u<}i zF1Bmi&e4w6jz`j$)0tN-5l;@AQs_INv!$~GT_iqH^fHd}ZBN;xg`tb7sUN+F3;y6c zOBUe!(^6Y;H?jC*GuFyX)9iF0%eZ9_wf1Mx^gb^QZXAR>7*jewF1#+2u$7PDgof64 z;GkP*bY`T&uu|EM8L4xgL}a2?M6J%s?eiPvLA6wuvFdj~1iH`2?%o{J`q&Flq>Z=H zu2m)Hhj(dSBXwpv9psBW(_OEon|}1If6cpUoccMRnB0nJWwP-vEc)?37vYm^71#!{ zabLD?$l*Z)T2)Ii-r_9nEKRb74z*Z?Wf+Dl_H50HdvmNlYrx==aM~V2Tb%1+75!2g zB2sDn6Fd~0nJ8~OuD+t}GpmL)zKmSn)h~dvZv)BY$lZ7zZHb=W>iOa=_JbfA3msc8xAG#r& z0S$I1&?i}p#tCI} zjvp&5)zMvck(+f0MHjKMmvFiSNgKpdc6&Re4G7Hwaa!F{X%<2pp|F_=J|X%HvT2)* z&V2^RKN2nKbl+=~2|0RB3!CPbL(ky|P4es9h1gWW0~P`Idi$c=%pEbQ9b5oMB5>iG zxzc=yFza_Y2~K}$|B!J=g{cT5VL{PDZk9}C>&=Tq2FnOg(_{U9O`BOmVel-R8h5pI z%7^XWVnySz475Pc21m?Y|9cs-2UhB%gt6caGn*a{Y2i<2@%{I?PLmUcgk!MsLW<21 zj8M}ijQ+YDd_4dMoRndR5rgsCo9du_@V+-N@aq4Ao_y4`)x=boyjr(Ba)K zi%1^}5|jt!kBedfTl2hwCQvyA?KlT_*~Bu9v_@OSH|tt3P&j8y)1&38rD}59`r6T5NHI>Wq3cp879Q{b2S(D-WXum*)^}~4 zv=qBKs0{P4vkTk{)HgtTevfAr+QmV-kON(Pd-8fdFBf59qt%Ki)riH$u(eAyYGL!I zbG2+zNAn#C^bLnJejdjy`3)>ifCrTNvG_j7&M-{%InA9%3fyh<>ZztBi8*v|6QK~&9#H~h*uxpWZ&3PJXd`>~Y(dQF*tCl-KO=-jM$ zgJPbG?G6(G5yK-yFuhW5QmLBWEigpL@B~C=ungp7a6_v@gD7>0MjO>s9Amp$!agnZ z+;YS1H~1PBT;L(;*!xdB(G%OYG0E#X?a&ST_cMXx_`eH${Iq+MYRN4Sn+?tP1g79?*eSbx0HxM{eBG~YNJ7a^8x5kqv=RZUiVW-j zc#C;g`iU*`Go6#EH6Hvyvz+H}Ujb&8xmiLur4*(O254@Bsdi0=9ZfThHm)6PosmI5 zfJxhZ;0t0s~CMym=93QPt&4r#TSFd9iG z>Otfp5ffrDz81l=(k;n0WF`V0 zpW@6iiLwCe{L+0(YkA^Io$&Q`u&Y`8{0r>*@8$I@SftijAcPnmAM7{97($1D;<+nu zPw#4mGyjp!L|=H@HMDZ|b@bcyw?F7he`Leki0F=44CY@*l$a6?cMge;1#Ag-q0|Fe zN>#(1ca8!i-O zsn<-+uq){sZ3poZSO*Hll1F`Yt;YAq#Os_R-`V=$g6 zwgO8MLL5$=>{ar=_0D(IPGJbU?j|%Gx}6~IjK`sQ|A$1e=H<@ku^KTc*DRJ$#;U%2 ztf6jz2HqkPe34hor`ZkSq91f)hRZfOT3IY;zQwuJcHKJ%?|i!12ry^%oX9`7e^3iQ zJU}wQS&V6&AU3`Xn9#2u$QkTRhE4`Z_aCP zxs@fpwuW7ExmkU;wEUij4t?H2ImP?B@>v;!?l!debrF3BP0}f4#59_w&y_DBFZ-KV zn3UJ*8{Z)HQuowc6>c)B+~PLV1``vKcZ$L3GbV>xU29Yb{_Kx2D{9(C>GaXIS|%*+ z*GPCsazv#WGMlq-Lk{vI!z>s2UN=w-`f_;Vz9W#}BIw!JbW%PlA^vJ3Nd?Gci0 zfZvjNlHa$ses+CD#q}F>m=4T8XMz(Vlbu{`L|A#$mVM;G%E|{fsJtq$^&ZB3aO$xO z!Y_!QWv`!qpA+8jwuO6?Yk5a(`s-`E_70=JzkO2jc6CLjq`0dx_x|Qa5tAdW7H5a5 z%H&1G)?bKgo7!$<@%B?B-EbPiYZDhIhTu;(2f`EX5>L#$dNvJ9q+|Ib51vTQ+iPh0 zgFVcgVWY9%FJAL}5>D9jgP}sJd#`C?%P#Y@yAX*+ECmTTDSTZTwOI14+cyUuV%ug` z4f?XJPE=o5XesSsvuz52*b!!)kGD?_7Y)v3b$eH7brwUxxqv1mrNBbD+?NrUvw!G& z=T#1uM%Hk%w(0wS=Bf=MMk6^e_cNfP>u#)=CLl*uaaofS>aHHVX7`wVNTE*@tG#=e zTBJjO2E66ACFpolS#i|~=%;l=IDt!0Cet4B6s*a84;a_Gz|0gQTCN_4JQc0}N)1~m z^hO1GQ&J@BwGjMPldhSrF{?zBYF(d5bR1gvra40zLsx8|?tK<|7Ou(d$CID$=_72v z_sTrGkEl%Rv>Qre>d?wSNZ`f9+dhQC?P<^H`=h zBWZmmmUt?W81!gPrPu*T-lcB4Z5uG%_H^KHLQY zdgt+Puz&mZ*Sxit&3q~4`Mm{YD|O47aDzuHo+4ijJL7+bNs zG=JNQS~APcfvVEfx1&0vlY#gu+xSZJ*^1Di${;L_C&DtTQPw~fLxYHv4F>R4?hTj5 z2D8B!A)CFR5hNyRxK;;XzG`au%5kt2p<~_{gr!;|ESqB}YamO*xMIv=25waNiWb?9 zMWfO#w`GnJhjX;wOTbn!P<5cHG_!zhWs(TQSK1$z<~J2zrlPEY%-BtC0h`Pwv&le} z!wjIA4pn7I zP`OhMXF3fGfcXl!p~DppOB0E(OdiS_$Q}q$*%lR76>@XcN0)!RQS|Mrr_q-{tAxv$$-%2O01W6w`dn>a!4`e zE)&FpTa%M7WUE*3bCfLft$Wdw|A0wOf5oZb>i|rEL zTAX9QiOuA_Y>G#7Fx_q&hm*9Fe1mPX&}^}_p5fRoQ;<(sy~Gl`TU{x0G@@s_3}Yt$ z>P+FuOu<)aduHFCnY`+;{x$yBfut2|8v@jtvgO_u zoMV60XY$@Di^sRLkqbtpzO`5=1cfGkJ;SptWP$vUDnMj!O#)I;PWhTi&R)F!#lUj`I4OE;-KjZ zt>KD>k>jx1te~Mc3L3G!fEE=r*dhQ*63pHf2qWuOF~^&QLz`=eo(CAF@t zi@)vz9V+j8#lf)yP26OAHt_fge^@Y65Fk7@e%ar17zAyC9fo_A)TKFnFSuFkZ`$bM z%fxHNok6vyCrN6kOH%9E89twY z1c1l+UF6AhJ6tgcMAS#y&CV>EmZ4nG@>kvT9P`?R=^c6A!;H31c{sDey?($Z{r>L- zNx8E6j0#h8CC{s@_}@;oKHw6~^j%#%KWmm{E~D~S0SVgV24Kzrp7dS6S4r8cq}C`& zJ$kfoC`3&1w;8>(C}7cdFO>JJ#QAfA`Q~Srj{4xxU0eL~N*d3X?B*vpr?<(|AT&lW zHAv#e`Ga|m^(tfz$U{EtC_o{KP`nKhD8WG-!eJc2Q5?f@oWRN7c?y(|Uh4h4uKLTO zY0VvD)k7}7OuO~n*PdK8VO}!gZB&*Gwr?%yTmuY$bQK>zAODa9)WE7% zrdLaKBqf8DrJj#*g6>_L;EmQaWv)yhHHZy22(_5QR!VN<$2Le-YMv37kx0H{rWQyB z<|rI8_y+I}iO8v(D~Qe1V+wA36Y4Oznk|}JMWK?Hk$X^E z!UkFGA&1>Rauw^v)oi5YD(}UJy3htlqfY~@Gg~BMVu^;+CMZOlhmB6<~(VwoK9HT$V|cm+}V1Yr5RO2zz-|9->JAT@lci z{3|hhf@SHFYyqpWkK-kP>(a$Xlezx9cCSQmtzX3}+La|;3@km_%n7VLXH#@_2F~k> zkuv|Bn+dNE9Ou07O^S)pTHosZj*_*XihX)`Dx}g^)VJ9dMAwY!%cE%Xa z;uZKGZOoKW6&0*ns{QJ>im95KC{oGF)1XW3um9E&eYO6lquSLyt?7&m1&XJBl%b7u z8-=MzBkIr{JDGbHI*6W;kLO47NAq_C|13bOhn-d6D|o(OlrS;EoAE!`|Jm2WZer`^ zL)80kMZU$}496FV<2yTK8`)2uRZJ8=;=XpV#iPY|DMi&%oz&(MXGx%BYxK|P!|40y zr=>@h=1a}8&)!~0Z&`z`39flI-m`qm@`LkVrJqRNNxmwz@uU1w*Ok`@$=O}MVJs)%fE9zem-@6 z`Mg?D#nUQADjuwOzOu2BSt+lCD~ZZj$elQ7bHQU^&yGLT%PAgP};OWI3%kMuF= z2hu6h-&I%@rAky~s7h9iS8cCC^<_#1Q;MhDQ~i|c7ge{?Kc~;63mdu3SP7@vYDpba zm)C#3ZO`UUo0FT1TWqPR6}3_AeeEx{zt_>N&(_>p^XZQ1?YU~TPA{y2)nBu{!6#x^_H0LszrL@CeWRzS* zZYA@`Ch|GT%P1$PJ=Axp7iohu11&(?Mq8%8jsE}opYbtM#Qd5iV(n%Fb|3o)CznHT z4shGKo49vyAK|{lJ;?ondzO2^LwIX=v;2nFfST{&$G}KnGyhKh>)@yL%hw5Fy0wBj z0a;K;3!XV#FfDjV^KcpKA$w+o&%OJGd!=w|`ESe5F2A|_>GGbp{q(j|Z<~AD`rA(4 zj`fPe?r?c{ba+8@yXfJ!kZKoXP(T6(fM!IXMKS^ci+qG})VLYMBYy@GxbUtJ!Un?- zHZY(9pRX;F{Hs#P>w81ze=ov*4fL=frQeoD-M(*1zcYBoLnm)6zYZ8p@4qjY;{Wm8 zrXUI2lonOJs4b>t2Q^#xNCg7l1hWA5NPuKyP(!A#?iOWe3?KWZI!w~oj6Ua&XG#Bd!a81<)$nI-oQQ zA9;9U_~8)912UpjDyi?Sfg!G|6ycmapna{HLR6~BB(6ld74okzYs%lL54vKZ{lbmS zWksjazGwM<+q(&}xAl}rO}7AJr#fa*uB+m zfr=vK0I5Dr0G2?Tw4YA6?Nll*XNG!xjT{x__EFZ_mOa%o%xy(?JXpjMqtH__V1#kZ z+-68=0^EG4-cNJ8#|b}W0lXB9)Q=L%rcU+TTLpx&2#W-#)f$`7L`MWYc3sm9%SL*yWyKu=taJm z(zCU(kxPO)X3BYga}okV6vpOBpu+g7X`DnvnCy?7f9C*Ay_f(D@KG&Q z(Yq%Fx>BJ*MlMkcAdxoCgy1-=XYj8uv@X1J!G@A;21$GSI$7Xs_dg{ffrwK-&fuic zUt_3&xy_Xv`}7CH5S~Gr%MgHBkXQ^N>?Fx!NzqA?IGdhx^?XOfFhOVs5iIH|({~0b zTCz^_>{I`5!iNXml-}Q)d`5y^8ifkjri+Iq z;Y%xubhXm;@0W3%{!^EXWm{$jzvT3sn>CR6jB6kQ7)-gt&ybT?m@>>fR=XYhVNnyVpHnu1zrql+3lB3mW8pr({*(y6*(V|#a@H%T>`z_NUHOB9 zpT*pAq--A=`mO0t8=)N6G&JF)qyrNG5GNa27?a9K(Dm8BFK4eB+qQ<}2o6?rwc6Kj|7zLOzn-``@irQJ{_;u(b;y8Vclru4)cmgh ziSE9Sy{Vh~R{2l*_LdO!{4KG)zRnaJY*9;qBW7p6wgkVv?S|`o*!jA(^jZ|88!Us$ zJoqh#6AlkrHn$*zFm{((4*tts1GUP^w3MFOK)Xtk0xQwVTHs$y8-7y3F}z#vEGoH> z_P2Y(Wdp@Zt4EE{n{{MAz%z4g_luFkXFs5B{ZAsya&rc*)1?7)dfV#?3sp97tlAHF zAdPq*_JHWWbAV=FOkm>F-e?7jB9(@1%eVwg10&rA0z^Iz+X^mvgt+bH&U*{zlA?;D z6ILYy!>ILkXx;=OREMJEiU42&jNrS(h%t_`UM4X1jrnvp9o5d38n0B&Oe{Qrl1V2p z?~RSA0Zwc;@Owc~jRAri!r%`8AVdMO&qKrRGo72_h;7!BkHWsNG24O&wxs8M!9(}Xqj#d)hFL zT$(dNU6ox&0j@!uxJAlv$G(l*5nP>Wz+h3z1&RXzv77>PnOiVT6C&0$3+ITS_MJ<)g2syx#iFRI69egCa(F7l;bt+3d?Pa_|N&;)_4}+U}Dx|5>ekG6)Zb z??Si@I9h}7T2SINedx@G&%J-&zrDJaah`0~g@Qe)y@!SYH?$G{Z13{O_jeAE^kRY! z7XiR4ZhT*2_{{U~B*Ifcc z01k#uTY@>eVNlLfU>IPtk~-nw!6x?sBn=~e9^ktiA(XJUi}7x!7*8R`!3n49T>ItL zmL5geB9T=9(Ob6Doz*@$3KaF>Z(Bd9tdHxui;}D7lzsf=7vifPoHVaR)>IT@s9JEt z?AKDk8eW$dmqu1Q(%RYMCg{V_w=X5bnHz-bt zE?0mgr}YEMeP3^yT4YIDG$h2Lb7`kD0c?v4uU)tc^PYk z6);rifR1)>#T2b9mEk_UTOyj@H}LDx`@v zAXR&G{q#=CQ}0LVgcu|Ph7rZLhGP`-?qB`>WGLYFK7RNV03@V$vvkk)_1B(kwa9Z0 z+EG`P3efsN2e{AzDY+tC)%(ywg`56)%Z0(>$ff2+7c{g-xT8MqHoa_yHHV$eo)J+s zcK5&-H=+N3(A!$}rnW{kDu_DOI3zdgoH5vwb!_K!@&N|^fpAau9wrvyl!6J68-#-? zwKwokfZi^Z^;nPJa7+j8V2#em9T5@CsCpRxht9V%rfccr_(a4}FKnp;jbGy4 z*3f++VJbIhOtvYDC}<#;8HZ8_r_&8JnPZ!k1Ds;yK$h%saL=>3tS_j!XJ$%eM?w+r z8*suEB!ys4jaZCRRs(MafVqZg&FPqq`1w}BM_qyBUXX--Y9h zoJ!A=<#jEi!gn>wJUHZH}d*%Y{qdrv1y zpORfNmGf#a!09#)%p6n-d+L&p*7RaCs`Dh2ggu(@Vgi2w0K#=tUSe#(~X^lV<{T>2Z@7Wva#sd6J276XP_LtQE|6pJV=g8okG&1Z! zXx#xf3}Wm|5YPcYkI|jeCttXDPFd%nByR!mwlk#Uwrd#p-wveBBiLw`1b*f2CDBN=-A zb4~S;1ODOmiFsG<^?$$uAQ4Ug0--yKCaho8{Pd6POw2OOZ-OG(m3@wY>>CJ^mI2VM1;Oqme~2t&XCfPfw-{;buUoKxksOFxl{ zaDfFW%&bevtEWzZjj*@(%83w`A}+eMnN!4PXGweZ2@!q)DK6{@ z5Cb@6-2|g-&JrzHnV|MshVD*Dyt)iorkU=lK#n!RNha$v2^qAk96UdyT-wwwXu2KL z(GdGP!zh)%c0XhrzlOEw%M~EpiG3$l;FN;W1b`q=$62Az2`@eA^sWjpU~C>0dJ|{y zM|vNvKpPlXdM08xBwk^|omC;6CfYuN6FnqokFJsvc*mP;k16=M1_;K>;tg1^?%YsW zxjR5vg8Xc#FHM?(Q(T~r>Y$J;CPIv7y~3dqyMIdDB(KoEBZ4$`SP?B3Yo(7$S)&kw zArYg9r)gn$Oqv=NZ7HrA1c8p}UVMwHv**kg8^wsIN;ue!YuYWvmGe&_rgF-#(8MY1CeCh2kU{MPLGdUn0qRH)yT9IO zAlk=*6mZBW8z>f2yChP85%g#9 zc;Js-%nF}{!1sb>Xe*N$O~o^+7fb<5=g2CR?V6uVIBB~$l2e)@;5*S&LatlqB`1Uh zEFpznD?K5^IIaCS=s2_>0$9{zxmP;7t(zSk`wNx0_S9`@p|mN3Cg0Upp)zVe155(y ze3GiRMtClW>-(&;);gc%v&1tNr_6xofHXbBZQHQ-yh6B$Q`ar3WUVebo?&^zX0edK z`RnOliE&vr8t$=1+$1XU60Lg$lXVj}>}al)SXTDCw*@8&GUqR8i2!*H1gI+|u)SQF}js3xDL$-0kR{byAdQ8o$vct?5Em7u^9+}SSZ_XJya8Ylb|E|SAW zWSr|O9Qg`PXV&(X7qsV-4L6Rn{4FzteF07k@+{u5s;rCI5f_u^nSqqAI9^RHM^h@k z*KwxH#jJlT)twF*Df@ybGcAs;XTF#nF=Gja%|25UP{JctCFW2@c?VJygDp9VQr!W8 zBn(wTGSfoJRL2$MXWdqf&&rvD=5~WwEn>szO?QXW7(zT)MrbnN>oV?mz5R0oyM>#I z*(3%$1q@bdgxR@XQAQoOT}Y_mO5tcXz2}F+<}k%i5nUN9Pw9GlTEDyKCW_+xkhRG19a5B3J~>aQTT@~QLL$2z z65NMkp;b2y^OPcN~VF* zgxWLlJt}rafRo`dZHFj5L$t9;E0t0%*SQv5r_*LI*u;MIPj%UWySg}u0nCu%lIWU3 zOzib*vgfL&{3yQ-y6LH8Sm$*>UQ`@qStUl5_&3!S-s(%9iz^;*vIX0bRP1CH<>_+h zH6>Zq@$s!I3aYJsVMs|8XX`1oNEz%$9l7LKg7f45xg`$8!B!?vl1)6MB>46 z{Z6!rEwdLiK$@#&*gh;XD2{V2)3MHwO2F^M5@G^dVFTLTN451K(NLr=spNnO#^%q0 zEo=#0_h%Xuxwi*V-GE|RqB4t9@|B8g;rX_cN5tA1$h3{lv4!|ml>H1+G&mK%6n&JF zxb+#^g3pP9z~Ale*fz7ms zbvupMwb*lEml&16Hd04+EnET9o3d9EXt+bPo*~K*@F$LTzLPHx#j1J;m!);myqMLB zo3y_9v3WOkDm6DU0-0fp!^9XSU_5&3y+40%Pt3puFzXhaMGWGURa40Q5{x&FnxD=m z42!qo|EbUdmT5tFgGvMwbct{WT_$h}CM^io((ZD|CDntuE%Zo&?WBUA!ZjyyZ+AeN zi32IXg4Nr|P;ScJ@FpqfS~oN`b1m6crV#-XRv% z)SON@vgmVQIIG1{z2T{@zU@A*!T*mWH})VF;XzjkH)SSpM}V${c}0o4+xCPRfz^3@ zl6HhnIw#BEsTnQKcS2&^rp!}^Oocmu!g#%%c2dqbRZ}_2X-2W9oGuL#iw)hhy)2C; zU@mwcD$6AKkOmd+`4>*pPnt0!m+IW0c{(JsRyupB*ic@ zndF1@F_Fuu#Eg|xEk!z*01yv+HV8)XnhFgKv;bhHVcHK!yflsYR$!3D9G4<#opqO{ zVTHMifiv_K6m@VShDqndmAU}{Rq0+wl?@BO8r)=_Ae}OsDUZ(889b{6qulZw6@)5< zwBg%M(g>8f%cRJJ;-!=|>AtLvQ8}iluAa_cFm!c_YWm`~np7ZrQ5$v=6{<|lpf=vD zQLvEL0f(#j@nodsNRz?NCo>^T$M+;{0F;(61xZsd)2xc7dIe`W+wb=phC7u?nivJn zw7v9XWuQ%-d18J8mev_`+icr13TiNdT(dql*rl`!Ag4uV^k0(4FJJQ{o;46$9!U$~ zm_${>2(fA>DKCm;`NY-ckdZbapf?Sc1Ty{X%9idBmyj#bbHd1+T%wu4ZOB&IV91qX zE(S7YRJWN&n0!oJOf@N&1?~>;XAN3XEo7shR!zd{&zw{`Y0~0~nQwfMCm86pxolYY zIG#RYwTx5JHLYx6sfoDF5{(G;w!tnXu@h8Q@R~$|UH@Z%JLLcjgvc@(!7IXI3uWU3 zb_D*{t4J#Cf$NNg!gKKv3b3 z?1qKY9cgiEesJa7NKCsM;VuA|iM~2Lr4Y_s#QHD!iK}RxpN{0P9z22Tr3%M}Kd6Yx zJ+VXAd2}H*;X!RganMPkI;cU^Q_RhpJmKqYNXFy+|96G!l*S@llfyB9fVL1TT-0NiV|cxHoe}3z&PfOYSpWz-M4;M4GwqS-UAjG*;|tJ5U0%M{x~wt@ z!M8|aSbaMr0+#X6YW6k3!7nsm1^iySTjj;U6LhoojA=!sHUfP}+t^Hj7gv$XT`6Gk zpmxF)bczTBEfA$aErO*KMJi7an16BzWqSSgWAC>=@x=%40S$K`tUmcI{90~3-t!$W zHr=dVh|)S?5QBKowU8|;=0I95u06I$Hk3VPyau=9+(*t^j#^8+7F1UTk5BB}iSq_X zU4y`|0rbj)GMG{-bw`Ej)L*^x&c2$j-F4ry9atZV&^-fZwx@3QPNk6m$`uC;YhdF# zgT43S|EN2|${RIs!o5kS2rKBvggxj{!a+HvJS#vd0XgZc$4b+zkkEZYF?`=d4e#9J z(u}!72h}+p>bl`;?3^js(VtkN6vr-2+HIx+IU<)*%m=uxJR#fxcg}JkEn_~|&OTXa z>>mjii&9oh6-VA!9mF7ye~Occ05WaL%!>>c3k$6H)-$1)umzC|!A0{{bP0rsP7s+7 zT!aQtXaK~pCLe{4>|4qNvGY0xLUK8x=Cj$?w747&cCq)&`Hq1&w~5q@*=1lE-bO5r z&y;r^F3#rmA+pqrp-Skr6+$c3ilSu>93Yo$V{ z8|zcA|NG2@)QW4nObD!i_6ek#b4kb2*V~PemTl>jd{#0nYY>x#H@*6~WLKqTp`)wu zYfBS@!KCvkRhlZTs8uIPkB<)4E}tbDBfT#KP&bMDpWs?CYr=m357dQNmpqz2=?cAS zUK$jHkH1rpvaeCDf6x2}t`H`k$o;FxiYUvnpf1EiP&Wx7sH;K<>S_>z3J(No`v98C zjQ0?J|Ndfxa<866YD*BTgdFD#WXQs2ezM!-m zlOcMz?U^9-Bv}Arcv%}vuDRoF(C}eg@GS#C?>Po2Z^!DUdJ%qxwnz?t6O1d$$%l0d z*o5@-Ep-y5aZ!lc+JJV&P@5x1C#w~JLwBZgeJ{p#pM8%(AR|s_E+-evcbU@_g@n@J zrf>sfys=y7^9zzT**LdI5T{>35Ih`NPA_`J3|Pds=}+Sf@p)~uz#gXZ5$}A%al%R( ztc2oV{bAf`!2O`RL#Zkp9~|-acubSG#RdKin}4F*2=Vac@2x z{rj`$)E2GrbrL9|8F&tC2~?&0LiI<5rSz4oa8gg%Y5 zAcbjmSSCbou>S@m)ZB#fqu^*O>YB+<%o1|NDtQvhrsl|A&u(=*Ph4F9r)~ z)vI07l;$_Jl$1*fht5)5Gn>Z+ax8YL4bH!iU{ZC9YMn_`$C;I_^68hWoP2&ce#iY?gP~%x9^o5dzPUjh`WaK`3 zqK0tb!vw>(uBv!Yc5^*Q#31lE)=v)uWv&d3F0y#e^XnU8N~PR7AmhcE5|ZX-TyjWK zTro)Ov5-a#%UGdCZ|hnn1nU>f~ z6P}OwGh{ZQEML2$7u)tYmkS4*7U(nML>fJ|1@u=bJTVAG!B~{Odg{s3=t6w!sti7e z06Z5AOAoHoR$pMUC!{>digf9Fq{+Dcq|CZSAHPrS`>5E>`6z`iXZRTSEoN&hq?|#& z?-@i|%o#kOI@JFItj#H*txk z*(N@VZ+@qC5B)}b3Y3nSCx<|`sGG$Zbkh=Gm6g@bW>!%+Omn~)*3aOIUGmAvpcYrx zf{T}lTNa@M)(`Uq{TE>QmUd_d6Cl3OET)eS$_$(g5xMn@QFCxSypMeTRaUwyuR$=4 zy|PF4%R_2w`~nl1}*eUSOzXJ7HJB!MJZKKOP>JV>X+QI=4AnCJeL z*09V=4CZ$?I6V(Lzal8S>wJF*H*zehC6h8~bAcm!V=Lb?o(!RId^0Sc+OOJFN!r`B zk}lg{#h8&NtPAW2>^}wbDC9E)038+A8DFZc7On$fDds=(WjuIG_CUlu^MnOaLR|Df znWcmBSY`GW4x^^V+lUD9w|KC#P$>QWgClwI;9zgo4?711|vyfrOIdkd6gF5OQO!6Af&kStmU;gr1#9yP@cP zIi@9beiT~46@OTK)M&Hi{qflPDYW;bC(a(p;Qbtz1Sj=H+>jP>>T%!LN%+%RZbml`U z54gkZ+EWG6An*qZYn6lS(I?V4zDF+=8W2&+OVTw3G4pWEB`*i`?&9$`K5}nwPO}ql z&_8L6;26oq+D3I~s+9|#ze5fN1dAZ$9d`$yy4-PKF%O>j#ed#-g*x0Px!RRfJ>Fl* z@`t@yU%1=^ ze{ZQ5PZj^_uUtp^dGUwB`TCq^?p`y1)!Y2HfOw89D-2;6K=t}>0Eu}kpsef^XB71D zyRceIj3~nINt*^oRU0N9WRai7@w37Eb>iQSbIuEBMM*F?sZr*0unE`dzXN1$83hT_ zY0uCy+$hDhWDyo01)?g(FHIT)!qEpBl{U?y&sgyYj*L6^8^GNT>s12Fb1&0~R_OYA zani^d`i9~BY^(;!j?8@&FeHt3{IR=0#(|=a&zVx+?0^V;9!`+v!Vjql=Wlo16mlLB z>;4T**k8}e-0U ztoipifQB|SsQA(B@7#a`6W}GVZGCpADNk<@y$(+hq8NLIHq}k`RmJr;G{5R5R3_+taP_i786UI!jemO9Z%ML) zpg_-dmx!%UbYX@-ArnT)wN5Ku*BTaOGo!uFG~sBS13k)9oI*$?O9|Wg%tsw(2Fxfy zvI#5`h%OuTX@Zs;4fD2)qnP8+9c>VYv@G0^NX%p+@DcZjx*$cdIS6q4^-8@v`wOHa0ENJs%M^B760 z9ZRQLT{ddv+^6sBpH$DPS(b8HzoF6X_*o|^M6$FP&fG-j{AF zy8UgI^SlgZ`&}l+1?6IQ<*BQ|_`L>xI_1mOCvT(QGMpFjnlRDlFrtpqsi4O)P;{Z+ zrKDT@A*x}#Cyc^TGBHNN(snRqmjzF6>#vX7pCOG=;JD+@Nh`Z_!0c{08jKfSa%fY; zic1ThvtGIz0zsWFN*PRK1%uSvE0Am&rgHC2Iie8HvFbboZS@0u6xe&v>#D&&W0O(@ z%yw=S0|%WGl0>)i;rhB~BeY0$rOJL@@os(tcBZNu-aE-Q!&#?VnGPI^ z`pMwoCQxzZnu>w_Nqe6aWEp>9BtHff0~QI_wO7GAeP5mdJNGIz_dgu~`?Qe}E!RC; zHM%F!W|N=!+;C@uT}>9zdF9;J1?xhL#G^69sZ!M4bB&vvYNkL*c|F%-zB|=T4Jgqs z$;KN&xP=T^=={0mm3cBPS8HAM)~0@kI!h}GJI1X|mw<|;msKRo&9XQ>aG{?7^9htrgsL=-3D{h*UAZ=oayNj>mQ=^;Vq5V0I+FA)g4Oekfm~J z_btYkT#$;zn|D~UGN3Yx1GZCYyOzfD7){6~(3k;NP>QD{1|;OVRU?PRt2fmnUY;^K=pUjkwzu1`J`pO?ufVe+Zcf!gmN`R4Hj%Ikrt2r>A)uBnS#o zO%o?kopQcZM*{vXG=kyGNso8*Tgj>**lR7#W}cue+i_ic$baPlTuVSb!`~cNK6|f6 z(M*$B?~xXEq#zZf@5jK@8Szf7VBRg8(xx37;PZQqrwX__|I6luj9kxYUPPLYBoViHyM(Z6n&nRfp_A2|qhwElW)0qMtkQvHy0|!yG z*)=VzU5`akc8R$Lw2}h*6G?Zghi^d%Z~Dica%EWtCrV;j3Q3pD_^CNAkWN3NE(uJBASSmLGc-nvoZPZdCKm4^0!ylF20fOAy~Pq&6d76beWW{K z8Xk+5nj4T{r9s)})Q94ikFCLz-tohP&l#OKt)groU@)_?vY286shc!3N+hhI2}#0W z1=Tprf&Oq2w;S%5Bo6k~R&rgp5BfQn0om2-XT~Goa3;&>a0B4vu9E%~ZoFjBna1do zKu0cI=iJP=kjV&f-O|~uh||o?C^_om7o5?O?#&ZRhCWH^)o&Ch(i6fVRZZbY4J!-` zTNR96Vs_w}*qEt$9}WcUmnwY-eMH8eK%z=3@>P-#c}57MOornUc3lW_PnO%ob(zWn z-9lu%Zx^%47&4DszsZ!+Luhy+Y#;Z?j55Y_)+mX}TH5#yk+&)!~5`)bVa_89x#vQI-{7AHC3J z3d>3W4}pHaM_Sg&;lXuh!Cs!ZYR`8uTB$VOJy}^jgTnxgM=9xnq$Dz|FPli-x&lz{V!!pGIc-G_FUFD!%Ni8on}T&;>V z73Wc7OZ6ZSfz3VSMRIi5b{#UFy9P_;3^@=u(i$ge>Qgwy=&rSy#?{c63F)au>dQ?! z*hYScMIFLcBmE9;=dJ#R#q7QVX=~GDl~w@r6(~Y}VuEgVw3WS%WpP|IpV5j7I8pid z3W`{=u2mqkuImuljPuM7_uc^1Y`f10P~T)I0!_X)p~>)Y^1MixanD=Ho+Cy%&=hI1 zGGE6_{aI10E*%C*1(2J<_A|}Vw4eA|-p4caLtutmV@dZ`n!A7wr%JVJa4k^lN9M|o zD(l>9fopK9UH0wA9ASAYl<$N}t{;9j*mlQ?VN|Ql=H%?zQRN5y>(qv!G|=94++yzi zE7=s@a4(%H3$%}LC94r(+mEt+PTCIKJfwKjIxh>#f_=#&Ma`|kd8Y8cb^tdFiWVBC z5ZgnwUspAR%%nwQy-R_`E%v}}9$e43aR98}6LMQwUFcy?IHt&V^;e2WW}hiN971PU z$e<})dU#BXgrTUa__H^kaGSvQxebGU&ry%VF9dz?BJYR@!ws8saM4i}u951F^B6}y zPb<60ajH2}lg4q24yCN7_c+cPqWyM12^a!#-aY2UIAMw^!` z$6KXEtn<>W?MOfOEDSsl0s%lj4=o5Eq2Qo#ytBB%W zI~6-gU#M|ZlZ*;nzO$P!GH3g0f6?+z_0v5@_Ct?ReI4BW>`kMg0k`RYTC!+;j+T^} z<13$i_IWiA?Cj?owSXWsFPZ@o_@|O(g#DkWGl_ z`6X>_ZVocC^BTskHwl0dk8$=+x9(mTqx~kU$2hJe;Ksy@w~eDMcFulX1B|GeAf&)( zqp%4hwu%#Xhk|F{!quM{oo-FPa+>JWUj6km4w`zXncwL`rA6gHk1ju0R@F2pR;#8_ zvNF%(_?5?Z+gSbOQL|c=>#Mw4E)S3MJ_}vbIHUwuT$#?YQ%tr&=UCBm+B>A-9^1tPcT+=G8`uOA zHF1&rC$1?;wT&=IEK6Y03A| z4&FJ073RQhmp*!0ek)xl@`)~FGt%TBe)B;eK)lIbSn?G%(q{a!>AQQAu%OrWF5`Ub9IN87^$`(UDbQp62;Ff9S39Mz{S3hbWH!zkB^8 zyI7F}M0S4Lz8S8;&4ik@Q?nj|e3St<^5902?Agao$J5b7mZqchVFu>IiMA3>I{vVF z8y57g@*1_`|A3t-F(Ut*@a$?G?gS9?X0)OSEAQ>nn0P5Iz6xgGq{Aic&noAVvj7rl z$avoZ*H>u|Ya8BHC~MlBc@JkA24iH#8^V+puQWidCW(4r$g#|nyvta=d&(%5MX?;8 zyJzx+iOGrIwWLC~Hq)aJF+uH_TaC(Tx=#lobeG{eKZ~p7HMN>l$ARJ9nXA|?E@;;7 zH>-ZMfaVx(c{V$`sOsw_Fq8Q7`1E#n%b#bysqpfhK#&Z9zerFMMe?V(=*AeF#I$WT zz;dLcWD!aBa$a&|UA&Ag)H)L-t9xtn3y<*OcxX&*w6Js=Ez7fIUSpuM?7x^*%T@Ix zXqYNn{tYcYDO|Y6sP2bDla0ToQP|;%``?_S8K~DB_$ppKb>Grf7=OmFMMcWphqHYy z1z=7LwESjG9NJhGgjP$4wZwzR$EW{q;GWNJeDJ{=cWeG%&yBs^cRcpk7A=I%NT2j|OM**>F^)ya8yX-6+i z+hswA7GyF3A&G!v44YUG?$7JQ11opS0-aX%q-L%sDasKnjr-Lv{_PhRiY_nmza zS)oan0KMV0#w{>fqEtJl{C0O-C-&MmbD!>>?=z9W@9TV%$9aTUMnSbRNV8M3J|`ai zeiN*`AA(dh`*8`rijf+!udaywn@x;%-+Nax$9d%vmTFt+0o1##&J;U9svl{J;lZ+m~eMf`|3To~%DrNvn9uXpI(8O*L&oli|% zV$J(Os{K2XRUOQ<5T|x+jJSZ5$noRjPAU$$3+h1bRY3E=pPWzp{61465h$MwaLmdhfCBO_)k1>)W zQ>!dUi+QdtF>c~JLuRx3&jlW@nr#N&`Tag|ruBGMkW>1zgj0Kj48pJu@_DP-Ri-_aw6@`>sZ9sBD62 zRyHzE5SoKJESn25i;?`)QLb($I-T$7Mikbhv|E8Gp-ndwRtkA^g~rW+D5$tQDHV2I zs9@GAlnodG#zK6Ub(rK2|48vQiIb_!MN3J8bF`aHQIhTQD;}Tv@Bdp=LKT!slSYvk z@h(nt*^GGwi^-jV=$V}9_K7(&6JYm=IdY5jX?6moP1F^y+C(Y%!ys+(%R$=X*Mr31 z_Ztbc9uo53i5h(SfB-Nc{gt3sZod3~i{qXGaQokpB@4ivI|j^jEg8*bX}tn)0|O8d z`ac2e^M3xgbJgY_u{7UIqf7n0)?eOm4g+&i?>1~<{p=SRH@w7}ItZ6X3dcJ6Xa7Gi z_P@s}3H&yzySQp<*Eys&Q*exbNP8N<)2$D&QfptF30ybUPf3q{?k_WL*u=^Z+D8`p z?1%85IZ5+*@sLgEHv*K~poL-j?9)WVIu*EHOn(j6wi>!XdbPT2%Y_S ztxK~cJyGGEPjtzg`m2|^2d*0!dk220h4525#t<))TGnc5UBd{2uGuC8MOtI|HV(17 zhVXp}SUi3SaHcc{4Og41vFpyi630*!SYx!CbN5+~8NObSy^i{@Fll2qrVY;l^Bju~ zB=i3wZtg)0KnlRgX*AN0k^d6Z1#lMN(ES(!=#sy=p)COj3u@7M zrOpDt0@b)vAPFJl@FLOv{((4O2NTu^G?CM5JKmH5Fye}AwxoF zLXJPuD-?*%t2@YWZfS!o8nQ#~qsW9?^dCDa=?z_+x}3 zHqZqfjG!TqM`t7MW~V*x90m7yqph4v*MvIZ=zr9EdD@MAi7E@oC_lWO0V^{;g&7Ob z5Jt3y7c+Ht13DOK;N{`zcJA+>Hfrue;zLjNy2sr^Adk-G=Gbkag;AX3bSr-zZ?TMi z(8x)MgN4$~AF=G6>SSTBQjc2c$dXsghWK=#j#7y75M8y3uHpKwJ|Wq&=e?hEY4CDG z>88OyXvz4gj`XA1Cd#GJiPOk|5vroRiq13n4P*@GGGGOlNI>B%~d-i5 zxm?pV(vL~H1R!t-bSjWIoG>6c$cqpIS^ek|M<56qB43IO%j) zoQWr;LQ}?+a$iZ4%8-PmD%Dvp+}V_-#yRJG!vz;9E~2=|;-bp9&x|8|u!d=WH>GOC znby!OhyRJE`~ThcYn-WxrcBZ!h|cxU-#6uJMkPIh7`5L~;*x~Vy`*WHSBMpp-d%M~ zEmO| z|19bYGB7YWWXSNa!I4qsG4Qy*-Z44FLXdn{^g(=KWM;oH<3JYwG!?dO7(om0fRxoO-{#C;w@)Tw83l&2~HNw99UL?6r?g zNJ0BopkT5{v9MGM3@Uo-o^l#&==TjT-hAlz(tr2CE6Wd+QmW5jAAxQBZFNKm5w#Xn z3KdCIM5d1br34n|FNx4TxaGgM1&$p00P(~c zZ!B}*`Dq}_YGTKCo+fwq_EYIhHkU6HOXW(nR&O+0?Xc5b7bQ8DS5!?m%z}mPA{H!j zlg@ru{pcIt`pS2HSR2vD>`tbu-rkK3~-$keItu+Z^z%e~|i zEI7G5N=p7|smHhkf;F3|Ow<3bu&B7wU5Axk9yq8l8U?4gH`TssrwvsVr=6C$cV^Uu z3=F$4UNAbdKIJu?GFg8J{Y(RVy9BasN>Y=J;x)3K;Y4Htjh7SCb59_ODj#xsOO0Z zU5wWJ>GV$iAaQ%?;q&t;%2=w_6qStBk7^tW*=K~+o^>QyJx-!p`RPm>?7iSkYB8Z( zOlJzJZi-%JvJ9_k^+Dta&IGMYOhBG@BvaB>2|J}5AlDh-iU1a9o~|T7r*%s`bfJWI z!0kgb9K)!eda^F%d=y*%qb4aZjG-$ru#Balt-w)B7i~!bF_aYe*1pYvscUNpTRjUT zFd-ROrkAajZ3)Yv8?DyJU7@&NG|>j=BRi zC?K7m3QYMmH^(+}9L{m%yf%bYMd@>2HdXAVj<7AkNmh`qllL%GMQ|MAO zg>#Xg8vbdS^BcAKJqW2{JOVv~ws{c*yFgn!*3CfcmxS|Qe_PnsGeJg8OBF+D?ArBx zQE);~UMx=@SKHIvqTM$R-TbWi)CIbEgawN)#-#d!)l4wy9cGsIS8ZdVR~zBoxdA&5DjEPg%WDT|Tbc+A9Qd$BW>@&z6)kMo=@t@oG1+b>1=Fde5_E zg2*jHYF1T}_^a&EZ)-C@q88f6s4N+nvZHt^cAV_GAytC^75X}mNYcRZ6paBDyA}4a zY8Vp=J#06$__vTkO+r`G6pc0AY=3e(PbHqJi_*>G<9N$m=ahW8U#=f4g0`le#;MBl zMk+jbrX0s$+_D8vGd9)gm>~PSe0jfmXpuB_(L1~p7Cgu2*cQ{7Nb1;yIqGHXJX00r jnPZ+rofD|4Ux2$r_T{6(!<1D8OhSpw@$69;c$5eLsssb< literal 0 HcmV?d00001 diff --git a/docs/mkdocs/docs/images/ArcticDB Logo Purple Stacked.png b/docs/mkdocs/docs/images/ArcticDB Logo Purple Stacked.png new file mode 100644 index 0000000000000000000000000000000000000000..44adcc4be88ec89db0b8c12cdfa1e2d82ce69f78 GIT binary patch literal 41521 zcmeEuc{G%5-0-wg$&w}6sYvz^!(g&xDN89@?7PZ7F|zg`Td0SkFqEX7qQzd8q-02? zY-Ln4D$7Ln<@;SXE$@5Y_dDM?-}ldV&ikC_IZxg9bzj$S->-?cFf&}cT3|H}hg)lG zq`w!3V`;(RmesJXgePL2dQ+;2F27l9%@F0!y0AUpahD9I}# zoUny1&cE0B9``?n9q!_+;Cjr}%hj6{0Be-P}>B!fUeW04^L zlB<>k`bbVmQEr?1KBes%O6nS_>W3CqnfkbRx&{BeYMY#r(mo|64OK-ARh3_>K!{u% zNsj-!buP{tZa)5Aj)1(Um!rF@g0Hu`gxGJAYUueK^YMq{032oPY*SMWWA6ZxqqnoG zvA&iB;4km#>7t>C-{#_?qOK&TqKH?Kb5c}MkyCeaa+TYr=Ip4Z?xO6huB`U!dwn10 z01^^p_7j?zb)o<}n$2`R_`Ot85Mslwu;fxsW>LRiD*z>=j$N$mEi{A%%xWcCY zHMbWI4)Ad!1v&b=?r;aP{olx#!heTSfMekQ9-;p;1}}j7H$wkeF#jJTv`9i{4@Ylz zSFpGW5(q^FWSSNjsPLb8i@p2HjQv^;wh{gDw^fF}{I=_^-mtYlSnW_*`wKW+=38U^ z9s9^x-)GHF3%6%~oTPB?em-a||9XHczTu=%`_DUp=MM4bzFVu#y}9wT!4u>2EEy{W zERMYKu`-jpY8;ZQzjU&-|* z!2PO<7w<^V2w$Vj9$NM3kd1@!IyoE;M84~%?MnyOuILk_>$71#Nc9nEtox@$-M3PD zZgxUQ|76bWm(P`5;KpX}rG;Qiazp9@zHmwT&AL|ydn$|z;fV7*r7Z9>7_ZclNp2 zHA7yXD9a-CX!w!1)JJnux7UAouEm0DF<+_yt8PEIdrx!XsgCG(KBWpjW|!vSQu-2x zCNnEIj*cUGH~4d)Q;IJ>9pSI>XBDsGw4Qr&AwqC|E@+G%U(wn923DsUZjN#s?|mL)ixM|MN~h%}e5!(Dkw zh>y&vq6;DH9mWI}+(msN0H3>`!bdI8Cq>qCQZE<^6eP)1hxh9M8aM|N?7eET0QUYw zJiWV-ryJFX-S11RscFa~elugANav=Cu; z%Wb9TY>8?7d{Z}+Wge-!6e`MvokBN{3zejy4bJdlQ0OL;MgBR*z@==YD6eM< z;XI_52m_P9`(t$ccoQ&{0Q$&DA`H*$AH;wYTDJ&A76!$3(|AOdmY_wv+2b%t$<4XQ zM-@%0`SjxrM^*K?5cWs|_|Ckm>c^AB!Y`DiI0rinw>9mbuwBFMBANM886_wlbFQnn z#R{P>{J<9&=WwZmld4ygt=YoCk(EmQLWy~RnC{Jm2z(&pK^>wy+3LU42MitiOj+i@x1Ku4UQRM|W9~Hyng1!6 zb1)b-yND6?_2r|?(B~$k+$4EQxA0S^3PHVzR6xp1`bs4=XY$z3j+Y~OUBx)uYhz6E z{B1GOYLRRsi|7gff|yimV+dZ`*N#b0$w_i^1rik>OjNeErDJq6m82*uAgniHSeuN> zQOx5RL~fAbT_E~Xm}onAhbPGwl0yCZ2!9o86 z0`s~AcQ=rL$9fpD$@+vGVA|{6f)CxcW(^JH;#Vw7-7n<>IU)@)wvx9pL$xI|Eb=`G z=HhGpsk*!3={Q`q0w663LQXh`(PCpf{U>MZ z$%Aq9CR3oF`o9a@S4kn;74j#%Or-a`2TRw&p59zlF2A5PAD`XAHk7FwsV`b5te&vT z{evxsL4}C!gxlaj4{6{|E)T{Xg*bX5sJHnKA|dud9p_HzBzsPvOtKmJUMyOSLYKI_ zEJcE%T*zOurDxYo>$$d^c{&o5l6VANB$;v{j-6mcaJOqC&lP{Ds7Mdt!$m&81m%Po z!Dl6xFzXJ2dpIIpC?Z{?2sbs;KtDEox)E%4w8Y%;LA(c&7l&}fI-+?z{pF_1dOEg* z@gA$V-k)6oEq3rRQFYfaFdLuMdh&oBk>z%#Qr zVc*rvEE9y;dra-*Bq<|eT*5y9hiEwD3?`y2dujL-iOYH-06CJLdx;AMm%z*_Fn08x z{L1nHIb%}y)8~U9?L)PzC{Bl7XsyBEQ27UmQOh!)Gg7JkgmdT`x6 zwe1+4{imPyRak)|<}Cmr{#J1fm+%dG=x$e}a8EI!A0pmz6p_%_*D#s!908oP2@y{v z>wq4wJ}Dz-L}|aVv`zVmZ}~aM!$!tpZf~vRTZ_$H!frv#?}NUc{=;kN&R^C)ES&qA zKQBJc1L^Ta3_J@O{y1mrrHVxNpxC?oLr*#9HjT@c*#7Yykqwx06)++cm`P{|b*oTf z&zu5-Fo59Oj)|85xKJ*A5?On#dboDDUvu^C}o4lV%H!O8u zg3Bc!aTnueW=XDV?cWG55wrtXOyMF%X#)!yCPA^}MFJ{_3HAzZs^>DUY;8{J^#sH` zVI+Bvm&Vge;^WNHXGhD+UpRWstbu3?nZRxdgtu%Q*-S8=L25OxE-%kYP6^U8DgYz- zBELlR)dWk#&!7mV-{2nbj;9aw%rx~Zg2zjJ0_l-n;)gm_F*)Fhqbq81Wp4%Y z8vyw~?!llb;G{Z+OK9w0l$pnU82QY_D0;{BNngYlXZ^A6<383*8&em!03MQUe+waUw z<W4*j9+bf>Mz-dij={0P*pQsNykIC_hY#EB8mCFIH+PC8*0 z5j?hDb-rS0=+CGjpU;%MArMh8^P$7Eo23Q7FC8piI%G~@xus7vCls7Oijjz!yxS0p zSxTJH2K$U+#%Gw@KLn9%m6^Uhc(|{K!JjrFn|6B>Wm(F`%L>V4v!4k3A4q8<-*Qn; z7I3wG1MsyF_%j&2_SXe`yrw@oHS@HCUrdzxRI*cFNz5I0B07ABCjK*5Ymrlpgt=f4ik(fYdbz>%b4oOO<)KfWB#;NYfS@0ult|9s0a z7Lc+Ohcm}Wp9CRUX&l3TQ5uLZux|os_%oSt@)XNui12(EJ8snlhr&ecj4lwWT#*%w;_Z*OQDU~dz|kA0y;v13@{ z5!MIP;cc0=K8cz29MO*yiBOb<^tQwxePbEf>@yMyjmyupK%HAeY53w@)1^|mIe$F$ z{$Jq7BJdyULdd^0reu8DT|1=eQ930>{Iow+Z=NK5IdMU}bDc5qjvVBsUf93db7;g} z`*3C8$jp{#PLPwJ^fuWAtAQUwnT0IwVMoI9Czhi2dV*!4gJhr{hNMW*YBI7Rtp0ZP%cvpUwb z)(6}``AZ9C|1BVnJSeQXaKI=!Kov1V8&hHvND6NpL#2eq>({wvUzOV1tr4G92Pud} z#>sx6##5$8aJPbD2B+=>IyiCs)e%Z!($jekhTQ8rpr-U(6&i8zkIErT@A1i(K+ zApn(-n!XWYlggI`RyCESh~d#tZIlDifRDseqNM%`ac)` zjfWu?j6&f-`R0wjU{-OP%AZlQDjPbRvt|9g>3~t9F$4;5tRNDDj0@!E@3g=-3uF`G zvLzI-|D8@m8WhK9!dVbty{{jT5v`aw(rtJ*TN5wmq&c2+uKS6aPIVCY2u$0;Fm1~R zGI=X4Y^brZY(()mk~)SaI&OudS01|T9!w4QpeDn-$G7C87xc33hz;A3{UYo?i%N>Q___eM z_U6CQSBkC#ld1Cy{n;TJ{yw7fM51J?eGpFB_8`FBaj;?J=ELwJo!P_b z&2s~8TO)iRVA`r|t;3!gFUomwI8>RPhVPy~keaaAE3Ae1jdNh#h3U>2iPzimEBfb>BL8WbC zItnpc#A$eCZYur9)Q9TPi)V~k9eY1)=Bm`W%AEOCkt3!Hzt94SMFm?R)47TA&iS12 z`JVyb#Oq4_4dMG?)K}t%qD9+Lzp$|~h+%M%^(b!6h0McEl45Q0?0cbl{5R|H*;f(I zj)&${4k8=ra2H{OvV7R|KIgV+DKpyCw}*D2kTGh9bZ-QtAJZ@poiGrOZgv^dV{_z@ zV$Q#bCTo8#ZWl)C^oa|LPEmarSW#o6G5s*}QU=r`qrUzJU-2Su;`@CY%(WNxOg#xl zxm~UgQY`Sx)C(8LRTfsuf-Kv>zG{b{mzqZi96PBSEA8PE>3+Ko^$8_Cxkrd%9RU;PctQ80s6_Bl4P(|Y0CU~dT zug$%1iN?(2QpoNxWKIGyn<)%=2k`0; zDtttq#)uG<8CS(G9dnd6X&ujabK&X8LPMw+*ky=`({_+ zXOX;k{n1GjceP-3d?-~YC-T=A5r)=tRZ4=oe}20wJs_pkMPlkZhV8=)#M!N4hDu6I zwW?bKKx&xh5Iz|sy=~D@QQz`)I9u-;M12{N1 zj!sB8$M~G@*YRlTTl$A4EyK=(@pI(A!v)+I4C9yRSjxyIIB-?wn{}_*uY zF$sGYz#}o$wki{9#nqp%cYVTOU@RcafMbNt1zpy$V{_be>}`W)E63z%kY}}n$XTcY z^J2)&vRJ?<)li$nNmCq`HT@V;q8?@!FOL*42$=y3CA3j^Qy=ZUJ)5)Nv4=xk-Qyn2 zDSil(fQ;KwdP$5ykW48RA_tWt+4qL?RrugRf2P*+yr9lf^BPDc0A@EN3i}YLE{pE} zI$$!IHfAYMXorHTuDsFx3RaiRYu=tatYF)W(q9WKo!tuDgaVSrs60dfNv-`5N5XMF z7}dXQq9~`PjnxNyZMiwnRX?(`|9jvdvQ482sQ01B0wyjlT$V_;zC10WYE@tbTY!{k zB9&-@%C+QW(KWB%#|?(OCTGVnun+%5zDF4-6VEVQnVra}GQ8dTb@9w#7Wgg=&g5=Q z*eGXc_lGu!PEC8uuX`VLlTN7H}9?s5V)G%S=!*mte6$3&MrrE>~cqR zBmnQw>swkft$kzXIP5}I$9eV@oRz`Y@^J}5k^qewSaT?M5~84?8#^IT0)?9pKrDL< zc~z4>pf|cc!J(5Ub_mW|V=hN)gaDGFDp@0cL_&vOg1P ztsm(*bWDFe*pf&oj1t<2S#*-o1EBVJkCQ|hLY0t>_yKJMsMZOW48yAc`%1tCfK1*A zW&_n*5d9nz6Bwwn0)0}oZ-kWtd(9WN&O=)>RI#l`iQQaUi{}1ja*k>F{td0pCYpuw zUq6rRpMh#uu4qCRB=baQcLMYJ<6o#*8&V0yB?5ZyZce!;(g}^{4o3vycPHEqjxd9y zt2(?-Sq34n7a7Cc^aW5X)9uJXnspP;D|DnzDXva^Bu)w78M*ZzAzk_NnLQ8n`fp+=%sri@V$spyT(4@T~CpAS^7W0p|`K(|#X*@n!6 zD}nhM251BBUmNtH$Q2}TANCk(I{*pn+xF`*Qk2{`zaH}j7a!}r$GW4q z1x9*o#&E_BC?3Tc>A1*)h=(P~V2SGjA_FtjP{Is~C5xi8Lk2)g8lI@R56evfkz1In zM%AI>c*uH*Bm1HAzL*W)e4eukb%(LuyAariW3*p}0Y^A!%beNqi&)2JLt!t$W4(}T z_dys0nUzPBLEQ)^^jvT_p(M~cV&3oPSPLF>SO7`XQwZ9iu-dXR5lhV=-P(kN3F<=1 zP(sOFyU?(Q)k4^6AUL+Vbdfd$6x|c^!1iEgUd1}*V3Sb!DDo*tE4JD+5v6^ww-|c7 z^Y>#c+Xc&x>bqyM0YMlKv^MG2+QSQLb3b6Z-eL~A zegXB|awj`rEFLdJlUt2Hq*Fg@@GyxGxPYYXrSY$vq%I7Vl7q=Fp zM4Uqc-i?ZB78`y))wzg0MNx(Y%F+-RViX-{`NCS%zd{w0#iyds4~M4+WX1KrpQdlT z432ObK@2VX1&HJ5#axJT-H37k7Fa$YQQgR79IRT1-cZE@s&XDHm=yv@0!mP&pD2x) zOmCk3aboabE_C|cxDie970X~#jQp>A7m{$%17RWyweCsDf5y0} zowb^Zcc?7~kld)#cjnkfmVI{U}~4jLU5RoS`KXi;AL9_SBvW zIuOt98c!c^-3%yU#qUwbZNPGU9UdiX^P+_RZu^~X8{s$FJkX8qfld#sLmU`Av#`#0 zlpMY#rf7UpdDl$|4FZMSBaGI_QUJ>qWiV^d7+NrywP2aLkrX|4LQWH~e?skQw6wtl4l@iOT%jP&KI3HS64vmy+ zH2RLi@$^|vy}Q8+<6$VD$B^%<3wWL&ewtQUGPOA0P?rCkJSQg83Z2u6h+Fy)#hpNq z2}@Ee&y&q;za}876}E{ojtK}h5};v1D3E1ll`RZNP3aR1vCmMMR~Wj1=rcadwoefW z?l4e>i~NdhxpDCwHdj@)n?V1j4?1)0JVF*q2f%~ZpV4UASts(wcBTQT%!oku!U~hV9v4if>FF2jstcg(L?;}M4H)9zeOzqBvb{kq$6gRV`6Jo0fdruJV3aP6!WK2nx?2a5&s& zL=!on$sv>J`W@;D6wCWL;Xy%oVbP(hXI7TFOB?7HJb~7ZB(gbp49Ylq+QI-4mXd_< zl;z*YkbyMrD~z__9=Bq9f^=Ubo0%RGMN|-gc-R{W73Kj^vjoi1>nqCGhfC@L_zV#F z3Q$lf0IhZvX4XZLJTm((n<&K|=tBbPZnQWoeprk?lv>8jvO%Q9V&L3Mbpa;O%4n=F zF0Z}JAMp;04vL91d}%!3IP8JdXi@*)lZIElu(PVU0dzCYlfREVPJ7?&IJk$g9UAm; zm)=#72eV-q08CIDs!y~iE*|>uEY*Z`OC4bSb)(7MlezU zMkpSDYejQPMT>arf4UhL*SsvWkf8wg$xn`5r>R{;H8~<+3LEQKi*?5$XBi_c3?0I=he+VMI zs(WDRBju669FefvgAm9{;nUY2f^fbH!+R^>ox9SB7m=%O(&LH#D1_|2XAmUo5#GGq zH=#veAlYnqA&!wPNvXwfBblOFcBDMl%`Gx9CBO(4dfsU#fv4&{mE^#nj1Zn1n<=%m ze@8PGyyzeaCtg>mTtY2UnVEG+UDQ?ba!@jvJp~!dmd%KBD?oo?8uq9fH3g zz<^9$wtV*VXH{lJm=kLhja-r)vJA!+v^q-x_3cQEV2S}5wgRjJh&7{{cIx|xFonKi zq9%!&LF!AHS@*z#{F$Xt)afi$gBE?Fqh=7mf=08*FmZquZK4FF#8J}W9ZcHcT1e;+ zZqO$ZSMC8&ubX)(3LPFdLi}XSWucEC6uP z3^Yo;pM$W&tM+)t2sGtw(O3Q;>ydWci$~$1D^A@>fYCQ##gYqyfUy$A z6+V?b5|n1F%UYZ2bFAZ6X_|B4%Or%}SH0mLjZeplxIE&oMn zLvAL3-a+9xu1qDiybq*?x%py-`fpQ9zM5ATkfsDdX6$>hqz;Z2wF{IAO9-Sqd zvD+rYt6`Yn!$sZzyFx5juDiawH2R5NIS z!WObrFzses#)&@Lf_;Yk0hCUWg~sNL`RXV1z)N`sK$+s?Hnil+{!dT`au8Yg-a}vt zc9-P!oR)Hrn1mS`h#5}a``ccXpzNgtv=+I+byFT?`>qUZH`x3^+5vMVBkJdo0X(>f z#if)7apHGoZYs}jpPL<;>x1DeLi`YqV&Q|VA*gSoF>DXvMLLbPZZ}Oasj?*QzZhP% z43~QWQELAprCJORtEz6DG+VO&=+{XY5d8w(#>lszIl!=;x}sj~CD%S_u7JRZ4i*Z_ z3m_-KSh*^UWMFyu-yt50hgY#fwb7xVRTy(dTUAT*;APR4QYx5_P+8er*}H-+JGV?4=h`F!=EsLLV#-5|mb$({zAgNGPAfn9d6S zYZ_bQ=|oH9O$o@z3M}G^mwIyP#${-psbqt!Jx&8;T%`8xvC69ZDf7_o1WewNQUlh7 zzNy#)cOE));TW;yK#ga+uU)?d~nvnI#D>MX188CZ}%e6=27m?v07icGNL>u9bgJQ7TVFF8;{Ni;7Xd{8P zqf*cj&QJTycWr3S~+y z$H0&dBPEWb%SJ+#8;f#VY|v9qX4YOnf(S^IA?@^#1i&UBocR3|Lk-!piGKhd8h#C; zm@SImRCb}dUkls9#*fi>Edd`s?1*^BC;qokhM@)mI5ns_F2%;s`X|T>6RnVkqf0or zmtfF|RA8S3r5Lw3db9x%I&NTq!VA zTu@B@Q~yPi%O2Z63>^FrU`zf0wgsiy2iSmC8j!4ke5^pTI&LVKdksUsk?atq`~nxN zK>b7~WAcZ@F`|r@|E`2Nqf`gXvs2vI)$7 z0-4xsh zf|L$z23K?<&o3%nRyue%V<6!cgcTpx^({!lv8Qmu2Db$}nDft57&?Xc7}aqP5wBck zV`drn+ARfz1J-=>02+FzpH0{X!u=$86{svCK>=1WAW)19km~KgD7@QsFdksalquFFLJRkzs3_}j1chz+k$Lt8>&6xT0N3rZSV3Z-jUFpxQ zK+sJ{*KxT+K%r+)et?n(Tnb!*<7{0LsSS{#oY)_zrUxd$0VzMaMimKz_t-wDpAF-v z@D$v0Sp8=T8tyPl`{!vgwl9SHkrx5~Ma*!>o)JYs zN-ZEr{0#zf$5`j;H2`}$t>!*z2}ZyiLN}V0f!R`sNVkCu90B|C99s01KVO;qr|f`% zYScvmn930R32@Gzi;JWju!6)lz=qsJAj7pqi}53MeNQJVHi`+I9fVIA!Y2V33T8=c zVR95*%mjysc$m8jeH^)9Zv1s9_Sh^qRtU6(3gqLg1r_83o<|x$d><(fP70-{NCo2% z2=WNIz@-|fJB%3&GzInyNqfg`IC;b3$-rBjDbjZ@tOV?}V9Wl&OlfK#i|7}i1Wx3y z3x~MKJj8typgCBE9Sftpp%sGIbSw_{afcP!3~Jg~4vLWbZF6xVh-m0?NaV)F&k-Xr zx7C4I2J`gTrr1;DGf)Z|DfRbL#NLZ=u`9CS@25y?E+Vv`(TnYXCb&=KGZl2vg$!t> zjUXAk3dZZ>(FMw#`HxmNax174ksI`H80yZ*VqxR7HxVm5ffXpLL{=9YSwv+Q4|Jaj z+?Fx8Ep$(yWj&%d574_3A|!AJ{~&iTifRNZCnMR3g$oV{N;ptELdLEX&KiYC3i&;3 zgCN-840H&HQ1Qit0xcDSWHZv{JSs=xL;J?7MR}Nlf-sCVNqh2Sp1DxUWuNLj;OBGZ z)hoCw0phdj@=q8H`k#OPhXVgYf&ULFK(fbL-WJtQe7!hz`J*{&m>~@;(pR5ERg1K_ zsLy_D+>}hat3EYFRX?Edjzc`Ub*?$wHUMlKj%Wtv2Uqf1B|&t0=AHkZD?1W;Q!LqP zGiYMbigD6w^;pI06>qsl^6&Ipw^RD*EN9J@dX)s}?+rY2;`L5CSJlgcW_sp4BHZ`A zU_uve*~;4cy?Jw0EY?fJP>6^VIHqVLC{!I3%3njCOeAWrdU1)h_6)mqnwlM3-Wi%5 zt0RwY$F9DK&dXcIbV^5bhCU0!ZzFtB3oAO_@8HSb=E9#`uVW&;){-@ zN&^l2<87Pf-m0oC^fquC*iIdd%8$zbeBg?XX=>3r%R_^qvO1Uf+rmxi?PP0Ss>D}U zhD=A4UI>wd3yHl?zKt52ytv&t+kfqGjartPb(!Kxl< ze#!NX_vb$hdCw5Wh4z=v&rgF(iEN1XoavAX3LJy-_jh3Z6K3OG>sOm@9dzyCVY?0XZ#F-rDb$aiQlA{Cp7FbQWyc^sa?aTE-h6*{HVj(tdD__9 z;jghJJR>Ce;C*I9K*xYgp_zYJ)d6WXU8AG0MOg5xa9}cTUrzblRkQj{YxlC%b_K}V z37E}q)%H!^r4-F6DPGTco;|^r))ceH$WNiJWW95jdFsUBLWc}{9i0I4AD?4=r-FB< zN(hDXCHneu`cTwo(_{U@)`bf?-EBXg4RKy>Th(d0AG)Mi(z(7<=Tymtr=IxD;zhz$ z>)L{xlzJvViFUV8owBz7lew0-Qt4@?cywU)^bqe0@l&_EoSSZ(o6l_1X!8)UByWys z93CFp`eiczoXRR0mQ-3tJYD{Ky|drp-LY)!qTUBrN#DAXrpqcVE6mgT>B7jgG;hdV zR<+tk#{DI=F#{Q4;yihrD}s-0n;U3&807sWlL-ar z++qmPZulZT>SoO-o~}K?n!1DaNm%Jy4Vw)CKbj_QbJAWa;>WILu7CjtKXPHgJ67@V zA>aB*(wogmJhlE8uN<|1I@+kJskg&Uy55o1lRR-Tkx9xgYga6C@Z5ZbbFax!mP@Qs zIpsw;2}SDxfT4MzYF=H|ygfbL>i~dM0N@MroLon(i-F?M`C7r1j#@B~B^V3||8_6} z$sc6i?mb-?9n~+>VUzpNjO~Tz-ea8`RttRjR_It1uzbT6{!`*HjwYU7{fFzVb~}=O zaLC(e2Zs79x6>_%EEMHW|d{ZZ+9 z>+j3ogtcW~vWe2O1olc>ohSI3?S$vLrwg;x!{*!)`d^wyJS;Ab>*GsQXMHRI@n_m` z0QM($!Mwj(bfY(G+klw(?JLH2!+=34X5k?N=epWpwx-+R(RNUjnzyeWO>D{c?YS$?p23RQF~dj+IBF%20Zm z4dol(eQlWmQq49KAG)*pjQ1wgDtObftBO8Lir|R%k->Z3sJ#A;LQ-OiE^B7&5G|hy zYdC8KSy<`~rKmQK1_gvJO5ToEe9!x;adZB}^H&cOQp^ZJj(N>+x0d(~GB2U}sGEoW z$qY#IjX2u)lBqtpx*x)OO}{BanKt0X>Th>m2B-BWw5Phu+pPbZ>v>gtwq|X_PRXbp zalHWV+n#GjRm&pTOJK_HaT_aK=QV-`AT_GD%gjP(zlmdag0;sjRn47ecA3;aW$K(3 zean2$-E#6R+%_Pu>&+@-h0{L6XSHuzTlPuE`~(0fGh-bxP^7R2RHCA2wKsG^z}!N; z7iaE|=-<~zs};+#%wy3j%Uq6f_U7xcXV^{C_6l7!zU!r`v4QQ?m3P{}0IU5vAbN*D z^gjAsRb%X3-EO(Mwriw?!@g&e`uy25>^QNk;w`b_wzs0(?h0g|-hMNI0Vi#RlT!ST zJlal*O75y+?JuCV|Cp1*MP3zTYhQ^GwJgiPMEX_;i?Yg#bdHOKm1jfvPPmV}r_0;q zjKeKG;!%*5(C$$!I!hl-PmSS`z(czyNX8L!b{D=#e_C${cmf#aZD;I0^Hn_#-Me+W z83jLoR!6YdNa@tiV9_2BW-G|_C(7KZw{8bh@uqH_8n|2zoAfqfrd{FkmuL41B{%qu z*Y)2=KSCoa%_}%Uuzk zob+l3^XxbOi{4Ww^0^BXj+L^73>Q?X$T1r z_1#a>Oy0M%03Z5*57CNY1+?IshZ(ohYNp6SLFQR@ha2IR;^V&E*4Yi55m_O8*J?&J zx{i>Znd!UxIlYq|*$UB_2b6yzIYmO-OCv;EyfCWW$y|x%DUO?^A53CT62Dx$B`V*C z*D#zPgiHWxFdu#2lm=zD?aGv*X3al)sR^kgz^!m=XY9^*vgMWAn>_li-E2ueBC`CW z5uk=qfJ`XGeoAMBzmYUM2RqGBic&t@(XHuY{{o2KVm@x2J+GeMmHOD}3Qw$f{QxN3 zd7a7bX1G9f(k`Jl$$h@_{;c_Ydv?_HDD-`Ju;It9Le9K9{i;yM?`ab=?oL1-9bMnWvQ#$2#N^R2@X*JFFh3x z2+D@Z%z2tHHN~TcVmPgwj*Z^5-@Y>HzM5fPc57-lB;+oeDa%GXdrixn+x3h<(N9X2 zR-gOy{Ce-j%pjO|q-YgC2v?NvVI4SRu<~5743GK0d-=O7N7SIB+xN|kz5TpY-$a8_ z{pZ?gncIhX399@leS;1~V>4|u$(Kq31E*`YREX-5b)t)eby=eY6w?1_RKq} z6z2P#BY9TwbPad)i=LD0E^wa-069NUe+T9q0?8wHCZ3gl#6@aog-p%d&q(+tet+8o zq858lOj-ErpwGHyIp2rAhfS3W>QskJ54;HS`EUTj*@1`+>5|k2PBmR4wE(L1P7^=! zca8)#i26cziVl5`@SpbSnHg>F{D7|*YQIHwJe~TpU>2(M&vXeajECGGs*1R3T-5TP zYQ?O6$z2PCzg2n?>S14B`+AXSqmvxYaXQUfYgG1_IL=OpKIJf9+YY}Wa`K+``w#D@ zKaG8;2p%6xdJ`4Z7_@hF;Z>fo0P1lbrBlZ_{tasA?o{gmNS_`l){gR<^yDyF#Xuvg zxLH+j=w{kM;hMYu;b+}^W#6$Z@?Zrb{$@-lpFEmqsoX%mH(jn9M*h)x6kO>FH`OV! z`+S?GkB{1M)!~ixMg7$3?jQZx4<9!4H6Dp`lp2>6kA8A@-f*TTH5@va=!SEPJ@R+? zQI9g6NPSD(xcYv8{&nj^3ZvA$_WAh%UzaEq=?CUW!$UYT+DWRuYjmeReiXB#YAt^w z5LQtfF&^ZT!{G?GZO-%eJx>j{C9?3rXfUHNwf6q!+VSk^AY7Ak+0v(hfjHv9sNRl` z@7w5w@7Jz&dk{<;jS=igG+VC{aCo3FQ6?~gakczmkbErvozK}4w+a2$D@YFFFd;k# zfp*6Uf%QE>>~A(i^Y;`jU5Qh_(eG^m4+0L4*pbRUA*54M%qdh+Wl=fPgrg#d`mdG) zUGHKi9l-q|TC8+LPe*xFE7=*o!rsXbR~IaYos9d1aOHdrDI zZ9t%=l$rgzds>@b%j7%NZvPi7SeE*1-nl40=B4i3+(Tfa66#~A9d;MlCr5)cHT#$@ zs^4yWH0-?hO;0UJyXQTJ^oK!~25s{fDbuR3B^rdbm&(zs7S?^&q{T8?*YQfU&Q^^F z7s;X(;^w6VW)-&8_S<_b1g@oQL+l?M5KQWP6-JQna{hWPD!(za)aufD1EtXKg>t^^ zxrRW_Z~^2an{6JQbsFW-mfd^$Mc-X0*$HVNcf7kEez=Kfek_4Odf7XDT?&3kDFggy zj(X(-XrkUw>-exChVwyykYJCAl94V+k@p z9qw*7ryLwvt*r5Ra3K4IX#zuBf8P7s4VB5@^R-VKs%cOU-ijUqWr=2wY#hc-owRn2IF+ZDkX1S6QAFizJMrG_MP>Z(7oo*1{W&M!$1iDr@eEP_Kn6Ox;Al}X5N@CmNd zP0}@*mVR2PzABHu4|)t~G?)30WXR2FIM~-`Q(A zzIv1>Ki6X4*Zc9^r#H^y37;pTis< z0lpCN`bvJ9ox1kiK;@oy_I09SVBh!f$LwM4y(=F6%MGyA`*Bmw+y7Z$!?FXi9lef`^ij4qk~jcxAVq*5C_%BC&P(t0+TjWz7@8#3NeF}kEGl&l&pMR+`wt8k~( z(uQ40Ha6|lV5h6ZoC&|=108Ml7IUBk zkx+QJY{TBJHA(|QGnP|c*4__7?^PGC*L}A}e1;IMV0|N&r+xX)Lu+-pe6j@Y+zzj( zF3t(z$K?tjDa-CkJ(wFvcs=#==-_o^5*sU>@TZ0g#IN#>kH92hf%Dvp?1zQfkj=zi zn4i&B_dDT*lqGY6d+svYr^MLTBG90RM!M*>)#3xCOb$}_=uFN(WV^e25*!OM>N3Z* z1?MXi)U2wi_98_Qny#IwRE$3=9)0fnkkz*{X*6E~dpVTZNl`(Rc5f0nN2;4myytt3 z-ffQPpPV(zIEqg_TBm!+&-~_22@T-@-!pc$hZ00xg(Y}(*u=Y zWNM|iX9hx@JlyLSlhu3k&eR0PdD6k+4C%~14EiveJ2&DRala|Pjb5F;*M`IC#AfIv zfZkMyD8q=$AEc}cYGzN5Bc-mZkrfO=2Ky3L;YYp-CF|LEFS}UTb=LIW8|H-gB=czBs_LdTq z2S3C`uj^lPSb90LVq3o(YyX!gOsDeehwmvoZx?mb!rL5XXKAQPuW{)Pdz?`BDD*oA zP0*1C?#ixi`sU#I{on7uKRuJ3*IkoHq?HT=fc zQk+ot?3?h+3k{4&mb}R%JHrvFV22muYAgkBPc@_`&-(uR>QwAOI=AzIh_TW&z2MIh z86_Q1mn4CgJSyI|b(AO3`X8?@%}wmJ|E!sMqLcqYd$4^u3BuG7_%Q|9-2@vyW5aGi zbAgE3r%7cOl5{g{G$#s%uX@Z`9A>xIrk}5ok>>s8{Ylu?#1pP#6~Yft zLf5zbQ$R8IYatX2#d7wriB|;sKYBnYwVqLbx8aBzBs|rUkp7k^UV|;xFdQ=yA%rtPtIvD2i6EW(stV7``t`DLuL+6 zlPIF+ zdLUn7@UuwNi7Pzg0aV-w8ASv}cN*QDrLNuRw!biUgjM{}${bS@&uZ3w_;Eo8yS&-# z)_CRh5A;|CFYGKz3NZxTT+6}EH%)Kk&zYga&kG|*^t2i5yJZ7Y( z$9mYRkp%^&Vw-)$n8sQrm7L?Ls) z`~duzM+CH3N1sC7gg+&*JMB;f?~pDl-E{1JCb_ee-gxxh@$s?bbN6vA5-5~v|74iD zwf5iAn#CmYG=gARo@<2-Klh^)t!h$KR4vx9OUhL=r1z=PE=^xvAIwA;Y$Ni#Te3n zXEn?~ROv2Y_8dxo4|G>U(&i!ATk%qatv~o-`r(W{te&6SBDxw+Y+Q!hGK4J4;0r?Q zmQ#&SvGdwA?;O8qW;DKabD2*`wes^x`gi_U{&$>}b5QdAnd~s^+4(RQrg30;R}@;F zl~{G;Oz(9nAAh~#d(P%^$5CeU9Ap7j<)Ogc?(t--Y8IL3*u1vRo?(yph zpa0z4+5QwVh6ejWl^ z`4huGO{D9xyGHgM14IAjW*Q?iWjqLewjytLtY{Vr#@p^n_xnD~Ha|H#SV45_ldspalrFfAQ*eUfs!(16fpy6$?wlopQl|R79yGE!;K#sv z@W_&(!cc&UURkiOVsKC3!0br#61ITL24f)LOHF4}bnl_bx zA5P4uGHG0Pd-TxhkoWAuNJvIVQdMD&&Ym`*JP?akNlPAfZEoGAYPM7*gFFXg4vcI| z=tezJho7s;Te4GW_k*t7jnTFH6~&{ibHWB|rp2RQ@@H19I|izk93-=pco^jba+}&e zi3_cK#$O8>aZ7zDumAW&eA`i<{Cze)bH^u^?d=eZIB%uZVlu>AKkc}!IpF*DY~>TQ zq8+?~;-^GojeFX6UWK3gk_#TvC&r4bTaemMmEb?p#@L_sSL|OJ+FcOT(<72~s66=l zfx8==m4iIv9Pjl$;lOzmB7n&@6=(f9UM=_Z_nJBQNS27=_Lon52PX6V?rzxo?wJ;~ zt&ZoxP}q69k@qp2gE1k~yDn_6GLx2d$1B>ae>dCp;(s;uX`U+cK0ig-|NX*h)e&V+qMtwlUV2v5akg=QUHG-`5{K zhURr&_nv#s^Q`yWedml%h4*dZp7-u>tO_Ohq#8G@9_Lh|KVXxE71)|x6^?Xto2WXk zg#UTR=-AS}9p)GZM)JkO2L-*Ew<)i>L!e+vSy9nT^{yPWXRyzd!wf&-5argGs*q(F52;a3`AG=gZ%xj}|61NolDnvNG@tYU5tb~IVzEg1O>RAdmH`R!E za#}Hc)!D%TOlKD`owd){9-UXQ9x~GXeMYNzw?Tqs_qvcNi-iHy-#C5GuHyzZW8u*J z=6!b`^)KA1b!RHO8i6LIRj)DmJM_*;Aoc@6=^Sb@}uTMci)Ps8MOCwDJKrJ3^G!0OLsGuNu|kri+i6xy2Z7? z7SrcuFnW5o?^2s@5HuPU{(}W$ga2(BlO)Su-g2yS5fw(iK8#T@5Zo;y7995GQ#?s6 zy$gkO^o0Bbq~t9(sCET^c29w`W*M{`~D?-1CU^{TZiq*jQsVHqbuN zx|6fl8*5qNBjb1Lh)woVh$!;NI=INWNN`6R5oM$Op&d8WFX;MPWv$hjUn=}s+%L5Rg}F_AA3|;0vd(TJ;vrh zbjI==Uo-@l(B|G1%lTGRm>kJU@xtQVw3iqn3A&-UUVSj9KMXRm`J2{{G|7Tb06qEU zucSg&v-*^;wcI9o2oD6hQ9k<_b)6|~-i-9L)V}Jf{Jv#W)cbHy-PVfH*p;+$C979E z48KL_C4FFkj#-`zWCmg*pp%UmYNfv9XSF-n21|fB4K0e#lv%u9PH*#X)NUd_|^$$ z!}nO1ZesOLPM36ka=l}FxSC}a*enm{odh-^b%UtFrBm;;JE{P6BjrI?1P27n1gv*- zsYo9*(mj@csmp2S&kGM684J@_XshMq%RmAS771`$Kl)hE597`EX&A;m95Hs}&`?MN z)oZsAqJp&_pTPmI*j5x%3gV0%9-{L!x}Vv-ofWc%h~dwSg%RM}jWi$ee2 zyRK*&CGI2CH$hD_;eM1*bIu4zY(< zl2xXRber6I@AT?XX*Of5f}SeUCF}1SOCPUTyGf=9rb`OL|cQirdRq>^T%EAHB}$Z+-A{!@pksT>l!| zxN_b2sMPV?c<&9a{U-3wzj^ZjraO+IY-yI2<&sU*lw$p%J^={utYKr?PMY& z&R0!lX%=4}9MYY&I;I`EY-GQ4!}90_oi4$`B&u6Bxhu!NNg zhvrklmU9C%32!|zRCad^zb#G7WQx93noE{`DKX%8OsJ>VC%!_z`9ZM9o*hzVB5`=6 z*EQJRftg8FPa#Xc!C&WA^0@2M3x?KbvGo)3<<>HIfAaeWu(wOjtOibE%CDW+!vY?{yw5`FVmK^(i>={Rblr8 zj@KBSNHLnvq%`jxf>P?j)T`@E#7(kR&67s|A!ltj_)&gFlfTKf=04?UKc>o@9ysqC zF^p&SL8dDEav1OB^4K-5(xGYx8ze`!k+V)?#TR{pa&SCH&auhxlo{#S7$Tbfu=21f zq}0pw`qZz_VtJ~3H{YYEi}^PjM4f%<(7I1@!ireJoDB;~fk$=Ok3Q*$^u&k?s|%`2 zp{Lr8W<-rvh|!111yoOJH%uAHyv6VIsLxQz_m;c?z0v@4b*-+$7E zl(V3a5jRhzalgV)>gnl?4)BKz!5KN;2vubQ`r4C{72UnNC(6fMT&#W_s=|b`*62_7 zeVrE(KpBdhenyAm2fw9(V!u3oka;hQKgBoBOq7arWN~w32pYtze>wlefnaYiT;CAy zR%-5dk5^Z=E`}RZb8JZELHJ4jJtn%;u@5%%>8@VSwH~JX`dK?5vSO9%B(lI{B1mqj zT5wacyLYf z&pGK=3`RT4dZJ^en;R(6yjE(#%boCnDG)R778TOsUM@))?FFt{7_){Q=VDWXo9_8q znxgU_Hl^Jp9a=2jSX?~mMDlf84j4}p!fn!f`JIRUBVIgbYN0cBiZ!Iik7e1Xz8C!P z$6Fm27dt8sW}LQVE!>qS&@wl&j#p^=24J4CG45Bl{ib}W`;Za*2Uoeel5c!Hj2BR4-)IPixihmJc>B zU)3=3WrS&bSbA#YJGD4CA&*XbTMNm&iCe2Kq)_F zU8sol()wKW=YoyZsdfe<(;*XS%~p4DJSN&K0h1p2>?-#aGns(5dp+&}b#uy#b0?iN zKY1i?>cec*pkxily777k_@{r1y)DegrOtDoSstMb_KTqFmxzlydc= z+PjWY6bsOIM^W3jU-2p84C4yYeA9Kko2EinHFh>6k#& zx&y%iX3tV-K#A{HpawF{Kx?v*`xWbj_JyNC^%TOHfyZPAhFkVQExe%P-i&HE2!!tJ zzc(ARFT&1Bs2@^=3k@tj*(mNA6aw}2dOhoD=gm6R{mj5~z7}+bRL}6Rhe3adWGIe@ z#s*-n(T3>>0Ne(OU8E;e>FB1bUH44ehC~|p%`@sH!}Pm@NpIGV9OGq)vT25za39`4 z?T{*z>swrc@4j0S!f6$wB%}6FB(p*ulL^eV9>x3FUoUvr`59%q8gxyTtpvd2mM1+F z9A?Ubpvbf3f3A8;>W!<=xA{wO_;#N3{QC{ zkpw&kZn7)sIX9-ZV0=fXj;$2*I^$;(U+=NFc9w(bTGk?BfU$o%bhu3PosR*Pt3%4$ zV8Jh`P<<*okISPNVbAUHM4JA*SZxMfXw$Lxrrd~z&P2S34PQXh#;P)uXhPH1s2JdY zL~?DEi}m)Bi0B9v1(={SdxGa)Qfr(O!QBA+gxoh(1)$G&=K7WH z5mcQCY1!@?>h_;Vq5=M9g3mFuSq&HmQs$GZ&WW-4|E33+_`%7X;(~7h+e+wLY3^T- zA~b=`Iw4d!)%%B-7tKIBE=g7#RpmC3oQn5*BRujOE+PTbpx`QM8fcF160ze5*tr?z zKRX;^T-(j*ipgo$Hu%ErshX!p{?JzJHAc?7!b_%}(FmJ2eaF9ssaS#S)n($HoZ@ z@9A3+k7{&se1$S5uOkOjCFC7*(Y3kCM?NKz#ruzmoze(1WQ}$~V<#ilnL-P%^R@5; z)+l}e^tnM0K}7!nF&Av5?|sTp5sMe0ys|G0AksHmJOpi;)R;}k(jPt-!y~@dBYgez z@=>sjPPs+(v>gBXGKj%cd))=ViAB*X(7WYyw!ds7S=@M{cuwI2?n zXuwxfbFkB!k5eg}atpGO`$B$Q-`eWT?NM6wRFXniZQJ~ zrNF*$c+4oA0x&S`D7d}J%!$2{CeY0aPbF19rzTgoY==%Y*XX}O+7-)-wj*NL)vot# zH-Y5-pT2EZS@>ps83{HM&V}`gl8078j5zSU1E$A3RvqH ztp+1cVE}mTLOV)Ug44Ooq~Y@J&?~E(qMWA+jVexML=5K?GR-v{jm`9rhZ&;wDi3W`fJo*4&B3#+Ax={IAn) zJ45PZ!yPrTg|XH*-JXqnPVWTmFhrYHa9K^pk)ZJpYCi9530c}THsGLNaYPk(&Eg)6 zosr}x-%!-!zv_jlDWGo_efuM&fxJ(BVK)B~*K^zv36{<|a?pqYR@hew(Re(R^h=Ko zhO}d8z+P!_4vh|QzN_rkyzgD`egC{CPM9D1HYoj4&X3>}1F(8v>^Xk{nhh;e+xX3yq_xYj9BJb6YM)eq;-uJ90YS~YO zW<|+-$4GAj4o9b85wAo}R_@0f7y?S0(SBP72h$FR-N_MS&s>@ykbwU+Gh++(17agB zIi)SF`e}#(mQ>UWLuY4EW|;9w+w-eWgJkh#V=i5Huv2VB89`KA?jvS7BocT5QD2r)~zp%C|Gvc=z zPu1aUS}tS!w9d8a_&Ub*MZ=1~-oDB8-q1w@^<|YLw&>}*I$}iIW{ag!SIpw_=*$X* zuN{Y&sxrVzSA(9Bd;?T{#++9Bu?Y=AD|?cWZejf62H#kuDH(h#*xY=)+g)kvI4SiA zY0G8R7R!mg<#fH~rl$K+$!C__sqhh#BSulVU&p&EaNEB)tOSb>HS`DLxL@YXAJzUA zWW$MMG$^gAiL~ECOe+f0$|n^a;s>i!X&>I?ZQa^r=F8?iaidsnOgz5-u^T>41hms| zGj)DCyp#?H|BTcg)P+SkI?Sd9HBH|v5%pL zIfog;J65Usz9BXK_o7`?YLuItPJgG{^f_e(!FW~Bmz7Uy2Ve1sGGyVrU|urQhRICAKm z2IUJ&L$ZI*ML5!qKxcG#FYBj3Y^_Hg+=+)C{X~DNHu5!y6uX z)b2wslq9j8KBWA@flXfMDzi~6dASGWyNu|y9trHpE`-L-fV3pWi}SJs5cuT`r;;Aq zpJBbI;3Iq)E(ruc8c9pNV(JG#l%y9nrJ*m$i-y*kbU2kZv2{_IAxTyeMY-IZ9VOD} zha4>phbJro;CgjCqxj+Gj+*-y5TpT2ies$FS8-QhY+fi&G}z3Bhx(o^BiOr~Eb2pt4U|zR35F*mRjY;8g?zYv!EZg-X;Q#2$Zsc3 zC3hB1AkzD9Jujtic4u^qS_3d4$XMNC=GAVRn0^58LSHmNq^QA8PoENzCJPD9gKSxR19r@ zk{rj1W}^4DP}uX5x`2e?m17G7tD?HR z97vM{r_vPLQ*)USZjssXcQIS;rpki<{ z;1X+VbH8~gWDs^#e-NE#UI32`ZHRZDZK)%9mu~du!`#B`#K9qYdzc3`D<3jtCgNca zGGx|ze4@!i-F!~byW5xi?qa~t1a^`>LP?54Es7au)+V3Quy$whl?Gpae;Y`b`<|Oq z4 z2{s_abxiy7yUVdgsJd2`^gse)J*u@T@3Zq_t;7`Hx(k{;-$jPhEXwg-Kbmn@YOJwue6-iOMY{CIx3PsFEeH$VxKf&!~54wRpXdyaGc;-jDuSc~+Q2s-tvqk4K zYOBy`CW>dwv#Hn=_!6c?IYZnv0dGo%XleCiUx)}_%w!@s1khB19>qqb#eE8WKyF#7 zA3!lx$txCq_ZV5_A8h0fL54u1WBmxUaz)h*T|3&Ve7r{>>RuKB@OYDBKe0tz>f^9W z4w;4bCpi5e&x*=*d26(#3}36`Y$D3bRa#{~ZhVH&%_D!WCuG>zf@B@MgbDPFnRt|92Sl_dd==b~XiRzpT2ugD3Kuobi< zXL$&t*s@-`uob4orW=F`hPz;onr!5A#^KVIjO|#p>y0Rtxl2fyXmbyu&T9#2iq zjk)vBUO-GzV}4yoa|Xmykeo9b*6ctQ;?kT?78p_@n>-lF9!3u+(h&(&-);L z@X8UNO&sONSlb90qpqGvKE9a#iZmG*`jj(-#y*M-S@gKS+6LtuI4gG0y^4DXWrKnb z?Niw@dMJj9LXsljJCXOYWzJeCnakj31C%QdA<|0+7X2)BAdfZVNw#D)ywgPxNQdyV zRfj9b%WNKq;$N4{$`>Aj7H--8MvWEsK^}I%C%6+`i!RKuho(fjZW58Z4fT94i~yOki;e_AX}NA`EwwGK$Xx3^es@ zwI4_U3bTGAtsgPbWI>D#Piy0F4*Xqb_R}=@*|~2P8B9zBOF$HLpgE`JU0l!s7S|xd zm-Ih-lFF5p{`rW0)#Fr$AFdP&-*Z+%+C@H5jxvniQ9Ep{Nc#923fKg|f4B5!T~tXlMPH4W=+>^;gF zp+2?L)Qrb(UYB{2Dhm*OD=PgVuRB=yn1p><^mpF35gW@5BYJGo7VHn~4aGx3cvW6i zQk5H$hdMHlcZ50S9_I-Bz+Ol~UM;ane8RHw#Nrz?{_|U2CS6@^WK5bK{EJyhGFedZwT_*dEu!cPy4LQlBj@6Fqm%!x138E7K*!ED z3h!m^LbDlk+FLjRv2419p0hq-4Qjq#3O2OhF{IoAw#ER+zm~!fe$kEm{tKaUEAIy z!_HybaGg^~4r8fwgRHkiMi>1VUBH#EOL>Xc;2BxGCK3IlO9m6Gce-%eR#Qz%wf3K^ z9p2>)O-5aneU%i=;|S@rMkQ2n16<;EUIfM&44OKn9y6EM)6ImSo2v*R^6PxxLe|nQ zJV!>pTRQybfaN_>=jzI8`St#WPAS#$AosWCejtV2vIR>aY6r8+^85?d8aEe8Tlx;)Tv0@ z+c594`tj$_m45TBN2bIw(xERdy5k$o!RK9gZmtTQFx-0A@;P;fFw-T2QLJDx}nq}X9t{s zOn&dJ*3(cp3i0QFpGCcuK~D6FI-aUM+nUrhSQ4Be-tI`z`$XpJNo$GoX!Z#@NP%#3 z&QyM%(1f(#LFDIX8)Wf}Xj)WEp6-q8kcpI~d0jFGAHf^1669Hr#Q} zMSnJ;dwf-z2)M6!eytp3q}3m@O5DM$Bjn_k`$lA1j=@skr5wd#`vi$)S(+T-*t##H60W)eVrzs!OiWb! za(ZUBc(-I@TM3Ai0)gs?b!;YnJ_U7K_`i-@hI?_zVK zNRqQ0Z^una!2p~7_g)6ARm(DqJ{BDK;|2aER5KLa4OT#oU^Ud)Hg?l- znP7ov7CVQ2uW$I68t|xD<)KhWj_;Kpg$q!f^*#z{ugH@!gwmkMzHOe**X1Rz?sOLK z8;*L%D!fIbJVwtzFX>Wo(vAGA+ODN^o|EETtan$lZ|C`v`Vb| zp5f<81wOUDx0pdpE4-0@zH3;6GKc~OKC<@+sj6Y^{HKtQc5xoTOT)q@t**8X^JzMF zS~Ez`c6dYcQAQBdyy!L}Rc)qSIBAwGS)koMyHICcGIhqg*8wtx?_DO;?;os?DySX` zME9&;;1V+Oe83@ERC}&z_?mK<#X00h3}@*-rgVZO;rLzTJ9!RE7V_Z2!+qYxPnGblygT`9p+!1r zcE-)(D@dTs(Vw$jbEMS}>2YUFVw_h`&3dG#-*P!|T-zFsbzN@?nRf$R zpr4ug!JFT-PM@zqMD)~6Mo31>z|AyGgJjy3Vj zKdEe687rDxZuIwIqMKcGBBvrcs4TG$#n@}$4JKLV{~dgaKEOM$#{1ZNV9An~Jb8Vk zv?4UyrAw|3Kmoc7m1am1nnXJ>AGM~L_gPnBejEjAxvEW`c3EPm1~8pOx#!4Z13&!d ztnluhE>PxG&kC4u;TpznY4Ed*fuA8!!9L4cEPZ(sntt$KR1h|um|$N1D6irq{puP9ku&;3ImG< z#%i+{eL3{cO#RCeei)JZcF~-4yNIr0aujxhD8P}S{M>M=f2!Xv%UO97C8aRiyW>!~ zkuEXMm~$kr_Yboo($D@6`{(|%FYsO3gkoQ+G1Iixb@j+RO5b}>%42Zrgm+nFWlu`S zkqy+I+biv>Jt95sLCzS$lW?A<^W}e;)j}6k<^Aq%NmU*GvfV8`F;-CbL-6k^T3x780x3;U#Pod90`LV>8nK<17{elW7Homw1KR^2cW=Y*nT3$GAi7UE6$@ za%Oo;A>tZTzTuBRd>uk<(R$lold0G%w`d+q!>jYVrt{b@QVtH<1K64hw!4`YYUX)Q zst9k^4uK(%sd?D?knmxM{2KSXQ6;=ph`wP>fu)9ok5XCo6gS7u06)H_8q~!` zOv9N0s|zuq#7(-dm;Ct1)029h?-h#zotUD0jjiEjCVtP`66f9rC05K+wjsuSU_({2 zj5JHvT`fiSRR5`;*K&J=DTy)tdcfo!rzwB=aMQJ4lWuO<>cW8@$bT(=%cIv|x^7iq zUaj)&g1$L)fwQ$GYc#lpn7sF~4&y$1$ZfOZBvl5i z?~N%;EjwY)uC0hGOH8=|vZBl@)?!O)Q>Z1crdPKZde)BY1yfA&VIE$3B$i~Al?UuJ1pYU{E7hoLI6E9IA)RTA+pmaqei ziN(+wJ8%*~+j1*t7b(bR6T}v4Ko(8WwEtrRoBlRrQLrwsT3Wpx_P3}(TCR@S5!l{Pp(K?A$Qt>O{f)ishIJ}1_aekFPSinw{KdDLaFpPiK>M^en&<|qGyuE8hO0-3!P@2qvOVK0*(3e$r2 zG^RYP96YEk+d#zB?0v&e*Lt16+q5O^f6|4~Ib#)C#*dj3O9pFXi4Q6RhB8&o_ZbY-nUz(>RbX1+Uh}#nhuLA@mr)uQZVQQ$S zQqzV_K9RS;{Y2{6&pKj@+p@kt6e7ATCB5VAIJyNU{yErX{CVRtq0VuBP>nV`*rO7U z&#QE6>ked04Y+`@TgC7Y%ZQ{L-Sq*%^=s1q#d_73MUC=d1;OGPP|z2dw(36WWVKgnSiEWT(5Nu zvAGsLa%5O4H(g-ppw16PhG0tTVKFf;5EI5Ht1F0c`-<&-?JlIH8+*q0u|e|S z!Icw3mnwhwoiJ<9n~l{7TC^QAUfxGh2@w8*RwGOgZ@AFOLxWuY8jn_8Zm>pb!~~E6 zH%&Uv7uh=M&6pFseT=0RK~)QZsRyaFXAuFl=vD-aEp9yfS}?|2TxT$KK+4nwoAbT5 zGdj-hvH0kjmr>z$lKNK6Ow2)-C6i%=pbA~g7O^XjX6l!g$aj|M)$PNx>IdTT*TboC zqOm->Gay|BZy>>2Z=<;QeMvAb9<9w}%r&3|DQ4ha?|bT9O_p6AZ20C8#d|53;Xl(^ z8O8$*d`e#(R;}t{`z!@B5SPO{4|nk~3g}V=QKWv~qu`$XVBG#kO;@mSVmT`9YgPQq z8h6~JdC4F9P{RR=90Qlf>nGw0YdDKf!9;!!Tn&&*xj%B@LYkX9^pqc|i2o}~9xc)2 zbP!Rf)n3znS}k=YPJI`d0|(L}_O!YUa4~mi4ZZfuTDDP#C{2LU0bK_*!a_po{%*v) zT2FL2ZF}L{O3GWlTvpS7o2>rm=?s4GUqzpP3)qEPl^RSr-XKsTg)N2S=wye#b>Dus?W+t_Mmk>iX49phB0m1_^xH{P*~d{HVeV&|yWLgx!*%WsCP%cu9xv1& zC~nzk^UA{U4~=YYp-uo^x>qp48u& z%whSb)ZY7WS0?Z}s1HT2ltE&z0oAyv1IG!cJ}9?ICi~ZtYpv1ulv|wr^B$an5R{*P zeOY2R&4Z1)sSY&+@3oW=n@%+9axA6f@`j)Zy4;H{YzU{Rz8*`Y!JGNL3w7CgYU`BT zuPP%KPW>rD7ERm8MR23l0g1+Lz5HW6BfZsu4|0!2dLSCMQ6Ahn@7K6qas2y#Bq}ft zRGu$4_X`I?F^T4&Dtxi(u%dE{ssTjrIZcj;VhQl_pPvl=LaW82%>C5zR!d4P84rbk z&}HN}n8bq%VYS}LLe`oQrF4~oo2!48M+5UD;Md~X-aY!mVu+prctBQa zOGV0#Mp zy;Aq*9HuBHVS8S#hagI)zYpp51;1U2glRYiAX=Wd#?{LGuN7N-N`h#EjrJ_U)qbf9 zUQG?YwWQgQ1+&gq!IZ3n{4aXc5{v|63hSI0P${g%v8nir_><*KMAvA&VQ_ zf4rVtTq0RivpgYNT)u-4z(k7^7Bn}A$lj3OB}v4@Pi|%BlNE&m@BX^ z?TA78(I$`wbq^_!84?Pg)4b7@uI1aV4OUaX`D2jC+fV+I_sIAxHtD*^daBfgqp53< zvL2a9ooI;s^9CmJ>Q7>zi4t9x14-&CnfklYJc^c@zKX@Ni}vovS0B53cJw^3Fc5B5 zNnSl(v8|B<>7AmhE|ca+Y3f>^Lc-sgxC;46h-!ZdVb_ptKvvxI{=Qt8W`Cyd`-?fT z{)$H=Oxs5cQZ* zfTX8;DiXmt+_%39q`-dE&q62AO@9$$CPltRfg)a?==E^nRC?DfxG!yEZvb~+ zZwZZ%+G<%#%nwx(;}M?`mpI8$a5(6%cVW}GqPSaB>yrZ?AFy8Ed82@B7;UYv{>L-6 zU)nz8q}%uXXy!x;O0lO$y$Q9%sZdL*Y8uD z`2HDx7-Wsmnhi*k5j-dQNs!o5U4OxHj&}BfSIU}#kMiQ5Xhmpud>?`W|I8=4(f(wx zJ{mjo=;7f`MU!Cs?ZNbyZy!y)lIo_mwUJ{a;yhbB{p=3nYvs=T@YyB%qUN&X_=6H% zPpoWSvd4&z7lK>$XH>_!i1&AWu%Xv40?BdlQ}G^jYYRudY7qc7lcjDLG~Q25r_^7m~*j?7g;<>y^@9Ru-2G3%thlY}#r5=2JVreW#>ENzk@FK2`u*@F&gw;pM zZ@o+P0u@SW6{`742144~V|P$T+L(1=YB#uS(x5p!(sU6@CJIi3+v1-GpS?ISTo;mh zCCN=hTk{!2@@n=b6#X`VA}nl4RNO(vV7`PT;jG~a&v!9Q>Bo>nHL5K3lBC#nR4;Z? z^_xHK&yssxD1`Jv=X?Zr;uz#Mrt{k>Zw?Jx)8tkfQi5fMZY;bHK1i^i8McP+XjW+9JBO*>}?)luNGglaGE`iB9d*dlKCMr4mr9%p} zPFV#>MQ`0ie>TaZ3H{z>-0XODTPha&z|Rmn9citHIas+ccE)q?g}4|zWf-+MzQ&~X f)tag+E6god{)tY0st%n`5#+2ERx|IE&A literal 0 HcmV?d00001 diff --git a/docs/mkdocs/docs/images/ArcticDB Logo.svg b/docs/mkdocs/docs/images/ArcticDB Logo.svg new file mode 100644 index 0000000000..10d8320100 --- /dev/null +++ b/docs/mkdocs/docs/images/ArcticDB Logo.svg @@ -0,0 +1,3 @@ + + + diff --git a/docs/mkdocs/docs/index.md b/docs/mkdocs/docs/index.md index 3109eab8c4..e763886509 100644 --- a/docs/mkdocs/docs/index.md +++ b/docs/mkdocs/docs/index.md @@ -1,4 +1,4 @@ -logo +logo ## What is ArcticDB? diff --git a/docs/mkdocs/docs/stylesheets/extra.css b/docs/mkdocs/docs/stylesheets/extra.css index a93f40c4c9..ab28864f2c 100644 --- a/docs/mkdocs/docs/stylesheets/extra.css +++ b/docs/mkdocs/docs/stylesheets/extra.css @@ -1,7 +1,21 @@ +@import url("https://fonts.googleapis.com/css2?family=Geist+Mono:wght@100..900&display=swap"); + +@font-face { + font-family: "Aspekta"; + font-style: normal; + src: url("../assets/fonts/AspektaVF.woff2"); +} + [data-md-color-scheme="arcticdb"] { - --md-primary-fg-color: #003f69; - --md-primary-fg-color--light: #ECB7B7; - --md-primary-fg-color--dark: #90030C; + --md-primary-fg-color: #141c52; + --md-primary-fg-color--light: #ecb7b7; + --md-primary-fg-color--dark: #90030c; + --md-primary-bg-color: #f9f9f9; + --md-typeset-a-color: #783abb; + --md-default-fg-color--light: #783abb; + --md-accent-fg-color: #141c52; + --md-typeset-color: #141c52; + --md-default-bg-color: #f9f9f9; } .md-grid { diff --git a/docs/mkdocs/mkdocs.yml b/docs/mkdocs/mkdocs.yml index f04cec95e5..b6ba7841ae 100644 --- a/docs/mkdocs/mkdocs.yml +++ b/docs/mkdocs/mkdocs.yml @@ -20,8 +20,12 @@ markdown_extensions: theme: name: material custom_dir: overrides - logo: images/FullWithBorder.png - favicon: images/FullWithBorder.png + font: + text: Aspekta + code: Geist Mono + + logo: images/ArcticDB Logo.svg + favicon: images/ArcticDB Logo.svg palette: scheme: arcticdb features: From a02f90f7582ef69afc2336e5c04182c9230afd53 Mon Sep 17 00:00:00 2001 From: Georgi Petrov <32372905+G-D-Petrov@users.noreply.github.com> Date: Mon, 16 Dec 2024 13:04:00 +0200 Subject: [PATCH 04/11] Fix asv problems (#2065) #### Reference Issues/PRs Fixes problems with publishing of ASV benchmarks #### What does this implement or fix? This PR implements the following: - Upgrade the Python analysis flow and C++ tests to Python 3.11 - this is needed because some of the benchmarks need at least 3.10 - Fixes the problems with publishing of the benchmarks - because they can't evaluate np.inf correctly - Fixes LFS for BI benchmarks - Incorporates changes from https://github.com/man-group/ArcticDB/pull/2060 See successful run [here](https://github.com/man-group/ArcticDB/actions/runs/12317667688/job/34380720877) (had to be started manually due to the changes to the workflow files) #### Any other comments? #### Checklist
Checklist for code changes... - [ ] Have you updated the relevant docstrings, documentation and copyright notice? - [ ] Is this contribution tested against [all ArcticDB's features](../docs/mkdocs/docs/technical/contributing.md)? - [ ] Do all exceptions introduced raise appropriate [error messages](https://docs.arcticdb.io/error_messages/)? - [ ] Are API changes highlighted in the PR description? - [ ] Is the PR labelled as enhancement or bug so it appears in autogenerated release notes?
--- .github/actions/setup_deps/action.yml | 6 +- .github/workflows/analysis_workflow.yml | 2 +- .github/workflows/benchmark_commits.yml | 21 ++-- build_tooling/transform_asv_results.py | 2 + python/.asv/results/benchmarks.json | 136 +++++++++++----------- python/benchmarks/basic_functions.py | 101 +++++++++++----- python/benchmarks/bi_benchmarks.py | 6 +- python/benchmarks/finalize_staged_data.py | 84 +++++++------ 8 files changed, 210 insertions(+), 148 deletions(-) diff --git a/.github/actions/setup_deps/action.yml b/.github/actions/setup_deps/action.yml index d79e91c7d6..f0a3ff938d 100644 --- a/.github/actions/setup_deps/action.yml +++ b/.github/actions/setup_deps/action.yml @@ -9,8 +9,8 @@ runs: dnf update -y dnf remove -y 'gcc-toolset-13-*' dnf install -y zip flex bison gcc-toolset-10 gcc-toolset-10-gdb gcc-toolset-10-libatomic-devel krb5-devel cyrus-sasl-devel openssl-devel \ - unzip tar epel-release jq wget libcurl-devel python3 \ - python3-devel python3-pip perl-IPC-Cmd + unzip tar epel-release jq wget libcurl-devel \ + python3.11-devel python3.11-pip perl-IPC-Cmd dnf groupinstall -y 'Development Tools' @@ -19,7 +19,7 @@ runs: echo "CXX=/opt/rh/gcc-toolset-10/root/bin/g++" | tee -a $GITHUB_ENV echo "CMAKE_CXX_COMPILER=/opt/rh/gcc-toolset-10/root/bin/g++" | tee -a $GITHUB_ENV echo "LD_LIBRARY_PATH=/opt/rh/gcc-toolset-10/root/usr/lib64:/opt/rh/gcc-toolset-10/root/usr/lib:/opt/rh/gcc-toolset-10/root/usr/lib64/dyninst" | tee -a $GITHUB_ENV - echo "/opt/rh/devtoolset-10/root/usr/bin" | tee -a $GITHUB_PATH + echo "/opt/rh/devtoolset-10/root/usr/bin:/opt/python/cp311-cp311/bin" | tee -a $GITHUB_PATH echo $GITHUB_ENV diff --git a/.github/workflows/analysis_workflow.yml b/.github/workflows/analysis_workflow.yml index ac2e93cef1..8532149b54 100644 --- a/.github/workflows/analysis_workflow.yml +++ b/.github/workflows/analysis_workflow.yml @@ -6,7 +6,7 @@ on: type: boolean default: false - schedule: # Schdeule the job to run at 12 a.m. daily + schedule: # Schedule the job to run at 12 a.m. daily - cron: '0 0 * * *' pull_request_target: diff --git a/.github/workflows/benchmark_commits.yml b/.github/workflows/benchmark_commits.yml index a27177db93..9a4327d1e7 100644 --- a/.github/workflows/benchmark_commits.yml +++ b/.github/workflows/benchmark_commits.yml @@ -31,8 +31,14 @@ jobs: defaults: run: {shell: bash} steps: + - name: Initialize LFS + shell: bash -l {0} + run: | + dnf install -y git-lfs + - uses: actions/checkout@v3.3.0 with: + lfs: 'true' fetch-depth: 0 submodules: recursive token: ${{ secrets.ARCTICDB_TEST_PAT }} @@ -46,14 +52,15 @@ jobs: - name: Install deps uses: ./.github/actions/setup_deps - # We are changing the python here because we want to use the default python to build (it is devel version) - # and this python for the rest of the testing - - name: Select Python (Linux) - shell: bash -el {0} + - name: Extra envs + shell: bash -l {0} run: | - ls /opt/python - echo /opt/python/cp36-cp36m/bin >> $GITHUB_PATH - + . build_tooling/vcpkg_caching.sh # Linux follower needs another call in CIBW + echo -e "VCPKG_BINARY_SOURCES=$VCPKG_BINARY_SOURCES + VCPKG_ROOT=$PLATFORM_VCPKG_ROOT" | tee -a $GITHUB_ENV + cmake -P cpp/CMake/CpuCount.cmake | sed 's/^-- //' | tee -a $GITHUB_ENV + env: + CMAKE_BUILD_PARALLEL_LEVEL: ${{vars.CMAKE_BUILD_PARALLEL_LEVEL}} - name: Set persistent storage variables uses: ./.github/actions/set_persistent_storage_env_vars diff --git a/build_tooling/transform_asv_results.py b/build_tooling/transform_asv_results.py index bab414460a..cd5c4418f2 100644 --- a/build_tooling/transform_asv_results.py +++ b/build_tooling/transform_asv_results.py @@ -5,7 +5,9 @@ As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. """ + import pandas as pd +from numpy import inf from arcticdb.storage_fixtures.s3 import real_s3_from_environment_variables import json from pathlib import Path diff --git a/python/.asv/results/benchmarks.json b/python/.asv/results/benchmarks.json index b3fc895823..c659476028 100644 --- a/python/.asv/results/benchmarks.json +++ b/python/.asv/results/benchmarks.json @@ -11,7 +11,7 @@ "1500000" ] ], - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -29,7 +29,7 @@ "1500000" ] ], - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -47,7 +47,7 @@ "1500000" ] ], - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -65,7 +65,7 @@ "1500000" ] ], - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -83,7 +83,7 @@ "1500000" ] ], - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -101,7 +101,7 @@ "1500000" ] ], - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -119,7 +119,7 @@ "1500000" ] ], - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -142,7 +142,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "time", "unit": "seconds", @@ -166,7 +166,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "time", "unit": "seconds", @@ -190,7 +190,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "time", "unit": "seconds", @@ -214,7 +214,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "time", "unit": "seconds", @@ -238,7 +238,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "time", "unit": "seconds", @@ -262,7 +262,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "time", "unit": "seconds", @@ -286,7 +286,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:36", + "setup_cache_key": "basic_functions:38", "timeout": 6000, "type": "time", "unit": "seconds", @@ -310,7 +310,7 @@ "1000" ] ], - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -333,7 +333,7 @@ "1000" ] ], - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -356,7 +356,7 @@ "1000" ] ], - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -379,7 +379,7 @@ "1000" ] ], - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "peakmemory", "unit": "bytes", @@ -407,7 +407,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "time", "unit": "seconds", @@ -436,7 +436,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "time", "unit": "seconds", @@ -465,7 +465,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "time", "unit": "seconds", @@ -494,7 +494,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "time", "unit": "seconds", @@ -523,7 +523,7 @@ "repeat": 0, "rounds": 2, "sample_time": 0.01, - "setup_cache_key": "basic_functions:137", + "setup_cache_key": "basic_functions:139", "timeout": 6000, "type": "time", "unit": "seconds", @@ -531,7 +531,7 @@ "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_append_large": { - "code": "class ModificationFunctions:\n def time_append_large(self, rows):\n self.lib.append(f\"sym\", self.df_append_large)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_append_large(self, lad: LargeAppendDataModify, rows):\n large: pd.DataFrame = lad.df_append_large[rows].pop()\n self.lib.append(f\"sym\", large)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_append_large", "number": 1, @@ -544,18 +544,18 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "be3be12028b2f1a949589e618252e94a88e5f35b5aa90f5815fd8aaa324c8550", + "version": "b817d86d1bf76649691197bfaf1261a96a1a34c9a25f053d66f6dfcf14c6f279", "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_append_short_wide": { - "code": "class ModificationFunctions:\n def time_append_short_wide(self, rows):\n self.lib_short_wide.append(\"short_wide_sym\", self.df_append_short_wide)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_append_short_wide(self, lad: LargeAppendDataModify, rows):\n large: pd.DataFrame = lad.df_append_short_wide[rows].pop()\n self.lib_short_wide.append(\"short_wide_sym\", large)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_append_short_wide", "number": 1, @@ -568,18 +568,18 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "3a2e1e7a4dc518468ba388f560231ac1a1366b212dbd3309e3e877606c5630e8", + "version": "3678115ad2d40bf19062212095071431ff63cedc159661ee3056be7cbf109f98", "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_append_single": { - "code": "class ModificationFunctions:\n def time_append_single(self, rows):\n self.lib.append(f\"sym\", self.df_append_single)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_append_single(self, lad: LargeAppendDataModify, rows):\n self.lib.append(f\"sym\", self.df_append_single)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_append_single", "number": 1, @@ -592,18 +592,18 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "c7f13a15b9074ab9bdb6f3e47ab97d75708938f005021b7a8fde82fe6902041d", + "version": "8f398155deb342c70fe4c65e8da636b1f18c9296632b4649aab8dae306aa8453", "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_delete": { - "code": "class ModificationFunctions:\n def time_delete(self, rows):\n self.lib.delete(f\"sym\")\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_delete(self, lad: LargeAppendDataModify, rows):\n self.lib.delete(f\"sym\")\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_delete", "number": 1, @@ -616,18 +616,18 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "da4c95139bc0ae404ed6585b9e3398af8ed7e421cefcbeb9ff9ea6a77b85915a", + "version": "6d8afae2414e0f842495a7962f5950472814bde20e99eebc474db6953d8e1ae3", "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_delete_short_wide": { - "code": "class ModificationFunctions:\n def time_delete_short_wide(self, rows):\n self.lib_short_wide.delete(\"short_wide_sym\")\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_delete_short_wide(self, lad: LargeAppendDataModify, rows):\n self.lib_short_wide.delete(\"short_wide_sym\")\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_delete_short_wide", "number": 1, @@ -640,18 +640,18 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "12254786f4a42e8bd488f48075cb70eddf4d87c8581271e2e2b526b7940123b9", + "version": "f867fc9cac4d0706b01166662af37434100460706d4f6118de0bc2e0e3087bae", "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_update_half": { - "code": "class ModificationFunctions:\n def time_update_half(self, rows):\n self.lib.update(f\"sym\", self.df_update_half)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_update_half(self, lad: LargeAppendDataModify, rows):\n self.lib.update(f\"sym\", self.df_update_half)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_update_half", "number": 1, @@ -664,18 +664,18 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "f56b8677f5b90b49568e6865c0656b734b9b2a8054baa71b78eaed8f53cb3176", + "version": "6a011f58b79c483849a70576915c2d56deed1227d38489a21140341ca860ce33", "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_update_short_wide": { - "code": "class ModificationFunctions:\n def time_update_short_wide(self, rows):\n self.lib_short_wide.update(\"short_wide_sym\", self.df_update_short_wide)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_update_short_wide(self, lad: LargeAppendDataModify, rows):\n self.lib_short_wide.update(\"short_wide_sym\", self.df_update_short_wide)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_update_short_wide", "number": 1, @@ -688,18 +688,18 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "5db16777228d8de1ab4af9943d1ed0541c0b02c4dbcd888cfa3e26f37eb0215b", + "version": "111496c5bd4a4c498df28819d3cbcd9d699c4d3363ad3969f102a1d2076b3086", "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_update_single": { - "code": "class ModificationFunctions:\n def time_update_single(self, rows):\n self.lib.update(f\"sym\", self.df_update_single)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_update_single(self, lad: LargeAppendDataModify, rows):\n self.lib.update(f\"sym\", self.df_update_single)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_update_single", "number": 1, @@ -712,18 +712,18 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "cf62fa8a658e2f2ab16d286992423dd8d69334415ab61600906c6e9dc0185597", + "version": "c45c168d5713f3028a9a5b97959d52116c8d228870ad580be06d86336d2476c6", "warmup_time": -1 }, "basic_functions.ModificationFunctions.time_update_upsert": { - "code": "class ModificationFunctions:\n def time_update_upsert(self, rows):\n self.lib.update(f\"sym\", self.df_update_upsert, upsert=True)\n\n def setup(self, rows):\n def get_time_at_fraction_of_df(fraction, rows=rows):\n end_time = pd.Timestamp(\"1/1/2023\")\n time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1)))\n return end_time + time_delta\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1))\n self.df_append_large = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(2))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n self.df_append_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, \"s\", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS)\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\"sym\", self.init_dfs[rows])\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)", + "code": "class ModificationFunctions:\n def time_update_upsert(self, lad: LargeAppendDataModify, rows):\n self.lib.update(f\"sym\", self.df_update_upsert, upsert=True)\n\n def setup(self, lad: LargeAppendDataModify, rows):\n \n self.df_update_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(0.5, rows))\n self.df_update_half = generate_pseudo_random_dataframe(rows//2, \"s\", get_time_at_fraction_of_df(0.75, rows))\n self.df_update_upsert = generate_pseudo_random_dataframe(rows, \"s\", get_time_at_fraction_of_df(1.5, rows))\n self.df_append_single = generate_pseudo_random_dataframe(1, \"s\", get_time_at_fraction_of_df(1.1, rows))\n \n self.df_update_short_wide = generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n )\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n self.lib = self.ac[get_prewritten_lib_name(rows)]\n self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)]\n\n def setup_cache(self):\n \n self.ac = Arctic(ModificationFunctions.CONNECTION_STRING)\n rows_values = ModificationFunctions.params\n \n self.init_dfs = {rows: generate_pseudo_random_dataframe(rows) for rows in rows_values}\n for rows in rows_values:\n lib_name = get_prewritten_lib_name(rows)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n df = self.init_dfs[rows]\n lib.write(\"sym\", df)\n print(f\"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}\")\n \n lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)\n self.ac.delete_library(lib_name)\n lib = self.ac.create_library(lib_name)\n lib.write(\n \"short_wide_sym\",\n generate_random_floats_dataframe_with_index(\n ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS\n ),\n )\n \n # We use the fact that we're running on LMDB to store a copy of the initial arctic directory.\n # Then on each teardown we restore the initial state by overwriting the modified with the original.\n copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL)\n \n number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds\n \n lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration)\n \n return lad", "min_run_count": 2, "name": "basic_functions.ModificationFunctions.time_update_upsert", "number": 1, @@ -736,14 +736,14 @@ "1500000" ] ], - "repeat": 0, - "rounds": 2, + "repeat": 3, + "rounds": 1, "sample_time": 0.01, - "setup_cache_key": "basic_functions:235", + "setup_cache_key": "basic_functions:278", "timeout": 6000, "type": "time", "unit": "seconds", - "version": "80de9b1982a498c300177d02874a8626152eccb57cd0ba4228a5bb168e7608c8", + "version": "7f139bf03457104abe937914aa3572503ed52330b3a271d82112696060331d8f", "warmup_time": -1 }, "bi_benchmarks.BIBenchmarks.peakmem_query_groupby_city_count_all": { diff --git a/python/benchmarks/basic_functions.py b/python/benchmarks/basic_functions.py index 5ff8e4e9c2..fdcf53d319 100644 --- a/python/benchmarks/basic_functions.py +++ b/python/benchmarks/basic_functions.py @@ -5,6 +5,8 @@ As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. """ +import time +from typing import List from arcticdb import Arctic from arcticdb.version_store.library import WritePayload, ReadRequest import pandas as pd @@ -185,6 +187,7 @@ def time_read_batch_pure(self, rows, num_symbols): def peakmem_read_batch(self, rows, num_symbols): read_reqs = [ReadRequest(f"{sym}_sym") for sym in range(num_symbols)] self.lib.read_batch(read_reqs) + def time_read_batch_with_columns(self, rows, num_symbols): COLS = ["value"] read_reqs = [ @@ -214,6 +217,10 @@ def peakmem_read_batch_with_date_ranges(self, rows, num_symbols): ] self.lib.read_batch(read_reqs) +def get_time_at_fraction_of_df(fraction, rows): + end_time = pd.Timestamp("1/1/2023") + time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1))) + return end_time + time_delta from shutil import copytree, rmtree class ModificationFunctions: @@ -221,7 +228,11 @@ class ModificationFunctions: Modification functions (update, append, delete) need a different setup/teardown process, thus we place them in a separate group. """ + rounds = 1 number = 1 # We do a single run between setup and teardown because we e.g. can't delete a symbol twice + repeat = 3 + warmup_time=0 + timeout = 6000 ARCTIC_DIR = "modification_functions" ARCTIC_DIR_ORIGINAL = "modification_functions_original" @@ -232,7 +243,42 @@ class ModificationFunctions: params = PARAMS param_names = PARAM_NAMES + class LargeAppendDataModify: + """ + This class will hold a cache of append large dataframes + The purpose of this cache is to create dataframes + which timestamps are sequenced over time so that + overlap does not occur + """ + + def __init__(self, num_rows_list:List[int], number_elements:int): + self.df_append_large = {} + self.df_append_short_wide = {} + start_time = time.time() + for rows in num_rows_list: + print("Generating dataframe with rows: ", rows) + lst = list() + lst_saw = list() + for n in range(number_elements+1): + print("Generating dataframe no: ", n) + + df = generate_pseudo_random_dataframe(rows, "s", get_time_at_fraction_of_df(2*(n+1), rows)) + df_saw = generate_random_floats_dataframe_with_index( + ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, "s", + get_time_at_fraction_of_df(2*(n+1), rows=ModificationFunctions.WIDE_DF_ROWS) + ) + + lst.append(df) + lst_saw.append(df_saw) + print(f"STANDARD Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}") + print(f"SHORT_n_WIDE Index {df_saw.iloc[0].name} - {df_saw.iloc[df_saw.shape[0] - 1].name}") + print("Add dataframes: ", len(lst)) + self.df_append_large[rows] = lst + self.df_append_short_wide[rows] = lst_saw + print("APPEND LARGE cache generation took (s) :", time.time() - start_time) + def setup_cache(self): + self.ac = Arctic(ModificationFunctions.CONNECTION_STRING) rows_values = ModificationFunctions.params @@ -241,7 +287,9 @@ def setup_cache(self): lib_name = get_prewritten_lib_name(rows) self.ac.delete_library(lib_name) lib = self.ac.create_library(lib_name) - lib.write("sym", self.init_dfs[rows]) + df = self.init_dfs[rows] + lib.write("sym", df) + print(f"INITIAL DATAFRAME {rows} rows has Index {df.iloc[0].name} - {df.iloc[df.shape[0] - 1].name}") lib_name = get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS) self.ac.delete_library(lib_name) @@ -257,62 +305,61 @@ def setup_cache(self): # Then on each teardown we restore the initial state by overwriting the modified with the original. copytree(ModificationFunctions.ARCTIC_DIR, ModificationFunctions.ARCTIC_DIR_ORIGINAL) + number_iteration = ModificationFunctions.repeat * ModificationFunctions.number * ModificationFunctions.rounds - def setup(self, rows): - def get_time_at_fraction_of_df(fraction, rows=rows): - end_time = pd.Timestamp("1/1/2023") - time_delta = pd.tseries.offsets.DateOffset(seconds=round(rows * (fraction-1))) - return end_time + time_delta + lad = ModificationFunctions.LargeAppendDataModify(ModificationFunctions.params, number_iteration) + + return lad + + def setup(self, lad: LargeAppendDataModify, rows): - self.df_update_single = generate_pseudo_random_dataframe(1, "s", get_time_at_fraction_of_df(0.5)) - self.df_update_half = generate_pseudo_random_dataframe(rows//2, "s", get_time_at_fraction_of_df(0.75)) - self.df_update_upsert = generate_pseudo_random_dataframe(rows, "s", get_time_at_fraction_of_df(1.5)) - self.df_append_single = generate_pseudo_random_dataframe(1, "s", get_time_at_fraction_of_df(1.1)) - self.df_append_large = generate_pseudo_random_dataframe(rows, "s", get_time_at_fraction_of_df(2)) + self.df_update_single = generate_pseudo_random_dataframe(1, "s", get_time_at_fraction_of_df(0.5, rows)) + self.df_update_half = generate_pseudo_random_dataframe(rows//2, "s", get_time_at_fraction_of_df(0.75, rows)) + self.df_update_upsert = generate_pseudo_random_dataframe(rows, "s", get_time_at_fraction_of_df(1.5, rows)) + self.df_append_single = generate_pseudo_random_dataframe(1, "s", get_time_at_fraction_of_df(1.1, rows)) self.df_update_short_wide = generate_random_floats_dataframe_with_index( ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS ) - self.df_append_short_wide = generate_random_floats_dataframe_with_index( - ModificationFunctions.WIDE_DF_ROWS, ModificationFunctions.WIDE_DF_COLS, "s", get_time_at_fraction_of_df(2, rows=ModificationFunctions.WIDE_DF_ROWS) - ) self.ac = Arctic(ModificationFunctions.CONNECTION_STRING) self.lib = self.ac[get_prewritten_lib_name(rows)] self.lib_short_wide = self.ac[get_prewritten_lib_name(ModificationFunctions.WIDE_DF_ROWS)] - def teardown(self, rows): + def teardown(self, lad: LargeAppendDataModify, rows): # After the modification functions clean up the changes by replacing the modified ARCTIC_DIR with the original ARCTIC_DIR_ORIGINAL # TODO: We can use dirs_exist_ok=True on copytree instead of removing first if we run with python version >=3.8 rmtree(ModificationFunctions.ARCTIC_DIR) copytree(ModificationFunctions.ARCTIC_DIR_ORIGINAL, ModificationFunctions.ARCTIC_DIR) - del self.ac + del self.ac - def time_update_single(self, rows): + def time_update_single(self, lad: LargeAppendDataModify, rows): self.lib.update(f"sym", self.df_update_single) - def time_update_half(self, rows): + def time_update_half(self, lad: LargeAppendDataModify, rows): self.lib.update(f"sym", self.df_update_half) - def time_update_upsert(self, rows): + def time_update_upsert(self, lad: LargeAppendDataModify, rows): self.lib.update(f"sym", self.df_update_upsert, upsert=True) - def time_update_short_wide(self, rows): + def time_update_short_wide(self, lad: LargeAppendDataModify, rows): self.lib_short_wide.update("short_wide_sym", self.df_update_short_wide) - def time_append_single(self, rows): + def time_append_single(self, lad: LargeAppendDataModify, rows): self.lib.append(f"sym", self.df_append_single) - def time_append_large(self, rows): - self.lib.append(f"sym", self.df_append_large) + def time_append_large(self, lad: LargeAppendDataModify, rows): + large: pd.DataFrame = lad.df_append_large[rows].pop(0) + self.lib.append(f"sym", large) - def time_append_short_wide(self, rows): - self.lib_short_wide.append("short_wide_sym", self.df_append_short_wide) + def time_append_short_wide(self, lad: LargeAppendDataModify, rows): + large: pd.DataFrame = lad.df_append_short_wide[rows].pop(0) + self.lib_short_wide.append("short_wide_sym", large) - def time_delete(self, rows): + def time_delete(self, lad: LargeAppendDataModify, rows): self.lib.delete(f"sym") - def time_delete_short_wide(self, rows): + def time_delete_short_wide(self, lad: LargeAppendDataModify, rows): self.lib_short_wide.delete("short_wide_sym") diff --git a/python/benchmarks/bi_benchmarks.py b/python/benchmarks/bi_benchmarks.py index b67595b061..33481e5f83 100644 --- a/python/benchmarks/bi_benchmarks.py +++ b/python/benchmarks/bi_benchmarks.py @@ -184,8 +184,4 @@ def time_query_groupby_city_count_filter_two_aggregations(self, times_bigger) -> def peakmem_query_groupby_city_count_filter_two_aggregations(self, times_bigger): - return self.query_groupby_city_count_filter_two_aggregations(times_bigger) - - - - + return self.query_groupby_city_count_filter_two_aggregations(times_bigger) \ No newline at end of file diff --git a/python/benchmarks/finalize_staged_data.py b/python/benchmarks/finalize_staged_data.py index e0a2c86bab..0237cbddba 100644 --- a/python/benchmarks/finalize_staged_data.py +++ b/python/benchmarks/finalize_staged_data.py @@ -7,18 +7,20 @@ from arcticdb.util.utils import TimestampNumber """ +import sys from arcticdb.arctic import Arctic from arcticdb.util.utils import CachedDFGenerator, TimestampNumber, stage_chunks from arcticdb.version_store.library import Library, StagedDataFinalizeMethod from .common import * -from asv_runner.benchmarks.mark import SkipNotImplemented + class FinalizeStagedData: - ''' - Check and benchmark performance of finalize_staged_data(). - Due to specifics of this procedure we tune asv to make single measurement - which would be over a relatively big staged data. - ''' + """ + Check and benchmark performance of finalize_staged_data(). + Due to specifics of this procedure we tune asv to make single measurement + which would be over a relatively big staged data. + """ + number = 1 rounds = 1 repeat = 1 @@ -27,7 +29,7 @@ class FinalizeStagedData: timeout = 600 LIB_NAME = "Finalize_Staged_Data_LIB" - #Define the number of chunks + # Define the number of chunks params = [1000, 2000] def __init__(self): @@ -39,11 +41,11 @@ def setup_cache(self): # Generating dataframe with all kind of supported data types cachedDF = CachedDFGenerator(350000, [5]) return cachedDF - - def setup(self, cache:CachedDFGenerator, param:int): + + def setup(self, cache: CachedDFGenerator, param: int): cachedDF = cache - - # Unfortunately there is no way to tell asv to run single time + + # Unfortunately there is no way to tell asv to run single time # each of finalize_stage_data() tests if we do the large setup in the # setup_cache() method. We can only force it to work with single execution # if the symbol setup with stage data is in the setup() method @@ -52,7 +54,9 @@ def setup(self, cache:CachedDFGenerator, param:int): self.ac.delete_library(self.lib_name) self.lib = self.ac.create_library(self.lib_name) - INITIAL_TIMESTAMP: TimestampNumber = TimestampNumber(0, cachedDF.TIME_UNIT) # Synchronize index frequency + INITIAL_TIMESTAMP: TimestampNumber = TimestampNumber( + 0, cachedDF.TIME_UNIT + ) # Synchronize index frequency df = cachedDF.generate_dataframe_timestamp_indexed(200, 0, cachedDF.TIME_UNIT) list_of_chunks = [10000] * param @@ -61,45 +65,51 @@ def setup(self, cache:CachedDFGenerator, param:int): self.lib.write(self.symbol, data=df, prune_previous_versions=True) stage_chunks(self.lib, self.symbol, cachedDF, INITIAL_TIMESTAMP, list_of_chunks) - def time_finalize_staged_data(self, cache:CachedDFGenerator, param:int): + def time_finalize_staged_data(self, cache: CachedDFGenerator, param: int): print(">>> Library:", self.lib) print(">>> Symbol:", self.symbol) self.lib.finalize_staged_data(self.symbol, mode=StagedDataFinalizeMethod.WRITE) - def peakmem_finalize_staged_data(self, cache:CachedDFGenerator, param:int): + def peakmem_finalize_staged_data(self, cache: CachedDFGenerator, param: int): print(">>> Library:", self.lib) print(">>> Symbol:", self.symbol) self.lib.finalize_staged_data(self.symbol, mode=StagedDataFinalizeMethod.WRITE) - def teardown(self, cache:CachedDFGenerator, param:int): + def teardown(self, cache: CachedDFGenerator, param: int): self.ac.delete_library(self.lib_name) + +from asv_runner.benchmarks.mark import SkipNotImplemented + + class FinalizeStagedDataWiderDataframeX3(FinalizeStagedData): - ''' - The test is meant to be executed with 3 times wider dataframe than the base test - ''' + """ + The test is meant to be executed with 3 times wider dataframe than the base test + """ def setup_cache(self): # Generating dataframe with all kind of supported data type - cachedDF = CachedDFGenerator(350000, [5, 25, 50]) # 3 times wider DF with bigger string columns + cachedDF = CachedDFGenerator( + 350000, [5, 25, 50] + ) # 3 times wider DF with bigger string columns return cachedDF - - def setup(self, cache:CachedDFGenerator, param:int): - if (not SLOW_TESTS): - raise SkipNotImplemented ("Slow tests are skipped") - super().setup(cache,param) - - def time_finalize_staged_data(self, cache:CachedDFGenerator, param:int): - if (not SLOW_TESTS): - raise SkipNotImplemented ("Slow tests are skipped") - super().time_finalize_staged_data(cache,param) - - def peakmem_finalize_staged_data(self, cache:CachedDFGenerator, param:int): - if (not SLOW_TESTS): - raise SkipNotImplemented ("Slow tests are skipped") - super().peakmem_finalize_staged_data(cache,param) - - def teardown(self, cache:CachedDFGenerator, param:int): - if (SLOW_TESTS): + + def setup(self, cache: CachedDFGenerator, param: int): + if not SLOW_TESTS: + raise SkipNotImplemented("Slow tests are skipped") + super().setup(cache, param) + + def time_finalize_staged_data(self, cache: CachedDFGenerator, param: int): + if not SLOW_TESTS: + raise SkipNotImplemented("Slow tests are skipped") + super().time_finalize_staged_data(cache, param) + + def peakmem_finalize_staged_data(self, cache: CachedDFGenerator, param: int): + if not SLOW_TESTS: + raise SkipNotImplemented("Slow tests are skipped") + super().peakmem_finalize_staged_data(cache, param) + + def teardown(self, cache: CachedDFGenerator, param: int): + if SLOW_TESTS: # Run only on slow tests self.ac.delete_library(self.lib_name) From b81fdc1be84257b474e4dfaf5624b3ca5bfa7cdc Mon Sep 17 00:00:00 2001 From: James Munro <283605+jamesmunro@users.noreply.github.com> Date: Mon, 16 Dec 2024 14:53:28 +0000 Subject: [PATCH 05/11] Update alias-type for netlify hosting (#2069) Symbolic links don't work on netlify. `copy` will create a copy of the versioned docs under `latest`. --- docs/mkdocs/mkdocs.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/mkdocs/mkdocs.yml b/docs/mkdocs/mkdocs.yml index b6ba7841ae..c447ea14fd 100644 --- a/docs/mkdocs/mkdocs.yml +++ b/docs/mkdocs/mkdocs.yml @@ -47,6 +47,8 @@ extra_css: plugins: - search - autorefs + - mike: + alias_type: copy - mkdocs-jupyter: ignore_h1_titles: True include_source: True From 860e1527e730c6b9d5266e7665f3306e6f40bfe9 Mon Sep 17 00:00:00 2001 From: Ogynan Stoimenov Date: Tue, 17 Dec 2024 10:58:36 +0200 Subject: [PATCH 06/11] Normalization exceptions test coverage (#2071) #### Reference Issues/PRs Adds tests as continuation of #2036 #### What does this implement or fix? Tests whether the exceptions are handled correctly when the normalizers throw in _try_normalize(). #### Any other comments? #### Checklist
Checklist for code changes... - [ ] Have you updated the relevant docstrings, documentation and copyright notice? - [ ] Is this contribution tested against [all ArcticDB's features](../docs/mkdocs/docs/technical/contributing.md)? - [ ] Do all exceptions introduced raise appropriate [error messages](https://docs.arcticdb.io/error_messages/)? - [ ] Are API changes highlighted in the PR description? - [ ] Is the PR labelled as enhancement or bug so it appears in autogenerated release notes?
--- .../version_store/test_normalization.py | 38 ++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/python/tests/unit/arcticdb/version_store/test_normalization.py b/python/tests/unit/arcticdb/version_store/test_normalization.py index f82b608751..4a65580aed 100644 --- a/python/tests/unit/arcticdb/version_store/test_normalization.py +++ b/python/tests/unit/arcticdb/version_store/test_normalization.py @@ -6,10 +6,11 @@ As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. """ import datetime +import inspect import itertools import sys from collections import namedtuple -from unittest.mock import patch +from unittest.mock import patch, MagicMock import numpy as np import pandas as pd import dateutil as du @@ -714,3 +715,38 @@ def test_series_zero_name(lmdb_version_store, sym): lib.write(sym, series) vit = lib.read(sym) assert vit.data.equals(series) + +@pytest.mark.parametrize( + "returns_expected", + [ + {"returns": ArcticDbNotYetImplemented(), "expected": ArcticDbNotYetImplemented}, + {"returns": Exception(), "expected": ArcticNativeException}, + {"returns": (MagicMock(), None), "expected": ArcticNativeException} + ] +) +@pytest.mark.parametrize( + "method_to_test", + [ + "write", "update", "stage", "append" + ] +) +def test_throws_correct_exceptions(returns_expected, method_to_test, lmdb_version_store): + mock_normalizer = MagicMock(name="mock_normalizer") + returns = returns_expected["returns"] + expected = returns_expected["expected"] + normalize_method_mock = mock_normalizer.normalize + if isinstance(returns, Exception): + normalize_method_mock.side_effect = returns + else: + normalize_method_mock.return_value = returns + lib = lmdb_version_store + lib._normalizer = mock_normalizer + + method_to_test = getattr(lib, method_to_test) + non_default_arg_count = sum( + 1 for param in inspect.signature(method_to_test).parameters.values() + if param.default is param.empty + ) - 1 + args = [MagicMock()] * non_default_arg_count + with pytest.raises(expected): + method_to_test(*args) From d2e568e12cc5b7ee90b622581171510fc8b90fa0 Mon Sep 17 00:00:00 2001 From: Georgi Petrov <32372905+G-D-Petrov@users.noreply.github.com> Date: Tue, 17 Dec 2024 12:54:51 +0200 Subject: [PATCH 07/11] Fix error during publishing (#2074) #### Reference Issues/PRs #### What does this implement or fix? ASV has started writing `np.nan`s to the `JSON`s. When we try to evaluate them during publishing, it fails because they are not defined. This PRs defines them as was done for `np.inf ` in a previous PR #### Any other comments? #### Checklist
Checklist for code changes... - [ ] Have you updated the relevant docstrings, documentation and copyright notice? - [ ] Is this contribution tested against [all ArcticDB's features](../docs/mkdocs/docs/technical/contributing.md)? - [ ] Do all exceptions introduced raise appropriate [error messages](https://docs.arcticdb.io/error_messages/)? - [ ] Are API changes highlighted in the PR description? - [ ] Is the PR labelled as enhancement or bug so it appears in autogenerated release notes?
--- build_tooling/transform_asv_results.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/build_tooling/transform_asv_results.py b/build_tooling/transform_asv_results.py index cd5c4418f2..a464f846e4 100644 --- a/build_tooling/transform_asv_results.py +++ b/build_tooling/transform_asv_results.py @@ -7,7 +7,9 @@ """ import pandas as pd -from numpy import inf + +# Importing inf and nan so they can be evaluated correctly during extraction +from numpy import inf, nan from arcticdb.storage_fixtures.s3 import real_s3_from_environment_variables import json from pathlib import Path From 1a012f2c69b73cba5227046f5cea009267d7fdd5 Mon Sep 17 00:00:00 2001 From: Georgi Petrov <32372905+G-D-Petrov@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:28:28 +0200 Subject: [PATCH 08/11] Try to remove any toolset (#2075) #### Reference Issues/PRs #### What does this implement or fix? GCC toolset 14 has been added to the manylinux image that we are using and that is breaking our flows. This PR changes the flow to remove any toolset, not just 13. N.B.: The proper fix would be to create our own dev image on top of the manylinux one #### Any other comments? #### Checklist
Checklist for code changes... - [ ] Have you updated the relevant docstrings, documentation and copyright notice? - [ ] Is this contribution tested against [all ArcticDB's features](../docs/mkdocs/docs/technical/contributing.md)? - [ ] Do all exceptions introduced raise appropriate [error messages](https://docs.arcticdb.io/error_messages/)? - [ ] Are API changes highlighted in the PR description? - [ ] Is the PR labelled as enhancement or bug so it appears in autogenerated release notes?
--- .github/actions/setup_deps/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/setup_deps/action.yml b/.github/actions/setup_deps/action.yml index f0a3ff938d..eca34a5738 100644 --- a/.github/actions/setup_deps/action.yml +++ b/.github/actions/setup_deps/action.yml @@ -7,7 +7,7 @@ runs: shell: bash -l {0} run: | dnf update -y - dnf remove -y 'gcc-toolset-13-*' + dnf remove -y 'gcc-toolset-*' dnf install -y zip flex bison gcc-toolset-10 gcc-toolset-10-gdb gcc-toolset-10-libatomic-devel krb5-devel cyrus-sasl-devel openssl-devel \ unzip tar epel-release jq wget libcurl-devel \ python3.11-devel python3.11-pip perl-IPC-Cmd From ad0b3b4f4d370635d607d4200407822a57cb6592 Mon Sep 17 00:00:00 2001 From: Vasil Danielov Pashov Date: Wed, 18 Dec 2024 11:38:42 +0200 Subject: [PATCH 09/11] Storage mover port (#2039) #### Reference Issues/PRs Add the StorageMover class from arcticc. Some changes in the implementation of the StorageMover were needed as `batch_read_compressed` used to return not only the keys but the segments as well. In the current ArcticDB version the segment is passed as continuation and that required keeping additional vector where the segments themselves are stored. Make sure all unit tests are passing. No changes to the API are done. Note in arcticc it was in the tools module while here it's in the toolbox module. #### What does this implement or fix? #### Any other comments? #### Checklist
Checklist for code changes... - [ ] Have you updated the relevant docstrings, documentation and copyright notice? - [ ] Is this contribution tested against [all ArcticDB's features](../docs/mkdocs/docs/technical/contributing.md)? - [ ] Do all exceptions introduced raise appropriate [error messages](https://docs.arcticdb.io/error_messages/)? - [ ] Are API changes highlighted in the PR description? - [ ] Is the PR labelled as enhancement or bug so it appears in autogenerated release notes?
--- cpp/arcticdb/CMakeLists.txt | 3 + cpp/arcticdb/async/async_store.hpp | 4 +- cpp/arcticdb/async/task_scheduler.hpp | 4 +- cpp/arcticdb/entity/atom_key.hpp | 4 +- cpp/arcticdb/entity/key.cpp | 4 + cpp/arcticdb/entity/key.hpp | 34 +- cpp/arcticdb/entity/metrics.hpp | 4 +- cpp/arcticdb/storage/storage_utils.cpp | 147 ++++ cpp/arcticdb/storage/storage_utils.hpp | 41 +- cpp/arcticdb/storage/storages.hpp | 2 +- cpp/arcticdb/stream/index_aggregator.hpp | 8 + cpp/arcticdb/stream/segment_aggregator.hpp | 1 - cpp/arcticdb/stream/stream_source.hpp | 4 +- cpp/arcticdb/toolbox/library_tool.hpp | 3 +- cpp/arcticdb/toolbox/python_bindings.cpp | 29 + cpp/arcticdb/toolbox/python_bindings.hpp | 7 - cpp/arcticdb/toolbox/storage_mover.hpp | 659 ++++++++++++++++++ cpp/arcticdb/util/error_code.hpp | 3 +- .../version/local_versioned_engine.cpp | 4 +- cpp/arcticdb/version/python_bindings.cpp | 4 +- cpp/arcticdb/version/version_core.cpp | 2 +- cpp/arcticdb/version/version_store_api.cpp | 2 +- python/arcticdb/toolbox/storage.py | 3 + .../integration/toolbox/test_storage_mover.py | 330 +++++++++ 24 files changed, 1227 insertions(+), 79 deletions(-) create mode 100644 cpp/arcticdb/storage/storage_utils.cpp create mode 100644 cpp/arcticdb/toolbox/storage_mover.hpp create mode 100644 python/arcticdb/toolbox/storage.py create mode 100644 python/tests/integration/toolbox/test_storage_mover.py diff --git a/cpp/arcticdb/CMakeLists.txt b/cpp/arcticdb/CMakeLists.txt index 429b8acad4..e41495c5d4 100644 --- a/cpp/arcticdb/CMakeLists.txt +++ b/cpp/arcticdb/CMakeLists.txt @@ -308,6 +308,7 @@ set(arcticdb_srcs storage/storage.hpp storage/storage_override.hpp storage/store.hpp + storage/storage_utils.hpp stream/aggregator.hpp stream/aggregator-inl.hpp stream/append_map.hpp @@ -328,6 +329,7 @@ set(arcticdb_srcs stream/stream_utils.hpp stream/stream_writer.hpp toolbox/library_tool.hpp + toolbox/storage_mover.hpp util/allocator.hpp util/bitset.hpp util/buffer.hpp @@ -486,6 +488,7 @@ set(arcticdb_srcs storage/s3/s3_storage.cpp storage/s3/s3_storage_tool.cpp storage/storage_factory.cpp + storage/storage_utils.cpp stream/aggregator.cpp stream/append_map.cpp stream/index.cpp diff --git a/cpp/arcticdb/async/async_store.hpp b/cpp/arcticdb/async/async_store.hpp index fed575a817..3bfe961703 100644 --- a/cpp/arcticdb/async/async_store.hpp +++ b/cpp/arcticdb/async/async_store.hpp @@ -51,11 +51,11 @@ class AsyncStore : public Store { public: AsyncStore( std::shared_ptr library, - const arcticdb::proto::encoding::VariantCodec &codec, + const proto::encoding::VariantCodec &codec, EncodingVersion encoding_version ) : library_(std::move(library)), - codec_(std::make_shared(codec)), + codec_(std::make_shared(codec)), encoding_version_(encoding_version) { } diff --git a/cpp/arcticdb/async/task_scheduler.hpp b/cpp/arcticdb/async/task_scheduler.hpp index 08e4ba419c..4efd408cdd 100644 --- a/cpp/arcticdb/async/task_scheduler.hpp +++ b/cpp/arcticdb/async/task_scheduler.hpp @@ -300,13 +300,13 @@ inline auto& io_executor() { } template -inline auto submit_cpu_task(Task&& task) { +auto submit_cpu_task(Task&& task) { return TaskScheduler::instance()->submit_cpu_task(std::forward(task)); } template -inline auto submit_io_task(Task&& task) { +auto submit_io_task(Task&& task) { return TaskScheduler::instance()->submit_io_task(std::forward(task)); } diff --git a/cpp/arcticdb/entity/atom_key.hpp b/cpp/arcticdb/entity/atom_key.hpp index 5197615c3f..7240979cd0 100644 --- a/cpp/arcticdb/entity/atom_key.hpp +++ b/cpp/arcticdb/entity/atom_key.hpp @@ -91,8 +91,8 @@ class AtomKeyImpl { } friend bool operator<(const AtomKeyImpl &l, const AtomKeyImpl &r) { - auto lt = std::tie(l.id_, l.version_id_, l.index_start_, l.index_end_, l.creation_ts_); - auto rt = std::tie(r.id_, r.version_id_, r.index_start_, r.index_end_, r.creation_ts_); + const auto lt = std::tie(l.id_, l.version_id_, l.index_start_, l.index_end_, l.creation_ts_); + const auto rt = std::tie(r.id_, r.version_id_, r.index_start_, r.index_end_, r.creation_ts_); return lt < rt; } diff --git a/cpp/arcticdb/entity/key.cpp b/cpp/arcticdb/entity/key.cpp index 8c31aee12c..1632e3c5a2 100644 --- a/cpp/arcticdb/entity/key.cpp +++ b/cpp/arcticdb/entity/key.cpp @@ -90,6 +90,10 @@ KeyClass key_class_from_key_type(KeyType key_type) { return get_key_data(key_type).key_class_; } +const char* get_key_description(KeyType key_type) { + return get_key_data(key_type).description_; +} + bool is_string_key_type(KeyType key_type){ return variant_type_from_key_type(key_type) == VariantType::STRING_TYPE; } diff --git a/cpp/arcticdb/entity/key.hpp b/cpp/arcticdb/entity/key.hpp index 54b57c8fb9..3ecc51e27f 100644 --- a/cpp/arcticdb/entity/key.hpp +++ b/cpp/arcticdb/entity/key.hpp @@ -16,6 +16,10 @@ #include #include #include +#include +#include + +namespace rng = std::ranges; namespace arcticdb::entity { @@ -193,10 +197,10 @@ enum class KeyType : int { UNDEFINED }; -inline std::vector key_types_write_precedence() { +consteval auto key_types_write_precedence() { // TOMBSTONE[_ALL] keys are not included because they're not written to the storage, // they just exist inside version keys - return { + return std::array { KeyType::LIBRARY_CONFIG, KeyType::TABLE_DATA, KeyType::TABLE_INDEX, @@ -215,9 +219,9 @@ inline std::vector key_types_write_precedence() { }; } -inline std::vector key_types_read_precedence() { +consteval auto key_types_read_precedence() { auto output = key_types_write_precedence(); - std::reverse(std::begin(output), std::end(output)); + rng::reverse(output); return output; } @@ -247,7 +251,7 @@ enum class VariantType : char { VariantType variant_type_from_key_type(KeyType key_type); -inline bool is_index_key_type(KeyType key_type) { +constexpr bool is_index_key_type(KeyType key_type) { // TODO: Change name probably. return (key_type == KeyType::TABLE_INDEX) || (key_type == KeyType::MULTI_KEY); } @@ -258,30 +262,26 @@ bool is_ref_key_class(KeyType k); bool is_block_ref_key_class(KeyType k); -inline KeyType get_key_type_for_data_stream(const StreamId &) { +constexpr KeyType get_key_type_for_data_stream(const StreamId &) { return KeyType::TABLE_DATA; } -inline KeyType get_key_type_for_index_stream(const StreamId &) { +constexpr KeyType get_key_type_for_index_stream(const StreamId &) { return KeyType::TABLE_INDEX; } +const char* get_key_description(KeyType type); template -auto foreach_key_type_read_precedence(Function&& func) { - auto types = key_types_read_precedence(); - for(auto type : types) { - func(KeyType(type)); - } +constexpr auto foreach_key_type_read_precedence(Function&& func) { + rng::for_each(key_types_read_precedence(), func); } template -auto foreach_key_type_write_precedence(Function&& func) { - auto types = key_types_write_precedence(); - for(auto type : types) { - func(KeyType(type)); - } +constexpr auto foreach_key_type_write_precedence(Function&& func) { + rng::for_each(key_types_write_precedence(), func); } + inline KeyType key_type_from_int(int type_num) { util::check(type_num > 0 && type_num < int(KeyType::UNDEFINED), "Unrecognized key type number {}", type_num); return KeyType(type_num); diff --git a/cpp/arcticdb/entity/metrics.hpp b/cpp/arcticdb/entity/metrics.hpp index 4819098cbe..db56a0551c 100644 --- a/cpp/arcticdb/entity/metrics.hpp +++ b/cpp/arcticdb/entity/metrics.hpp @@ -32,8 +32,8 @@ namespace arcticdb { const std::string MONGO_INSTANCE_LABEL = "mongo_instance"; const std::string PROMETHEUS_ENV_LABEL = "env"; -const int SUMMARY_MAX_AGE = 30; -const int SUMMARY_AGE_BUCKETS = 5; +constexpr int SUMMARY_MAX_AGE = 30; +constexpr int SUMMARY_AGE_BUCKETS = 5; class MetricsConfig { public: diff --git a/cpp/arcticdb/storage/storage_utils.cpp b/cpp/arcticdb/storage/storage_utils.cpp new file mode 100644 index 0000000000..7ae6816b38 --- /dev/null +++ b/cpp/arcticdb/storage/storage_utils.cpp @@ -0,0 +1,147 @@ +#include +#include +#include +#include + +namespace arcticdb { + +std::vector filter_keys_on_existence( + const std::vector& keys, + const std::shared_ptr& store, + bool pred + ){ + auto key_existence = folly::collect(store->batch_key_exists(keys)).get(); + std::vector res; + for (size_t i = 0; i != keys.size(); i++) { + if (key_existence[i] == pred) { + res.push_back(keys[i]); + } + } + return res; +} + +void filter_keys_on_existence(std::vector& keys, const std::shared_ptr& store, bool pred) { + std::vector var_vector; + var_vector.reserve(keys.size()); + rng::copy(keys, std::back_inserter(var_vector)); + + auto key_existence = store->batch_key_exists(var_vector); + + auto keys_itr = keys.begin(); + for (size_t i = 0; i != var_vector.size(); i++) { + bool resolved = key_existence[i].wait().value(); + if (resolved == pred) { + *keys_itr = std::move(std::get(var_vector[i])); + ++keys_itr; + } + } + keys.erase(keys_itr, keys.end()); +} + +AtomKey write_table_index_tree_from_source_to_target( + const std::shared_ptr& source_store, + const std::shared_ptr& target_store, + const AtomKey& index_key, + std::optional new_version_id +) { + ARCTICDB_SAMPLE(WriteIndexSourceToTarget, 0) + // In + auto [_, index_seg] = source_store->read_sync(index_key); + index::IndexSegmentReader index_segment_reader{std::move(index_seg)}; + // Out + index::IndexWriter writer(target_store, + {index_key.id(), new_version_id.value_or(index_key.version_id())}, + std::move(index_segment_reader.mutable_tsd())); + std::vector> futures; + // Process + for (auto iter = index_segment_reader.begin(); iter != index_segment_reader.end(); ++iter) { + auto& sk = *iter; + auto& key = sk.key(); + std::optional key_to_write = atom_key_builder() + .version_id(new_version_id.value_or(key.version_id())) + .creation_ts(util::SysClock::nanos_since_epoch()) + .start_index(key.start_index()) + .end_index(key.end_index()) + .content_hash(key.content_hash()) + .build(key.id(), key.type()); + + writer.add(*key_to_write, sk.slice()); // Both const ref + futures.emplace_back(submit_io_task(async::CopyCompressedInterStoreTask{ + sk.key(), + std::move(key_to_write), + false, + false, + source_store, + {target_store}})); + } + const std::vector store_results = collect(futures).get(); + for (const async::CopyCompressedInterStoreTask::ProcessingResult& res: store_results) { + util::variant_match( + res, + [&](const async::CopyCompressedInterStoreTask::FailedTargets& failed) { + log::storage().error("Failed to move targets: {} from {} to {}", failed, source_store->name(), target_store->name()); + }, + [](const auto&){}); + } + // FUTURE: clean up already written keys if exception + return to_atom(writer.commit().get()); +} + +AtomKey copy_multi_key_from_source_to_target( + const std::shared_ptr& source_store, + const std::shared_ptr& target_store, + const AtomKey& index_key, + std::optional new_version_id) { + using namespace arcticdb::stream; + auto fut_index = source_store->read(index_key); + auto [_, index_seg] = std::move(fut_index).get(); + std::vector keys; + for (size_t idx = 0; idx < index_seg.row_count(); idx++) { + keys.push_back(stream::read_key_row(index_seg, static_cast(idx))); + } + // Recurse on the index keys inside MULTI_KEY + std::vector new_data_keys; + for (const auto &k: keys) { + auto new_key = copy_index_key_recursively(source_store, target_store, k, new_version_id); + new_data_keys.emplace_back(std::move(new_key)); + } + // Write new MULTI_KEY + + folly::Future multi_key_fut = folly::Future::makeEmpty(); + IndexAggregator multi_index_agg(index_key.id(), [&new_version_id, &index_key, &multi_key_fut, &target_store](auto &&segment) { + multi_key_fut = target_store->write(KeyType::MULTI_KEY, + new_version_id.value_or(index_key.version_id()), // version_id + index_key.id(), + 0, // start_index + 0, // end_index + std::forward(segment)).wait(); + }); + for (auto &key: new_data_keys) { + multi_index_agg.add_key(to_atom(key)); + } + if (index_seg.has_metadata()) { + google::protobuf::Any metadata = *index_seg.metadata(); + multi_index_agg.set_metadata(std::move(metadata)); + } + if (index_seg.has_index_descriptor()) { + multi_index_agg.set_timeseries_descriptor(index_seg.index_descriptor()); + } + multi_index_agg.commit(); + return to_atom(multi_key_fut.value()); +} + +AtomKey copy_index_key_recursively( + const std::shared_ptr& source_store, + const std::shared_ptr& target_store, + const AtomKey& index_key, + std::optional new_version_id) { + ARCTICDB_SAMPLE(RecurseIndexKey, 0) + if (index_key.type() == KeyType::TABLE_INDEX) { + return write_table_index_tree_from_source_to_target(source_store, target_store, index_key, new_version_id); + } else if (index_key.type() == KeyType::MULTI_KEY) { + return copy_multi_key_from_source_to_target(source_store, target_store, index_key, new_version_id); + } + internal::raise("Cannot copy index recursively. Unsupported index key type {}", index_key.type()); +} + +} \ No newline at end of file diff --git a/cpp/arcticdb/storage/storage_utils.hpp b/cpp/arcticdb/storage/storage_utils.hpp index a99e8733d8..d825d370d7 100644 --- a/cpp/arcticdb/storage/storage_utils.hpp +++ b/cpp/arcticdb/storage/storage_utils.hpp @@ -18,38 +18,13 @@ inline auto stream_id_prefix_matcher(const std::string &prefix) { std::get(id).compare(0u, prefix.size(), prefix) == 0); }; } -inline std::vector filter_keys_on_existence( - const std::vector& keys, - const std::shared_ptr& store, - bool pred - ){ - auto key_existence = folly::collect(store->batch_key_exists(keys)).get(); - std::vector res; - for (size_t i = 0; i != keys.size(); i++) { - if (key_existence[i] == pred) { - res.push_back(keys[i]); - } - } - return res; -} - -inline void filter_keys_on_existence(std::vector& keys, const std::shared_ptr& store, bool pred) { - std::vector var_vector; - var_vector.reserve(keys.size()); - std::transform(keys.begin(), keys.end(), std::back_inserter(var_vector), - [](auto&& k) { return VariantKey(std::move(k)); }); - - auto key_existence = store->batch_key_exists(var_vector); - - auto keys_itr = keys.begin(); - for (size_t i = 0; i != var_vector.size(); i++) { - bool resolved = key_existence[i].wait().value(); - if (resolved == pred) { - *keys_itr = std::move(std::get(var_vector[i])); - ++keys_itr; - } - } - keys.erase(keys_itr, keys.end()); -} +std::vector filter_keys_on_existence(const std::vector& keys, const std::shared_ptr& store, bool pred); +void filter_keys_on_existence(std::vector& keys, const std::shared_ptr& store, bool pred); + +AtomKey copy_index_key_recursively( + const std::shared_ptr& source_store, + const std::shared_ptr& target_store, + const AtomKey& index_key, + std::optional new_version_id); } //namespace arcticdb \ No newline at end of file diff --git a/cpp/arcticdb/storage/storages.hpp b/cpp/arcticdb/storage/storages.hpp index bc2de2c526..9b393cda4f 100644 --- a/cpp/arcticdb/storage/storages.hpp +++ b/cpp/arcticdb/storage/storages.hpp @@ -182,7 +182,7 @@ class Storages { OpenMode mode_; }; -inline std::shared_ptr create_storages(const LibraryPath& library_path, OpenMode mode, const decltype(std::declval().storage_by_id())& storage_configs, const NativeVariantStorage& native_storage_config) { +inline std::shared_ptr create_storages(const LibraryPath& library_path, OpenMode mode, decltype(std::declval().storage_by_id())& storage_configs, const NativeVariantStorage& native_storage_config) { Storages::StorageVector storages; for (const auto& [storage_id, storage_config]: storage_configs) { util::variant_match(native_storage_config.variant(), diff --git a/cpp/arcticdb/stream/index_aggregator.hpp b/cpp/arcticdb/stream/index_aggregator.hpp index 1af2294fcd..22cf2c9123 100644 --- a/cpp/arcticdb/stream/index_aggregator.hpp +++ b/cpp/arcticdb/stream/index_aggregator.hpp @@ -60,6 +60,10 @@ class FlatIndexingPolicy { segment_.set_timeseries_descriptor(timeseries_descriptor); } + void set_metadata(google::protobuf::Any&& metadata) { + segment_.set_metadata(std::move(metadata)); + } + private: Callback callback_; FixedSchema schema_; @@ -89,6 +93,10 @@ class IndexAggregator { indexing_policy_.set_timeseries_descriptor(timeseries_descriptor); } + void set_metadata(google::protobuf::Any&& metadata) { + indexing_policy_.set_metadata(std::move(metadata)); + } + private: IndexingPolicy indexing_policy_; }; diff --git a/cpp/arcticdb/stream/segment_aggregator.hpp b/cpp/arcticdb/stream/segment_aggregator.hpp index 95660a81a5..f25b72c56f 100644 --- a/cpp/arcticdb/stream/segment_aggregator.hpp +++ b/cpp/arcticdb/stream/segment_aggregator.hpp @@ -11,7 +11,6 @@ #include #include #include -#include #include namespace arcticdb::stream { diff --git a/cpp/arcticdb/stream/stream_source.hpp b/cpp/arcticdb/stream/stream_source.hpp index 401fa8a4ac..3d7487fb9e 100644 --- a/cpp/arcticdb/stream/stream_source.hpp +++ b/cpp/arcticdb/stream/stream_source.hpp @@ -11,8 +11,6 @@ #include #include #include -#include -#include #include #include @@ -55,7 +53,7 @@ struct StreamSource { virtual bool supports_prefix_matching() const = 0; virtual bool fast_delete() = 0; - using ReadContinuation = folly::Function; + using ReadContinuation = folly::Function; virtual folly::Future> batch_read_compressed( std::vector> &&ks, diff --git a/cpp/arcticdb/toolbox/library_tool.hpp b/cpp/arcticdb/toolbox/library_tool.hpp index da5b2bec12..bfd48821f2 100644 --- a/cpp/arcticdb/toolbox/library_tool.hpp +++ b/cpp/arcticdb/toolbox/library_tool.hpp @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -53,7 +52,7 @@ class LibraryTool { void remove(VariantKey key); - std::vector find_keys(arcticdb::entity::KeyType); + std::vector find_keys(entity::KeyType); bool key_exists(const VariantKey& key); diff --git a/cpp/arcticdb/toolbox/python_bindings.cpp b/cpp/arcticdb/toolbox/python_bindings.cpp index f53b89e07a..6d70f6e425 100644 --- a/cpp/arcticdb/toolbox/python_bindings.cpp +++ b/cpp/arcticdb/toolbox/python_bindings.cpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace arcticdb::toolbox::apy { @@ -86,6 +87,34 @@ void register_bindings(py::module &m, py::exception& .def("take_lock_guard", &ReliableStorageLockManager::take_lock_guard) .def("free_lock_guard", &ReliableStorageLockManager::free_lock_guard); + + py::class_(tools, "StorageMover") + .def(py::init, std::shared_ptr>()) + .def("go", + &StorageMover::go, + "start the storage mover copy", + py::arg("batch_size") = 100) + .def("get_keys_in_source_only", + &StorageMover::get_keys_in_source_only) + .def("get_all_source_keys", + &StorageMover::get_all_source_keys, + "get_all_source_keys") + .def("incremental_copy", + &StorageMover::incremental_copy, + "incrementally copy keys") + .def("write_keys_from_source_to_target", + &StorageMover::write_keys_from_source_to_target, + "write_keys_from_source_to_target") + .def("write_symbol_trees_from_source_to_target", + &StorageMover::write_symbol_trees_from_source_to_target, + "write_symbol_trees_from_source_to_target") + .def("clone_all_keys_for_symbol", + &StorageMover::clone_all_keys_for_symbol, + "Clone all the keys that have this symbol as id to the dest library.") + .def("clone_all_keys_for_symbol_for_type", + &StorageMover::clone_all_keys_for_symbol_for_type, + "Clone all the keys that have this symbol and type to the dest library."); + // S3 Storage tool using namespace arcticdb::storage::s3; py::class_>(tools, "S3Tool") diff --git a/cpp/arcticdb/toolbox/python_bindings.hpp b/cpp/arcticdb/toolbox/python_bindings.hpp index 2975663ce5..592c368714 100644 --- a/cpp/arcticdb/toolbox/python_bindings.hpp +++ b/cpp/arcticdb/toolbox/python_bindings.hpp @@ -8,13 +8,6 @@ #pragma once #include -#include - -#include -#include - -#include -#include namespace arcticdb::toolbox::apy { diff --git a/cpp/arcticdb/toolbox/storage_mover.hpp b/cpp/arcticdb/toolbox/storage_mover.hpp new file mode 100644 index 0000000000..4a33a7d96c --- /dev/null +++ b/cpp/arcticdb/toolbox/storage_mover.hpp @@ -0,0 +1,659 @@ +#pragma once + +#include + +#include "codec/default_codecs.hpp" +#include "column_store/column_utils.hpp" + +#include "stream/test/stream_test_common.hpp" +#include "util/variant.hpp" +#include "fmt/format.h" +#include +#include "async/async_store.hpp" +#include "version/version_map.hpp" +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "version/version_functions.hpp" + +namespace rng = std::ranges; + +namespace arcticdb { + +constexpr std::size_t NumThreads = 50; + +struct BatchCopier { + std::atomic count_ = 0; + std::atomic objects_moved_ = 0; + std::atomic bytes_moved_ = 0; + std::atomic skipped_ = 0; + interval_timer timers_; + std::vector keys_; + std::shared_ptr source_store_; + std::shared_ptr target_store_; + size_t batch_size_; + size_t thread_count_; + + BatchCopier(std::shared_ptr source_store, + std::shared_ptr target_store, + size_t batch_size, + size_t thread_count=32) : + source_store_(std::move(source_store)), + target_store_(std::move(target_store)), + batch_size_(batch_size), + thread_count_{thread_count}{ + timers_.start_timer(); + } + + void add_key(const VariantKey& key, bool check_target=true, bool check_source=true) { + if(check_target && !is_ref_key_class(variant_key_type(key)) && target_store_->key_exists(key).get()) { + ++skipped_; + return; + } + + if(check_source && !source_store_->key_exists(key).get()) { + log::storage().warn("Found an unreadable key {}", key); + return; + } + + keys_.push_back(key); + if(keys_.size() == batch_size_) { + copy_keys(); + keys_ = std::vector(); + + if(++count_ %10 == 0) { + timers_.stop_timer(); + auto bps = bytes_moved_ / timers_.get_timer().get_results().total; + log::storage().info("Moved {}, {} objects ({} skipped), {} per second", format_bytes(bytes_moved_), objects_moved_, skipped_, format_bytes(bps)); + timers_.start_timer(); + } + } + } + + void go(std::unordered_map>&& keys, bool perform_checks) { + size_t batch_size_per_thread = std::max(batch_size_ / thread_count_, size_t{1}); + // Log approximately every 10000 objects + uint64_t logging_frequency = 10000 / batch_size_per_thread; + folly::FutureExecutor exec{thread_count_}; + std::vector> futures; + + foreach_key_type_write_precedence([&](auto key_type) { + bool check_target = perform_checks && !is_ref_key_class(key_type); + bool check_source = perform_checks; + if (auto it = keys.find(key_type); it != keys.end()) { + while(it->second.size() > 0) { + const auto start = it->second.size() >= batch_size_per_thread ? it->second.end() - batch_size_per_thread : it->second.begin(); + const auto end = it->second.end(); + const size_t size = std::distance(start, end); + std::vector> keys_to_copy; + keys_to_copy.reserve(size); + auto segments_ptr = std::make_unique>(size); + std::transform( + std::make_move_iterator(start), + std::make_move_iterator(end), + std::back_inserter(keys_to_copy), + [segments = segments_ptr.get(), pos = 0](VariantKey&& key) mutable { + return std::pair{std::move(key), [segments, pos=pos++](storage::KeySegmentPair&& segment) { + segments->at(pos) = std::move(segment); + return segments->at(pos).variant_key(); + }}; + } + ); + it->second.erase(start, end); + futures.emplace_back(exec.addFuture( + [this, keys_to_copy=std::move(keys_to_copy), &logging_frequency, check_target, check_source, segments_ptr=std::move(segments_ptr)]() mutable { + for (const auto& key: keys_to_copy) { + if(check_source && !source_store_->key_exists(key.first).get()) { + log::storage().warn("Found an unreadable key {}", key.first); + } + if(check_target && target_store_->key_exists(key.first).get()) { + ++skipped_; + } + } + auto collected_kvs = source_store_->batch_read_compressed(std::move(keys_to_copy), BatchReadArgs{}).get(); + if (!collected_kvs.empty()) { + const size_t bytes_being_copied = std::accumulate(segments_ptr->begin(), segments_ptr->end(), size_t{0}, [] (size_t a, const storage::KeySegmentPair& ks) { + return a + ks.segment().size(); + }); + target_store_->batch_write_compressed(*segments_ptr.release()).get(); + bytes_moved_.fetch_add(bytes_being_copied, std::memory_order_relaxed); + objects_moved_.fetch_add(collected_kvs.size(), std::memory_order_relaxed); + } + ++count_; + if (count_.compare_exchange_strong(logging_frequency, 0)) { + timers_.stop_timer(); + auto bps = bytes_moved_.load() / timers_.get_timer().get_results().total; + log::storage().info("Moved {}, {} objects ({} skipped), {} per second", + format_bytes(bytes_moved_.load()), + objects_moved_.load(), + skipped_.load(), + format_bytes(bps)); + timers_.start_timer(); + } + // count_ could be incremented to a value greater than logging_frequency, just reset it in this case + if (count_.load() > logging_frequency) { + count_.store(0); + } + return makeFuture(folly::Unit{}); + })); + } + } + }); + collect(futures).get(); + timers_.stop_timer(); + auto bps = bytes_moved_.load() / timers_.get_timer().get_results().total; + log::storage().info("Moved {}, {} objects ({} skipped), {} per second", + format_bytes(bytes_moved_.load()), + objects_moved_.load(), + skipped_.load(), + format_bytes(bps)); + } + + void copy_keys() { + std::vector segments(keys_.size()); + std::vector> keys_to_copy; + keys_to_copy.reserve(keys_.size()); + std::transform( + std::make_move_iterator(keys_.begin()), + std::make_move_iterator(keys_.end()), + std::back_inserter(keys_to_copy), + [&segments, i=0](VariantKey&& key) mutable { + return std::pair{std::move(key), [&segments, i=i++](storage::KeySegmentPair&& ks) { + segments.at(i) = std::move(ks); + return segments.at(i).variant_key(); + }}; + } + ); + keys_.clear(); + auto collected_kvs = source_store_->batch_read_compressed(std::move(keys_to_copy), BatchReadArgs{}).get(); + if (!collected_kvs.empty()) { + bytes_moved_ += std::accumulate(segments.begin(), segments.end(), size_t{0}, [] (size_t a, const storage::KeySegmentPair& ks) { + return a + ks.segment().size(); + }); + target_store_->batch_write_compressed(std::move(segments)).get(); + } + objects_moved_ += keys_.size(); + } + + void finalize() { + if(!keys_.empty()) { + copy_keys(); + } + timers_.stop_timer(); + auto total = timers_.get_timer().get_results().total; + auto bps = bytes_moved_ / total; + log::storage().info("Moved {} {} objects in {} - {} bps ", format_bytes(bytes_moved_), objects_moved_, total, format_bytes(bps)); + } +}; + +struct BatchDeleter { + uint64_t count = 0; + uint64_t objects_moved = 0; + uint64_t skipped = 0; + interval_timer timers; + std::vector keys; + std::shared_ptr source_store_; + std::shared_ptr target_store_; + size_t batch_size_; + + BatchDeleter(std::shared_ptr source_store, std::shared_ptr target_store, size_t batch_size) : + source_store_(std::move(source_store)), + target_store_(std::move(target_store)), + batch_size_(batch_size){ + timers.start_timer(); + } + + void delete_keys() { + target_store_->remove_keys(keys).get(); + objects_moved += keys.size(); + } + + void add_key(const VariantKey& key, bool check_target=true) { + if(check_target && !target_store_->key_exists(key).get()) { + skipped++; + log::storage().warn("Found an unreadable key {}", key); + return; + } + keys.push_back(key); + if(keys.size() == batch_size_) { + delete_keys(); + keys = std::vector(); + + if(++count %10 == 0) { + timers.stop_timer(); + auto bps = objects_moved / timers.get_timer().get_results().total; + log::storage().info("Moved {} objects ({} skipped), {} per second", objects_moved, skipped, bps); + timers.start_timer(); + } + } + } + + void finalize() { + if(!keys.empty()) { + delete_keys(); + } + timers.stop_timer(); + auto total = timers.get_timer().get_results().total; + auto bps = objects_moved / timers.get_timer().get_results().total; + log::storage().info("Moved {} objects in {} - {} per second ", objects_moved, total, bps); + } +}; + +inline MetricsConfig::Model get_model_from_proto_config(const proto::utils::PrometheusConfig& cfg) { + switch (cfg.prometheus_model()) { + case proto::utils::PrometheusConfig_PrometheusModel_NO_INIT: return MetricsConfig::Model::NO_INIT; + case proto::utils::PrometheusConfig_PrometheusModel_PUSH: return MetricsConfig::Model::PUSH; + case proto::utils::PrometheusConfig_PrometheusModel_WEB: return MetricsConfig::Model::PULL; + default: internal::raise("Unknown Prometheus proto model {}", int{cfg.prometheus_model()}); + } +} + +class ARCTICDB_VISIBILITY_HIDDEN StorageMover { +public: + StorageMover(std::shared_ptr source_library, std::shared_ptr target_library) : + source_store_(std::make_shared>(source_library, + codec::default_lz4_codec(), + encoding_version(source_library->config()))), + target_store_(std::make_shared>(target_library, + codec::default_lz4_codec(), + encoding_version(target_library->config()))), + cfg_() { + codec::check( + encoding_version(source_library->config()) == encoding_version(target_library->config()), + "The encoding version of the source library {} is {} which is different than the encoding version {} of the target library {}", + source_library->name(), encoding_version(source_library->config()),encoding_version(target_library->config()), target_library->name()); + auto const& src_cfg = source_library->config(); + util::variant_match(src_cfg, + [](std::monostate){util::raise_rte("Invalid source library cfg");}, + [&](const proto::storage::VersionStoreConfig& conf){ + if (conf.has_prometheus_config()) { + MetricsConfig prometheus_config( + conf.prometheus_config().host(), + conf.prometheus_config().port(), + conf.prometheus_config().job_name(), + conf.prometheus_config().instance(), + conf.prometheus_config().prometheus_env(), + get_model_from_proto_config(conf.prometheus_config()) + ); + PrometheusInstance::instance()->configure(prometheus_config); + } + source_symbol_list_ = conf.symbol_list(); + }); + + auto const& target_cfg = target_library->config(); + util::variant_match(target_cfg, + [](std::monostate){util::raise_rte("Invalid source library cfg");}, + [&](const proto::storage::VersionStoreConfig& conf){ + target_symbol_list_ = conf.symbol_list(); + }); + } + + void go(size_t batch_size = 1000) { + BatchCopier copier{source_store_, target_store_, batch_size}; + foreach_key_type([&](KeyType key_type) { + source_store_->iterate_type(key_type, [&](const VariantKey &&key) { + copier.add_key(key); + }); + }); + copier.finalize(); + } + + py::list get_all_source_keys() { + py::list res; + size_t count = 0; + foreach_key_type([&](KeyType key_type) { + source_store_->iterate_type(key_type, [&](const VariantKey& key) { + res.append(key); + if(++count % 10000 == 0) + log::storage().info("Got {} keys", count); + }); + }); + return res; + } + + struct MissingKeysData { + std::atomic scanned_keys_; + std::atomic missing_keys_; + std::mutex mutex_; + interval_timer timer_; + + MissingKeysData() : + scanned_keys_(0), + missing_keys_(0) + { + timer_.start_timer(); + } + + void report() { + std::lock_guard lock{mutex_}; + timer_.stop_timer(); + auto keys_per_sec = scanned_keys_ / timer_.get_timer().get_results().total; + log::version().info("Scanned {} keys of all types and found {} missing : {} keys/sec", scanned_keys_.load(), missing_keys_.load(), keys_per_sec); + timer_.start_timer(); + } + }; + + struct FindMissingKeysTask : async::BaseTask { + KeyType key_type_; + std::shared_ptr source_store_; + std::shared_ptr target_store_; + std::shared_ptr global_data_; + uint64_t keys_of_type_; + uint64_t missing_keys_of_type_; + size_t batch_size_; + bool skip_target_check_ref_; + bool skip_source_check_; + + FindMissingKeysTask( + KeyType key_type, + std::shared_ptr source_store, + std::shared_ptr target_store, + std::shared_ptr global_data, + size_t batch_size=100, + bool skip_target_check_ref=false, + bool skip_source_check=false): + key_type_(key_type), + source_store_(std::move(source_store)), + target_store_(std::move(target_store)), + global_data_(std::move(global_data)), + keys_of_type_(0), + missing_keys_of_type_(0), + batch_size_(batch_size), + skip_target_check_ref_(skip_target_check_ref), + skip_source_check_(skip_source_check){ + } + + std::vector operator()() { + interval_timer timers; + timers.start_timer(); + std::vector res; + std::vector all_keys; + source_store_->iterate_type(key_type_, [&](const VariantKey &&key) { + ++keys_of_type_; + ++global_data_->scanned_keys_; + all_keys.emplace_back(key); + if (all_keys.size() == batch_size_) { + auto key_exists = folly::collect(target_store_->batch_key_exists(all_keys)).get(); + for (size_t idx = 0; idx != all_keys.size(); idx++) { + if ((skip_target_check_ref_ && is_ref_key_class(key_type_)) || !key_exists[idx]) { + if (skip_source_check_ || source_store_->key_exists(all_keys[idx]).get()) { + res.push_back(all_keys[idx]); + ++missing_keys_of_type_; + ++global_data_->missing_keys_; + } else { + log::storage().warn("Storage contains an unreadable key {}", all_keys[idx]); + } + } + } + all_keys.clear(); + } + if(keys_of_type_ % 10000 == 0) { + timers.stop_timer(); + auto keys_per_sec = keys_of_type_ / timers.get_timer().get_results().total; + log::version().info("Scanned {} {} keys and found {} missing : {} keys/sec", keys_of_type_, get_key_description(key_type_), missing_keys_of_type_, keys_per_sec); + global_data_->report(); + timers.start_timer(); + } + }); + + if (!all_keys.empty()) { + auto key_exists = folly::collect(target_store_->batch_key_exists(all_keys)).get(); + for (size_t idx = 0; idx != all_keys.size(); idx++) { + if ((skip_target_check_ref_ && is_ref_key_class(key_type_)) || !key_exists[idx]) { + if (skip_source_check_ || source_store_->key_exists(all_keys[idx]).get()) { + res.push_back(all_keys[idx]); + ++missing_keys_of_type_; + ++global_data_->missing_keys_; + } else { + log::storage().warn("Storage contains an unreadable key {}", all_keys[idx]); + } + } + } + } + + log::storage().info("{} missing keys of type {}, scanned {}", res.size(), get_key_description(key_type_), keys_of_type_); + return res; + } + }; + + std::unordered_map> get_missing_keys(size_t batch_size, bool reverse, bool skip_target_check_ref) { + auto shared_data = std::make_shared(); + std::unordered_map> results; + auto prim = reverse ? target_store_ : source_store_; + auto second = reverse ? source_store_ : target_store_; + foreach_key_type_read_precedence([&](KeyType key_type) { + auto task = FindMissingKeysTask{key_type, prim, second, shared_data, batch_size, skip_target_check_ref, true}; + results.emplace(key_type, task()); + }); + + log::storage().info("Finished scan, collating results"); + shared_data->report(); + return results; + } + + void incremental_copy(size_t batch_size = 1000, size_t thread_count = 32, bool delete_keys=false, bool perform_checks=true) { + auto missing_keys = get_missing_keys(batch_size * 100, false, true); + log::storage().info("Copying {} missing key types", missing_keys.size()); + BatchCopier copier{source_store_, target_store_, batch_size, thread_count}; + copier.go(std::move(missing_keys), perform_checks); + + if (delete_keys) { + auto deleting_keys = get_missing_keys(batch_size * 100, true, false); + log::storage().info("Deleting {} key types", deleting_keys.size()); + BatchDeleter deleter{source_store_, target_store_, batch_size}; + foreach_key_type_read_precedence([&](auto key_type) { + if (auto it = deleting_keys.find(key_type); it != deleting_keys.end()) { + for (auto &key : it->second) + deleter.add_key(key, perform_checks); + } + }); + deleter.finalize(); + } + } + + py::list get_keys_in_source_only() { + auto all_missing = get_missing_keys(100, false, false); + + py::list res; + for(const auto& missing_of_type : all_missing) { + for (const auto &key : missing_of_type.second) + res.append(key); + } + return res; + } + + size_t clone_all_keys_for_symbol(const StreamId &stream_id, size_t batch_size) { + std::vector vkeys; + foreach_key_type([&](KeyType key_type) { + source_store_->iterate_type(key_type, [&](const VariantKey& key) { + vkeys.push_back(key); + }, std::get(stream_id)); + }); + return write_variant_keys_from_source_to_target(std::move(vkeys), batch_size); + } + + size_t clone_all_keys_for_symbol_for_type( + const StreamId &stream_id, + size_t batch_size, + KeyType key_type) { + std::vector vkeys; + source_store_->iterate_type(key_type, [&](const VariantKey& key) { + vkeys.push_back(key); + }, std::get(stream_id)); + return write_variant_keys_from_source_to_target(std::move(vkeys), batch_size); + } + + size_t write_variant_keys_from_source_to_target(std::vector&& vkeys, size_t batch_size) { + std::vector> write_futs; + + size_t total_copied = 0; + for (size_t start = 0; start < vkeys.size(); start += batch_size) { + const size_t end = std::min(start + batch_size, vkeys.size()); + const size_t copy_max_size = end - start; + std::vector> keys_to_copy(copy_max_size); + std::vector segments(copy_max_size); + size_t copied = 0; + for (size_t offset = start; offset < end; ++offset) { + if (VariantKey& key = vkeys[offset]; source_store_->key_exists(key).get() && !target_store_->key_exists(key).get()) { + keys_to_copy[copied++] = std::pair{std::move(key), [copied, &segments](storage::KeySegmentPair&& ks) { + segments[copied] = std::move(ks); + return segments[copied].variant_key(); + }}; + } + } + total_copied += copied; + [[maybe_unused]] auto keys = source_store_->batch_read_compressed(std::move(keys_to_copy), BatchReadArgs{}).get(); + write_futs.push_back(target_store_->batch_write_compressed(std::move(segments))); + } + folly::collect(write_futs).get(); + return total_copied; + } + + + size_t write_keys_from_source_to_target(const std::vector& py_keys, size_t batch_size) { + std::vector vkeys; + rng::transform(py_keys, std::back_inserter(vkeys), [](const auto& py_key) -> VariantKey { + if (py::isinstance(py_key)) { + return py_key.template cast(); + } else if (py::isinstance(py_key)) { + return py_key.template cast(); + } + internal::raise("Invalid key type"); + }); + return write_variant_keys_from_source_to_target(std::move(vkeys), batch_size); + } + + py::dict write_symbol_trees_from_source_to_target(const std::vector& py_partial_keys, bool append_versions) { + std::shared_ptr source_map(std::make_shared()); + std::shared_ptr target_map(std::make_shared()); + std::optional symbol_list; + if(target_symbol_list_) + symbol_list .emplace(target_map); + // res is a dict with key sym and value a dict showing results of the versions + py::dict res; + target_map->set_log_changes(true); + for(const auto& py_pkey: py_partial_keys) { + // For each version, outputs the version_id which was written in the dest if no error otherwise error string + py::dict sym_data; + std::unordered_map> version_to_snapshot_map; + auto sym = py_pkey.attr("id").cast(); + // Can be either numeric(version id) or string(snapshot_id) + auto ids = py_pkey.attr("versions").cast>>(); + std::vector index_keys; + for(const auto& id: ids) { + util::variant_match(id, + [&](const VersionId& numeric_id) { + auto index_key = get_specific_version(source_store_, source_map, sym, numeric_id); + if (!index_key) { + sym_data[py::int_(numeric_id)] = + fmt::format("Sym:{},Version:{},Ex:{}", sym, numeric_id, "Numeric Id not found"); + } + else { + index_keys.emplace_back(index_key.value()); + } + }, + [&](const StringId& snap_name) { + auto opt_snapshot = get_snapshot(source_store_, snap_name); + if (!opt_snapshot) { + sym_data[py::str(snap_name)] = + fmt::format("Sym:{},SnapId:{},Ex:{}", sym, snap_name, "Snapshot not found in source"); + return; + } + // A snapshot will normally be in a ref key, but for old libraries it still needs to fall back to iteration of + // atom keys. + auto variant_snap_key = opt_snapshot.value().first; + auto snapshot_segment = opt_snapshot.value().second; + auto opt_idx_for_stream_id = row_id_for_stream_in_snapshot_segment( + snapshot_segment, variant_key_type(variant_snap_key) == KeyType::SNAPSHOT_REF, sym); + if (opt_idx_for_stream_id) { + auto stream_idx = opt_idx_for_stream_id.value(); + auto index_key = read_key_row(snapshot_segment, stream_idx); + version_to_snapshot_map[index_key.version_id()].push_back(snap_name); + index_keys.emplace_back(std::move(index_key)); + } + else { + sym_data[py::str(snap_name)] = + fmt::format("Sym:{},SnapId:{},Ex:{}", sym, snap_name, "Symbol not found in source snapshot"); + } + } + ); + } + // Remove duplicate keys + rng::sort(index_keys, [&](const auto& k1, const auto& k2) {return k1.version_id() < k2.version_id();}); + auto to_erase = rng::unique(index_keys, std::equal_to{}, [](const auto& k){ return k.version_id();}); + index_keys.erase(to_erase.begin(), to_erase.end()); + for(const auto& index_key: index_keys) { + VersionId v_id = index_key.version_id(); + try { + std::optional new_version_id; + std::optional previous_key; + if (append_versions) { + auto [maybe_prev, _] = get_latest_version(target_store_, target_map, sym); + if (maybe_prev){ + new_version_id = std::make_optional(maybe_prev.value().version_id() + 1); + previous_key = std::move(maybe_prev); + } + } else { + if (auto target_index_key = get_specific_version(target_store_, target_map, sym, v_id)) { + throw storage::DuplicateKeyException(target_index_key.value()); + } + } + const auto new_index_key = copy_index_key_recursively(source_store_, target_store_, index_key, new_version_id); + target_map->write_version(target_store_, new_index_key, previous_key); + if(symbol_list) + symbol_list->add_symbol(target_store_, new_index_key.id(), new_version_id.value_or(0)); + + // Change the version in the result map + sym_data[py::int_(v_id)] = new_version_id ? new_version_id.value() : v_id; + // Give the new version id to the snapshots + if (version_to_snapshot_map.contains(v_id)) { + for(const auto& snap_name: version_to_snapshot_map[v_id]) { + sym_data[py::str(snap_name)] = sym_data[py::int_(v_id)]; + } + } + } + catch (std::exception &e) { + auto key = py::int_(v_id); + auto error = fmt::format("Sym:{},Version:{},Ex:{}", sym, v_id, e.what()); + sym_data[key] = error; + // Give the error to snapshots which also had the same version_id + if (version_to_snapshot_map.contains(v_id)) { + for(const auto& snap_name: version_to_snapshot_map[v_id]) { + sym_data[py::str(snap_name)] = error; + } + } + } + } + util::variant_match(sym, + [&sym_data, &res](const NumericId& numeric_id) { + res[py::int_(numeric_id)] = sym_data; + }, + [&sym_data, &res](const StringId& string_id) { + res[py::str(string_id)] = sym_data; + } + ); + } + return res; + } + +private: + std::shared_ptr source_store_; + std::shared_ptr target_store_; + proto::storage::VersionStoreConfig cfg_; + bool target_symbol_list_; + bool source_symbol_list_; +}; + +} diff --git a/cpp/arcticdb/util/error_code.hpp b/cpp/arcticdb/util/error_code.hpp index 5f0b6b6486..9a229e5d66 100644 --- a/cpp/arcticdb/util/error_code.hpp +++ b/cpp/arcticdb/util/error_code.hpp @@ -103,7 +103,8 @@ inline std::unordered_map get_error_category_names() ERROR_CODE(9001, E_UNKNOWN_CODEC) \ ERROR_CODE(9002, E_ZSDT_ENCODING) \ ERROR_CODE(9003, E_LZ4_ENCODING) \ - ERROR_CODE(9004, E_INPUT_TOO_LARGE) + ERROR_CODE(9004, E_INPUT_TOO_LARGE) \ + ERROR_CODE(9005, E_ENCODING_VERSION_MISMATCH) enum class ErrorCode : detail::BaseType { #define ERROR_CODE(code, Name, ...) Name = code, diff --git a/cpp/arcticdb/version/local_versioned_engine.cpp b/cpp/arcticdb/version/local_versioned_engine.cpp index c3edc934f5..cd2ee0e5a1 100644 --- a/cpp/arcticdb/version/local_versioned_engine.cpp +++ b/cpp/arcticdb/version/local_versioned_engine.cpp @@ -1654,7 +1654,7 @@ std::unordered_map LocalVersionedEngine::scan_object_size auto& sizes_info = sizes[key_type]; ++sizes_info.count; key_size_calculators.emplace_back(std::forward(k), [&sizes_info] (auto&& ks) { - auto key_seg = std::move(ks); + auto key_seg = std::forward(ks); sizes_info.compressed_size += key_seg.segment().size(); const auto& desc = key_seg.segment().descriptor(); sizes_info.uncompressed_size += desc.uncompressed_bytes(); @@ -1685,7 +1685,7 @@ std::unordered_map> LocalVer store->iterate_type(key_type, [&keys, &mutex, &sizes, key_type](const VariantKey&& k){ keys.emplace_back(std::forward(k), [key_type, &sizes, &mutex] (auto&& ks) { - auto key_seg = std::move(ks); + auto key_seg = std::forward(ks); auto variant_key = key_seg.variant_key(); auto stream_id = variant_key_id(variant_key); auto compressed_size = key_seg.segment().size(); diff --git a/cpp/arcticdb/version/python_bindings.cpp b/cpp/arcticdb/version/python_bindings.cpp index 00a5398629..bb8c8518cb 100644 --- a/cpp/arcticdb/version/python_bindings.cpp +++ b/cpp/arcticdb/version/python_bindings.cpp @@ -7,12 +7,10 @@ #include #include -#include #include #include #include #include -#include #include #include #include @@ -26,6 +24,7 @@ #include #include + namespace arcticdb::version_store { [[nodiscard]] static std::pair compute_first_last_dates( @@ -111,6 +110,7 @@ void register_bindings(py::module &version, py::exception>(version, "RefKey") diff --git a/cpp/arcticdb/version/version_core.cpp b/cpp/arcticdb/version/version_core.cpp index 7d2032a60c..b5e5be90eb 100644 --- a/cpp/arcticdb/version/version_core.cpp +++ b/cpp/arcticdb/version/version_core.cpp @@ -1657,7 +1657,7 @@ VersionedItem compact_incomplete_impl( }); return util::variant_match(std::move(result), - [&slices, &pipeline_context, &store, &options, &user_meta](CompactionWrittenKeys& written_keys) -> VersionedItem { + [&slices, &pipeline_context, &store, &user_meta](CompactionWrittenKeys& written_keys) -> VersionedItem { auto vit = collate_and_write( store, pipeline_context, diff --git a/cpp/arcticdb/version/version_store_api.cpp b/cpp/arcticdb/version/version_store_api.cpp index bbb1d10b5e..ab8c369371 100644 --- a/cpp/arcticdb/version/version_store_api.cpp +++ b/cpp/arcticdb/version/version_store_api.cpp @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -1167,5 +1166,6 @@ ReadResult read_dataframe_from_file( void PythonVersionStore::force_delete_symbol(const StreamId& stream_id) { version_map()->delete_all_versions(store(), stream_id); delete_all_for_stream(store(), stream_id, true); + version_map()->flush(); } } //namespace arcticdb::version_store diff --git a/python/arcticdb/toolbox/storage.py b/python/arcticdb/toolbox/storage.py new file mode 100644 index 0000000000..21753cd416 --- /dev/null +++ b/python/arcticdb/toolbox/storage.py @@ -0,0 +1,3 @@ +from typing import NamedTuple, Union, List + +SymbolVersionsPair = NamedTuple("SymbolVersionsPair", [("id", Union[int, str]), ("versions", List[Union[int, str]])]) diff --git a/python/tests/integration/toolbox/test_storage_mover.py b/python/tests/integration/toolbox/test_storage_mover.py new file mode 100644 index 0000000000..0d7c1ae861 --- /dev/null +++ b/python/tests/integration/toolbox/test_storage_mover.py @@ -0,0 +1,330 @@ +import numpy as np +import pytest + +from hypothesis import given, strategies as st, settings +from arcticdb.config import Defaults +from arcticdb.version_store.helper import ArcticMemoryConfig, get_lib_cfg, add_lmdb_library_to_env +from arcticdb.toolbox.library_tool import KeyType +from arcticdb.toolbox.storage import SymbolVersionsPair +from arcticdb_ext.tools import StorageMover +from pandas import DataFrame +from pandas.testing import assert_frame_equal +from arcticdb.util.test import sample_dataframe +from arcticc.pb2.storage_pb2 import EnvironmentConfigsMap +import hypothesis +import sys + + +# configure_test_logger("DEBUG") + +def create_local_lmdb_cfg(lib_name=Defaults.LIB, db_dir=Defaults.DATA_DIR, description=None): + cfg = EnvironmentConfigsMap() + add_lmdb_library_to_env( + cfg, + lib_name=lib_name, + env_name=Defaults.ENV, + db_dir=db_dir, + description=description + ) + return cfg + +@pytest.fixture +def arctidb_native_local_lib_cfg_extra(tmpdir): + def create(): + return create_local_lmdb_cfg(lib_name="local.extra", db_dir=str(tmpdir)) + + return create + +@pytest.fixture +def arctidb_native_local_lib_cfg(tmpdir): + def create(lib_name): + return create_local_lmdb_cfg(lib_name=lib_name, db_dir=str(tmpdir)) + return create + +def create_default_config(): + return create_local_lmdb_cfg() + +def add_data(version_store): + version_store.write("symbol", sample_dataframe()) + version_store.write("pickled", {"a": 1}, pickle_on_failure=True) + version_store.snapshot("mysnap") + version_store.write("rec_norm", data={"a": np.arange(5), "b": np.arange(8), "c": None}, recursive_normalizers=True) + version_store.write("symbol", sample_dataframe()) + version_store.snapshot("mysnap2") + + +def compare_two_libs(lib1, lib2): + ver1 = lib1.list_versions() + ver2 = lib2.list_versions() + + print(ver1) + print(ver2) + + assert len(lib1.list_versions()) == len(lib2.list_versions()) + assert lib1.list_versions() == lib2.list_versions() + assert lib1.list_snapshots() == lib2.list_snapshots() + + assert_frame_equal(lib1.read("symbol", as_of=0).data, lib2.read("symbol", as_of=0).data) + assert_frame_equal(lib1.read("symbol", as_of=1).data, lib2.read("symbol", as_of=1).data) + assert_frame_equal(lib1.read("symbol", as_of="mysnap").data, lib2.read("symbol", as_of="mysnap").data) + assert_frame_equal(lib1.read("symbol", as_of="mysnap2").data, lib2.read("symbol", as_of="mysnap2").data) + + assert lib1.read("pickled").data == lib2.read("pickled").data + assert lib1.read("pickled", as_of="mysnap").data == lib2.read("pickled", as_of="mysnap").data + assert lib1.read("pickled", as_of="mysnap2").data == lib2.read("pickled", as_of="mysnap2").data + + assert lib1.read("rec_norm").data.keys() == lib2.read("rec_norm").data.keys() + assert all(lib1.read("rec_norm").data["a"] == lib2.read("rec_norm").data["a"]) + assert all(lib1.read("rec_norm").data["b"] == lib2.read("rec_norm").data["b"]) + assert lib1.read("rec_norm").data["c"] == lib2.read("rec_norm").data["c"] + assert lib1.read("rec_norm", as_of="mysnap2").data.keys() == lib2.read("rec_norm", as_of="mysnap2").data.keys() + + +def test_storage_mover_single_go(lmdb_version_store_v1, arctidb_native_local_lib_cfg_extra): + add_data(lmdb_version_store_v1) + arctic = ArcticMemoryConfig(arctidb_native_local_lib_cfg_extra(), env=Defaults.ENV) + lib_cfg = get_lib_cfg(arctic, Defaults.ENV, "local.extra") + lib_cfg.version.symbol_list = True + dst_lib = arctic["local.extra"] + + s = StorageMover(lmdb_version_store_v1._library, dst_lib._library) + s.go() + + compare_two_libs(lmdb_version_store_v1, dst_lib) + + +def test_storage_mover_key_by_key(lmdb_version_store_v1, arctidb_native_local_lib_cfg_extra): + add_data(lmdb_version_store_v1) + arctic = ArcticMemoryConfig(arctidb_native_local_lib_cfg_extra(), env=Defaults.ENV) + lib_cfg = get_lib_cfg(arctic, Defaults.ENV, "local.extra") + lib_cfg.version.symbol_list = True + dst_lib = arctic["local.extra"] + + s = StorageMover(lmdb_version_store_v1._library, dst_lib._library) + all_keys = s.get_all_source_keys() + for key in all_keys: + s.write_keys_from_source_to_target([key], 2) + + compare_two_libs(lmdb_version_store_v1, dst_lib) + +@pytest.mark.xfail(sys.platform == "win32", reason="Numpy strings are not implemented for Windows") +def test_storage_mover_symbol_tree(arctidb_native_local_lib_cfg_extra, arctidb_native_local_lib_cfg, lib_name): + col_per_group = 5 + row_per_segment = 10 + local_lib_cfg = arctidb_native_local_lib_cfg(lib_name) + lib = local_lib_cfg.env_by_id[Defaults.ENV].lib_by_path[lib_name] + lib.version.write_options.column_group_size = col_per_group + lib.version.write_options.segment_row_size = row_per_segment + lib.version.symbol_list = True + lmdb_version_store_symbol_list = ArcticMemoryConfig(local_lib_cfg, Defaults.ENV)[lib_name] + + lmdb_version_store_symbol_list.write("symbol", sample_dataframe(), metadata="yolo") + lmdb_version_store_symbol_list.write("symbol", sample_dataframe(), metadata="yolo2") + lmdb_version_store_symbol_list.write("snapshot_test", 1) + lmdb_version_store_symbol_list.snapshot("my_snap") + lmdb_version_store_symbol_list.snapshot("my_snap2") + lmdb_version_store_symbol_list.snapshot("snapshot_test", 2) + lmdb_version_store_symbol_list.delete_version("snapshot_test", 0) + lmdb_version_store_symbol_list.write("pickled", {"a": 1}, metadata="cantyolo", pickle_on_failure=True) + lmdb_version_store_symbol_list.write("pickled", {"b": 1}, metadata="cantyolo2", pickle_on_failure=True) + lmdb_version_store_symbol_list.write("pickled", {"c": 1}, metadata="yoloded", pickle_on_failure=True) + lmdb_version_store_symbol_list.write( + "rec_norm", + data={"a": np.arange(1000), "b": np.arange(8000), "c": None}, + metadata="realyolo", + recursive_normalizers=True, + ) + lmdb_version_store_symbol_list.write( + "rec_norm", + data={"e": np.arange(1000), "f": np.arange(8000), "g": None}, + metadata="realyolo2", + recursive_normalizers=True, + ) + + lmdb_version_store_symbol_list.write("dup_data", np.array(["YOLO"] * 10000)) + + arctic = ArcticMemoryConfig(arctidb_native_local_lib_cfg_extra(), env=Defaults.ENV) + lib_cfg = get_lib_cfg(arctic, Defaults.ENV, "local.extra") + lib_cfg.version.symbol_list = True + dst_lib = arctic["local.extra"] + + s = StorageMover(lmdb_version_store_symbol_list._library, dst_lib._library) + sv1 = SymbolVersionsPair("symbol", [1, 0]) + sv2 = SymbolVersionsPair("pickled", [2, 0]) + sv3 = SymbolVersionsPair("rec_norm", [1, 0]) + sv4 = SymbolVersionsPair("dup_data", [0]) + sv5 = SymbolVersionsPair("snapshot_test", ["my_snap", "my_snap2"]) + res = s.write_symbol_trees_from_source_to_target([sv1, sv2, sv3, sv4, sv5], False) + assert len(res) == 5 + for r in res: + for v in res[r]: + assert type(res[r][v]) == int + + assert len(dst_lib.list_versions()) == 8 + assert_frame_equal(lmdb_version_store_symbol_list.read("symbol").data, dst_lib.read("symbol").data) + assert_frame_equal(lmdb_version_store_symbol_list.read("symbol", 0).data, dst_lib.read("symbol", 0).data) + assert lmdb_version_store_symbol_list.read("symbol").metadata == dst_lib.read("symbol").metadata + assert lmdb_version_store_symbol_list.read("symbol", 0).metadata == dst_lib.read("symbol", 0).metadata + + assert lmdb_version_store_symbol_list.read("pickled").data == dst_lib.read("pickled").data + assert lmdb_version_store_symbol_list.read("pickled", 0).data == dst_lib.read("pickled", 0).data + assert lmdb_version_store_symbol_list.read("pickled").metadata == dst_lib.read("pickled").metadata + assert lmdb_version_store_symbol_list.read("pickled", 0).metadata == dst_lib.read("pickled", 0).metadata + + def comp_dict(d1, d2): + assert len(d1) == len(d2) + for k in d1: + if isinstance(d1[k], np.ndarray): + assert (d1[k] == d2[k]).all() + else: + assert d1[k] == d2[k] + + comp_dict(lmdb_version_store_symbol_list.read("rec_norm").data, dst_lib.read("rec_norm").data) + comp_dict(lmdb_version_store_symbol_list.read("rec_norm", 0).data, dst_lib.read("rec_norm", 0).data) + assert lmdb_version_store_symbol_list.read("rec_norm").metadata == dst_lib.read("rec_norm").metadata + assert lmdb_version_store_symbol_list.read("rec_norm", 0).metadata == dst_lib.read("rec_norm", 0).metadata + + np.testing.assert_equal(lmdb_version_store_symbol_list.read("dup_data").data, dst_lib.read("dup_data").data) + assert lmdb_version_store_symbol_list.read("dup_data").metadata == dst_lib.read("dup_data").metadata + + assert lmdb_version_store_symbol_list.read("snapshot_test", "my_snap").data, dst_lib.read("snapshot_test", 0).data + + lmdb_version_store_symbol_list.write("new_symbol", 1) + lmdb_version_store_symbol_list.snapshot("new_snap") + lmdb_version_store_symbol_list.write("new_symbol", 2) + lmdb_version_store_symbol_list.snapshot("new_snap2") + lmdb_version_store_symbol_list.write("new_symbol", 3) + lmdb_version_store_symbol_list.delete_version("new_symbol", 1) + sv6 = SymbolVersionsPair("new_symbol", [2, 0, "new_snap", "new_snap2"]) + dst_lib.write("new_symbol", 0) + + res = s.write_symbol_trees_from_source_to_target([sv6], True) + assert len(res) == 1 + assert "new_symbol" in res + assert res["new_symbol"][2] == 3 + assert res["new_symbol"][0] == 1 + assert res["new_symbol"]["new_snap"] == 1 + assert res["new_symbol"]["new_snap2"] == 2 + + assert dst_lib.read("new_symbol", 0).data == 0 + assert dst_lib.read("new_symbol", 1).data == 1 + assert dst_lib.read("new_symbol", 2).data == 2 + assert dst_lib.read("new_symbol", 3).data == 3 + + +def test_storage_mover_and_key_checker(lmdb_version_store_v1, arctidb_native_local_lib_cfg_extra): + add_data(lmdb_version_store_v1) + arctic = ArcticMemoryConfig(arctidb_native_local_lib_cfg_extra(), env=Defaults.ENV) + lib_cfg = get_lib_cfg(arctic, Defaults.ENV, "local.extra") + lib_cfg.version.symbol_list = True + dst_lib = arctic["local.extra"] + + s = StorageMover(lmdb_version_store_v1._library, dst_lib._library) + s.go() + + keys = s.get_keys_in_source_only() + assert len(keys) == 0 + + +def test_storage_mover_clone_keys_for_symbol(lmdb_version_store_v1, arctidb_native_local_lib_cfg_extra): + add_data(lmdb_version_store_v1) + lmdb_version_store_v1.write("a", 1) + lmdb_version_store_v1.write("a", 2) + lmdb_version_store_v1.write("b", 1) + arctic = ArcticMemoryConfig(arctidb_native_local_lib_cfg_extra(), env=Defaults.ENV) + lib_cfg = get_lib_cfg(arctic, Defaults.ENV, "local.extra") + lib_cfg.version.symbol_list = True + dst_lib = arctic["local.extra"] + + s = StorageMover(lmdb_version_store_v1._library, dst_lib._library) + s.clone_all_keys_for_symbol("a", 1000) + assert dst_lib.read("a").data == 2 + + +@pytest.fixture() +def lib_with_gaps_and_reused_keys(version_store_factory): + lib = version_store_factory(name="source", de_duplication=True, col_per_group=2, segment_row_size=2) + + lib.write("x", 0) + lib.write("x", 1) + lib.write("x", 2) + lib.snapshot("s2") + lib.write("x", DataFrame({"c": [0, 1]}, index=[0, 1])) + lib.write("x", DataFrame({"c": list(range(5))}, index=list(range(5))), prune_previous_version=True) # 2 slices + lib.write("x", 5) + lib.delete_version("x", 5) + lib.write("x", 6) + + return lib + + +@pytest.mark.parametrize("mode", ("check assumptions", "go", "no force")) +def test_correct_versions_in_destination(mode, lib_with_gaps_and_reused_keys, lmdb_version_store_v1): + s = StorageMover(lib_with_gaps_and_reused_keys._library, lmdb_version_store_v1._library) + if mode == "check assumptions": + check = lib_with_gaps_and_reused_keys + elif mode == "go": + s.go() + check = lmdb_version_store_v1 + else: + s.write_symbol_trees_from_source_to_target([SymbolVersionsPair("x", ["s2", 4, 6])], False) + check = lmdb_version_store_v1 + + lt = check.library_tool() + + assert {vi["version"] for vi in check.list_versions("x")} == {2, 4, 6} + assert len(lt.find_keys(KeyType.TABLE_INDEX)) == 3 + assert [k.version_id for k in lt.find_keys(KeyType.TABLE_DATA)] == [2, 3, 4, 4, 6] + + +@settings(deadline=None, suppress_health_check=(hypothesis.HealthCheck.function_scoped_fixture,)) +@given(to_copy=st.permutations(["s2", 4, 6]), existing=st.booleans()) +def test_correct_versions_in_destination_force(to_copy, existing, lib_with_gaps_and_reused_keys, version_store_factory): + try: + _tmp_test_body(to_copy, existing, lib_with_gaps_and_reused_keys, version_store_factory) + except: + import traceback + + traceback.print_exc() + raise + + +def _tmp_test_body(to_copy, existing, lib_with_gaps_and_reused_keys, version_store_factory): + # mongoose_copy_data's force mode rewrite version numbers in the target + source = lib_with_gaps_and_reused_keys + target = version_store_factory(name="_unique_") + + if existing: + target.write("x", 0) + + s = StorageMover(source._library, target._library) + s.write_symbol_trees_from_source_to_target([SymbolVersionsPair("x", to_copy)], True) + + actual_vers = sorted(vi["version"] for vi in target.list_versions("x")) + print(to_copy, existing, "->", actual_vers) + + lt = target.library_tool() + start = 0 if existing else 2 # mover starts at the first input version if target is empty.... + n = int(existing) + len(to_copy) + assert actual_vers == list(range(start, start + n)) + assert len(lt.find_keys(KeyType.TABLE_INDEX)) == n + + source_keys = source.library_tool().find_keys(KeyType.TABLE_DATA) + expected_target = [] + for item in to_copy: + if item == "s2": + expected_target.append(source_keys[0]) + elif item == 4: + expected_target.extend(source_keys[1:4]) + else: + expected_target.append(source_keys[-1]) + expected_target.sort() # key=lambda k: (k.version_id, k.start_index)) + + target_keys = lt.find_keys(KeyType.TABLE_DATA) + target_keys.sort() + if existing: + target_keys.pop(0) + + for a, e in zip(target_keys, expected_target): + assert a.content_hash == e.content_hash + assert a.creation_ts >= source_keys[-1].creation_ts From f9d955f71fbb7439ddce8724dda5ec8e23f6c171 Mon Sep 17 00:00:00 2001 From: phoebusm Date: Mon, 16 Dec 2024 16:20:30 +0000 Subject: [PATCH 10/11] Add s3 sts proxy support --- cpp/arcticdb/storage/s3/s3_storage.cpp | 13 +++++++++++-- cpp/arcticdb/storage/s3/s3_storage.hpp | 6 ++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/cpp/arcticdb/storage/s3/s3_storage.cpp b/cpp/arcticdb/storage/s3/s3_storage.cpp index 4f6facb6ce..5964161e80 100644 --- a/cpp/arcticdb/storage/s3/s3_storage.cpp +++ b/cpp/arcticdb/storage/s3/s3_storage.cpp @@ -10,6 +10,7 @@ #include #include +#include #include #include @@ -100,12 +101,20 @@ S3Storage::S3Storage(const LibraryPath &library_path, OpenMode mode, const S3Set } else if (conf.aws_auth() == AWSAuthMethod::STS_PROFILE_CREDENTIALS_PROVIDER){ Aws::Config::ReloadCachedConfigFile(); // config files loaded in Aws::InitAPI; It runs once at first S3Storage object construct; reload to get latest + auto client_config = get_s3_config(conf); + auto sts_client_factory = [&](const Aws::Auth::AWSCredentials& creds) { // Get default allocation tag + auto sts_config = get_proxy_config(conf.https() ? Aws::Http::Scheme::HTTPS : Aws::Http::Scheme::HTTP); + auto allocation_tag = Aws::STS::STSClient::GetAllocationTag(); + sts_client_ = std::make_unique(creds, Aws::MakeShared(allocation_tag), sts_config); + return sts_client_.get(); + }; auto cred_provider = Aws::MakeShared( "DefaultAWSCredentialsProviderChain", conf.aws_profile(), - std::chrono::minutes(static_cast(ConfigsMap::instance()->get_int("S3Storage.STSTokenExpiryMin", 60))) + std::chrono::minutes(static_cast(ConfigsMap::instance()->get_int("S3Storage.STSTokenExpiryMin", 60))), + sts_client_factory ); - s3_client_ = std::make_unique(cred_provider, get_s3_config(conf), Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, conf.use_virtual_addressing()); + s3_client_ = std::make_unique(cred_provider, client_config, Aws::Client::AWSAuthV4Signer::PayloadSigningPolicy::Never, conf.use_virtual_addressing()); } else if (creds.GetAWSAccessKeyId() == USE_AWS_CRED_PROVIDERS_TOKEN && creds.GetAWSSecretKey() == USE_AWS_CRED_PROVIDERS_TOKEN){ ARCTICDB_RUNTIME_DEBUG(log::storage(), "Using AWS auth mechanisms"); diff --git a/cpp/arcticdb/storage/s3/s3_storage.hpp b/cpp/arcticdb/storage/s3/s3_storage.hpp index 9be7f11b98..50e7faff07 100644 --- a/cpp/arcticdb/storage/s3/s3_storage.hpp +++ b/cpp/arcticdb/storage/s3/s3_storage.hpp @@ -7,6 +7,8 @@ #pragma once +#include + #include #include #include @@ -79,6 +81,10 @@ class S3Storage final : public Storage { std::shared_ptr s3_api_; std::unique_ptr s3_client_; + //aws sdk annoyingly requires raw pointer being passed in the sts client factory to the s3 client + //thus sts_client_ should have same life span as s3_client_ + std::unique_ptr sts_client_; + std::string root_folder_; std::string bucket_name_; std::string region_; From 6931d3f67487dcf9f94b3cbd5bf6502818a210ff Mon Sep 17 00:00:00 2001 From: Vasil Danielov Pashov Date: Thu, 19 Dec 2024 16:22:48 +0200 Subject: [PATCH 11/11] Implement origin for pandas resampling (#1962) #### Reference Issues/PRs Resolve: #1740 Resolve: #1751 #### What does this implement or fix? Implements equivalent of pandas origin in ArcticDB's resample. The default is set to `epoch` which matches the default behavior of Arctic up to now but is different than Pandas which uses `start_day`. Changing it to `start_day` is a breaking change. Fix how min and max aggregators handle infinity when there are only infinities in the column. Previously the result was `NaN` which is not consistent with Pandas and wrong. Now they return the proper value of infinity. Add hypothesis test using combinations of all possible parameters for resampling. `start`, `end`, `start_day`, `end_day` are nut supported in conjunction with `date_range` clause. #### Any other comments? #### Checklist
Checklist for code changes... - [ ] Have you updated the relevant docstrings, documentation and copyright notice? - [ ] Is this contribution tested against [all ArcticDB's features](../docs/mkdocs/docs/technical/contributing.md)? - [ ] Do all exceptions introduced raise appropriate [error messages](https://docs.arcticdb.io/error_messages/)? - [ ] Are API changes highlighted in the PR description? - [ ] Is the PR labelled as enhancement or bug so it appears in autogenerated release notes?
--------- Co-authored-by: Vasil Pashov --- cpp/arcticdb/processing/clause.cpp | 83 ++++-- cpp/arcticdb/processing/clause.hpp | 40 +-- cpp/arcticdb/processing/clause_utils.cpp | 72 ++++- cpp/arcticdb/processing/clause_utils.hpp | 50 +--- .../processing/sorted_aggregation.hpp | 4 +- .../processing/test/rapidcheck_resample.cpp | 6 +- .../processing/test/test_resample.cpp | 30 +- cpp/arcticdb/python/python_utils.hpp | 1 - cpp/arcticdb/version/python_bindings.cpp | 93 ++++-- python/arcticdb/util/test.py | 78 +++++ python/arcticdb/version_store/processing.py | 33 ++- .../hypothesis/arcticdb/test_resample.py | 110 ++++++++ .../hypothesis/arcticdb/test_sort_merge.py | 2 +- .../arcticdb/version_store/test_resample.py | 267 ++++++++++++++++-- 14 files changed, 694 insertions(+), 175 deletions(-) create mode 100644 python/tests/hypothesis/arcticdb/test_resample.py diff --git a/cpp/arcticdb/processing/clause.cpp b/cpp/arcticdb/processing/clause.cpp index 82ce052969..308bb0d2f0 100644 --- a/cpp/arcticdb/processing/clause.cpp +++ b/cpp/arcticdb/processing/clause.cpp @@ -8,8 +8,6 @@ #include #include -#include - #include #include #include @@ -21,9 +19,13 @@ #include #include #include +#include + + namespace arcticdb { +namespace ranges = std::ranges; using namespace pipelines; class GroupingMap { @@ -431,6 +433,53 @@ std::vector AggregationClause::process(std::vector&& entity_ return str_; } +template +ResampleClause::ResampleClause(std::string rule, + ResampleBoundary label_boundary, + BucketGeneratorT&& generate_bucket_boundaries, + timestamp offset, + ResampleOrigin origin) : + rule_(std::move(rule)), + label_boundary_(label_boundary), + generate_bucket_boundaries_(std::move(generate_bucket_boundaries)), + offset_(offset), + origin_(std::move(origin)) { + clause_info_.input_structure_ = ProcessingStructure::TIME_BUCKETED; + clause_info_.can_combine_with_column_selection_ = false; + clause_info_.modifies_output_descriptor_ = true; + clause_info_.index_ = KeepCurrentTopLevelIndex(); +} + +template +const ClauseInfo& ResampleClause::clause_info() const { + return clause_info_; +} + +template +void ResampleClause::set_component_manager(std::shared_ptr component_manager) { + component_manager_ = std::move(component_manager); +} + +template +std::string ResampleClause::rule() const { + return rule_; +} + +template +void ResampleClause::set_date_range(timestamp date_range_start, timestamp date_range_end) { + // Start and end need to read the first and last segments of the date range. At the moment buckets are set up before + // reading and processing the data. + constexpr static std::array unsupported_origin{ "start", "end", "start_day", "end_day" }; + user_input::check( + util::variant_match(origin_, + [&](const std::string& origin) { return ranges::none_of(unsupported_origin, [&](std::string_view el) { return el == origin; }); }, + [](const auto&) { return true;} + ), + "Resampling origins {} are not supported in conjunction with date range", unsupported_origin + ); + date_range_.emplace(date_range_start, date_range_end); +} + template void ResampleClause::set_aggregations(const std::vector& named_aggregators) { clause_info_.input_columns_ = std::make_optional>(); @@ -475,16 +524,15 @@ std::vector> ResampleClause::structure_for_ if (ranges_and_keys.empty()) { return {}; } - TimestampRange index_range( - std::min_element(ranges_and_keys.begin(), ranges_and_keys.end(), - [](const RangesAndKey& left, const RangesAndKey& right) { - return left.start_time() < right.start_time(); - })->start_time(), - std::max_element(ranges_and_keys.begin(), ranges_and_keys.end(), - [](const RangesAndKey& left, const RangesAndKey& right) { - return left.end_time() < right.end_time(); - })->end_time() - ); + + // Iterate over ranges_and_keys and create a pair with first element equal to the smallest start time and second + // element equal to the largest end time. + const TimestampRange index_range = std::accumulate( + std::next(ranges_and_keys.begin()), + ranges_and_keys.end(), + TimestampRange{ ranges_and_keys.begin()->start_time(), ranges_and_keys.begin()->end_time() }, + [](const TimestampRange& rng, const RangesAndKey& el) { return TimestampRange{std::min(rng.first, el.start_time()), std::max(rng.second, el.end_time())};}); + if (date_range_.has_value()) { date_range_->first = std::max(date_range_->first, index_range.first); date_range_->second = std::min(date_range_->second, index_range.second); @@ -492,11 +540,11 @@ std::vector> ResampleClause::structure_for_ date_range_ = index_range; } - bucket_boundaries_ = generate_bucket_boundaries_(date_range_->first, date_range_->second, rule_, closed_boundary, offset_); + bucket_boundaries_ = generate_bucket_boundaries_(date_range_->first, date_range_->second, rule_, closed_boundary, offset_, origin_); if (bucket_boundaries_.size() < 2) { return {}; } - debug::check(std::is_sorted(bucket_boundaries_.begin(), bucket_boundaries_.end()), + debug::check(ranges::is_sorted(bucket_boundaries_), "Resampling expects provided bucket boundaries to be strictly monotonically increasing"); return structure_by_time_bucket(ranges_and_keys, bucket_boundaries_); } @@ -521,12 +569,11 @@ std::vector> ResampleClause::structure_fo } date_range_ = std::make_optional(min_start_ts, max_end_ts); - - bucket_boundaries_ = generate_bucket_boundaries_(date_range_->first, date_range_->second, rule_, closed_boundary, offset_); + bucket_boundaries_ = generate_bucket_boundaries_(date_range_->first, date_range_->second, rule_, closed_boundary, offset_, origin_); if (bucket_boundaries_.size() < 2) { return {}; } - debug::check(std::is_sorted(bucket_boundaries_.begin(), bucket_boundaries_.end()), + debug::check(ranges::is_sorted(bucket_boundaries_), "Resampling expects provided bucket boundaries to be strictly monotonically increasing"); auto new_structure_offsets = structure_by_time_bucket(ranges_and_entities, bucket_boundaries_); @@ -541,7 +588,7 @@ std::vector> ResampleClause::structure_fo } } internal::check( - std::all_of(expected_fetch_counts.begin(), expected_fetch_counts.end(), [](EntityFetchCount fetch_count) { + ranges::all_of(expected_fetch_counts, [](EntityFetchCount fetch_count) { return fetch_count == 1 || fetch_count == 2; }), "ResampleClause::structure_for_processing: invalid expected entity fetch count (should be 1 or 2)" diff --git a/cpp/arcticdb/processing/clause.hpp b/cpp/arcticdb/processing/clause.hpp index 901b622b79..134780d8db 100644 --- a/cpp/arcticdb/processing/clause.hpp +++ b/cpp/arcticdb/processing/clause.hpp @@ -36,6 +36,8 @@ namespace arcticdb { +using ResampleOrigin = std::variant; + using RangesAndKey = pipelines::RangesAndKey; using SliceAndKey = pipelines::SliceAndKey; @@ -317,6 +319,7 @@ struct AggregationClause { template struct ResampleClause { + using BucketGeneratorT = std::function(timestamp, timestamp, std::string_view, ResampleBoundary, timestamp, const ResampleOrigin&)>; ClauseInfo clause_info_; std::shared_ptr component_manager_; ProcessingConfig processing_config_; @@ -325,29 +328,22 @@ struct ResampleClause { // This will contain the data range specified by the user (if any) intersected with the range of timestamps for the symbol std::optional date_range_; // Inject this as a callback in the ctor to avoid language-specific dependencies this low down in the codebase - std::function(timestamp, timestamp, std::string_view, ResampleBoundary, timestamp)> generate_bucket_boundaries_; + BucketGeneratorT generate_bucket_boundaries_; std::vector bucket_boundaries_; std::vector aggregators_; std::string str_; timestamp offset_; + ResampleOrigin origin_; ResampleClause() = delete; ARCTICDB_MOVE_COPY_DEFAULT(ResampleClause) - ResampleClause(const std::string& rule, - ResampleBoundary label_boundary, - std::function(timestamp, timestamp, std::string_view, ResampleBoundary, timestamp)>&& generate_bucket_boundaries, - timestamp offset): - rule_(rule), - label_boundary_(label_boundary), - generate_bucket_boundaries_(std::move(generate_bucket_boundaries)), - offset_(offset) { - clause_info_.input_structure_ = ProcessingStructure::TIME_BUCKETED; - clause_info_.can_combine_with_column_selection_ = false; - clause_info_.modifies_output_descriptor_ = true; - clause_info_.index_ = KeepCurrentTopLevelIndex(); - } + ResampleClause(std::string rule, + ResampleBoundary label_boundary, + BucketGeneratorT&& generate_bucket_boundaries, + timestamp offset, + ResampleOrigin origin); [[nodiscard]] std::vector> structure_for_processing( std::vector& ranges_and_keys); @@ -356,27 +352,19 @@ struct ResampleClause { [[nodiscard]] std::vector process(std::vector&& entity_ids) const; - [[nodiscard]] const ClauseInfo& clause_info() const { - return clause_info_; - } + [[nodiscard]] const ClauseInfo& clause_info() const; void set_processing_config(const ProcessingConfig& processing_config); - void set_component_manager(std::shared_ptr component_manager) { - component_manager_ = component_manager; - } + void set_component_manager(std::shared_ptr component_manager); [[nodiscard]] std::string to_string() const; - [[nodiscard]] std::string rule() const { - return rule_; - } + [[nodiscard]] std::string rule() const; void set_aggregations(const std::vector& named_aggregators); - void set_date_range(timestamp date_range_start, timestamp date_range_end) { - date_range_.emplace(date_range_start, date_range_end); - } + void set_date_range(timestamp date_range_start, timestamp date_range_end); std::vector generate_bucket_boundaries(timestamp first_ts, timestamp last_ts, diff --git a/cpp/arcticdb/processing/clause_utils.cpp b/cpp/arcticdb/processing/clause_utils.cpp index 6b40a64e67..910dfa4c84 100644 --- a/cpp/arcticdb/processing/clause_utils.cpp +++ b/cpp/arcticdb/processing/clause_utils.cpp @@ -6,9 +6,12 @@ */ #include +#include + -namespace arcticdb { +namespace arcticdb { +namespace ranges = std::ranges; using namespace pipelines; std::vector> structure_by_row_slice(ComponentManager& component_manager, std::vector>&& entity_ids_vec) { @@ -101,9 +104,74 @@ std::shared_ptr> generate_segment_fetch_counts( } } debug::check( - std::all_of(res->begin(), res->end(), [](const size_t& val) { return val != 0; }), + ranges::none_of(*res, [](size_t val) { return val == 0; }), "All segments should be needed by at least one ProcessingUnit"); return res; } +template +requires std::is_same_v || std::is_same_v +std::vector> structure_by_time_bucket( + std::vector& ranges, + const std::vector& bucket_boundaries) { + std::erase_if(ranges, [&bucket_boundaries](const T &range) { + auto start_index = range.start_time(); + auto end_index = range.end_time(); + return index_range_outside_bucket_range(start_index, end_index, bucket_boundaries); + }); + auto res = structure_by_row_slice(ranges); + // Element i of res also needs the values from element i+1 if there is a bucket which incorporates the last index + // value of row-slice i and the first value of row-slice i+1 + // Element i+1 should be removed if the last bucket involved in element i covers all the index values in element i+1 + auto bucket_boundaries_it = std::cbegin(bucket_boundaries); + // Exit if res_it == std::prev(res.end()) as this implies the last row slice was not incorporated into an earlier processing unit + for (auto res_it = res.begin(); res_it != res.end() && res_it != std::prev(res.end());) { + auto last_index_value_in_row_slice = ranges[res_it->at(0)].end_time(); + advance_boundary_past_value(bucket_boundaries, bucket_boundaries_it, last_index_value_in_row_slice); + // bucket_boundaries_it now contains the end value of the last bucket covering the row-slice in res_it, or an end iterator if the last bucket ends before the end of this row-slice + if (bucket_boundaries_it != bucket_boundaries.end()) { + Bucket current_bucket{ *std::prev(bucket_boundaries_it), *bucket_boundaries_it }; + auto next_row_slice_it = std::next(res_it); + while (next_row_slice_it != res.end()) { + // end_index from the key is 1 nanosecond larger than the index value of the last row in the row-slice + TimestampRange next_row_slice_timestamp_range{ + ranges[next_row_slice_it->at(0)].start_time(), + ranges[next_row_slice_it->at(0)].end_time() }; + if (current_bucket.contains(next_row_slice_timestamp_range.first)) { + // The last bucket in the current processing unit overlaps with the first index value in the next row slice, so add segments into current processing unit + res_it->insert(res_it->end(), next_row_slice_it->begin(), next_row_slice_it->end()); + if (current_bucket.contains(next_row_slice_timestamp_range.second)) { + // The last bucket in the current processing unit wholly contains the next row slice, so remove it from the result + next_row_slice_it = res.erase(next_row_slice_it); + } else { + break; + } + } else { + break; + } + } + // This is the last bucket, and all the required row-slices have been incorporated into the current processing unit, so erase the rest + if (bucket_boundaries_it == std::prev(bucket_boundaries.end())) { + res.erase(next_row_slice_it, res.end()); + break; + } + res_it = next_row_slice_it; + } + } + return res; +} + +template std::vector> structure_by_time_bucket( + std::vector& ranges, + const std::vector& bucket_boundaries); +template std::vector> structure_by_time_bucket( + std::vector& ranges, + const std::vector& bucket_boundaries); +template std::vector> structure_by_time_bucket( + std::vector& ranges, + const std::vector& bucket_boundaries); +template std::vector> structure_by_time_bucket( + std::vector& ranges, + const std::vector& bucket_boundaries); + } diff --git a/cpp/arcticdb/processing/clause_utils.hpp b/cpp/arcticdb/processing/clause_utils.hpp index b0d7c044ec..7d0942c23d 100644 --- a/cpp/arcticdb/processing/clause_utils.hpp +++ b/cpp/arcticdb/processing/clause_utils.hpp @@ -157,54 +157,8 @@ void advance_boundary_past_value(const std::vector& bucket_boundaries template requires std::is_same_v || std::is_same_v std::vector> structure_by_time_bucket( - std::vector& ranges, - const std::vector& bucket_boundaries) { - std::erase_if(ranges, [&bucket_boundaries](const T &range) { - auto start_index = range.start_time(); - auto end_index = range.end_time(); - return index_range_outside_bucket_range(start_index, end_index, bucket_boundaries); - }); - auto res = structure_by_row_slice(ranges); - // Element i of res also needs the values from element i+1 if there is a bucket which incorporates the last index - // value of row-slice i and the first value of row-slice i+1 - // Element i+1 should be removed if the last bucket involved in element i covers all the index values in element i+1 - auto bucket_boundaries_it = std::cbegin(bucket_boundaries); - // Exit if res_it == std::prev(res.end()) as this implies the last row slice was not incorporated into an earlier processing unit - for (auto res_it = res.begin(); res_it != res.end() && res_it != std::prev(res.end());) { - auto last_index_value_in_row_slice = ranges[res_it->at(0)].end_time(); - advance_boundary_past_value(bucket_boundaries, bucket_boundaries_it, last_index_value_in_row_slice); - // bucket_boundaries_it now contains the end value of the last bucket covering the row-slice in res_it, or an end iterator if the last bucket ends before the end of this row-slice - if (bucket_boundaries_it != bucket_boundaries.end()) { - Bucket current_bucket{*std::prev(bucket_boundaries_it), *bucket_boundaries_it}; - auto next_row_slice_it = std::next(res_it); - while (next_row_slice_it != res.end()) { - // end_index from the key is 1 nanosecond larger than the index value of the last row in the row-slice - TimestampRange next_row_slice_timestamp_range{ - ranges[next_row_slice_it->at(0)].start_time(), - ranges[next_row_slice_it->at(0)].end_time()}; - if (current_bucket.contains(next_row_slice_timestamp_range.first)) { - // The last bucket in the current processing unit overlaps with the first index value in the next row slice, so add segments into current processing unit - res_it->insert(res_it->end(), next_row_slice_it->begin(), next_row_slice_it->end()); - if (current_bucket.contains(next_row_slice_timestamp_range.second)) { - // The last bucket in the current processing unit wholly contains the next row slice, so remove it from the result - next_row_slice_it = res.erase(next_row_slice_it); - } else { - break; - } - } else { - break; - } - } - // This is the last bucket, and all the required row-slices have been incorporated into the current processing unit, so erase the rest - if (bucket_boundaries_it == std::prev(bucket_boundaries.end())) { - res.erase(next_row_slice_it, res.end()); - break; - } - res_it = next_row_slice_it; - } - } - return res; -} + std::vector& ranges, + const std::vector& bucket_boundaries); std::vector> structure_by_row_slice(ComponentManager& component_manager, std::vector>&& entity_ids_vec); diff --git a/cpp/arcticdb/processing/sorted_aggregation.hpp b/cpp/arcticdb/processing/sorted_aggregation.hpp index 07ba30524a..440d58cfd7 100644 --- a/cpp/arcticdb/processing/sorted_aggregation.hpp +++ b/cpp/arcticdb/processing/sorted_aggregation.hpp @@ -160,7 +160,7 @@ class MinAggregatorSorted { void push(T value) { if constexpr (std::is_floating_point_v) { if (ARCTICDB_LIKELY(!std::isnan(value))) { - min_ = std::min(min_.value_or(std::numeric_limits::max()), value); + min_ = std::min(min_.value_or(std::numeric_limits::infinity()), value); } } else if constexpr (std::is_same_v && TimeType) { if (ARCTICDB_LIKELY(value != NaT)) { @@ -202,7 +202,7 @@ class MaxAggregatorSorted { void push(T value) { if constexpr (std::is_floating_point_v) { if (ARCTICDB_LIKELY(!std::isnan(value))) { - max_ = std::max(max_.value_or(std::numeric_limits::lowest()), value); + max_ = std::max(max_.value_or(-std::numeric_limits::infinity()), value); } } else if constexpr (std::is_same_v && TimeType) { if (ARCTICDB_LIKELY(value != NaT)) { diff --git a/cpp/arcticdb/processing/test/rapidcheck_resample.cpp b/cpp/arcticdb/processing/test/rapidcheck_resample.cpp index 14285d7a91..9f7c3fe2b2 100644 --- a/cpp/arcticdb/processing/test/rapidcheck_resample.cpp +++ b/cpp/arcticdb/processing/test/rapidcheck_resample.cpp @@ -17,7 +17,7 @@ using namespace arcticdb; auto generate_bucket_boundaries(std::vector&& bucket_boundaries) { - return [bucket_boundaries = std::move(bucket_boundaries)](timestamp, timestamp, std::string_view, ResampleBoundary, timestamp) mutable { + return [bucket_boundaries = std::move(bucket_boundaries)](timestamp, timestamp, std::string_view, ResampleBoundary, timestamp, ResampleOrigin) mutable { return bucket_boundaries; }; } @@ -113,11 +113,11 @@ RC_GTEST_PROP(Resample, StructureForProcessing, ()) { } if (left_boundary_closed) { - ResampleClause resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries(std::move(bucket_boundaries)), 0}; + ResampleClause resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries(std::move(bucket_boundaries)), 0, 0}; auto result = resample_clause.structure_for_processing(ranges_and_keys); RC_ASSERT(expected_result == result); } else { - ResampleClause resample_clause{"dummy", ResampleBoundary::RIGHT, generate_bucket_boundaries(std::move(bucket_boundaries)), 0}; + ResampleClause resample_clause{"dummy", ResampleBoundary::RIGHT, generate_bucket_boundaries(std::move(bucket_boundaries)), 0, 0}; auto result = resample_clause.structure_for_processing(ranges_and_keys); RC_ASSERT(expected_result == result); } diff --git a/cpp/arcticdb/processing/test/test_resample.cpp b/cpp/arcticdb/processing/test/test_resample.cpp index c2bfa40043..4279540b01 100644 --- a/cpp/arcticdb/processing/test/test_resample.cpp +++ b/cpp/arcticdb/processing/test/test_resample.cpp @@ -14,7 +14,7 @@ using namespace arcticdb; auto generate_bucket_boundaries(std::vector&& bucket_boundaries) { - return [bucket_boundaries = std::move(bucket_boundaries)](timestamp, timestamp, std::string_view, ResampleBoundary, timestamp) { + return [bucket_boundaries = std::move(bucket_boundaries)](timestamp, timestamp, std::string_view, ResampleBoundary, timestamp, ResampleOrigin) { return bucket_boundaries; }; } @@ -33,7 +33,7 @@ TEST(Resample, StructureForProcessingBasic) { // Insert into vector "out of order" to ensure structure_for_processing reorders correctly std::vector ranges_and_keys{bottom, top}; - ResampleClause resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 1500, 2500, 2999}), 0}; + ResampleClause resample_clause{ "dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 1500, 2500, 2999}), 0, 0 }; auto proc_unit_ids = resample_clause.structure_for_processing(ranges_and_keys); ASSERT_EQ(ranges_and_keys.size(), 2); ASSERT_EQ(ranges_and_keys[0], top); @@ -61,7 +61,7 @@ TEST(Resample, StructureForProcessingColumnSlicing) { // Insert into vector "out of order" to ensure structure_for_processing reorders correctly std::vector ranges_and_keys{top_right, bottom_left, bottom_right, top_left}; - ResampleClause resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 1500, 2500, 2999}), 0}; + ResampleClause resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 1500, 2500, 2999}), 0, 0}; auto proc_unit_ids = resample_clause.structure_for_processing(ranges_and_keys); ASSERT_EQ(ranges_and_keys.size(), 4); ASSERT_EQ(ranges_and_keys[0], top_left); @@ -86,7 +86,7 @@ TEST(Resample, StructureForProcessingOverlap) { // Insert into vector "out of order" to ensure structure_for_processing reorders correctly std::vector ranges_and_keys{bottom, top}; - ResampleClause resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 2500, 2999}), 0}; + ResampleClause resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 2500, 2999}), 0, 0}; auto proc_unit_ids = resample_clause.structure_for_processing(ranges_and_keys); ASSERT_EQ(ranges_and_keys.size(), 2); ASSERT_EQ(ranges_and_keys[0], top); @@ -113,7 +113,7 @@ TEST(Resample, StructureForProcessingSubsumed) { // Insert into vector "out of order" to ensure structure_for_processing reorders correctly std::vector ranges_and_keys{bottom, middle, top}; - ResampleClause resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 4500}), 0}; + ResampleClause resample_clause{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 4500}), 0, 0}; auto proc_unit_ids = resample_clause.structure_for_processing(ranges_and_keys); ASSERT_EQ(ranges_and_keys.size(), 3); ASSERT_EQ(ranges_and_keys[0], top); @@ -138,7 +138,7 @@ TEST(Resample, StructureForProcessingExactBoundary) { // Insert into vector "out of order" to ensure structure_for_processing reorders correctly std::vector ranges_and_keys{bottom, top}; - ResampleClause resample_clause_left{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 2000, 2500, 2999}), 0}; + ResampleClause resample_clause_left{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 2000, 2500, 2999}), 0, 0}; auto proc_unit_ids = resample_clause_left.structure_for_processing(ranges_and_keys); ASSERT_EQ(ranges_and_keys.size(), 2); ASSERT_EQ(ranges_and_keys[0], top); @@ -146,7 +146,7 @@ TEST(Resample, StructureForProcessingExactBoundary) { std::vector> expected_proc_unit_ids_left{{0}, {1}}; ASSERT_EQ(expected_proc_unit_ids_left, proc_unit_ids); - ResampleClause resample_clause_right{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 2000, 2500, 2999}), 0}; + ResampleClause resample_clause_right{"dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({1, 500, 2000, 2500, 2999}), 0, 0}; proc_unit_ids = resample_clause_right.structure_for_processing(ranges_and_keys); ASSERT_EQ(ranges_and_keys.size(), 2); ASSERT_EQ(ranges_and_keys[0], top); @@ -157,11 +157,11 @@ TEST(Resample, StructureForProcessingExactBoundary) { TEST(Resample, FindBuckets) { // Enough bucket boundaries to test all the interesting cases - ResampleClause resample_left("left", ResampleBoundary::LEFT, generate_bucket_boundaries({0, 10, 20, 30, 40}), 0); - ResampleClause resample_right("right", ResampleBoundary::RIGHT, generate_bucket_boundaries({0, 10, 20, 30, 40}), 0); + ResampleClause resample_left("left", ResampleBoundary::LEFT, generate_bucket_boundaries({0, 10, 20, 30, 40}), 0, 0); + ResampleClause resample_right("right", ResampleBoundary::RIGHT, generate_bucket_boundaries({0, 10, 20, 30, 40}), 0, 0); - resample_left.bucket_boundaries_ = resample_left.generate_bucket_boundaries_(0, 0, "dummy", ResampleBoundary::LEFT, 0); - resample_right.bucket_boundaries_ = resample_right.generate_bucket_boundaries_(0, 0, "dummy", ResampleBoundary::RIGHT, 0); + resample_left.bucket_boundaries_ = resample_left.generate_bucket_boundaries_(0, 0, "dummy", ResampleBoundary::LEFT, 0, 0); + resample_right.bucket_boundaries_ = resample_right.generate_bucket_boundaries_(0, 0, "dummy", ResampleBoundary::RIGHT, 0, 0); std::vector res; @@ -221,8 +221,8 @@ TEST(Resample, FindBuckets) { TEST(Resample, ProcessOneSegment) { auto component_manager = std::make_shared(); - ResampleClause resample("dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({-1, 2, 5}), 0); - resample.bucket_boundaries_ = resample.generate_bucket_boundaries_(0, 0, "dummy", ResampleBoundary::LEFT, 0); + ResampleClause resample("dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({-1, 2, 5}), 0, 0); + resample.bucket_boundaries_ = resample.generate_bucket_boundaries_(0, 0, "dummy", ResampleBoundary::LEFT, 0, 0); resample.date_range_ = {0, 5}; resample.set_component_manager(component_manager); resample.set_aggregations({{"sum", "sum_column", "sum_column"}}); @@ -266,8 +266,8 @@ TEST(Resample, ProcessOneSegment) { TEST(Resample, ProcessMultipleSegments) { auto component_manager = std::make_shared(); - ResampleClause resample("dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({-15, -5, 5, 6, 25, 35, 45, 46, 55, 65}), 0); - resample.bucket_boundaries_ = resample.generate_bucket_boundaries_(0, 0, "dummy", ResampleBoundary::LEFT, 0); + ResampleClause resample("dummy", ResampleBoundary::LEFT, generate_bucket_boundaries({-15, -5, 5, 6, 25, 35, 45, 46, 55, 65}), 0, 0); + resample.bucket_boundaries_ = resample.generate_bucket_boundaries_(0, 0, "dummy", ResampleBoundary::LEFT, 0, 0); resample.date_range_ = {0, 51}; resample.set_component_manager(component_manager); resample.set_aggregations({{"sum", "sum_column", "sum_column"}}); diff --git a/cpp/arcticdb/python/python_utils.hpp b/cpp/arcticdb/python/python_utils.hpp index 10e33c6838..4c4ea638f0 100644 --- a/cpp/arcticdb/python/python_utils.hpp +++ b/cpp/arcticdb/python/python_utils.hpp @@ -8,7 +8,6 @@ #pragma once #include -#include #include #include #include diff --git a/cpp/arcticdb/version/python_bindings.cpp b/cpp/arcticdb/version/python_bindings.cpp index bb8c8518cb..b839159843 100644 --- a/cpp/arcticdb/version/python_bindings.cpp +++ b/cpp/arcticdb/version/python_bindings.cpp @@ -5,7 +5,7 @@ * As of the Change Date specified in that file, in accordance with the Business Source License, use of this software will be governed by the Apache License, version 2.0. */ -#include +#include #include #include #include @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -23,27 +22,81 @@ #include #include #include +#include namespace arcticdb::version_store { +static consteval timestamp one_day_in_nanoseconds() { + return timestamp(24) * 60 * 60 * 1'000'000'000; +} + +template +requires std::integral +[[nodiscard]] static T python_mod(T a, T b) { + return (a % b + b) % b; +} + +/// @param ts in nanoseconds +[[nodiscard]] static timestamp start_of_day_nanoseconds(timestamp ts) { + return ts - python_mod(ts, one_day_in_nanoseconds()); +} + +/// @param ts in nanoseconds +[[nodiscard]] static timestamp end_of_day_nanoseconds(timestamp ts) { + const timestamp start_of_day = start_of_day_nanoseconds(ts); + const bool is_midnnight = start_of_day == ts; + if (is_midnnight) { + return ts; + } + return start_of_day + one_day_in_nanoseconds(); +} + [[nodiscard]] static std::pair compute_first_last_dates( timestamp start, timestamp end, timestamp rule, ResampleBoundary closed_boundary_arg, - timestamp offset + timestamp offset, + const ResampleOrigin& origin ) { - const timestamp ns_to_prev_offset_start = (start - offset) % rule; - const timestamp ns_to_prev_offset_end = (end - offset) % rule; + // Origin value formula from Pandas: + // https://github.com/pandas-dev/pandas/blob/68d9dcab5b543adb3bfe5b83563c61a9b8afae77/pandas/core/resample.py#L2564 + auto [origin_ns, origin_adjusted_start] = util::variant_match( + origin, + [start](timestamp o) -> std::pair {return {o, start}; }, + [&](const std::string& o) -> std::pair { + if (o == "epoch") { + return { 0, start }; + } else if (o == "start") { + return { start, start }; + } else if (o == "start_day") { + return { start_of_day_nanoseconds(start), start }; + } else if (o == "end_day" || o == "end") { + const timestamp origin_last = o == "end" ? end: end_of_day_nanoseconds(end); + const timestamp bucket_count = (origin_last - start) / rule + (closed_boundary_arg == ResampleBoundary::LEFT); + const timestamp origin_ns = origin_last - bucket_count * rule; + return { origin_ns, origin_ns }; + } else { + user_input::raise( + "Invalid origin value {}. Supported values are: \"start\", \"start_day\", \"end\", \"end_day\", \"epoch\" or timestamp in nanoseconds", + o); + } + } + ); + origin_ns += offset; + + const timestamp ns_to_prev_offset_start = python_mod(origin_adjusted_start - origin_ns, rule); + const timestamp ns_to_prev_offset_end = python_mod(end - origin_ns, rule); + if (closed_boundary_arg == ResampleBoundary::RIGHT) { return { - ns_to_prev_offset_start > 0 ? start - ns_to_prev_offset_start : start - rule, + ns_to_prev_offset_start > 0 ? origin_adjusted_start - ns_to_prev_offset_start : origin_adjusted_start - rule, ns_to_prev_offset_end > 0 ? end + (rule - ns_to_prev_offset_end) : end }; } else { return { - ns_to_prev_offset_start > 0 ? start - ns_to_prev_offset_start : start, + ns_to_prev_offset_start > 0 ? origin_adjusted_start - ns_to_prev_offset_start : origin_adjusted_start, ns_to_prev_offset_end > 0 ? end + (rule - ns_to_prev_offset_end) : end + rule }; } @@ -54,14 +107,14 @@ std::vector generate_buckets( timestamp end, std::string_view rule, ResampleBoundary closed_boundary_arg, - timestamp offset + timestamp offset, + const ResampleOrigin& origin ) { - timestamp rule_ns; - { + const timestamp rule_ns = [](std::string_view rule) { py::gil_scoped_acquire acquire_gil; - rule_ns = python_util::pd_to_offset(rule); - } - const auto [start_with_offset, end_with_offset] = compute_first_last_dates(start, end, rule_ns, closed_boundary_arg, offset); + return python_util::pd_to_offset(rule); + }(rule); + const auto [start_with_offset, end_with_offset] = compute_first_last_dates(start, end, rule_ns, closed_boundary_arg, offset, origin); const auto bucket_boundary_count = (end_with_offset - start_with_offset) / rule_ns + 1; std::vector res; res.reserve(bucket_boundary_count); @@ -73,16 +126,10 @@ std::vector generate_buckets( template void declare_resample_clause(py::module& version) { - std::string class_name; - if constexpr (closed_boundary == ResampleBoundary::LEFT) { - class_name = "ResampleClauseLeftClosed"; - } else { - // closed_boundary == ResampleBoundary::RIGHT - class_name = "ResampleClauseRightClosed"; - } - py::class_, std::shared_ptr>>(version, class_name.c_str()) - .def(py::init([](std::string rule, ResampleBoundary label_boundary, timestamp offset){ - return ResampleClause(rule, label_boundary, generate_buckets, offset); + const char* class_name = closed_boundary == ResampleBoundary::LEFT ? "ResampleClauseLeftClosed" : "ResampleClauseRightClosed"; + py::class_, std::shared_ptr>>(version, class_name) + .def(py::init([](std::string rule, ResampleBoundary label_boundary, timestamp offset, ResampleOrigin origin){ + return ResampleClause(std::move(rule), label_boundary, generate_buckets, offset, std::move(origin)); })) .def_property_readonly("rule", &ResampleClause::rule) .def("set_aggregations", [](ResampleClause& self, diff --git a/python/arcticdb/util/test.py b/python/arcticdb/util/test.py index 61440572a6..fdc922665c 100644 --- a/python/arcticdb/util/test.py +++ b/python/arcticdb/util/test.py @@ -35,6 +35,7 @@ from arcticdb.version_store.helper import ArcticFileConfig from arcticdb.config import _DEFAULT_ENVS_PATH from arcticdb_ext import set_config_int, get_config_int, unset_config_int +from packaging.version import Version from arcticdb import log @@ -756,3 +757,80 @@ def generic_named_aggregation_test(lib, symbol, df, grouping_column, aggs_dict): f"""\nPandas result:\n{expected}\n"ArcticDB result:\n{received}""" ) raise e + +def drop_inf_and_nan(df: pd.DataFrame) -> pd.DataFrame: + return df[~df.isin([np.nan, np.inf, -np.inf]).any(axis=1)] + + +def assert_dfs_approximate(left: pd.DataFrame, right: pd.DataFrame): + """ + Checks if integer columns are exactly the same. For float columns checks if they are approximately the same. + We can't guarantee the same order of operations for the floats thus numerical errors might appear. + """ + assert left.shape == right.shape + assert left.columns.equals(right.columns) + # To avoid checking the freq member of the index as arctic does not fill it in + assert left.index.equals(right.index) + + # Drop NaN an inf values because. Pandas uses Kahan summation algorithm to improve numerical stability. + # Thus they don't consistently overflow to infinity. Discussion: https://github.com/pandas-dev/pandas/issues/60303 + left_no_inf_and_nan = drop_inf_and_nan(left) + right_no_inf_and_nan = drop_inf_and_nan(right) + + check_equals_flags = {"check_dtype": False} + if PANDAS_VERSION >= Version("1.1"): + check_equals_flags["check_freq"] = False + if PANDAS_VERSION >= Version("1.2"): + check_equals_flags["check_flags"] = False + for col in left_no_inf_and_nan.columns: + if pd.api.types.is_integer_dtype(left_no_inf_and_nan[col].dtype) and pd.api.types.is_integer_dtype(right_no_inf_and_nan[col].dtype): + pd.testing.assert_series_equal(left_no_inf_and_nan[col], right_no_inf_and_nan[col], **check_equals_flags) + else: + if PANDAS_VERSION >= Version("1.1"): + check_equals_flags["atol"] = 1e-8 + pd.testing.assert_series_equal(left_no_inf_and_nan[col], right_no_inf_and_nan[col], **check_equals_flags) + + +def generic_resample_test(lib, sym, rule, aggregations, date_range=None, closed=None, label=None, offset=None, origin=None, drop_empty_buckets_for=None): + """ + Perform a resampling in ArcticDB and compare it against the same query in Pandas. + + :param drop_empty_buckets_for: Will add additional aggregation column using the count aggregator. At the end of the + aggregation query will remove all rows for which this newly added count aggregation is 0. Works only for int/uint + columns. There is similar function generic_resample_test_with_empty_buckets in + python/tests/unit/arcticdb/version_store/test_resample.py which can drop empty buckets for all types of columns, + but it cannot take parameters such as origin and offset. + """ + # Pandas doesn't have a good date_range equivalent in resample, so just use read for that + expected = lib.read(sym, date_range=date_range).data + # Pandas 1.X needs None as the first argument to agg with named aggregators + + pandas_aggregations = {**aggregations, "_bucket_size_": (drop_empty_buckets_for, "count")} if drop_empty_buckets_for else aggregations + resample_args = {} + if origin: + resample_args['origin'] = origin + if offset: + resample_args['offset'] = offset + + if PANDAS_VERSION >= Version("1.1.0"): + expected = expected.resample(rule, closed=closed, label=label, **resample_args).agg(None, **pandas_aggregations) + else: + expected = expected.resample(rule, closed=closed, label=label).agg(None, **pandas_aggregations) + if drop_empty_buckets_for: + expected = expected[expected["_bucket_size_"] > 0] + expected.drop(columns=["_bucket_size_"], inplace=True) + expected = expected.reindex(columns=sorted(expected.columns)) + + q = QueryBuilder() + if origin: + q = q.resample(rule, closed=closed, label=label, offset=offset, origin=origin).agg(aggregations) + else: + q = q.resample(rule, closed=closed, label=label, offset=offset).agg(aggregations) + received = lib.read(sym, date_range=date_range, query_builder=q).data + received = received.reindex(columns=sorted(received.columns)) + + has_float_column = any(pd.api.types.is_float_dtype(col_type) for col_type in list(expected.dtypes)) + if has_float_column: + assert_dfs_approximate(expected, received) + else: + assert_frame_equal(expected, received, check_dtype=False) \ No newline at end of file diff --git a/python/arcticdb/version_store/processing.py b/python/arcticdb/version_store/processing.py index 46a821d5fe..5f77cda66d 100644 --- a/python/arcticdb/version_store/processing.py +++ b/python/arcticdb/version_store/processing.py @@ -320,6 +320,7 @@ class PythonResampleClause: aggregations: Dict[str, Union[str, Tuple[str, str]]] = None # In nanosecods offset: int = 0 + origin: Union[str, pd.Timestamp] = "epoch" class QueryBuilder: @@ -590,7 +591,8 @@ def resample( rule: Union[str, pd.DateOffset], closed: Optional[str] = None, label: Optional[str] = None, - offset: Optional[Union[str, pd.Timedelta]] = None + offset: Optional[Union[str, pd.Timedelta]] = None, + origin: Union[str, pd.Timestamp] = 'epoch' ): """ Resample a symbol on the index. The symbol must be datetime indexed. Resample operations must be followed by @@ -637,7 +639,16 @@ def resample( offset: Optional[Union[str, pd.Timedelta]] default=None Offset the start of each bucket. Supported strings are the same as in `pd.Timedelta`. If offset is larger than rule then `offset` modulo `rule` is used as an offset. + origin: Optional[Union[str, pd.Timestamp]] default='epoch' + The timestamp on which to adjust the grouping. Supported string are: + * epoch: origin is 1970-01-01 + * start: origin is the first value of the timeseries + * start_day: origin is the first day at midnight of the timeseries + * end: origin is the last value of the timeseries + * end_day: origin is the ceiling midnight of the last day + + `start`, `start_day`, `end`, `end_day` origin values are not supported in conjunction with `date_range`. Returns ------- QueryBuilder @@ -658,6 +669,10 @@ def resample( * The library has dynamic schema enabled, and at least one of the columns being aggregated is missing from at least one row-slice. * At least one of the columns being aggregated contains sparse data. + UserInputException + + * `start`, `start_day`, `end`, `end_day` is used in conjunction with `date_range` + * `origin` is not one of `start`, `start_day`, `end`, `end_day`, `epoch` or a `pd.Timestamp` Examples -------- @@ -742,6 +757,10 @@ def resample( else: offset_ns = 0 + if not (isinstance(origin, pd.Timestamp) or origin in ["start", "end", "start_day", "end_day", "epoch"]): + raise UserInputException(f'Argument origin must be either of type pd.Timestamp or one of ["start", "end", "start_day", "end_day", "epoch"]. Got {offset} instead') + if type(origin) is pd.Timestamp: + origin = origin.value # This set is documented here: # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.resample.html#pandas.Series.resample # and lifted directly from pandas.core.resample.TimeGrouper.__init__, and so is inherently fragile to upstream @@ -750,15 +769,15 @@ def resample( boundary_map = { "left": _ResampleBoundary.LEFT, "right": _ResampleBoundary.RIGHT, - None: _ResampleBoundary.RIGHT if rule in end_types else _ResampleBoundary.LEFT + None: _ResampleBoundary.RIGHT if rule in end_types or origin in ["end", "end_day"] else _ResampleBoundary.LEFT } check(closed in boundary_map.keys(), f"closed kwarg to resample must be `left`, 'right', or None, but received '{closed}'") check(label in boundary_map.keys(), f"label kwarg to resample must be `left`, 'right', or None, but received '{closed}'") if boundary_map[closed] == _ResampleBoundary.LEFT: - self.clauses = self.clauses + [_ResampleClauseLeftClosed(rule, boundary_map[label], offset_ns)] + self.clauses = self.clauses + [_ResampleClauseLeftClosed(rule, boundary_map[label], offset_ns, origin)] else: - self.clauses = self.clauses + [_ResampleClauseRightClosed(rule, boundary_map[label], offset_ns)] - self._python_clauses = self._python_clauses + [PythonResampleClause(rule=rule, closed=boundary_map[closed], label=boundary_map[label], offset=offset_ns)] + self.clauses = self.clauses + [_ResampleClauseRightClosed(rule, boundary_map[label], offset_ns, origin)] + self._python_clauses = self._python_clauses + [PythonResampleClause(rule=rule, closed=boundary_map[closed], label=boundary_map[label], offset=offset_ns, origin=origin)] return self @@ -936,9 +955,9 @@ def __setstate__(self, state): self.clauses = self.clauses + [_AggregationClause(self.clauses[-1].grouping_column, python_clause.aggregations)] elif isinstance(python_clause, PythonResampleClause): if python_clause.closed == _ResampleBoundary.LEFT: - self.clauses = self.clauses + [_ResampleClauseLeftClosed(python_clause.rule, python_clause.label, python_clause.offset)] + self.clauses = self.clauses + [_ResampleClauseLeftClosed(python_clause.rule, python_clause.label, python_clause.offset, python_clause.origin)] else: - self.clauses = self.clauses + [_ResampleClauseRightClosed(python_clause.rule, python_clause.label, python_clause.offset)] + self.clauses = self.clauses + [_ResampleClauseRightClosed(python_clause.rule, python_clause.label, python_clause.offset, python_clause.origin)] if python_clause.aggregations is not None: self.clauses[-1].set_aggregations(python_clause.aggregations) elif isinstance(python_clause, PythonRowRangeClause): diff --git a/python/tests/hypothesis/arcticdb/test_resample.py b/python/tests/hypothesis/arcticdb/test_resample.py new file mode 100644 index 0000000000..e263211e92 --- /dev/null +++ b/python/tests/hypothesis/arcticdb/test_resample.py @@ -0,0 +1,110 @@ +import pandas as pd +import numpy as np +import pytest +from hypothesis import given, settings, assume +import hypothesis.extra.pandas as hs_pd +import hypothesis.extra.numpy as hs_np +import hypothesis.strategies as st +from arcticdb.util.hypothesis import use_of_function_scoped_fixtures_in_hypothesis_checked +from arcticdb import QueryBuilder +from arcticdb.util.test import assert_frame_equal, generic_resample_test +from arcticdb.util._versions import IS_PANDAS_TWO + + +COLUMN_DTYPE = ["float", "int", "uint"] +ALL_AGGREGATIONS = ["sum", "mean", "min", "max", "first", "last", "count"] +MIN_DATE = np.datetime64('1969-01-01') +MAX_DATE = np.datetime64('2000-01-01') + +@st.composite +def date(draw, min_date, max_date): + # Bound the start and end date so that we don't end up with too many buckets eating all RAM + # Use some pre-epoch dates. + # hs_np.from_dtype's min_value and max_value do not work with dates + res = draw(hs_np.from_dtype(np.dtype("datetime64[ns]"))) + assume(min_date <= res and res <= max_date) + return res + +@st.composite +def dataframe(draw): + index = hs_pd.indexes(elements=date(min_date=MIN_DATE, max_date=MAX_DATE).filter(lambda d: d is not pd.NaT), min_size=1) + columns = [hs_pd.column(name=f"col_{dtype}", dtype=dtype) for dtype in COLUMN_DTYPE] + result = draw(hs_pd.data_frames(columns, index=index)) + result.sort_index(inplace=True) + return result + +@st.composite +def origin(draw): + selected_origin = draw(st.sampled_from(["start", "end", "start_day", "end_day", "epoch", "timestamp"])) + # Hypothesis may generate dates for year > 2200 and some of the arithmetic operation will overflow. + if selected_origin == "timestamp": + min_date = MIN_DATE - np.timedelta64(365, 'D') + max_date = MAX_DATE + np.timedelta64(365, 'D') + return pd.Timestamp(draw(date(min_date=min_date, max_date=max_date))) + else: + return selected_origin + +def freq_fits_in_64_bits(count, unit): + """ + This is used to check if a frequency is usable by Arctic. ArcticDB converts the frequency to signed 64 bit integer. + """ + billion = 1_000_000_000 + mult = {'h': 3600 * billion, 'min': 60 * billion, 's': billion} + return (mult[unit] * count).bit_length() <= 63 + +@st.composite +def rule(draw): + count = draw(st.integers(min_value=1)) + unit = draw(st.sampled_from(['min', 'h'])) + result = f"{count}{unit}" + assume(freq_fits_in_64_bits(count=count, unit=unit)) + return result + +@st.composite +def offset(draw): + unit = draw(st.sampled_from(['s', 'min', 'h', None])) + if unit is None: + return None + count = draw(st.integers(min_value=1)) + result = f"{count}{unit}" + assume(freq_fits_in_64_bits(count=count, unit=unit)) + return result + + +@pytest.mark.skipif(not IS_PANDAS_TWO, reason="Some resampling parameters don't exist in Pandas < 2") +@use_of_function_scoped_fixtures_in_hypothesis_checked +@settings(deadline=None) +@given( + df=dataframe(), + rule=rule(), + origin=origin(), + offset=offset() +) +def test_resample(lmdb_version_store_v1, df, rule, origin, offset): + lib = lmdb_version_store_v1 + sym = "sym" + lib.write(sym, df) + for closed in ["left", "right"]: + for label in ["left", "right"]: + columns = list(df.columns) + agg = {f"{name}_{op}": (name, op) for name in columns for op in ALL_AGGREGATIONS} + try: + generic_resample_test( + lib, + sym, + rule, + agg, + origin=origin, + offset=offset, + closed=closed, + label=label, + # Must be int or uint column otherwise dropping of empty buckets will not work + drop_empty_buckets_for="col_uint") + except ValueError as pandas_error: + # This is to avoid a bug in pandas related to how end an end_day work. It's possible that when end/end_day are used + # the first value of the data frame to be outside of the computed resampling range. In arctic this is not a problem + # as we allow this by design. + if str(pandas_error) != "Values falls before first bin": + raise pandas_error + else: + return \ No newline at end of file diff --git a/python/tests/hypothesis/arcticdb/test_sort_merge.py b/python/tests/hypothesis/arcticdb/test_sort_merge.py index d983ca97dd..2417d1bf08 100644 --- a/python/tests/hypothesis/arcticdb/test_sort_merge.py +++ b/python/tests/hypothesis/arcticdb/test_sort_merge.py @@ -38,7 +38,7 @@ def generate_single_dataframe(draw, column_list, min_size=0, allow_nat_in_index= if not IS_PANDAS_TWO: # Due to https://github.com/man-group/ArcticDB/blob/7479c0b0caa8121bc2ca71a73e29769bbc41c66a/python/arcticdb/version_store/_normalization.py#L184 # we change the dtype of empty float columns. This makes hypothesis tests extremely hard to write as we must - # keep addional state about is there a mix of empty/non-empty float columns in the staging area, did we write + # keep additional state about is there a mix of empty/non-empty float columns in the staging area, did we write # empty float column (if so it's type would be object). These edge cases are covered in the unit tests. index = hs_pd.indexes(dtype="datetime64[ns]", min_size=1 if min_size <= 0 else min_size).filter(lambda x: allow_nat_in_index or not pd.NaT in x) else: diff --git a/python/tests/unit/arcticdb/version_store/test_resample.py b/python/tests/unit/arcticdb/version_store/test_resample.py index 965f679340..6b41090f87 100644 --- a/python/tests/unit/arcticdb/version_store/test_resample.py +++ b/python/tests/unit/arcticdb/version_store/test_resample.py @@ -12,16 +12,20 @@ import pytest from arcticdb import QueryBuilder -from arcticdb.exceptions import ArcticDbNotYetImplemented, SchemaException -from arcticdb.util.test import assert_frame_equal +from arcticdb.exceptions import ArcticDbNotYetImplemented, SchemaException, UserInputException +from arcticdb.util.test import assert_frame_equal, generic_resample_test from packaging.version import Version from arcticdb.util._versions import IS_PANDAS_TWO, PANDAS_VERSION +import itertools pytestmark = pytest.mark.pipeline ALL_AGGREGATIONS = ["sum", "mean", "min", "max", "first", "last", "count"] +def all_aggregations_dict(col): + return {f"to_{agg}": (col, agg) for agg in ALL_AGGREGATIONS} + # Pandas recommended way to resample and exclude buckets with no index values, which is our behaviour # See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#sparse-resampling def round(t, freq): @@ -30,31 +34,22 @@ def round(t, freq): return pd.Timestamp((t.value // td.value) * td.value) def generic_resample_test_with_empty_buckets(lib, sym, rule, aggregations, date_range=None): - # Pandas doesn't have a good date_range equivalent in resample, so just use read for that - expected = lib.read(sym, date_range=date_range).data - # Pandas 1.X needs None as the first argument to agg with named aggregators - expected = expected.groupby(partial(round, freq=rule)).agg(None, **aggregations) - expected = expected.reindex(columns=sorted(expected.columns)) - - q = QueryBuilder() - q = q.resample(rule).agg(aggregations) - received = lib.read(sym, date_range=date_range, query_builder=q).data - received = received.reindex(columns=sorted(received.columns)) + """ + Perform a resampling in ArcticDB and compare it against the same query in Pandas. - assert_frame_equal(expected, received, check_dtype=False) + This will remove all empty buckets mirroring ArcticDB's behavior. It cannot take additional parameters such as + orign and offset. In case such parameters are needed arcticdb.util.test.generic_resample_test can be used. -def generic_resample_test(lib, sym, rule, aggregations, date_range=None, closed=None, label=None, offset=None): + This can drop buckets even all columns are of float type while generic_resample_test needs at least one non-float + column. + """ # Pandas doesn't have a good date_range equivalent in resample, so just use read for that expected = lib.read(sym, date_range=date_range).data # Pandas 1.X needs None as the first argument to agg with named aggregators - if PANDAS_VERSION >= Version("1.1.0"): - expected = expected.resample(rule, closed=closed, label=label, offset=offset).agg(None, **aggregations) - else: - expected = expected.resample(rule, closed=closed, label=label).agg(None, **aggregations) + expected = expected.groupby(partial(round, freq=rule)).agg(None, **aggregations) expected = expected.reindex(columns=sorted(expected.columns)) - q = QueryBuilder() - q = q.resample(rule, closed=closed, label=label, offset=offset).agg(aggregations) + q = q.resample(rule).agg(aggregations) received = lib.read(sym, date_range=date_range, query_builder=q).data received = received.reindex(columns=sorted(received.columns)) @@ -548,10 +543,6 @@ def test_resampling_empty_type_column(lmdb_version_store_empty_types_v1): @pytest.mark.parametrize("closed", ["left", "right"]) class TestResamplingOffset: - @staticmethod - def all_aggregations_dict(col): - return {f"to_{agg}": (col, agg) for agg in ALL_AGGREGATIONS} - @pytest.mark.parametrize("offset", ("30s", pd.Timedelta(seconds=30))) def test_offset_smaller_than_freq(self, lmdb_version_store_v1, closed, offset): lib = lmdb_version_store_v1 @@ -564,7 +555,7 @@ def test_offset_smaller_than_freq(self, lmdb_version_store_v1, closed, offset): lib, sym, "2min", - self.all_aggregations_dict("col"), + all_aggregations_dict("col"), closed=closed, offset="30s" ) @@ -581,7 +572,7 @@ def test_offset_larger_than_freq(self, lmdb_version_store_v1, closed, offset): lib, sym, "2min", - self.all_aggregations_dict("col"), + all_aggregations_dict("col"), closed=closed, offset=offset ) @@ -603,7 +594,7 @@ def test_values_on_offset_boundary(self, lmdb_version_store_v1, closed, offset): lib, sym, "2min", - self.all_aggregations_dict("col"), + all_aggregations_dict("col"), closed=closed, offset=offset ) @@ -630,8 +621,226 @@ def test_with_date_range(self, lmdb_version_store_v1, closed, date_range, offset lib, sym, "2min", - self.all_aggregations_dict("col"), + all_aggregations_dict("col"), closed=closed, offset=offset, date_range=date_range - ) \ No newline at end of file + ) + +@pytest.mark.skipif(PANDAS_VERSION < Version("1.1.0"), reason="Pandas < 1.1.0 do not have offset param") +@pytest.mark.parametrize("closed", ["left", "right"]) +class TestResamplingOrigin: + + # Timestamps: pre start, between start and end, post end, first date in the index, last date in the index + @pytest.mark.parametrize( + "origin", + [ + "start", + "start_day", + pytest.param("end", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")), + pytest.param("end_day", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")), + "epoch", + pd.Timestamp("2024-01-01"), + pd.Timestamp("2025-01-01 15:00:00"), + pd.Timestamp("2025-01-03 15:00:00"), + pd.Timestamp("2025-01-01 10:00:33"), + pd.Timestamp("2025-01-02 12:00:13") + ] + ) + def test_origin(self, lmdb_version_store_v1, closed, origin): + lib = lmdb_version_store_v1 + sym = "test_origin_special_values" + # Start and end are picked so that #bins * rule + start != end on purpose to test + # the bin generation in case of end and end_day + start = pd.Timestamp("2025-01-01 10:00:33") + end = pd.Timestamp("2025-01-02 12:00:20") + idx = pd.date_range(start, end, freq='10s') + rng = np.random.default_rng() + df = pd.DataFrame({"col": rng.integers(0, 100, len(idx))}, index=idx) + lib.write(sym, df) + generic_resample_test( + lib, + sym, + "2min", + all_aggregations_dict("col"), + closed=closed, + origin=origin + ) + + @pytest.mark.parametrize("origin", [ + "start", + "start_day", + pytest.param("end", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")), + pytest.param("end_day", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")) + ]) + @pytest.mark.parametrize("date_range", [ + (pd.Timestamp("2025-01-01 10:00:00"), pd.Timestamp("2025-01-02 12:00:00")), # start and end are multiples of rule + (pd.Timestamp("2025-01-01 10:00:00"), pd.Timestamp("2025-01-02 12:00:03")), # start is multiple of rule + (pd.Timestamp("2025-01-01 10:00:03"), pd.Timestamp("2025-01-02 12:00:00")) # end is multiple of rule + ]) + def test_origin_is_multiple_of_freq(self, lmdb_version_store_v1, closed, origin, date_range): + lib = lmdb_version_store_v1 + sym = "test_origin_special_values" + start, end = date_range + idx = pd.date_range(start, end, freq='10s') + rng = np.random.default_rng() + df = pd.DataFrame({"col": rng.integers(0, 100, len(idx))}, index=idx) + lib.write(sym, df) + generic_resample_test( + lib, + sym, + "2min", + all_aggregations_dict("col"), + closed=closed, + origin=origin, + drop_empty_buckets_for="col" + ) + + @pytest.mark.parametrize("origin", [ + "start", + "start_day", + pytest.param("end", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")), + pytest.param("end_day", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")), + "epoch" + ]) + def test_pre_epoch_data(self, lmdb_version_store_v1, closed, origin): + lib = lmdb_version_store_v1 + sym = "test_origin_special_values" + start = pd.Timestamp("1800-01-01 10:00:00") + end = pd.Timestamp("1800-01-02 10:00:00") + idx = pd.date_range(start, end, freq='30s') + rng = np.random.default_rng() + df = pd.DataFrame({"col": rng.integers(0, 100, len(idx))}, index=idx) + lib.write(sym, df) + generic_resample_test( + lib, + sym, + "2min", + all_aggregations_dict("col"), + closed=closed, + origin=origin, + drop_empty_buckets_for="col" + ) + + @pytest.mark.parametrize("origin", [ + "start", + "start_day", + pytest.param("end", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")), + pytest.param("end_day", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")), + ]) + @pytest.mark.parametrize("date_range", + list(itertools.product( + [pd.Timestamp("2024-01-01") - pd.Timedelta(1), pd.Timestamp("2024-01-01") + pd.Timedelta(1)], + [pd.Timestamp("2024-01-02") - pd.Timedelta(1), pd.Timestamp("2024-01-02") + pd.Timedelta(1)])) + ) + def test_origin_off_by_one_on_boundary(self, lmdb_version_store_v1, closed, origin, date_range): + lib = lmdb_version_store_v1 + sym = "test_origin_special_values" + start, end = date_range + idx = pd.date_range(start, end, freq='10s') + rng = np.random.default_rng() + df = pd.DataFrame({"col": rng.integers(0, 100, len(idx))}, index=idx) + lib.write(sym, df) + generic_resample_test( + lib, + sym, + "2min", + all_aggregations_dict("col"), + closed=closed, + origin=origin, + drop_empty_buckets_for="col" + ) + + @pytest.mark.parametrize("origin", [ + "start_day", + "start", + pytest.param("end", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")), + pytest.param("end_day", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")) + ]) + def test_non_epoch_origin_throws_with_daterange(self, lmdb_version_store_v1, origin, closed): + lib = lmdb_version_store_v1 + sym = "test_origin_start_throws_with_daterange" + + lib.write(sym, pd.DataFrame({"col": [1, 2, 3]}, index=pd.DatetimeIndex([pd.Timestamp("2024-01-01"), pd.Timestamp("2024-01-02"), pd.Timestamp("2024-01-03")]))) + q = QueryBuilder() + q = q.resample('1min', origin=origin, closed=closed).agg({"col_min":("col", "min")}) + with pytest.raises(UserInputException) as exception_info: + lib.read(sym, query_builder=q, date_range=(pd.Timestamp("2024-01-02"), pd.Timestamp("2024-01-03"))) + assert all(w in str(exception_info.value) for w in [origin, "origin"]) + + @pytest.mark.parametrize("origin", ["epoch", pd.Timestamp("2025-01-03 12:00:00")]) + def test_epoch_and_ts_origin_works_with_date_range(self, lmdb_version_store_v1, closed, origin): + lib = lmdb_version_store_v1 + sym = "test_origin_special_values" + # Start and end are picked so that #bins * rule + start != end on purpose to test + # the bin generation in case of end and end_day + start = pd.Timestamp("2025-01-01 00:00:00") + end = pd.Timestamp("2025-01-04 00:00:00") + idx = pd.date_range(start, end, freq='3s') + rng = np.random.default_rng() + df = pd.DataFrame({"col": rng.integers(0, 100, len(idx))}, index=idx) + lib.write(sym, df) + generic_resample_test( + lib, + sym, + "2min", + all_aggregations_dict("col"), + closed=closed, + origin=origin, + date_range=(pd.Timestamp("2025-01-02 00:00:00"), pd.Timestamp("2025-01-03 00:00:00")) + ) + +@pytest.mark.skipif(PANDAS_VERSION < Version("1.1.0"), reason="Pandas < 1.1.0 do not have offset param") +@pytest.mark.parametrize("closed", ["left", "right"]) +@pytest.mark.parametrize("label", ["left", "right"]) +@pytest.mark.parametrize("origin",[ + "start", + "start_day", + pytest.param("end", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")), + pytest.param("end_day", marks=pytest.mark.skipif(PANDAS_VERSION < Version("1.3.0"), reason="Not supported")), + "epoch", + pd.Timestamp("2024-01-01"), + pd.Timestamp("2025-01-01 15:00:00"), + pd.Timestamp("2025-01-03 15:00:00") +]) +@pytest.mark.parametrize("offset", ['10s', '13s', '2min']) +def test_origin_offset_combined(lmdb_version_store_v1, closed, origin, label, offset): + lib = lmdb_version_store_v1 + sym = "test_origin_special_values" + # Start and end are picked so that #bins * rule + start != end on purpose to test + # the bin generation in case of end and end_day + start = pd.Timestamp("2025-01-01 10:00:33") + end = pd.Timestamp("2025-01-02 12:00:20") + idx = pd.date_range(start, end, freq='10s') + rng = np.random.default_rng() + df = pd.DataFrame({"col": range(len(idx))}, index=idx) + lib.write(sym, df) + generic_resample_test( + lib, + sym, + "2min", + all_aggregations_dict("col"), + closed=closed, + origin=origin, + drop_empty_buckets_for="col", + label=label, + offset=offset + ) + +def test_max_with_one_infinity_element(lmdb_version_store_v1): + lib = lmdb_version_store_v1 + sym = "test_max_with_one_infinity_element" + + lib.write(sym, pd.DataFrame({"col": [np.inf]}, index=pd.DatetimeIndex([pd.Timestamp("2024-01-01")]))) + q = QueryBuilder() + q = q.resample('1min').agg({"col_max":("col", "max")}) + assert np.isinf(lib.read(sym, query_builder=q).data['col_max'][0]) + +def test_min_with_one_infinity_element(lmdb_version_store_v1): + lib = lmdb_version_store_v1 + sym = "test_min_with_one_infinity_element" + + lib.write(sym, pd.DataFrame({"col": [-np.inf]}, index=pd.DatetimeIndex([pd.Timestamp("2024-01-01")]))) + q = QueryBuilder() + q = q.resample('1min').agg({"col_min":("col", "min")}) + assert np.isneginf(lib.read(sym, query_builder=q).data['col_min'][0]) \ No newline at end of file