From 583a0a71584617e632aba0f0613aed4d85b7de56 Mon Sep 17 00:00:00 2001 From: github-actions Date: Wed, 1 Nov 2023 01:31:47 +0000 Subject: [PATCH 01/18] Snapshot: 23.11.0-dev --- package.json | 2 +- pyproject.toml | 2 +- redash/__init__.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package.json b/package.json index b9782af0ca..b3a1be3b58 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "redash-client", - "version": "23.09.0-dev", + "version": "23.11.0-dev", "description": "The frontend part of Redash.", "main": "index.js", "scripts": { diff --git a/pyproject.toml b/pyproject.toml index 4e9d3151fd..030dfb20b9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ force-exclude = ''' [tool.poetry] name = "redash" -version = "23.09.0-dev" +version = "23.11.0-dev" description = "Make Your Company Data Driven. Connect to any data source, easily visualize, dashboard and share your data." authors = ["Arik Fraimovich "] # to be added to/removed from the mailing list, please reach out to Arik via the above email or Discord diff --git a/redash/__init__.py b/redash/__init__.py index 1b9bd4b5d8..3f7737123f 100644 --- a/redash/__init__.py +++ b/redash/__init__.py @@ -14,7 +14,7 @@ from redash.destinations import import_destinations from redash.query_runner import import_query_runners -__version__ = "23.09.0-dev" +__version__ = "23.11.0-dev" if os.environ.get("REMOTE_DEBUG"): From 3226cd7a17a0ce70f6a8b122761ab9822a91c048 Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Wed, 15 Nov 2023 00:12:28 +0300 Subject: [PATCH 02/18] dataframe_to_result function moved outside python query runner --- redash/query_runner/__init__.py | 34 +++++++++++++++++++++++++++++++++ redash/query_runner/python.py | 34 ++++++++------------------------- 2 files changed, 42 insertions(+), 26 deletions(-) diff --git a/redash/query_runner/__init__.py b/redash/query_runner/__init__.py index 9e75390422..6fd72db23e 100644 --- a/redash/query_runner/__init__.py +++ b/redash/query_runner/__init__.py @@ -3,6 +3,8 @@ from contextlib import ExitStack from functools import wraps +import numpy as np +import pandas as pd import sqlparse from dateutil import parser from rq.timeouts import JobTimeoutException @@ -35,6 +37,7 @@ "get_query_runner", "import_query_runners", "guess_type", + "pandas_to_result", ] # Valid types of columns returned in results: @@ -106,6 +109,37 @@ def find_last_keyword_idx(parsed_query): return -1 +def get_column_types_from_dataframe(df: pd.DataFrame) -> list: + columns = [] + + for column_name, column_type in df.dtypes.items(): + if column_type == np.bool_: + redash_type = TYPE_BOOLEAN + elif column_type == np.inexact: + redash_type = TYPE_FLOAT + elif column_type == np.integer: + redash_type = TYPE_INTEGER + elif column_type in (np.datetime64, np.dtype(" 10: + redash_type = TYPE_DATETIME + else: + redash_type = TYPE_DATE + else: + redash_type = TYPE_STRING + + columns.append({"name": column_name, "friendly_name": column_name, "type": redash_type}) + + return columns + + +def pandas_to_result(df: pd.DataFrame) -> dict: + columns = get_column_types_from_dataframe(df) + rows = df.to_dict("records") + return {"columns": columns, "rows": rows} + + class InterruptException(Exception): pass diff --git a/redash/query_runner/python.py b/redash/query_runner/python.py index 28219a7f89..1353a6fd24 100644 --- a/redash/query_runner/python.py +++ b/redash/query_runner/python.py @@ -9,6 +9,7 @@ guarded_unpack_sequence, safe_builtins, ) +from RestrictedPython.transformer import IOPERATOR_TO_STR from redash import models from redash.query_runner import ( @@ -20,19 +21,15 @@ TYPE_INTEGER, TYPE_STRING, BaseQueryRunner, + pandas_to_result, register, ) from redash.utils import json_dumps, json_loads -try: - import numpy as np +pandas_installed = importlib.util.find_spec("pandas") and importlib.util.find_spec("numpy") +if pandas_installed: import pandas as pd - pandas_installed = True -except ImportError: - pandas_installed = False - -from RestrictedPython.transformer import IOPERATOR_TO_STR logger = logging.getLogger(__name__) @@ -271,26 +268,11 @@ def get_query_result(query_id): return query.latest_query_data.data def dataframe_to_result(self, result, df): - result["rows"] = df.to_dict("records") - - for column_name, column_type in df.dtypes.items(): - if column_type == np.bool_: - redash_type = TYPE_BOOLEAN - elif column_type == np.inexact: - redash_type = TYPE_FLOAT - elif column_type == np.integer: - redash_type = TYPE_INTEGER - elif column_type in (np.datetime64, np.dtype(" 10: - redash_type = TYPE_DATETIME - else: - redash_type = TYPE_DATE - else: - redash_type = TYPE_STRING + converted_result = pandas_to_result(df) - self.add_result_column(result, column_name, column_name, redash_type) + result["rows"] = converted_result["rows"] + for column in converted_result["columns"]: + self.add_result_column(result, column["name"], column["friendly_name"], column["type"]) def get_current_user(self): return self._current_user.to_dict() From 433d2a9e2e26ef0cb93f819832ad6c5d37ca8abe Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Wed, 15 Nov 2023 00:12:42 +0300 Subject: [PATCH 03/18] added yandex disk query runner --- .../assets/images/db-logos/yandex_disk.png | Bin 0 -> 8745 bytes redash/query_runner/yandex_disk.py | 148 ++++++++++++++++++ redash/settings/__init__.py | 1 + tests/query_runner/test_yandex_disk.py | 46 ++++++ 4 files changed, 195 insertions(+) create mode 100644 client/app/assets/images/db-logos/yandex_disk.png create mode 100644 redash/query_runner/yandex_disk.py create mode 100644 tests/query_runner/test_yandex_disk.py diff --git a/client/app/assets/images/db-logos/yandex_disk.png b/client/app/assets/images/db-logos/yandex_disk.png new file mode 100644 index 0000000000000000000000000000000000000000..7b375648dfadd47f0e2e6a73794e9839ac360959 GIT binary patch literal 8745 zcmeHt^-~*6@OJ{iiWPTv*Fp&vAh>&rLn+eYPO(tj3k50^FD=DNDON04kzxUg76=pz zP#l6K;pKVfedqn{{R_S~yR*A@pPk*=yV<$9xfBy4ZSs4}_W%F@xvmb>3;+P$sXzcR z;hnGxsC4_!3N+JJ2h@$T?%WyBr;l{q3=ILocRDcunB)ln{Rg>Y<~s%e@JoOI{5uK! zZ>|LI|E>Z7CHVhO{}1TC%d86kaJ|-rs#$~q_X~+W&~9E07XQG{94ASBnTx+uLTn~R z3$*P|oF_7%$&cqpYxFtHwt7y-Pz zar4Z^>YyeOYbH@6H&;cq8rNIu$d_F}ER`M?kXi@U-8CP|YC-NnBui~FgHA|5=$ZuE z(Nc59GJ=?0I+?K%Cf>QERg<0L%v`sF(Tum$()qS zYAK~DS)4W{($nPv(!XF0!A{S+LMFrTJ^7;9!nyA3u_3 zSZWq~ZrXi?HuYPQ+Td5=m+R4`{aj}}V+I$g1v3OaPpOi**NFgrJfJ^}GruKyL6Vzz zsNyb0^`)n3ZMT6r>`UnrDbLBt>|L_olNh_Qz2oYt<$~RKi>7X4vY_OXaLylhk z(m4&5_Lvt%lS!aPDr91k}v6 zoK{+_7ToxeBFBc{JmU@#C=%fR{tJS%)OD9)y1s~ygQ)-?3%KS#6YyA!r+j{bGX5!a zkl3t)vry7Vq(bb*=2JMtbrEAFa;fj*jb)EBlq>=?`S9h7fcwXY=5qW}S`M0r%%0Qv z&hkFs3AIeiVxSmP2e~pqRVRxOjTMkuqab0~7lvc18p*Avu8dVnIhTwt7vFN5@qC7x z{poAOll}gDa%~=viewHx=Iz!>qthTG^Pozk|F`V@jOvAOXsNva_SZ|ztTLRBZ5qYE z3^DvAx*Km_&Eh6YXxMBSLbG%B%D&bG^Kw7l??n5wMR ziOc^a*Z-BdlVSfygwr%!lx`uJy`LOWjNq+DtNPJ`F(o-3DducF_9k15B+@^uUt@LZ)WnCy(p!w?(y*Bp=Vnd>#bZ%Fb`G{Q}J%u#HB$o^hv z>PETlYPA3~e?qRBe(UYa)9fa?Vb?5*?)NJouOd7?f#*gmzLtGHlMKK~iyO6@x95R} z=70C;LcM6O{;G{;w$i*Fidjrd%Sv)dkXegkD=Y|jN7yNR?N)c55Ae}8r$ySb$Lh^z zZ~68q`MzFlsir%C@q|6vD2H?V{2uuHN{L)Bj()H|o}vUSA-1bZsQmewzwS5@z&_$e z#MO0dDtXx>dD*AWyxH;NMM+%)ekXC@Z8C6>GUg?JEKPREzE!Fo$gb2_{5PNBN*!GwjGxYsFBz+{hX2 z1}V++;awdniOOWMy9YNLgaDoZwc2k4jfhbj4Bp=kU(l!HHbxoa*W=B>SqtB5O+xCt z_%4Dq`XE6)_MNeQMqM5E=vyAyiK)jWfjE>f2*{_qKlKYX^~(`BqtVQ?sFQ!-li-xr#aR}eBh=MaG4 z>8~BB6SnagFuY413|=gMJE7I-xAH{bhqMb3F-bLkC5cg}qBC9VlOTg=_sbt$);#yb zx!25eN`zyRoud7M?jVroZ#Yw3lQrRbfky<~QMm54A6#8yb*^Ktn!b(GXc3jJTfmKdTnjEC+1Wy% ze!;1C8p26^^L^bp5O9O%+nRW@JLhxV_c^%FONp&hn=2Md!gvy1qtUwsKv45DFDhP^ zAsbj=PMU=itKEgG$g!X;r@|!{(TqHpqyix7=1ISPx2CFV_@(20hdtSlO#+P7HSg7- zZ3T}31%Pe?0LPxEH6C`geqUmF3mzTS2qb`~)4VF6N4UBTkA?|R#^ECpRfF+x^lU%oSjy2sL*fFc z%NvITo?rqq3)b5yX?e9!KybFA81BZpF$}q`p3TG0l`f}sY^u}Ef+w?K^(3%EpI{ki z7f<%=qFA*8OTr!ZIaQYAq7f9+J8@*8xuK>AdWo?IkUzd*I|C>(od*?TBT~rTxkYcU zOp~6v@R}H`%5-2O{y0-@Py+@Au17ET2xt>UvE#rR&BVvCqp@$p`zMtq{W)qaDd;o_6ztXmM>U zN{6!vbQf~&@&T7x5CCi^jy=T&qYnH)Y|mswo8)ud1%Nl8lZs}gXE7H};JbYb)!}_z zcVJ6{PeB=XJISs3dAlt&UqC+xl5fyE1Yb`1z~|u&xJEnT8D`<5f>MVNicsc8qj;$c zKA;4#sM_*v2{iTgePxLya9rbkr+joR^0_~J+23JF#~}_xE%1tuhUP(Zr_L)qKvVXL zZ%M7}T9FR@OVSHXQ%8Y)NwPNrL*Z1~pcOUv?W9pAnHkBdv@1e!-FL-$V?OCfvnQ_5 zZ&QkQl`d9&I6#F}MUaA%K~^eZ9RJG<#C6p*u$dh{FbweFjZRK?C-K#J({v!{C(xtC z@yX|RjZt>Cxn*ay$X6MW&F}mN&!{9_AGBe8boha49{3N5ne0G@h2MXv0iOtVaaAFw z%3neJh(Dya-Hr_N$_H!Z8C3nsvLp(YkPY5ZI+73DQ{^;6#ssGRlJZLdRZ+dot2y3d zGIxJ8GqvaO(}OkmV>!<{TW%a9ci#b2r%vh^y%9ov#1iYsYa4y2Rfe%6=>FaISK_@# zw{d92*!}Ev4tIN`eU(yyHHb{X{1cHqpF>M+fH;$TJ%baG9 zXu0VVmDcl*Md;vQ*s;zZNo(H%!kvd}LeTA<-CfUc=}?l5UTuykywnp%qSjk9f+D$m zk$JyyLGldx)Vg$N>YSV7mh*%fJfdR9)ezxZ=6*bkORQ1T6AzYo5FJU5)+XFmmm-@W zn3V4{T4G9;R~0yH1^n(b-&j4L{><0Qw^eGAC^zpQiD{NZTd3p~m`I0tl@57_^+qr4 zIyfjjG^Y}EX zf4#SU?SxUggBG1HXZ;He*zhhK<_ZG}zxP8Gc2}~FO;fDPypE=YHB_KyBhE(rH~k}t zj;w(9p&L9N7hxos?e+pk9(F2){jaBbU6*oQHz=+l=iYqM)O{^RyWCTgg$;Ikj~%e@ zhxSj+UZl7h;l4vR?!S9hW4OipE%W@Y<%ITnd7NyXC5D|Cy`Ufane_9Q#N1~XgubW@ z=nXZByJ08su8@((<5fg=n=bP=yhuBwwoeY7m-09dZCY`ZdgECYo5#n+qHnDBT0Xk4 zI;QtEckt4_f~dX_LlqS&@$f_X0bhWTalHGiv>l{^!aREec)QxUzV`x3Pr#LEkC|okq`v+wjMv+6q=S=jjJa2^YV0Tu7*q&&Y{PDqL zSCY0g%}aeOz_p9-Gl}#T+}bhTx;uR8*C#4zs1M*iBvW;V%bq2VEY=pG)07nH1YogS9cxkM!eXk<`klewgt?s_b%LcIW+o|m-*(X zt&CT*gv93T+&e~rP*;w7TsogwW3^e~Y0QhxBZ{`0ac!@AAGwuto)P&;Hk?yxKi-RI zT38)KEM*?$v0K3Nhb$|RSscj#oy$~(3h7v;Te`=XC!4`Vn5RRL&@<%+lfG9!3gsHH zpA4ASMoCNeY1&>gW9r*STQMYTLxm#L2x9QMWXJi0h9J?##M)cj zetqH4CFp`JK?;}rIM+eMMR};O>oBeVcQvHVy;O*Ay!S>A; zb6!wBdMzFo3~oJCsN8-&f-6S!EW*emn~9%65T>_hOP6o-*%)tmpET7%U$nmdL$S*L zk?nhsmtI`HB<|v92A(NLx$08^B5a}yU$u~Z^Ut;|5gtOEUce_UfgCTY+ds|trxk~+ z9bldtNBvD%3IZ^XT~Ibp&k}RFjzeqWmO`2-*D|+O3%?9%(8>>Xw36#d6sSiuxzxs&5{5&Uer(`q{syz-wYwP7l#lYj`nje9+5xC zrN&aVR|INd$`c!b3nGKRWS?}pTL<}ZwdeulR~OOgj_YX8Z^Z+?Hz8koRAr|=)Vkkr z9z4=oB^mwN&d6p^#02L$_4`I88GWeDn7g^S@{)xQ#@_l{-6PVyj6B+XLQP?I;R9#H z<9l-spIva%*vh@+s;ie7*XF4BrHhKQCcyD5>aTOFB z>>ij?kI3k#)9K4<>GbQCXzo36+KUHh_Mc3#gU2?^NiOiDf!lNp_md5^Ct2TVBVsHf z`ctDRPfY|a?QRm2enw}* z*T-VW)8T@ekM)Kk{fx;W1NHD!6)S2>qc(+^aS0Oi)}j`#wr*^wDL9Xw7L5v4YVU*hW@L9k4r zE4=q?GM3&+>l*$ZXCnc-jxI$CvMjRqzRsQtP*51=PvubUuiwJoK+kTX*@sye=VJKV zt~vtcg+|}fkan`~qmjH+qer5}vSpNVy`+hOt5Jg^D}cQr+%~GGk0WqDJR-HgE1!;s%uO zQ0AH?4D29NR9rcyC1PwJ^$f zFNeDL6Z%Z`D8K3CE?etR%9)YGO*>)2z9$${`vZtLZ?*@Zh)^Ys@SpKKQ%ut#$S!~w z_Qzem<_36sEl-qG!G9buj-&QVNT;DBGeP%~ERLs{etO&#tK6KB{RgSChdk1#d_k60 zJ(3^~?h_T1=LY-Tsdm6Eei;6o9jTPNgW7%%8|YG}{JL{h@vj-^AuvN}GP?DJ+vbp? zJ^2Zy+DQoe-Tq0o0-VNkSOsUs+Z-c*2!*9sEBm6RXR-cw_a1k}QsBy@56LR+#<#ZP z=L1R0hU;0!fJQC<+fIL39AlWM^~5!`qo9^Oa=kSw4FQiK>FV1b$E{PCaLg$tT#C;_SR;m~L0gG}5o)s4!v zXx<-C4y$-EhkW%1y8C<(P$3+vXnbFx`N>QZMv*>Nfzc#_VE5V)!}vUlwsvkvnehkS z`h@W$U1W@nA_d^dY}F7DtNFt2(Nn%#3%qSKlnO1`P_N;{mwGeT!GFTKd&8ba!CJ(N z9Ika{p4pCfRw9DQdBrwa-^ZTBN4Hbf^5y;lEz0-2F=_gd>!J%uI)2uj`NhN@{iv&_ zJVKoLnm#Pj-qAt42%j<2!de^`$`L1VS%NG!OMw|3gp=j*VVNcPz_IGTPdO46mXnA4 z*vFSd3=KfBV2)SZ>6^uFUJ~~2cHqaX-BNjg8OSucyFj@1F9$y`V>$-P?g{0AaVnqS zrCwc2jXz~6+t2QI!tiM&tXSd!5te$;pF$Tu!0EOv~Xlil7gF*)ZWCoHkGB-i@H7lP&ujhN802X%^~^ipRsr@4dU?h zL9I1T{*5Hyo`z>*rRv3v>?ES4t0clV_1NdV+5O}Z8wJ=(UH~GhQKyqW&+3>GdIIbj zncsCp3wB&(3?ZE^IjXNJw9#eRhwSO=aQ5}f&O+r^MHv2IOA#VO_0+ovd#fE|oHO;q zFCwaorP*P@8!H=>Q&ztvqF5RFE+t$@V}vp<;Z_tS_6uvZMC(GTmH#_4#&+_tNYPF7LxrZ3KU>MF;>{W^Qsx)#S`d9(h(V*|w5yO;PMH)A<>^fsuij7dd zH+z94x=tCI*f}tfag~EO77dqQ<%A<@SRn;}Y4(zo!5==1K2VRS8915v=!+J!%ZN3S zjLc~NDs?$6I&wS<$4ksYeW(2lx^wV3*8RgSRz?#eI72L(h)>*((&=<}KDvcKM1uh; z_r(I2(%(h2xG#=&TGY1$0Jhd^DPYuju%yz;atGBs5jchlDe}l;_E>&jXETrbVQ9Wm zQVVV-c@QofTi#kt02CoI`Fz=G6(>qD@z3zNR3r3rog73=h^&mOr7`R)c|_XWwV-z5 zsP-N%jP^qH#I#C1n^uIV-ul_Lo9^^M>5zx8bZOu`QXPKb2|OSXoX-&NsNgp4K|PQF zJ0P`lvKs9K%v}lji%Y*}|CPIoR{t2iGj9=u=HTOx{WD1*N39~Q^S1G~G>g`)(!e(S zo2Zz~iOY%TLOqi&X1KxVAb&8U@Hk{EWc#id-BzAUrC(0X-3PL79}5Kt-1`J(ou6o+ zn8}lO{wKlJTi3M{g-GQfw+33R#J#{-&|b)E{fXUM;&a*rSm|NuHv@r7oj#T0ArHW1 z0h2D$Xp>u4{j`(a55H08Vz&yrXDZ?vhgyHx2j)V2d()L=Rn0oM?_BfsGQeKIMt^}u zu~p1F$%pAN#Q`67$h=s$cEuymWd%MNLmh{P*wC&a{#%Bk;3b?Y(1p6!kLM@tSCU5x zFuol7k-am}^$W^KVd*`QzyJi}=tqz0<6JXNFgjY{-cXv;(Y2dn{bJ;w&s=Sp(@S){ zN5zRNFUwSHcm#L2oo0GNE6>>ACW!|6){A&C%j{#CM*oDTZUET_$+a~UT-gJ?VqtMH zx7D8ikUsaXD74`J+-1VH^6i2BG}+3PNL8+g$-0<=SRG()5KX->FjBl-Zr%TN)O6uc ziGySLl2v+8buq!*MBD?dh#nP2w~AP3>XF=>aQy5-XelEcxV{t=*|}SHM%fZfiZ$xk z=lD77^V2}N(ZB?w-u(+mJ3Q`)R9Bxk*LR64qQI8b(l5*CL?^5vvs!S=UQ4Ij_AOO- zoQs;T%QtfJ4s{I!E$UUjcUxS&f~cfNgFkJn{3>Bp=}@H0aoq>VqA%Vih&;X3u(f)a zn8tg*5XMDXzZSB?_kAM>pN=^dN%F`ND7#O!(CG<|_PvPEdfU}VHJ>rpZ-DB&d(Pzb z{<50DMwM)h2dPZ(4g45VV3}k2IwLmoeiE89NQHYmx>+gy8f#oS_H;;w9tjkLr6fGz z%fUVxdMp`nOj7byeWmXY4hUL~e7Dx}7ytBz9nA{ib#XG9)vmlxb`+8Lwu|<@By=08 zN)bi79CmeC5g!jzCMtQ?DL2J@8$-AW;@YH&o@~Qb`(0-Z-3wQoRxM+i{o{QXDK;hB zIgHbxk9v?e=`Kp+jALj!E?Q6T@LSnP{h>DsL}kxuX|3KqHUR@SsR*T9J-ptl7e9SE9UQkyt)_P1Qs6X2~pgr$;n@=pON(Zi1(;2D*2 zM9YLivbmlk@~#P1{-v3!T(hk($T8&>)@K=)P!@?GfvwpAT@HtD)u6LWaq*Fs`N~ zQjfTtmuS(-=9hAD&q}B*GkOn0G}bBX0`Xa2C7vujYUp4edH&6ZysnvWZ?8Zrtdb25dwNjG20{jpCp9;hmcHiR5a4i-)x&Jc#@0*6MrV+GG I-Rb530x{GtcmMzZ literal 0 HcmV?d00001 diff --git a/redash/query_runner/yandex_disk.py b/redash/query_runner/yandex_disk.py new file mode 100644 index 0000000000..1c44159cb4 --- /dev/null +++ b/redash/query_runner/yandex_disk.py @@ -0,0 +1,148 @@ +import logging + +import requests +import yaml + +from redash.query_runner import BaseSQLQueryRunner, pandas_to_result, register +from redash.utils import json_dumps + +try: + import openpyxl # noqa: F401 + import pandas as pd + + enabled = True +except ImportError: + enabled = False + +logger = logging.getLogger(__name__) + +EXTENSIONS_READERS = { + "csv": pd.read_csv, + "tsv": pd.read_table, + "xls": pd.read_excel, + "xlsx": pd.read_excel, +} + + +class YandexDisk(BaseSQLQueryRunner): + should_annotate_query = False + + @classmethod + def type(cls): + return "yandex_disk" + + @classmethod + def name(cls): + return "Yandex Disk" + + @classmethod + def configuration_schema(cls): + return { + "type": "object", + "properties": { + "token": {"type": "string", "title": "OAuth Token"}, + }, + "secret": ["token"], + "required": ["token"], + } + + def __init__(self, configuration): + super(YandexDisk, self).__init__(configuration) + self.syntax = "yaml" + self.base_url = "https://cloud-api.yandex.net/v1/disk" + self.list_path = "counters" + + def _get_tables(self, schema): + offset = 0 + limit = 100 + + while True: + temp_items = self._send_query( + "resources/public", media_type="spreadsheet,text", limit=limit, offset=offset + ) + + temp_items = temp_items["items"] + + for i in temp_items: + file_name = i["name"] + file_path = i["path"].replace("disk:", "") + schema[file_name] = {"name": file_name, "columns": [file_path]} + + if len(temp_items) < limit: + break + + offset += limit + + return list(schema.values()) + + def test_connection(self): + self._send_query() + + def _send_query(self, url_path="", **kwargs): + token = kwargs.pop("oauth_token", self.configuration["token"]) + r = requests.get( + f"{self.base_url}/{url_path}", + headers={"Authorization": f"OAuth {token}"}, + params=kwargs, + ) + + response_data = r.json() + + if not r.ok: + error_message = f"Code: {r.status_code}, message: {r.text}" + raise Exception(error_message) + return response_data + + def run_query(self, query, user): + logger.debug("Yandex Disk is about to execute query: %s", query) + data = None + + if not query: + error = "Query is empty" + return data, error + + try: + params = yaml.safe_load(query) + except ValueError as e: + logger.exception(e) + error = str(e) + return data, error + + if not isinstance(params, dict): + error = "The query format must be JSON or YAML" + return data, error + + if "path" not in params: + error = "The query must contain path" + return data, error + + file_extension = params["path"].split(".")[-1].lower() + + if file_extension not in EXTENSIONS_READERS: + error = f"Unsupported file extension: {file_extension}" + return data, error + + file_url = self._send_query("resources/download", path=params["path"])["href"] + + read_params = {} + if file_extension in ["xls", "xlsx"]: + read_params["sheet_name"] = params.get("sheet_name", 0) + + try: + df = EXTENSIONS_READERS[file_extension](file_url, **read_params) + except Exception as e: + logger.exception(e) + error = f"Read file error: {str(e)}" + return data, error + + try: + data = json_dumps(pandas_to_result(df)) + error = None + except Exception as e: + logger.exception(e) + error = str(e) + + return data, error + + +register(YandexDisk) diff --git a/redash/settings/__init__.py b/redash/settings/__init__.py index 74842fd769..feea99f287 100644 --- a/redash/settings/__init__.py +++ b/redash/settings/__init__.py @@ -298,6 +298,7 @@ def email_server_is_configured(): "redash.query_runner.clickhouse", "redash.query_runner.tinybird", "redash.query_runner.yandex_metrica", + "redash.query_runner.yandex_disk", "redash.query_runner.rockset", "redash.query_runner.treasuredata", "redash.query_runner.sqlite", diff --git a/tests/query_runner/test_yandex_disk.py b/tests/query_runner/test_yandex_disk.py new file mode 100644 index 0000000000..c40258d606 --- /dev/null +++ b/tests/query_runner/test_yandex_disk.py @@ -0,0 +1,46 @@ +from io import BytesIO +from unittest import TestCase + +import pandas as pd + +from redash.query_runner.yandex_disk import EXTENSIONS_READERS + +test_df = pd.DataFrame( + [ + {"id": 1, "name": "Alice", "age": 20}, + {"id": 2, "name": "Bob", "age": 21}, + {"id": 3, "name": "Charlie", "age": 22}, + {"id": 4, "name": "Dave", "age": 23}, + {"id": 5, "name": "Eve", "age": 24}, + ] +) + +test_token = "AAAAQAA" +test_path = "/filename.{format}" + + +class TestYandexDisk(TestCase): + def test_xlsx(self): + output = BytesIO() + writer = pd.ExcelWriter(output) + test_df.to_excel(writer, index=False) + writer.save() + + is_equals = test_df.equals(EXTENSIONS_READERS["xlsx"](output)) + self.assertTrue(is_equals) + + def test_csv(self): + output = BytesIO() + test_df.to_csv(output, index=False) + output.seek(0) + + is_equals = test_df.equals(EXTENSIONS_READERS["csv"](output)) + self.assertTrue(is_equals) + + def test_tsv(self): + output = BytesIO() + test_df.to_csv(output, index=False, sep="\t") + output.seek(0) + + is_equals = test_df.equals(EXTENSIONS_READERS["tsv"](output)) + self.assertTrue(is_equals) From 83e0efdd5164858ecfca00c4f9dbf460f5acdfcf Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Wed, 15 Nov 2023 00:19:16 +0300 Subject: [PATCH 04/18] moved file_extension check --- redash/query_runner/yandex_disk.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/redash/query_runner/yandex_disk.py b/redash/query_runner/yandex_disk.py index 1c44159cb4..d221fa16c9 100644 --- a/redash/query_runner/yandex_disk.py +++ b/redash/query_runner/yandex_disk.py @@ -118,16 +118,16 @@ def run_query(self, query, user): file_extension = params["path"].split(".")[-1].lower() + read_params = {} + if file_extension not in EXTENSIONS_READERS: error = f"Unsupported file extension: {file_extension}" return data, error + elif file_extension in ("xls", "xlsx"): + read_params["sheet_name"] = params.get("sheet_name", 0) file_url = self._send_query("resources/download", path=params["path"])["href"] - read_params = {} - if file_extension in ["xls", "xlsx"]: - read_params["sheet_name"] = params.get("sheet_name", 0) - try: df = EXTENSIONS_READERS[file_extension](file_url, **read_params) except Exception as e: From 54a0e3c1e012a88b4bb382b2f652d08bbefdcfd3 Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Wed, 15 Nov 2023 09:12:29 +0300 Subject: [PATCH 05/18] skip unsupported extensions in schema --- redash/query_runner/yandex_disk.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/redash/query_runner/yandex_disk.py b/redash/query_runner/yandex_disk.py index d221fa16c9..2e6d5b4497 100644 --- a/redash/query_runner/yandex_disk.py +++ b/redash/query_runner/yandex_disk.py @@ -66,6 +66,11 @@ def _get_tables(self, schema): for i in temp_items: file_name = i["name"] file_path = i["path"].replace("disk:", "") + + file_extension = file_name.split(".")[-1].lower() + if file_extension not in EXTENSIONS_READERS: + continue + schema[file_name] = {"name": file_name, "columns": [file_path]} if len(temp_items) < limit: From 8d6a8131d03af8d96a14f859402e80b5fb84a660 Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Wed, 15 Nov 2023 13:31:40 +0300 Subject: [PATCH 06/18] removed unused variable --- tests/query_runner/test_yandex_disk.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/query_runner/test_yandex_disk.py b/tests/query_runner/test_yandex_disk.py index c40258d606..2d77d243a4 100644 --- a/tests/query_runner/test_yandex_disk.py +++ b/tests/query_runner/test_yandex_disk.py @@ -16,7 +16,6 @@ ) test_token = "AAAAQAA" -test_path = "/filename.{format}" class TestYandexDisk(TestCase): From 97be93323742d449d8af3ab0c2d3946af33b45c8 Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Mon, 20 Nov 2023 14:57:36 +0300 Subject: [PATCH 07/18] added support for xlsx with multiple sheets --- redash/query_runner/yandex_disk.py | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/redash/query_runner/yandex_disk.py b/redash/query_runner/yandex_disk.py index 2e6d5b4497..3700409603 100644 --- a/redash/query_runner/yandex_disk.py +++ b/redash/query_runner/yandex_disk.py @@ -57,15 +57,15 @@ def _get_tables(self, schema): limit = 100 while True: - temp_items = self._send_query( + tmp_response = self._send_query( "resources/public", media_type="spreadsheet,text", limit=limit, offset=offset ) - temp_items = temp_items["items"] + tmp_items = tmp_response["items"] - for i in temp_items: - file_name = i["name"] - file_path = i["path"].replace("disk:", "") + for file_info in tmp_items: + file_name = file_info["name"] + file_path = file_info["path"].replace("disk:", "") file_extension = file_name.split(".")[-1].lower() if file_extension not in EXTENSIONS_READERS: @@ -73,7 +73,7 @@ def _get_tables(self, schema): schema[file_name] = {"name": file_name, "columns": [file_path]} - if len(temp_items) < limit: + if len(tmp_items) < limit: break offset += limit @@ -124,12 +124,15 @@ def run_query(self, query, user): file_extension = params["path"].split(".")[-1].lower() read_params = {} + is_multiple_sheets = False if file_extension not in EXTENSIONS_READERS: error = f"Unsupported file extension: {file_extension}" return data, error elif file_extension in ("xls", "xlsx"): read_params["sheet_name"] = params.get("sheet_name", 0) + if read_params["sheet_name"] is None: + is_multiple_sheets = True file_url = self._send_query("resources/download", path=params["path"])["href"] @@ -140,6 +143,14 @@ def run_query(self, query, user): error = f"Read file error: {str(e)}" return data, error + if is_multiple_sheets: + new_df = [] + for sheet_name, sheet_df in df.items(): + sheet_df["sheet_name"] = sheet_name + new_df.append(sheet_df) + new_df = pd.concat(new_df, ignore_index=True) + df = new_df.copy() + try: data = json_dumps(pandas_to_result(df)) error = None From f102acbd3e439e67d64fd841e737247aed572688 Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Mon, 20 Nov 2023 15:08:01 +0300 Subject: [PATCH 08/18] moved pandas-converters to utils file --- redash/query_runner/__init__.py | 34 ----------------------- redash/query_runner/python.py | 2 +- redash/query_runner/utils.py | 44 ++++++++++++++++++++++++++++++ redash/query_runner/yandex_disk.py | 3 +- 4 files changed, 47 insertions(+), 36 deletions(-) create mode 100644 redash/query_runner/utils.py diff --git a/redash/query_runner/__init__.py b/redash/query_runner/__init__.py index 6fd72db23e..9e75390422 100644 --- a/redash/query_runner/__init__.py +++ b/redash/query_runner/__init__.py @@ -3,8 +3,6 @@ from contextlib import ExitStack from functools import wraps -import numpy as np -import pandas as pd import sqlparse from dateutil import parser from rq.timeouts import JobTimeoutException @@ -37,7 +35,6 @@ "get_query_runner", "import_query_runners", "guess_type", - "pandas_to_result", ] # Valid types of columns returned in results: @@ -109,37 +106,6 @@ def find_last_keyword_idx(parsed_query): return -1 -def get_column_types_from_dataframe(df: pd.DataFrame) -> list: - columns = [] - - for column_name, column_type in df.dtypes.items(): - if column_type == np.bool_: - redash_type = TYPE_BOOLEAN - elif column_type == np.inexact: - redash_type = TYPE_FLOAT - elif column_type == np.integer: - redash_type = TYPE_INTEGER - elif column_type in (np.datetime64, np.dtype(" 10: - redash_type = TYPE_DATETIME - else: - redash_type = TYPE_DATE - else: - redash_type = TYPE_STRING - - columns.append({"name": column_name, "friendly_name": column_name, "type": redash_type}) - - return columns - - -def pandas_to_result(df: pd.DataFrame) -> dict: - columns = get_column_types_from_dataframe(df) - rows = df.to_dict("records") - return {"columns": columns, "rows": rows} - - class InterruptException(Exception): pass diff --git a/redash/query_runner/python.py b/redash/query_runner/python.py index 1353a6fd24..08baad5531 100644 --- a/redash/query_runner/python.py +++ b/redash/query_runner/python.py @@ -21,9 +21,9 @@ TYPE_INTEGER, TYPE_STRING, BaseQueryRunner, - pandas_to_result, register, ) +from redash.query_runner.utils import pandas_to_result from redash.utils import json_dumps, json_loads pandas_installed = importlib.util.find_spec("pandas") and importlib.util.find_spec("numpy") diff --git a/redash/query_runner/utils.py b/redash/query_runner/utils.py new file mode 100644 index 0000000000..12469ab4b1 --- /dev/null +++ b/redash/query_runner/utils.py @@ -0,0 +1,44 @@ +import numpy as np +import pandas as pd + +from . import ( + TYPE_BOOLEAN, + TYPE_DATE, + TYPE_DATETIME, + TYPE_FLOAT, + TYPE_INTEGER, + TYPE_STRING, +) + +column_type_mappings = { + np.bool_: TYPE_BOOLEAN, + np.inexact: TYPE_FLOAT, + np.integer: TYPE_INTEGER, +} + + +def get_column_types_from_dataframe(df: pd.DataFrame) -> list: + columns = [] + + for column_name, column_type in df.dtypes.items(): + if column_type in column_type_mappings: + redash_type = column_type_mappings[column_type] + elif column_type in (np.datetime64, np.dtype(" 10: + redash_type = TYPE_DATETIME + else: + redash_type = TYPE_DATE + else: + redash_type = TYPE_STRING + + columns.append({"name": column_name, "friendly_name": column_name, "type": redash_type}) + + return columns + + +def pandas_to_result(df: pd.DataFrame) -> dict: + columns = get_column_types_from_dataframe(df) + rows = df.to_dict("records") + return {"columns": columns, "rows": rows} diff --git a/redash/query_runner/yandex_disk.py b/redash/query_runner/yandex_disk.py index 3700409603..a1b8e711b2 100644 --- a/redash/query_runner/yandex_disk.py +++ b/redash/query_runner/yandex_disk.py @@ -3,7 +3,8 @@ import requests import yaml -from redash.query_runner import BaseSQLQueryRunner, pandas_to_result, register +from redash.query_runner import BaseSQLQueryRunner, register +from redash.query_runner.utils import pandas_to_result from redash.utils import json_dumps try: From 2e0045dd902cbd6a081842879aeabd781f441506 Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Mon, 20 Nov 2023 15:33:21 +0300 Subject: [PATCH 09/18] added tests --- tests/query_runner/test_yandex_disk.py | 84 +++++++++++++++++++------- 1 file changed, 62 insertions(+), 22 deletions(-) diff --git a/tests/query_runner/test_yandex_disk.py b/tests/query_runner/test_yandex_disk.py index 2d77d243a4..c4f148e54c 100644 --- a/tests/query_runner/test_yandex_disk.py +++ b/tests/query_runner/test_yandex_disk.py @@ -1,9 +1,10 @@ from io import BytesIO -from unittest import TestCase +from unittest import mock import pandas as pd +import pytest -from redash.query_runner.yandex_disk import EXTENSIONS_READERS +from redash.query_runner.yandex_disk import EXTENSIONS_READERS, YandexDisk test_df = pd.DataFrame( [ @@ -18,28 +19,67 @@ test_token = "AAAAQAA" -class TestYandexDisk(TestCase): - def test_xlsx(self): - output = BytesIO() - writer = pd.ExcelWriter(output) - test_df.to_excel(writer, index=False) - writer.save() +def test_yandex_disk_type(): + assert YandexDisk.type() == "yandex_disk" - is_equals = test_df.equals(EXTENSIONS_READERS["xlsx"](output)) - self.assertTrue(is_equals) - def test_csv(self): - output = BytesIO() - test_df.to_csv(output, index=False) - output.seek(0) +def test_yandex_disk_name(): + assert YandexDisk.name() == "Yandex Disk" - is_equals = test_df.equals(EXTENSIONS_READERS["csv"](output)) - self.assertTrue(is_equals) - def test_tsv(self): - output = BytesIO() - test_df.to_csv(output, index=False, sep="\t") - output.seek(0) +@mock.patch("requests.get") +def test__send_query(mock_requests_get): + mock_requests_get.return_value.ok = True + mock_requests_get.return_value.json.return_value = {"foo": "bar"} - is_equals = test_df.equals(EXTENSIONS_READERS["tsv"](output)) - self.assertTrue(is_equals) + configuration = {"token": test_token} + disk = YandexDisk(configuration) + response = disk._send_query("test_url") + + assert response == {"foo": "bar"} + mock_requests_get.assert_called_once() + + +@pytest.mark.parametrize( + "configuration, error_message", + [({"token": test_token}, None), ({"token": ""}, "Code: 400, message: Unauthorized")], +) +@mock.patch("requests.get") +def test_test_connection(mock_requests_get, configuration, error_message): + if error_message: + mock_requests_get.return_value.ok = False + mock_requests_get.return_value.status_code = 400 + mock_requests_get.return_value.text = "Unauthorized" + else: + mock_requests_get.return_value.ok = True + + disk = YandexDisk(configuration) + if error_message: + with pytest.raises(Exception, match=error_message): + disk.test_connection() + else: + assert disk.test_connection() is None + + +def test_read_xlsx(): + output = BytesIO() + writer = pd.ExcelWriter(output) + test_df.to_excel(writer, index=False) + writer.save() + assert test_df.equals(EXTENSIONS_READERS["xlsx"](output)) + + +def test_read_csv(): + output = BytesIO() + test_df.to_csv(output, index=False) + output.seek(0) + + assert test_df.equals(EXTENSIONS_READERS["csv"](output)) + + +def test_tsv(): + output = BytesIO() + test_df.to_csv(output, index=False, sep="\t") + output.seek(0) + + assert test_df.equals(EXTENSIONS_READERS["tsv"](output)) From d515fc0a5268a7fa92b4c6e3df497cbb05ccfbcb Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Mon, 20 Nov 2023 19:07:05 +0300 Subject: [PATCH 10/18] fixed backend tests --- redash/query_runner/python.py | 10 ++++- redash/query_runner/yandex_disk.py | 25 ++++++++----- .../utils.py => utils/pandas.py} | 2 +- tests/query_runner/test_yandex_disk.py | 37 +++++++++++++------ 4 files changed, 49 insertions(+), 25 deletions(-) rename redash/{query_runner/utils.py => utils/pandas.py} (97%) diff --git a/redash/query_runner/python.py b/redash/query_runner/python.py index 08baad5531..807b44a487 100644 --- a/redash/query_runner/python.py +++ b/redash/query_runner/python.py @@ -2,6 +2,7 @@ import importlib import logging import sys +from importlib.util import find_spec from RestrictedPython import compile_restricted from RestrictedPython.Guards import ( @@ -23,13 +24,18 @@ BaseQueryRunner, register, ) -from redash.query_runner.utils import pandas_to_result from redash.utils import json_dumps, json_loads -pandas_installed = importlib.util.find_spec("pandas") and importlib.util.find_spec("numpy") +pandas_installed = find_spec("pandas") and find_spec("numpy") if pandas_installed: import pandas as pd + from redash.utils.pandas import pandas_to_result + + enabled = True +else: + enabled = False + logger = logging.getLogger(__name__) diff --git a/redash/query_runner/yandex_disk.py b/redash/query_runner/yandex_disk.py index a1b8e711b2..7ae595733d 100644 --- a/redash/query_runner/yandex_disk.py +++ b/redash/query_runner/yandex_disk.py @@ -1,29 +1,34 @@ import logging +from importlib.util import find_spec import requests import yaml from redash.query_runner import BaseSQLQueryRunner, register -from redash.query_runner.utils import pandas_to_result from redash.utils import json_dumps -try: +pandas_installed = find_spec("pandas") and find_spec("numpy") +openpyxl_installed = find_spec("openpyxl") + +if pandas_installed and openpyxl_installed: import openpyxl # noqa: F401 import pandas as pd + from redash.utils.pandas import pandas_to_result + enabled = True -except ImportError: + + EXTENSIONS_READERS = { + "csv": pd.read_csv, + "tsv": pd.read_table, + "xls": pd.read_excel, + "xlsx": pd.read_excel, + } +else: enabled = False logger = logging.getLogger(__name__) -EXTENSIONS_READERS = { - "csv": pd.read_csv, - "tsv": pd.read_table, - "xls": pd.read_excel, - "xlsx": pd.read_excel, -} - class YandexDisk(BaseSQLQueryRunner): should_annotate_query = False diff --git a/redash/query_runner/utils.py b/redash/utils/pandas.py similarity index 97% rename from redash/query_runner/utils.py rename to redash/utils/pandas.py index 12469ab4b1..eec815ba97 100644 --- a/redash/query_runner/utils.py +++ b/redash/utils/pandas.py @@ -1,7 +1,7 @@ import numpy as np import pandas as pd -from . import ( +from redash.query_runner import ( TYPE_BOOLEAN, TYPE_DATE, TYPE_DATETIME, diff --git a/tests/query_runner/test_yandex_disk.py b/tests/query_runner/test_yandex_disk.py index c4f148e54c..0e58d0d2a1 100644 --- a/tests/query_runner/test_yandex_disk.py +++ b/tests/query_runner/test_yandex_disk.py @@ -1,32 +1,41 @@ from io import BytesIO from unittest import mock -import pandas as pd -import pytest +from redash.query_runner.yandex_disk import enabled -from redash.query_runner.yandex_disk import EXTENSIONS_READERS, YandexDisk +if enabled: + import pandas as pd -test_df = pd.DataFrame( - [ - {"id": 1, "name": "Alice", "age": 20}, - {"id": 2, "name": "Bob", "age": 21}, - {"id": 3, "name": "Charlie", "age": 22}, - {"id": 4, "name": "Dave", "age": 23}, - {"id": 5, "name": "Eve", "age": 24}, - ] -) + from redash.query_runner.yandex_disk import EXTENSIONS_READERS, YandexDisk + + test_df = pd.DataFrame( + [ + {"id": 1, "name": "Alice", "age": 20}, + {"id": 2, "name": "Bob", "age": 21}, + {"id": 3, "name": "Charlie", "age": 22}, + {"id": 4, "name": "Dave", "age": 23}, + {"id": 5, "name": "Eve", "age": 24}, + ] + ) + + +import pytest test_token = "AAAAQAA" +skip_condition = pytest.mark.skipif(not enabled, reason="pandas and/or openpyxl are not installed") +@skip_condition def test_yandex_disk_type(): assert YandexDisk.type() == "yandex_disk" +@skip_condition def test_yandex_disk_name(): assert YandexDisk.name() == "Yandex Disk" +@skip_condition @mock.patch("requests.get") def test__send_query(mock_requests_get): mock_requests_get.return_value.ok = True @@ -40,6 +49,7 @@ def test__send_query(mock_requests_get): mock_requests_get.assert_called_once() +@skip_condition @pytest.mark.parametrize( "configuration, error_message", [({"token": test_token}, None), ({"token": ""}, "Code: 400, message: Unauthorized")], @@ -61,6 +71,7 @@ def test_test_connection(mock_requests_get, configuration, error_message): assert disk.test_connection() is None +@skip_condition def test_read_xlsx(): output = BytesIO() writer = pd.ExcelWriter(output) @@ -69,6 +80,7 @@ def test_read_xlsx(): assert test_df.equals(EXTENSIONS_READERS["xlsx"](output)) +@skip_condition def test_read_csv(): output = BytesIO() test_df.to_csv(output, index=False) @@ -77,6 +89,7 @@ def test_read_csv(): assert test_df.equals(EXTENSIONS_READERS["csv"](output)) +@skip_condition def test_tsv(): output = BytesIO() test_df.to_csv(output, index=False, sep="\t") From df42f1f57d3b747bfe721566663a61b2a2325bf7 Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Mon, 20 Nov 2023 21:00:26 +0300 Subject: [PATCH 11/18] fixed pandas to redash type conversion --- redash/utils/pandas.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/redash/utils/pandas.py b/redash/utils/pandas.py index eec815ba97..fc33fc8789 100644 --- a/redash/utils/pandas.py +++ b/redash/utils/pandas.py @@ -1,3 +1,5 @@ +import logging + import numpy as np import pandas as pd @@ -10,19 +12,18 @@ TYPE_STRING, ) -column_type_mappings = { - np.bool_: TYPE_BOOLEAN, - np.inexact: TYPE_FLOAT, - np.integer: TYPE_INTEGER, -} +logger = logging.getLogger(__name__) def get_column_types_from_dataframe(df: pd.DataFrame) -> list: columns = [] - for column_name, column_type in df.dtypes.items(): - if column_type in column_type_mappings: - redash_type = column_type_mappings[column_type] + if column_type in (np.bool_, "bool"): + redash_type = TYPE_BOOLEAN + elif column_type in (np.int64, "int64", np.integer): + redash_type = TYPE_INTEGER + elif column_type in (np.inexact, np.floating, "float64"): + redash_type = TYPE_FLOAT elif column_type in (np.datetime64, np.dtype(" Date: Mon, 20 Nov 2023 21:00:44 +0300 Subject: [PATCH 12/18] added more tests --- tests/query_runner/test_yandex_disk.py | 63 ++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/tests/query_runner/test_yandex_disk.py b/tests/query_runner/test_yandex_disk.py index 0e58d0d2a1..02ab267611 100644 --- a/tests/query_runner/test_yandex_disk.py +++ b/tests/query_runner/test_yandex_disk.py @@ -1,7 +1,10 @@ from io import BytesIO from unittest import mock +import yaml + from redash.query_runner.yandex_disk import enabled +from redash.utils import json_dumps if enabled: import pandas as pd @@ -25,6 +28,11 @@ skip_condition = pytest.mark.skipif(not enabled, reason="pandas and/or openpyxl are not installed") +@pytest.fixture +def mock_yandex_disk(): + return YandexDisk(configuration={"token": test_token}) + + @skip_condition def test_yandex_disk_type(): assert YandexDisk.type() == "yandex_disk" @@ -71,6 +79,61 @@ def test_test_connection(mock_requests_get, configuration, error_message): assert disk.test_connection() is None +@skip_condition +def test_get_tables(mock_yandex_disk): + mock_files = { + "items": [ + {"name": "test_file.csv", "path": "disk:/test_path/test_file.csv"}, + {"name": "invalid_file.txt", "path": "disk:/test_path/invalid_file.txt"}, + ] + } + mock_yandex_disk._send_query = mock.MagicMock(return_value=mock_files) + + tables = mock_yandex_disk._get_tables({}) + assert len(tables) == 1 + assert tables[0]["name"] == "test_file.csv" + assert tables[0]["columns"] == ["/test_path/test_file.csv"] + + +def mock_ext_readers_return(url, **params): + return test_df + + +@skip_condition +@mock.patch("requests.get") +def test_run_query(mocked_requests, mock_yandex_disk): + mocked_response = mock.MagicMock() + mocked_response.ok = True + mocked_response.json.return_value = {"href": "test_file.csv"} + mocked_requests.return_value = mocked_response + + mock_readers = EXTENSIONS_READERS.copy() + mock_readers["csv"] = mock_ext_readers_return + + expected_data = json_dumps( + { + "columns": [ + {"name": "id", "friendly_name": "id", "type": "integer"}, + {"name": "name", "friendly_name": "name", "type": "string"}, + {"name": "age", "friendly_name": "age", "type": "integer"}, + ], + "rows": [ + {"id": 1, "name": "Alice", "age": 20}, + {"id": 2, "name": "Bob", "age": 21}, + {"id": 3, "name": "Charlie", "age": 22}, + {"id": 4, "name": "Dave", "age": 23}, + {"id": 5, "name": "Eve", "age": 24}, + ], + } + ) + + with mock.patch.dict("redash.query_runner.yandex_disk.EXTENSIONS_READERS", mock_readers, clear=True): + data, error = mock_yandex_disk.run_query(yaml.dump({"path": "/tmp/file.csv"}), "user") + + assert error is None + assert data == expected_data + + @skip_condition def test_read_xlsx(): output = BytesIO() From 7f4286645598e12d02e1fcf93d9de2e2911037ed Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Tue, 21 Nov 2023 13:27:19 +0300 Subject: [PATCH 13/18] added tests for pandas --- redash/query_runner/python.py | 3 +- redash/query_runner/yandex_disk.py | 2 +- redash/utils/pandas.py | 9 +++-- tests/test_utils.py | 57 ++++++++++++++++++++++++++++++ 4 files changed, 65 insertions(+), 6 deletions(-) diff --git a/redash/query_runner/python.py b/redash/query_runner/python.py index 807b44a487..939bcfbf75 100644 --- a/redash/query_runner/python.py +++ b/redash/query_runner/python.py @@ -2,7 +2,6 @@ import importlib import logging import sys -from importlib.util import find_spec from RestrictedPython import compile_restricted from RestrictedPython.Guards import ( @@ -25,8 +24,8 @@ register, ) from redash.utils import json_dumps, json_loads +from redash.utils.pandas import pandas_installed -pandas_installed = find_spec("pandas") and find_spec("numpy") if pandas_installed: import pandas as pd diff --git a/redash/query_runner/yandex_disk.py b/redash/query_runner/yandex_disk.py index 7ae595733d..9d9c70eeea 100644 --- a/redash/query_runner/yandex_disk.py +++ b/redash/query_runner/yandex_disk.py @@ -6,8 +6,8 @@ from redash.query_runner import BaseSQLQueryRunner, register from redash.utils import json_dumps +from redash.utils.pandas import pandas_installed -pandas_installed = find_spec("pandas") and find_spec("numpy") openpyxl_installed = find_spec("openpyxl") if pandas_installed and openpyxl_installed: diff --git a/redash/utils/pandas.py b/redash/utils/pandas.py index fc33fc8789..51cae6cd3e 100644 --- a/redash/utils/pandas.py +++ b/redash/utils/pandas.py @@ -1,4 +1,5 @@ import logging +from importlib.util import find_spec import numpy as np import pandas as pd @@ -14,15 +15,17 @@ logger = logging.getLogger(__name__) +pandas_installed = find_spec("pandas") and find_spec("numpy") + def get_column_types_from_dataframe(df: pd.DataFrame) -> list: columns = [] for column_name, column_type in df.dtypes.items(): - if column_type in (np.bool_, "bool"): + if column_type in (np.bool_,): redash_type = TYPE_BOOLEAN - elif column_type in (np.int64, "int64", np.integer): + elif column_type in (np.int64, np.int32): redash_type = TYPE_INTEGER - elif column_type in (np.inexact, np.floating, "float64"): + elif column_type in (np.float64,): redash_type = TYPE_FLOAT elif column_type in (np.datetime64, np.dtype(" Date: Tue, 21 Nov 2023 15:25:08 +0300 Subject: [PATCH 14/18] added tests for pandas converter and yandex disk --- tests/query_runner/test_yandex_disk.py | 27 ++++++++++++++++++++++++++ tests/test_utils.py | 3 +++ 2 files changed, 30 insertions(+) diff --git a/tests/query_runner/test_yandex_disk.py b/tests/query_runner/test_yandex_disk.py index 02ab267611..ca8c97e473 100644 --- a/tests/query_runner/test_yandex_disk.py +++ b/tests/query_runner/test_yandex_disk.py @@ -134,6 +134,33 @@ def test_run_query(mocked_requests, mock_yandex_disk): assert data == expected_data +@skip_condition +def test_run_query_with_empty_query(mock_yandex_disk): + result = mock_yandex_disk.run_query("", "user") + assert result == (None, "Query is empty") + + +@skip_condition +def test_run_query_bad_yaml(mock_yandex_disk): + bad_yaml_query = "unparseable = yaml" + result = mock_yandex_disk.run_query(bad_yaml_query, "user") + assert result == (None, "The query format must be JSON or YAML") + + +@skip_condition +def test_run_query_without_path(mock_yandex_disk): + bad_yaml_query = "without: path" + result = mock_yandex_disk.run_query(bad_yaml_query, "user") + assert result == (None, "The query must contain path") + + +@skip_condition +def test_run_query_unsupported_extension(mock_yandex_disk): + bad_yaml_query = "path: /tmp/file.txt" + result = mock_yandex_disk.run_query(bad_yaml_query, "user") + assert result == (None, "Unsupported file extension: txt") + + @skip_condition def test_read_xlsx(): output = BytesIO() diff --git a/tests/test_utils.py b/tests/test_utils.py index aca28eace5..78d193ab8f 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -122,6 +122,7 @@ def test_render(self): @pytest.fixture +@skip_condition def mock_dataframe(): df = pd.DataFrame( { @@ -136,6 +137,7 @@ def mock_dataframe(): return df +@skip_condition def test_get_column_types_from_dataframe(mock_dataframe): result = get_column_types_from_dataframe(mock_dataframe) expected_output = [ @@ -150,6 +152,7 @@ def test_get_column_types_from_dataframe(mock_dataframe): assert result == expected_output +@skip_condition def test_pandas_to_result(mock_dataframe): result = pandas_to_result(mock_dataframe) From fe4077b3b5ab58ec700ccdad6db143f0e3a7e25a Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Tue, 21 Nov 2023 16:07:02 +0300 Subject: [PATCH 15/18] added tests for read_file and multiple sheets --- tests/query_runner/test_yandex_disk.py | 54 ++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/tests/query_runner/test_yandex_disk.py b/tests/query_runner/test_yandex_disk.py index ca8c97e473..bdd5ce8b3e 100644 --- a/tests/query_runner/test_yandex_disk.py +++ b/tests/query_runner/test_yandex_disk.py @@ -99,6 +99,10 @@ def mock_ext_readers_return(url, **params): return test_df +def mock_ext_readers_return_multiple_sheets(url, **params): + return {"sheet1": test_df} + + @skip_condition @mock.patch("requests.get") def test_run_query(mocked_requests, mock_yandex_disk): @@ -161,6 +165,56 @@ def test_run_query_unsupported_extension(mock_yandex_disk): assert result == (None, "Unsupported file extension: txt") +@skip_condition +def test_run_query_read_file_error(mock_yandex_disk): + mock_yandex_disk._send_query = mock.MagicMock(return_value={"href": "test_file.csv"}) + mock_yandex_disk._get_tables = mock.MagicMock(return_value=[{"name": "test_file.csv", "columns": []}]) + mock_yandex_disk._read_file = mock.MagicMock(side_effect=Exception("Read file error")) + + data, error = mock_yandex_disk.run_query(yaml.dump({"path": "/tmp/file.csv"}), "user") + assert data is None + assert error is not None and error.startswith("Read file error") + + +@skip_condition +@mock.patch("requests.get") +def test_run_query_multiple_sheets(mocked_requests, mock_yandex_disk): + mocked_response = mock.MagicMock() + mocked_response.ok = True + mocked_response.json.return_value = {"href": "test_file.xlsx"} + mocked_requests.return_value = mocked_response + + query = """ + path: /tmp/file.xlsx + sheet_name: null + """ + + mock_readers = EXTENSIONS_READERS.copy() + mock_readers["xlsx"] = mock_ext_readers_return_multiple_sheets + + with mock.patch.dict("redash.query_runner.yandex_disk.EXTENSIONS_READERS", mock_readers, clear=True): + data, error = mock_yandex_disk.run_query(query, "user") + + assert error is None + assert data == json_dumps( + { + "columns": [ + {"name": "id", "friendly_name": "id", "type": "integer"}, + {"name": "name", "friendly_name": "name", "type": "string"}, + {"name": "age", "friendly_name": "age", "type": "integer"}, + {"name": "sheet_name", "friendly_name": "sheet_name", "type": "string"}, + ], + "rows": [ + {"id": 1, "name": "Alice", "age": 20, "sheet_name": "sheet1"}, + {"id": 2, "name": "Bob", "age": 21, "sheet_name": "sheet1"}, + {"id": 3, "name": "Charlie", "age": 22, "sheet_name": "sheet1"}, + {"id": 4, "name": "Dave", "age": 23, "sheet_name": "sheet1"}, + {"id": 5, "name": "Eve", "age": 24, "sheet_name": "sheet1"}, + ], + } + ) + + @skip_condition def test_read_xlsx(): output = BytesIO() From 2f9eb7f41c9e979b1336b6c897f2ff61e55c5807 Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Tue, 21 Nov 2023 20:51:53 +0300 Subject: [PATCH 16/18] pandas: do not load if lib is not installed --- redash/utils/pandas.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/redash/utils/pandas.py b/redash/utils/pandas.py index 51cae6cd3e..0a46461983 100644 --- a/redash/utils/pandas.py +++ b/redash/utils/pandas.py @@ -1,9 +1,6 @@ import logging from importlib.util import find_spec -import numpy as np -import pandas as pd - from redash.query_runner import ( TYPE_BOOLEAN, TYPE_DATE, @@ -17,6 +14,10 @@ pandas_installed = find_spec("pandas") and find_spec("numpy") +if pandas_installed: + import numpy as np + import pandas as pd + def get_column_types_from_dataframe(df: pd.DataFrame) -> list: columns = [] From 00a441ba3dc412062dcfb9313462f15b71b4d630 Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Tue, 21 Nov 2023 20:52:03 +0300 Subject: [PATCH 17/18] added test for yaml read --- tests/query_runner/test_yandex_disk.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/query_runner/test_yandex_disk.py b/tests/query_runner/test_yandex_disk.py index bdd5ce8b3e..89d2f7a3d6 100644 --- a/tests/query_runner/test_yandex_disk.py +++ b/tests/query_runner/test_yandex_disk.py @@ -144,6 +144,14 @@ def test_run_query_with_empty_query(mock_yandex_disk): assert result == (None, "Query is empty") +@skip_condition +def test_run_query_nonstring_yaml(mock_yandex_disk): + bad_yaml_query = None + data, error = mock_yandex_disk.run_query(bad_yaml_query, "user") + assert data is None + assert error is not None + + @skip_condition def test_run_query_bad_yaml(mock_yandex_disk): bad_yaml_query = "unparseable = yaml" From f377b9d0329f9d2bd463ef508ab642eea18b3da8 Mon Sep 17 00:00:00 2001 From: Vladislav Denisov Date: Tue, 21 Nov 2023 21:53:46 +0300 Subject: [PATCH 18/18] fixed test for yaml read --- redash/query_runner/yandex_disk.py | 12 +++---- redash/utils/pandas.py | 48 ++++++++++++-------------- tests/query_runner/test_yandex_disk.py | 4 +-- 3 files changed, 29 insertions(+), 35 deletions(-) diff --git a/redash/query_runner/yandex_disk.py b/redash/query_runner/yandex_disk.py index 9d9c70eeea..145c52b212 100644 --- a/redash/query_runner/yandex_disk.py +++ b/redash/query_runner/yandex_disk.py @@ -114,9 +114,9 @@ def run_query(self, query, user): try: params = yaml.safe_load(query) - except ValueError as e: + except (ValueError, AttributeError) as e: logger.exception(e) - error = str(e) + error = f"YAML read error: {str(e)}" return data, error if not isinstance(params, dict): @@ -157,12 +157,8 @@ def run_query(self, query, user): new_df = pd.concat(new_df, ignore_index=True) df = new_df.copy() - try: - data = json_dumps(pandas_to_result(df)) - error = None - except Exception as e: - logger.exception(e) - error = str(e) + data = json_dumps(pandas_to_result(df)) + error = None return data, error diff --git a/redash/utils/pandas.py b/redash/utils/pandas.py index 0a46461983..40d74b2c5e 100644 --- a/redash/utils/pandas.py +++ b/redash/utils/pandas.py @@ -18,32 +18,30 @@ import numpy as np import pandas as pd - -def get_column_types_from_dataframe(df: pd.DataFrame) -> list: - columns = [] - for column_name, column_type in df.dtypes.items(): - if column_type in (np.bool_,): - redash_type = TYPE_BOOLEAN - elif column_type in (np.int64, np.int32): - redash_type = TYPE_INTEGER - elif column_type in (np.float64,): - redash_type = TYPE_FLOAT - elif column_type in (np.datetime64, np.dtype(" 10: - redash_type = TYPE_DATETIME + def get_column_types_from_dataframe(df: pd.DataFrame) -> list: + columns = [] + for column_name, column_type in df.dtypes.items(): + if column_type in (np.bool_,): + redash_type = TYPE_BOOLEAN + elif column_type in (np.int64, np.int32): + redash_type = TYPE_INTEGER + elif column_type in (np.float64,): + redash_type = TYPE_FLOAT + elif column_type in (np.datetime64, np.dtype(" 10: + redash_type = TYPE_DATETIME + else: + redash_type = TYPE_DATE else: - redash_type = TYPE_DATE - else: - redash_type = TYPE_STRING - - columns.append({"name": column_name, "friendly_name": column_name, "type": redash_type}) + redash_type = TYPE_STRING - return columns + columns.append({"name": column_name, "friendly_name": column_name, "type": redash_type}) + return columns -def pandas_to_result(df: pd.DataFrame) -> dict: - columns = get_column_types_from_dataframe(df) - rows = df.to_dict("records") - return {"columns": columns, "rows": rows} + def pandas_to_result(df: pd.DataFrame) -> dict: + columns = get_column_types_from_dataframe(df) + rows = df.to_dict("records") + return {"columns": columns, "rows": rows} diff --git a/tests/query_runner/test_yandex_disk.py b/tests/query_runner/test_yandex_disk.py index 89d2f7a3d6..437e8068bb 100644 --- a/tests/query_runner/test_yandex_disk.py +++ b/tests/query_runner/test_yandex_disk.py @@ -146,10 +146,10 @@ def test_run_query_with_empty_query(mock_yandex_disk): @skip_condition def test_run_query_nonstring_yaml(mock_yandex_disk): - bad_yaml_query = None + bad_yaml_query = [0, 1] data, error = mock_yandex_disk.run_query(bad_yaml_query, "user") assert data is None - assert error is not None + assert error.startswith("YAML read error: ") @skip_condition