From 91e88021ba7e9e6558b10fbc40679880998c9962 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Taavi=20P=C3=A4ll?= Date: Thu, 3 Mar 2022 16:49:04 +0200 Subject: [PATCH 1/2] feat: Zenodo remote provider for transparent storage on and retrieval from Zenodo (#125) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * added zenodo remote provider * added test * updated formatting * added sandbox, access_token is required, create new deposition when deposition id is not provided * updated test Snakefile * added initial zenodo remote documentation * added zenodo branches for ci * fixed code formatting * added zenodo sandbox pat to workflow * added zenodo remote test * fixed formatting * deleted comment * skip tibanna test for now * deleted branch * added zenodo file exception when hits api limit * moves file size assertion to remoteobject * updated test * added test file * run also too large upload test * updated docs * Delete superfluous file. * removed error handling around requests and re as unnecessary * updated error message * moved download code to remoteobject * use download url * fixed formatting * upload large files * fixed formatting * removed makedir * updated test, download to subdir * moved exception handling to request wrapper * removed zen branch from ci * fixed deposition id handling * fixed stats is not defined * fix download * fixed test download path, updated docs * Only run test if token is available. * fmt * envvars * fix skip criterion * fix error handling * windows secret Co-authored-by: Johannes Köster Co-authored-by: Johannes Köster --- .github/workflows/main.yml | 2 + docs/snakefiles/remote_files.rst | 36 +++- fg.pdf | Bin 0 -> 20009 bytes snakemake/exceptions.py | 5 + snakemake/remote/zenodo.py | 187 ++++++++++++++++++ tests/common.py | 8 + tests/test_remote_zenodo/Snakefile | 30 +++ .../expected-results/download.txt | 3 + tests/test_remote_zenodo/test.txt | 3 + tests/tests.py | 6 + 10 files changed, 279 insertions(+), 1 deletion(-) create mode 100644 fg.pdf create mode 100644 snakemake/remote/zenodo.py create mode 100644 tests/test_remote_zenodo/Snakefile create mode 100644 tests/test_remote_zenodo/expected-results/download.txt create mode 100644 tests/test_remote_zenodo/test.txt diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3067b2e15..3431b755e 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -91,6 +91,7 @@ jobs: - name: Run tests env: CI: true + ZENODO_SANDBOX_PAT: ${{ secrets.ZENODO_SANDBOX_PAT }} run: | # activate conda env export PATH="/usr/share/miniconda/bin:$PATH" @@ -195,5 +196,6 @@ jobs: - name: Run tests env: CI: true + ZENODO_SANDBOX_PAT: ${{ secrets.ZENODO_SANDBOX_PAT }} run: | python -m pytest -v -x tests/tests.py diff --git a/docs/snakefiles/remote_files.rst b/docs/snakefiles/remote_files.rst index ca351e86d..806883162 100644 --- a/docs/snakefiles/remote_files.rst +++ b/docs/snakefiles/remote_files.rst @@ -798,8 +798,42 @@ Note that the filename should not include the ``.cip`` ending that is sometimes Upon download, Snakemake will automatically decrypt the file and check the MD5 hash. +Zenodo +====== + +`Zenodo `_ is a catch-all open data and software repository. +Snakemake allows file upload and download from Zenodo. +To access your Zenodo files you need to set up Zenodo account and create a personal access token with at least write scope. +Personal access token must be supplied as ``access_token`` argument. +You need to supply deposition id as ``deposition`` to upload or download files from your deposition. +If no deposition id is supplied, Snakemake creates a new deposition for upload. +Zenodo UI and REST API responses were designed with having in mind uploads of a total of 20-30 files. +Avoid creating uploads with too many files, and instead group and zip them to make it easier their distribution to end-users. + +.. code-block:: python + from snakemake.remote.zenodo import RemoteProvider + import os + + # let Snakemake assert the presence of the required environment variable + envvars: + "MYZENODO_PAT" + + access_token=os.environ["MYZENODO_PAT"] + zenodo = RemoteProvider(deposition="your deposition id", access_token=access_token) + + rule upload: + input: + "output/results.csv" + output: + zenodo.remote("results.csv") + shell: + "cp {input} {output}" + + +It is possible to use `Zenodo sandbox environment `_ for testing by setting ``sandbox=True`` argument. +Using sandbox environment requires setting up sandbox account with its personal access token. -AUTO +Auto ==== A wrapper which automatically selects an appropriate remote provider based on the url's scheme. diff --git a/fg.pdf b/fg.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f43927b2ad172d7c1240bd12c64e6edf01d85fda GIT binary patch literal 20009 zcmbrl1$11?t|n}b?ZnL7W@cu_n3=iFY{$%uF*9?_%osB>Gc#ji%J1Zy^X{GhzL_^` z&FbzAsw$PLO1mYMzD+7GB1X$d#|%r_w|}#LT6~)`)i(gk0$>2x8d|{e@Brwgfi|X2 zW&qZAk|Ka!%-qTe=*VDVf_A?zH(#8 zmiLtvCYo?4dUD^Z-Ot3VR=jHG7jAk{Us6W{)ie$l$6GdetKe?AwlN=fNE+ z?)}Kg!;4S9x83lv&A{a|n`eI6E2k)ky_*=H4>K)2r1j~?0W{e3NYU9@Pk7na(Cv-R zL|fC?8o|@V>0x@fw2OtZrb~XGl_oAoj9Lf6AN5yJGMSb^jo&z4nwItk9bD}R5!-P$ zbR~);m}}0Uy@ht(#c=y!zFSSX z8pAj1El)yeBQ}%4jTMHuw8~5rixd08B|77LP8S;)R2@lzik1|nH@TN2u_sGXNAVNF z>M`BJ_3&{*B!eS8fhX~&5O8- z{}KIi@7gdjmi+a*(a@zP2Tz~mQ{(Ek?*O6Sj8%l|UgV7X=J+*TPWNN9J-q&>5;0Si z_FJaOCP74oQLJ){{dPu5DAzRb2N`fM6$mem-7r{Z@=!5GXHmukeJFaiBwc=6f@$_B znyfyB&T^jmJOWL9R;|+UPN&oDLI79Y%_D0(5;vAofqDJo5zh+B#}(nigvg3}^ztT% zz3t^$If*p9m3&+yvi)7KaEU6lZ_ONK>8rKUo_Yy2q^8FXX`xPgJIi64^-T62aB)~M z%{r!S<`?USa)55U{CQS3m3FCxzCz2W%>AqJ3!8D*048h`#apwmmfo-QpC2ft26wa* z=q8-oke<6lZG`TTgqrt$z4VCw`m!+W`*tt=!X|Ox7A6Z^)oM^L#=cnB?p7`@>m?W6 zmZ6?0iCQW3vfevTDyy_sN|U;eDlf7~$FSu)N2N-&0}X=39SI|`s8C7tr`=Gu8YpeR z$_yxfC;bE9*lNs!q%rAJqpKYCMYpO=yY3jFB3W?NI(H@+y)MoU`xJQEGdOc#d`CFlN6ctWMH)&FPS`kv{xssq^iYgJ#p<(# zE096MhSTo66bbDfdDtZk5z~YR>$hr9 z`6u{8SMXc>h;4LTJV?s;LrGn<_(;gAi`k8WUNB0G^by<#?AdjH+DLiA$j6YD#IN#? zbbatyN>-7FRt!77r>_P!Qxa>jp}v<+J`5|jj*|u)~-Fxs-M)ns0bc4sl9v_dqrh`Auy%qrp2Z~ZlyAGluib5(38RBK#w8x{fR;y>OAk}#5eVZe zwR6;hR`80t{C9TdtQjx-CGc-fHQNnT^(3_NJ}nW6SmYCPmL z?Ze_^QjNwm!S;qSi5h7e*dV%>+Ao7?u5nh}%yW>(V#xC*2KgY3pua`+Wo7CI$k+uP0e))*6Q3kpFdu#2}#E}ekSpV?3tDVSrv^HQmSb@UVCp-;933$ zjev;jTv90pK_@cKZGk&U;WJ=a`*?}(2HXOq;l1MzXUdc#B{4EG87POIT>Uu4D-bU} zix9qqhT#}U@f*+@Ap`;XZ+S2x@rWqj(PA7yV+9FfBmqJBLPT(W3e?wHIh<4A zNTB!cPvnP;iF;SVut+n;3IN4!k|9fT#*KiMCsv@Buj^HTLUrubEL(DBKcfoiHm=?v z{u>;(2RQ*O(8l=hfAjaN-$vl~KmYGDBMUq0@6zAbO#fyel-=xr0D5@?(?3TCpp6rN z`7dLk2z0dl;$Q@H1hD-@5Vo~(dM|eb{5CM})k80fcqcHjvjA9FS#@CF z4a(pB{BHM;A(D5nHBtgP0kqyb6A=T@D+67f06GAAVOuL(2PHcLBOu_ny%Kh01hD^M z&&LO#7j<r^u_DK({;BdWss6C|N36ew|F3@Ng$3VR{9EP!oU8w}GULBj{ylU5aQ@r!GX6bTvhUMM zFZkR1+Wa05y$I0R+z6;BF7!WJ<;)}(Wu;~GkjW1Ajwxg!0O=jJI}BikTLKM2AxzR& z9xOPJ%vl6d3>Aq|)mXV3R6K;pNEG#BfWP4)%nhRaz;^`^(f##EOPI?F*4vG=*FlZV z<-?}MWz$Qq#a0mc7k`KzCv{L?mRxnzi_`Dg$S9+Ik6@UHpz!LT-PeK;1T+l1#Rji>)z%)y|R?*(Se{M!KCJ36ZOvLb_#|@A(1P zp)8pOvj0diJNqL;8iWBgRd4K&EY~WGNfZ_9BJ2hX1UjLQv)utc;wPtB7Km$+>0UTQ zn51D5(qfAF{?U-h({-KFPeVrPgkG8LP*>^6yZTRG88CdSY?DMmNj0of&0^0haL>;* z77omQb}et>dn>*24!J+8T$7zmW(h`SX>3}ZmD6{igFKLErsLIhCaQqcTSE!#Z`-UBSw(D> zV>FZd+u;+zN9}N%ZD~OwrGtl0(Z&v>c{Eu0n}Jr}!H{D>y5a0Te)1I; zfDQisnE`ae&m7wS3K1<&urvm}03lUq0TD+N+D%wco&y!UUpOoVF;3v;6xlaSs({j5 zVR_y=VkhiYXq|wkoDWj~7N}QHuRx>}uvr7RC6GqnVk1vo8H60!Ds z;7}pC1}T{616aEu8j;(Ess;;&QVjDcMk%V&7zb=7h|GRJ{R<7}Yl_s#%lVyQ*pSkr zX!}(4qxH3_lWV}249>W5;Rn01c5v;yn=!SSY*;HHnh+Kv+k8)Z;kL=HUE8TPP%r%v zzB^tSdJ=ZR`r!KDZ$=XgbdkrvXhK-}BM>G5NUTVd5wW2UpmZCD<}wy{z|;`Xm8&Gl2I!F@ z#PQ1~P>`X@N#TkLEz5FBwFtNgxCs|3!hN%uQ!h6PQFdv9j=Hk zi?vKVP4*y_OIS>HN!FvwCf6hMk$Nk7X$DjHF7RE+FTYwfS8Th!hc`%4G%YXDFw`*3 zFw8JuhqYgUv^~x*t_fMLMBuo3hP03nTWGe>ZKk-b=_yb!(V= z^OMDW?+NcoJa!s(CpH$g3U)Hf04r9SP1ZbX!brUE$adcYjf=${@N-jcjpXt@<^{NyubNLjq zE3yl^#JjY7M!Kf}EBXEFNQhUaz}WEtdd8xfbrmQ7RTYvywga2Pe(hhOvUYLQik zWlF|LMh(j-EzNBx>$gtUPkypIFe76rV{IGEt6tO4C>LAE;q2uZ^2~Qj|4IFkH$puk z6MY?BkLH|4iT0F+K!Z{PQr)`#r246Mr7z7=$H;3i*kaeHZlY-YprN9KcGJ1*+>1L+ zr+OW|O}1_F0sevLne3VI18<0HNC*7ml9Mw}Er z3Can$iTsbBFs?A>XfYk8?YEn89b)N>=_2)9?Q-W-RugwMPoKtQ7NZWG`>gxcmw1<7 zo1du>J|o16M+_zu#~)%(e$)+3u2PFtt7?jSG`zI9Eav0l3*(F8L+NzxEc2rOVeks} z>io2RyLff@wD>Fs!2wYYa|BxeAqJuPf$t*|3^L>wh~Dp@-`o5N=x6juwCB*lgkFT5 zgLq(AKk1-7Ibzb$u#}UXxizb}db_$n&>D)02=}!~L=VjjiAO}Ce@1r^8y4>uofnr8 z^$<-K{hX{$qfxgf7O5EtDB?vYz(MmMads$qoLT=IvU|1zyL&;$tesSMw&KvRUotvb z8Kd`I!=4HshX8LA@mCm1PszS?<(*!O%Rawl;Iv3` zpH7&OWThmU;y^@5OgnUZolIfCh^T7Ha^|L~x2I1AzI?TM}9rV2{ z`3%CXBj)oLDkymZ)3MSNjv6LyDPP~D?(ZkUP#vqu!hkPhzl()Mgd z@rG}QpS@X4SY5uDb~?WWzkFxCu!OG&TBy`sZIN}+`QEyH-+3Bs4X~D2z8V!9jRN9q z+JtY+DdZ?rcj}wfT^v^ zZfba`{ahb=2t4m7d?Z+1uJL^<6(6ZM@ zFpoBlu4npI+aXnDAwFMxQgGsj9i4X2apKW*p1Cr(y4ba8?K2A+1>XOG4<8k`-Dl`g z-m*f_y5C@JyS#VuH1DLf%hX5DN90`MI(AY#NW5^cC;H1g_uL};H}(X!o~XEinqB)r z+t;moVGoqDV|nKMmLCutgq+tqgFgr^vp(@GyM=p|9+SUjKQvBhhHK_7tSy8!7OJQ+KImXXex?c#kSG!S$h5rz@MrRK!fTE91w zSy^4VG3u9GrF-4tP_o+b{GyUvW?IRq!|j#n)o~vD8nuAmmX)M;=52o5aB1CfY*+W1 zv|I7D6VmJON#~h<QB~%nnnjV3)BBf{8=T}3prUbNi9|{b|PLOJ{kEl za^1b=uK#K@GI@44sEhKg@iB3_EZf`Vxx>_OxbMnx`ZSeaYU5ic=hMQ2^`Y_6-mHuH zWl?eXMp-BSgWqfJv(epP?D3i054ptbD1H}Dn3o$@GHa8A&Bwso=l=`?{-}e>OLE ze0TT%a?*AF-A(={Ci(5~|HV_-z{$YM*7UDH#POd9L>1`ZXl`o*pl76G`M37JefodA z?7y1-asTO+z8E_FiDr}?z5xF!_&uh-3WXGvBsGPoC4g4WKqqq}gTG=15ul@ygSnlP ztphCM-%*aJ&HJ{Pxy`#QG6qJ9w$=tV|DuSQJ2*NCn;AF&n3>-9)(rl>VPbl}Q!_Vq zGW#=Nb`IFz5y*f4VPfY1{QfZjSXtTr{Qk2ZE64Ak;m`5^!0A8R{U_yjAOF=8EGxr* zUH>fu1K_vpzuWzF{zs<2+A%XT1Ae#pE$^R__v=5OKb)AEe@pq-@jrY1zoh&>j_==k z`G4T_dyM~!g8qBp`Y#Rpm$Lk&JhBGX?}6NZqgYl3rtdoZt`omiPv}pCOv}m22%u%* zU<5F-u&@Fc8JU=LVCf~_wcgxF(8kmX_&$~Nf{sSN<8OBM_v_!C{l1}PVt&69Hn5Wb znwy%v7c#N~=#`v+)~Wzb4gkISUu0NTruXpjH(3I}!TQe0+4PS-0$5nypF|l0*T3!< ze~|5GO)aJ`5*R@=eb~315LF62nmco<4MJ=ujdVfwN=u8V%8K z#9R>30tK_x#%Xlz0p~NMHtS-?vsA$W^tiNV?;wep6N8ECyNsV*8xETuzg|5q+b%br zZ95i#qv-&s6~7?KSsbkcKTn|kL}Y4RK@BNvx&`R_E?uLl8tMT(NOk%_!QDrSO-1RK^*S_vYCIMw$cVTu$*TEsQ@TyT!tD!~o1tK0j zL9UtSce@Sf&j&5vI)`xT3?sTAGD&*tO7Y`IA7y!N=hS4_UrK3bHme2g9eHSj5;{>q zSiisF&nNle|KuFQHTWh+OzuZcuCI#imYU&@j0sN}`W3$|eMXmzUT8TcDOtZS2i-Ll zH-rik1~P`{N*xn>OSXly(wcI}saMhg8zw6qoorAWtqfg#q*p#3J-8-C zigDjS>T_E!*tszR&(~vfX&M>;jsav5TuN47AKcsnv6H{m*-pWhWG$GLv9^3aXEA1l zY=q*zC%1IqMKPf(xZ>9K-3;=Y@I8FIbj2ksG6a0Q_>5h+_{8rojKi6fMlTx)FtWB%g`3g$ zZFHfQeQo{ReFlbUDKt+aO_=o9q#Xr`EODBOQgI}~!;U(IvfHSf(1k-u0pg^BJH2~u zR(rCs!VbH+yU65>$Pxo~d%HGkZTg?$1J*S)dq)@rD`5;Aj?C(h<)?*X{_?e|1P;$`E+}|fs!zei zv3cKQpL=v+e`#JhDT%Gqy??qfDd?6ejRy@bE1XOkE;q`0JW2l9D*1G%2YqE5wx%<9 z*)h$0c?s3xnk6v=-F-wyd}Z%2*l99<-8$=rn4!v0>O9u%d(lm0qrO&9aZUTh(OE6< zK47E4U`uCoq;p>f=u`{5)2Z7(3~n#Wa-J-@EG_>1O~cEe?TSFwtH~*PcQ6xV)%yLf zE9D(z%+B_X8wk~mblg(Tz7O2&gS=o(-Ovj%ZNjtBearxfePr|!80{>H<-a);Tx}w` zA+SQKJb_6|>6|g-0FUlbvcrT}3b{8 z!0+64k>MXen}+fbw16Q?4{*M_aayr3vAy5UXB0wYdKOsi(p;Ajh{pB(i25#yuN;>k ztGe}uq9t4kIA_pXFn@xNltP}#2Ru^E$h>vqA9&;bG~c_GAaEeUQoOE!t44F)eF?ed z3n=aKry;5^rd}R^NF`ugU(?)JTa56O5VYpPR+&{)kH+NyJ>25ow%z+U&||Tz`Y z=Jdd@fw44t_T_9Z&8qzsYC91BJ*_wqqgAosJBeCKI4djT~ST<&+d_C6DN^yC}_VOlK4Ht7_1#vlT zhlcc>jP3MErdz{vIf&O)w(Wzwl`iaqhVOn%k)aQmv+OQ(=!V&kTnFY6VhbWzjw(Dm(2UMp|eGVPlF-BcTzd7q=4`-|OQ& ztJ{k1u1ysTH=ylb+qmF08Yq3C?Ib4l5`?}Ej5=w_YQgChOj#GOjAFJBwldNQHP3N8 zo)I#Ua~o>EQ5t368rSSE(&7@1Y^tp($!9{fS_&O2)CUj3W+!Cn90;0avOKjAHn!$R zMjZ?ar4(hiU}=Z@s6!Z3JT|*lQw^H>alS#PF-K-$f-psS6$qyZ1Q5Rl0>8xKl<>#e zix1${(>Q3UijVU1B3t`Bf1;%=oAip@|8W8eg*XB_souxDmqy&x7chiAQ(l6|b%08g zeN{>r|XCkb#O!B0QYgU4OKPWs&M4Q&QmzV8KM&P>!Q0h zo?afO?bY22(C)L5PsG-|H%xsz-JNfFpv3ODJ5EE9Zb9qvj(O4PDp?g}5B}$VS~zH> z1^EnNvHGici*-UZJhq&+R6f2>;gSvF(97BwWU&nf6fPM81nHTqC~dPP-)JcYABkgP zzrvN+lmm;}sIJEol~uLzD!IHA3Tw> zw=lo`Edx{<$>>B=+#WxeuxEmL)=u?qMuF|hp+zEnioNO8at6LyE#+v{x z{2F|S_!@dh1QY{a-?m|2OQ?hE9{<-~(f|(ar&nBf_^l|34ZkHQAbj$x051L)Zd|%9 zT6ncj`dp#CiJ^{=`&UOkxFLg|cCMB#53ia*+mVK^L(e%2-6@Q~Fo&VL4Ky7dzjI0pUT{{A9`m(SV6Uz)z$yj; z35ysx7Gx)Sv88bkbpyg48W=wyLb=H@_nfP%8DLNFV6^n{d?-JCTs6+h-ISbhXdc{MRPy^i>g9!jIR|f$9t~Zw}Q7#xroPmlHO9^S;ZU z94}-IL%blW)do>wIKO=KoMes?qaSsukpnW03!*YelT(W|NZZ0*!?{N-e;9g``6yzN z@lboxbRh)vMBadSuO)4R>GW}Yj3{HI!WK!a?ddj%t2gkCNTG=Q@*rGp zL}@r|Sn>l=cEr+EZACI8ETnCw*3sdMO(ua7RuiY^?&go=PJFb3(n=$Fc+!0alk+Rk z&QT1O7-iMr^l>&OjB;H5Mc!fF%L&>hDf$9ctm+NDBaJhtrwQ z`?8w{%x4Yo!s}U8mj+eC9FVa-FfaVVL}xIvV~12N<}XBpyfD-#3o&0J6R2dG=xX_l zy@uZW6GKFhN0(Vuo6MRFK~h`>2FS1z+@Lo0wJ&6q+kQ#XL~5mdyB_2_qj@)&=ee%! zvyKho&%jAEl9Hz8dhucesU;j!?sRv*=}Dm5b322(?`R2bjbUM1!c?!lGa*${_wr(OV{uu=JL>cePBtO{*dRa3R(GoL)v&)Bl^`kq2Q*KAi~zqX z1QH1~ zHq_2%|0_cR|LWIg- zQ_VSMW9VQMjGQJ`-(MTb7bzrl62IdRg0>e;f=v`rXdW|`6tiZ4={%PQ3 z+@GCbjDezF)DG)JSYRa4u~z3!B+Q{L?M`Wy07zkh zo?&&K=(JcND{gt5nziDSfYB6DVyk+EnP}*T1sUkxn$l)Wg1!?*C3Ogb?a-~HdCYKF zv?MdxMC_6fjyiG_NCA-VPUBOH{qkwHnOmPb$97fm##`Fa zW{o3?@a2S?@n&4Hi&yjZ+vlpdHGV@5SBPjHib&7yhppC9232<-J9R&kdXqF0`X>IY zM=3F~gB7Z zPvwya3DQ~3_{j}KFsYR zc-l2O?lX;x!#CJuN$hDXPwMfE-{7$eQE{-UxCXMwt7;B+H{&_hh?iRbupf!2%ED1#C{`LqjK{p_C9|X1!V%^r||pA%FdN5HB{yi4Ju+ zP_Amue-hd}?7Ok;E0Mx?Im-3oxwgy4kjZR!U1M(3tC%FvsV8-e(~0;RDuAiDaHysG zGd~|M@aFfJ1YK?3*d0Z%(&-8VKcrd*M%By)QXBFA2iNkg6pD88Kf` zWBcLCIkvL!5+8O-9Xt9Dp{8PZIp*Z?@jBhc?c1B@PLqEoZ^W2y_&P=>9LmC^O*M;p z36eqba(wLqj%@6tqvVkX$6Fs=d~LOF7DwW5`~XaSx@8XF91OYz>xB{fZfk|2DZ25^ zBodw0LLZTas!*nWSs*j7dRelFv>$fCl_j8O+BW7AqyJkLm6%NVM+oRd-csF9Ek)^K zgZVhJqM$%wVQDS|4M)2kzXR&{nP~1>wGT=IXeNCw@g3R=UsLD#vmc}{jZGfUc3K$i zfo_jCE9K}I zh!9a_?mMb7vxYQGpK@B=ZFUXoYlA!i+SRTI` z&zGBz>wqVImqP7^qYWxQb)JQEbyZVy~L_QwNN=7!g$92vNjgZAXbudLeG`ntD^G$rX zmcS$k!K$(F>`nuw8P|TxEKp@^HL2AhX(79FFIk_Pd5x+JnCNY9T-0*AQ4Yem?Hud< z`Mr&RXThcY%VGG*S>6R&mq(k|K&+CZ3m=O$f`{?#(%w!dKXKHg)5OSU%++U)))0lP zRPyHOqYrCXcRw}sq(QmUc3I+*rYW3Y8}#xFzbQ$vf#)`YmAeb4DChhTZDO?1oD2N1 ztfZ;pUipZ7!m~VF+_OmU4*rb!*ey6)7vUNqj8moaON2f73(+siVBT@2kmUP|oQF1= z8#<3joakRprmZJ8gs9MaN)e z(u-M3=2uTvNdNZ9_ zP+4>oR_a5=Co5DDhy*!!NUkTbapUFa5_z}zxD?E-`D?$=hYq_Rcgu@ZZbC~wmgoAd z4)}57*HOeR5vMGf%w!hZ`9(NMeM(q~5y8gw&cX3GaY$4aX1R0&Hm=4o8o%4w&a)h| zkFW-lc&9$J-Hnwz;k!w(hVgEJf-IVS4oJJLRf4 zD}mlsmZ363REbeS(T$JcNZ4k><}9BVaR%|6pv zpI1<@e1opjAmruAcKcFhg|R}MT}`mTILCnZP^BU4XX$T~kq1v&{5Yvx`@<)2V;-1i zghzv*&z#9YvD9QK8e+PnpYSs*P12Ts_Hfzoc40g)B4@?vQ#ISaM7O}bxLttJN6B>3 zYa|$}P^WA0B%^$-cghFf+3DGf-KA+0nM8y_pyoxEMhql%MgDpPHi+D_i$VJFM|QJf zUc{FaPRVbqI*tjr=;aGxYP2dH1U_q)iPq^s1Jq;vPj8d%HB`(p+}Bcxn6xtka=J)& zOq6%GsET*~zHgH`!_#tnW4-LRP+%Q?-7%?d3hBZ8FJfY9#c#R+Lh#|9637xOWnK}J zceIDKHfQ?jRb4^2hF5D4)$7fjEVaRUBal6qDNqf@X}-9`w?WTtkcyNA@)n6wFD9mL zJrEqKAPUeDu!V|ypGDHs@II&}wpw;e3}@kU^{~Vt#V|jWgg)bgv!1GUNOf&~48`Gh zg$V`>*&Te}uxTuC0iU(c&wO!a{@B*|aMOlJjodQF9&diQ35h`SG3gA^8S~-ebCkz= zyBPz{ORx|yw{Dh#KFfi;OLRIi>qE2JwYkx@kngufmnIz`=#3{EN*) zF`bgK(B*khpZrp_)iMP2IxAjYbR1Dus@Qggg+5{!O;jKJ6{PZQb_%JY0DVN!f)T&+ zXLGN`Y?RMJ;$mbMZx(9Z$9VT6sSfmAw@C92uepmG+KTSk2C2tcv7^piKXT1ei#l#{ zlO`m!?pn`TU1j41sAgPXjz9Z<-RhGC7MISps zyP)L{#(Et1a$KxXx4ZleJ?wln;F*TSvd05oBIP7%IDSpAI=5^(PhS3d!j=z6wl{cQ zmx%nd;S}@PW60%sXro;_K9uV4(i1o^6}7nOF*d=z!Qt~-TTZ~(t7IYLnsl;C_kpd} z!76Q_Fw<;kBnOu&N0G0P6@AiRJ*>E|^m=bmIr3y@Ab{)JAc`rR9aJ_6=4=viE~Z8= zri;dx@ug#+xltN=6Kexq$}B~6w{$0dPsYIUG-da!ov*0}2%Ww=!mMdZXU{Q-G`jR~ z3M^yPcjbr@tXc4s1)~)|bAlppliMpTh^dK~PZ=3ttGyHka? z$_ecz5M7rLn_Z!`nIN)QA&RtH(6X0>w>X8jT7>o8@N9EUH)7r(h4tMRP8J^5nIUw! zAZ)O}=yAYyyEp?5;wM8l{SmT~aOXDJAapq)yzQXdEf_p?lB#}zT3vh`|8F20|KJ?h zS=gEXD?$4^LXnA_TfbZak8(EsI9v+16M5)MweIs!`5DC16VopvBG>->KA>U-q%Kw3~p>*iD8f* zti+$gad34j2(Ak6`11RY%=~*FTdFd>;kG}R7C^u$!P0znTW^CWa2MM`<8@}6)LHqO z`sBFQ=o^Cq%v&eMvbBd<(Vt@>jF-vZ+t0&{du2!FW{5;6HcMP9Y%o*|)kbe1SGd+Q zT2}Uckv3bvbZmA{N^9G*51br@Bp7*Cafx$v4E)bk|1&?o=ZuN@zi>K#mgb*%{MVxY zYw`YC%)jsm|NUD3fh7FS+W(D6fYqX9WME?l(6TeL0~p!fQ38yNY)tQX{Qn-3!1-SF z-yjL^5c+>X5@6Z?!5*l-tH8U4{EbNfu>M9a{Dnzie{cR@m;_oD&i8_UffCqQ{vDJ6 z`v;Kn8`l4i&i^6&GyH$l=uZa5DX2@;_h_IyB*<q&|RjOtQC0$zMorHzJ z-NuQ1+Xa50pqS+f&nSeNfrGXw)ZlO-%1e-d?um!;`z>t|Ffoba`8aD&UYt?TXMV#I zji2w&Z`*~heXFe8>KaVMGF^z}%#Yuw+VEIMB-^$BdeJ#iQFXG>+GMlds`9#VT5r9w zCgE&B9_CM-M{6n3mCyw8Hjk4>E9Oz|t5DWdKP<-WgBg22%t~v?j%lsNAL})+V%e1X zWHg9W6OoRc&nPUr31-UbXHdT^z13U=`tf-x^sv?J%RfBAD=Hh7a7cXtZMN z$-L@3y$r9Dp8=sLsLb^7qc}t-?`1q=ansAN(c=M)5VQCtJw$c7moXreQ`oz8;9^`N zP+40su2Lz!9^DDetVAu{h^@P)NKT>SW0sOgYF`P#@UrBuphLa?qgOGWTWk z2LmBR$1>mp8smf315R$gZ0e7ouwT|Yre{h>P!BLuU@w@VZzVoJ0_Vf;_b1c(3@iQ) z(TFtxXTCO9HYfk(%<2A3@uA?7Q>GmS_5O!00~X zA~TeEG!+niFGUXOSDELu!lj88dUTcBDLjL8Y-sM6l2H$s{;m>>%(_0|UpjM0 zVg<%UDRt~=PL@Zeh9SjlMm2;0>rXTDX_z!4rYK#Vuw|+)4?*Tk$T<4b&dX?Do$B7? zhp^FwM%=hx9l3 zL|HFx8&$jf>CJjSyaR2wwfdERs5(-;a2Zi~#;1hqKDm)tv&0RJ&sOMVVk(rYsgez3 z7OSZqxzE#i2USu?eb5Va;LsD;70ht>VN3;T-&{-vyVknlw*KHxWt>h_d#=HkG-}w= zk~|6%@x*jC%pp9GgPLA87gJlcTy-DBGn@rTZ9jwXAY$C0CSMNg!?abomw3=S)8Vz; zV7%s+@m%d|T1s#AV_(K`?_yuF9HOg0kQvZ5tvVk2HI;f=db?ylmamp@)=cT4l5Egt z1cM9KF?6XH@uK>?G@&K!zafFH}?F74A7zPiT92z85JJP2MI*aV(&$*y~@O%q+nPo@6$C;*_znpvx zkQl349`haZ$AFxhxpSprwRLZt;K0_?Mn*wayDx(zl*(mgTF~$&OAD8DI(Vlmt zYdbc$sVWwSeK=obg;%}ed}l;io6E84YaJ;Df9W+2(n6MqG-u#BTzm);GsJxi|efcJ?6K5uML@ zvblXD9qD4D;HpsvUHD~hz*-UdMD;T6W!(|yK5&7K^bA`UyXC9{!laf#9%Nvgc&|#J z+19&}e<(mW_@4o&Lt_!Vyv{Tpa8G+(b!fh~`bF3EN`rTPdSS1v!vOHZo`t-)ZH-R3 zfzHu83F_nw3gRq@Eqv;r(CSwOSF0`45V5V2jHIBg$eLW3;+e9E>1z~h1z*t2ZNORr ztB~oouAYOnKtdO^6i}FwlIKj9hG)x?5#Wr?6#r?3tfYWmhinI{fh2|D7{HtnC6L-* zPQe*3Z}>4{<`;rQqF+Lu%i`#}5 zM^mGC%v2&LC5euJV@*GSZ|=e^zVwln?l}`*^&)*Q{Rrn0=Md*K-pj}nU$18JocfN_ zlasGZ?k1L=Bfvc9UU`#2kJ_-i@U{WV!b>qO*=%;+otE5H^jk#DYVVdsJM-{2!MgW1 zHgl>?`q6SpsF8Ny<*HJeR0)ooY1jJAI|sN}f#wpvKK2sv-0`v&&|mD?_;KMaG3Guf z6#q2%QjcCH-t;hSVBPv{jgw3Ie&wt`O+2+K%cB8S%rYI`)K~{)@I$?%-#1nK(77yw zGOAqA5bzZNK$@{O7KyYA&!Bo5oK-)NE#A`^$Ok-yrE60NVCx$y%TdlpWhH9+Vx&E&>z}>Wopp z3N#aHjjB5X>q6?_Nc)0$O9=b`n_&{93v?c8j6G+-R}&lzY7WaT7Nif((eiu*X_-Kq zsY@595dyVc2&W|H8X3E8s+#h4xKt4)D^fSF*2$b_2(u7KhZdoEg4gjvqeHNiF>@*c zZVR)4(9&0z3>ZaVByx&IhM$|unA0h#`WBrwlvpbJ%5vWyV`3|Ho`>R;p<9IS9(j@f zD9sz5Un?2$sOJ+$4;9a!&DKr`f_KAB(+1c1Y%+Ic>PkK+h73Sr`4(Gh$VJsPFXX zd&284<&D(-Jep)$K!0LM{WMv4+!HZ(Hb>X@wt4Jeaj@QO(1PWjX&O8-=M7`S6(4cT z%f?QOrTvr8tqaBDh+>_2NdJS5XjA~lA|4+c%pb#A~db5iTG@ z2vQy3mboSU!eY;+l@_-Fz*K2L8+b{sIMSR7(n^P1^UI$~7Ll z8wl{xnVEho$5h?z#%`-MdxSb-&!C=l-<Kt~gqN_ONS#O1g;wBRC8V8RW6ogPM@l?~CPXGMK9eA@Z( zCT&9Lw#22!k=(JyXL3kD{X7?HUzT{%DIM@@d zDbqiYs5U!E&_k4BssI8}&QSmY;z@D2rB$~|H4nDml?R|5Ik43c`sq!|t`43(66zsP z9C%eMcFTSw0QZEgFXB_rW>q@|AERCkpT@6<10NxY^)UySd_JC*H)Ln3y5qtu_J^lev=e%z%02&IaH)SrapbK+CK7&=SXk5Hl+eJg#D zacwsz*5^}IJ<5l6uyw9NI^^g@N(UVtM$w%jkTtEqW^W!Lsgk98X|7xVk|Ej;?n&iR<~`=fT< z*830iDE?uegshrvA zJSE1j_os47irM=@c=K0FIuq4;>1>5;=A5#8k zg~iQZB2zQDtK7(Q+AEKsw=d%UoKuV4EA*(XD3Wi@>Z1)AI@;{Xlf<_^eR{%o;x9#2 zoqe8SM;>kR_$RS!sWMCN#{$+*rI{ShRpYK!nAYEzywa@g@aDwlMTzWMFXZ?SAF8^{ z6p$YJM{=&y)2|jQkFh&j*D6SBC z5-t%oVf9-B!M@|IXO}9nX3F(fKbp~g^O*F!V#&J66IB*Ab+S)dW6KfSu;{)#i^zH< zCHZwn1HzTwy$t5sZ(CR15}x*_?b+22WlQBAp6C~O_~Lu#hgI)IA6}L8XLq$=n;+iI zVa_L;*xUcVPh!4o(0;+7{{o7!#u`hq_ggRVm2`i2)oRD}CG91D`$aBg@ALOq|2IzY z?u3se0UqXup7Sa#IqTg1<-by9G@I*>UGMuoEbF;nuu1e@-X-Nb-;`Y|J_Bu=2DEK0 z$hIF%n1R4}Vscm38Oin^X1gXJha&nGdI@>TQ6xq|(!?}1Zbk62b9;UT|A>4W&++p=Q-j`hCM<)Uu-%0w zmX?UoPUI2p%o5<%D2?L0#O%~uAdXK-OxFbN5-iG3DNP2RWTlatnCu^{0NW^NXrO0k zs%NMWqv2GVnFHL+Xz65O5u>RPSejT=Ql$Vqcm#ZgNkxe!m%cOb%q!r&KLw5SqQruX zvdk(4BRyjaJp-WeU@73nLl@9V9U3lHMh1qKhK7a~z+GFGre;wF2I@crROg$Y0#d?- zTM1$iv!W<94Y)=gv8KycD3sd0sK2&vJ9;%q3u@P_^8mgFqp#^Y@ z8-^Hg&=+0I66kw0F%vUW40R^vn0if3%z%jp1Q1~b^e(y>(0#zMXH<2D#^#p5$qp2~ zhQ=lsVE|NTj-k%P03%$0@n#I%F$goSq$m-%xvmI&{915kRVpyf74(Dh^MRXUK^tFz z6Dpp0Y5Bk%vJkclaF48wX_7@^a#E^+rGZ(ZsZnCGp?Q*_v7v#5MXE(gim73m9T#CG akS&bGCBUQ(PHcvj#^#1xs;aL3Zd?Gms0HN! literal 0 HcmV?d00001 diff --git a/snakemake/exceptions.py b/snakemake/exceptions.py index e8752ce4d..288bec9c3 100644 --- a/snakemake/exceptions.py +++ b/snakemake/exceptions.py @@ -466,6 +466,11 @@ def __init__(self, msg, lineno=None, snakefile=None): super().__init__(msg, lineno=lineno, snakefile=snakefile) +class ZenodoFileException(RuleException): + def __init__(self, msg, lineno=None, snakefile=None): + super().__init__(msg, lineno=lineno, snakefile=snakefile) + + class ClusterJobException(RuleException): def __init__(self, job_info, jobid): super().__init__( diff --git a/snakemake/remote/zenodo.py b/snakemake/remote/zenodo.py new file mode 100644 index 000000000..65f97a5b6 --- /dev/null +++ b/snakemake/remote/zenodo.py @@ -0,0 +1,187 @@ +__author__ = "Taavi Päll" +__copyright__ = "Copyright 2019, Taavi Päll" +__email__ = "tapa741@gmail.com" +__license__ = "MIT" + +import os +import hashlib +from collections import namedtuple +import requests +from requests.exceptions import HTTPError +from snakemake.remote import AbstractRemoteObject, AbstractRemoteProvider +from snakemake.exceptions import ZenodoFileException, WorkflowError +from snakemake.common import lazy_property + + +ZenFileInfo = namedtuple("ZenFileInfo", ["checksum", "filesize", "id", "download"]) + + +class RemoteProvider(AbstractRemoteProvider): + def __init__(self, *args, stay_on_remote=False, **kwargs): + super(RemoteProvider, self).__init__( + *args, stay_on_remote=stay_on_remote, **kwargs + ) + self._zen = ZENHelper(*args, **kwargs) + + def remote_interface(self): + return self._zen + + @property + def default_protocol(self): + return "https://" + + @property + def available_protocols(self): + return ["http://", "https://"] + + +class RemoteObject(AbstractRemoteObject): + def __init__( + self, *args, keep_local=False, stay_on_remote=False, provider=None, **kwargs + ): + super(RemoteObject, self).__init__( + *args, + keep_local=keep_local, + stay_on_remote=stay_on_remote, + provider=provider, + **kwargs + ) + if provider: + self._zen = provider.remote_interface() + else: + self._zen = ZENHelper(*args, **kwargs) + + # === Implementations of abstract class members === + def _stats(self): + return self._zen.get_files()[os.path.basename(self.local_file())] + + def exists(self): + return os.path.basename(self.local_file()) in self._zen.get_files() + + def size(self): + if self.exists(): + return self._stats().filesize + else: + return self._iofile.size_local + + def mtime(self): + # There is no mtime info provided by Zenodo. + # Hence, the files are always considered to be "ancient". + return 0 + + def download(self): + stats = self._stats() + download_url = stats.download + r = self._zen._api_request(download_url) + + local_md5 = hashlib.md5() + + # Download file. + with open(self.local_file(), "wb") as rf: + for chunk in r.iter_content(chunk_size=1024 * 1024 * 10): + local_md5.update(chunk) + rf.write(chunk) + local_md5 = local_md5.hexdigest() + + if local_md5 != stats.checksum: + raise ZenodoFileException( + "File checksums do not match for remote file id: {}".format(stats.id) + ) + + def upload(self): + with open(self.local_file(), "rb") as lf: + self._zen._api_request( + self._zen.bucket + "/{}".format(os.path.basename(self.remote_file())), + method="PUT", + data=lf, + ) + + @property + def list(self): + return [i for i in self._zen.get_files()] + + @property + def name(self): + return self.local_file() + + +class ZENHelper(object): + def __init__(self, *args, **kwargs): + + try: + self._access_token = kwargs.pop("access_token") + except KeyError: + raise WorkflowError( + "Zenodo personal access token must be passed in as 'access_token' argument.\n" + "Separate registration and access token is needed for Zenodo sandbox " + "environment at https://sandbox.zenodo.org." + ) + + if "sandbox" in kwargs: + self._sandbox = kwargs.pop("sandbox") + else: + self._sandbox = False + + if self._sandbox: + self._baseurl = "https://sandbox.zenodo.org" + else: + self._baseurl = "https://zenodo.org" + + if "deposition" in kwargs: + self.deposition = kwargs.pop("deposition") + self.bucket = self.get_bucket() + else: + # Creating a new deposition, as deposition id was not supplied. + self.deposition, self.bucket = self.create_deposition().values() + + def _api_request( + self, url, method="GET", data=None, headers={}, files=None, json=False + ): + + # Create a session with a hook to raise error on bad request. + session = requests.Session() + session.hooks = {"response": lambda r, *args, **kwargs: r.raise_for_status()} + session.headers["Authorization"] = "Bearer {}".format(self._access_token) + session.headers.update(headers) + + # Run query. + try: + r = session.request(method=method, url=url, data=data, files=files) + if json: + msg = r.json() + return msg + else: + return r + except HTTPError as e: + raise WorkflowError("Failed to connect to zenodo", e) + + def create_deposition(self): + resp = self._api_request( + method="POST", + url=self._baseurl + "/api/deposit/depositions", + headers={"Content-Type": "application/json"}, + data="{}", + json=True, + ) + return {"id": resp["id"], "bucket": resp["links"]["bucket"]} + + def get_bucket(self): + resp = self._api_request( + self._baseurl + "/api/deposit/depositions/{}".format(self.deposition), + headers={"Content-Type": "application/json"}, + json=True, + ) + return resp["links"]["bucket"] + + def get_files(self): + files = self._api_request( + self._baseurl + "/api/deposit/depositions/{}/files".format(self.deposition), + headers={"Content-Type": "application/json"}, + json=True, + ) + return { + os.path.basename(f["filename"]): ZenFileInfo( + f["checksum"], int(f["filesize"]), f["id"], f["links"]["download"] + ) + for f in files + } diff --git a/tests/common.py b/tests/common.py index c0a7b0836..8539ec876 100644 --- a/tests/common.py +++ b/tests/common.py @@ -54,6 +54,10 @@ def has_gcloud_service_key(): return "GCP_AVAILABLE" in os.environ +def has_zenodo_token(): + return "ZENODO_SANDBOX_PAT" in os.environ + + gcloud = pytest.mark.skipif( not is_connected() or not has_gcloud_service_key(), reason="Skipping GCLOUD tests because not on " @@ -66,6 +70,10 @@ def has_gcloud_service_key(): ci = pytest.mark.skipif(not is_ci(), reason="not in CI") not_ci = pytest.mark.skipif(is_ci(), reason="skipped in CI") +zenodo = pytest.mark.skipid( + not has_zenodo_token(), reason="no ZENODO_SANDBOX_PAT provided" +) + def copy(src, dst): if os.path.isdir(src): diff --git a/tests/test_remote_zenodo/Snakefile b/tests/test_remote_zenodo/Snakefile new file mode 100644 index 000000000..910f70e62 --- /dev/null +++ b/tests/test_remote_zenodo/Snakefile @@ -0,0 +1,30 @@ +import os +from snakemake.remote.zenodo import RemoteProvider + +access_token_sandbox=os.environ["ZENODO_SANDBOX_PAT"] +zen_sandbox = RemoteProvider(access_token=access_token_sandbox, sandbox=True) + +rule all: + input: "download.txt", zen_sandbox.remote("large_upload.txt") + +rule download: + input: + zen_sandbox.remote("uploaded.txt") + output: + "download.txt" + shell: + "cp {input} {output}" + +rule upload: + input: "test.txt" + output: + zen_sandbox.remote("uploaded.txt") + shell: + "cp {input} {output}" + +try: + rule too_large_upload: + output: zen_sandbox.remote("large_upload.txt") + shell: "head -c 101000000 /dev/urandom > {output}" +except ZenodoFileException: + print("Current Zenodo stable API supports <=100MB per file.") diff --git a/tests/test_remote_zenodo/expected-results/download.txt b/tests/test_remote_zenodo/expected-results/download.txt new file mode 100644 index 000000000..db302ec1d --- /dev/null +++ b/tests/test_remote_zenodo/expected-results/download.txt @@ -0,0 +1,3 @@ +Freedom of self-doubt +6 p.m. +Rising \ No newline at end of file diff --git a/tests/test_remote_zenodo/test.txt b/tests/test_remote_zenodo/test.txt new file mode 100644 index 000000000..db302ec1d --- /dev/null +++ b/tests/test_remote_zenodo/test.txt @@ -0,0 +1,3 @@ +Freedom of self-doubt +6 p.m. +Rising \ No newline at end of file diff --git a/tests/tests.py b/tests/tests.py index 75e9470f5..63abd0edc 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -1201,6 +1201,12 @@ def test_output_file_cache_remote(): ) +@connected +@zenodo +def test_remote_zenodo(): + run(dpath("test_remote_zenodo")) + + def test_multiext(): run(dpath("test_multiext")) From 1b164fda59d66e70cfe1b38017e68042acc32f36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20K=C3=B6ster?= Date: Fri, 4 Mar 2022 08:56:03 +0100 Subject: [PATCH 2/2] add retry handling, fix code issues --- snakemake/remote/zenodo.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/snakemake/remote/zenodo.py b/snakemake/remote/zenodo.py index 65f97a5b6..fb71ed127 100644 --- a/snakemake/remote/zenodo.py +++ b/snakemake/remote/zenodo.py @@ -8,7 +8,11 @@ from collections import namedtuple import requests from requests.exceptions import HTTPError -from snakemake.remote import AbstractRemoteObject, AbstractRemoteProvider +from snakemake.remote import ( + AbstractRemoteObject, + AbstractRemoteProvider, + AbstractRemoteRetryObject, +) from snakemake.exceptions import ZenodoFileException, WorkflowError from snakemake.common import lazy_property @@ -32,10 +36,10 @@ def default_protocol(self): @property def available_protocols(self): - return ["http://", "https://"] + return ["https://"] -class RemoteObject(AbstractRemoteObject): +class RemoteObject(AbstractRemoteRetryObject): def __init__( self, *args, keep_local=False, stay_on_remote=False, provider=None, **kwargs ): @@ -69,7 +73,7 @@ def mtime(self): # Hence, the files are always considered to be "ancient". return 0 - def download(self): + def _download(self): stats = self._stats() download_url = stats.download r = self._zen._api_request(download_url) @@ -88,7 +92,7 @@ def download(self): "File checksums do not match for remote file id: {}".format(stats.id) ) - def upload(self): + def _upload(self): with open(self.local_file(), "rb") as lf: self._zen._api_request( self._zen.bucket + "/{}".format(os.path.basename(self.remote_file())),