From 6b0d80507c9726c2a59afe5dfa87d6c93cc49145 Mon Sep 17 00:00:00 2001 From: Patrick Austin Date: Tue, 6 Jun 2023 09:47:32 +0000 Subject: [PATCH 001/651] Add datatypes for Larch XAS tools --- .../config/sample/datatypes_conf.xml.sample | 9 + lib/galaxy/datatypes/larch.py | 204 ++++++++++++++++++ lib/galaxy/datatypes/test/bad_atoms.inp | 44 ++++ lib/galaxy/datatypes/test/bad_potentials.inp | 24 +++ lib/galaxy/datatypes/test/no_pymatgen.inp | 51 +++++ lib/galaxy/datatypes/test/test.inp | 52 +++++ lib/galaxy/datatypes/test/test.prj | Bin 0 -> 5372 bytes 7 files changed, 384 insertions(+) create mode 100644 lib/galaxy/datatypes/larch.py create mode 100644 lib/galaxy/datatypes/test/bad_atoms.inp create mode 100644 lib/galaxy/datatypes/test/bad_potentials.inp create mode 100644 lib/galaxy/datatypes/test/no_pymatgen.inp create mode 100644 lib/galaxy/datatypes/test/test.inp create mode 100644 lib/galaxy/datatypes/test/test.prj diff --git a/lib/galaxy/config/sample/datatypes_conf.xml.sample b/lib/galaxy/config/sample/datatypes_conf.xml.sample index 49aab75ddbbd..226251828a63 100644 --- a/lib/galaxy/config/sample/datatypes_conf.xml.sample +++ b/lib/galaxy/config/sample/datatypes_conf.xml.sample @@ -946,6 +946,13 @@ + + + + + + + @@ -1204,6 +1211,8 @@ + + diff --git a/lib/galaxy/datatypes/larch.py b/lib/galaxy/datatypes/larch.py new file mode 100644 index 000000000000..80f7fb358c07 --- /dev/null +++ b/lib/galaxy/datatypes/larch.py @@ -0,0 +1,204 @@ +from typing import TYPE_CHECKING + +from galaxy.datatypes.data import ( + get_file_peek, + Text, +) +from galaxy.datatypes.metadata import MetadataElement +from galaxy.datatypes.sniff import ( + build_sniff_from_prefix, + FilePrefix, + get_headers, +) + +if TYPE_CHECKING: + from galaxy.model import DatasetInstance + + +@build_sniff_from_prefix +class AthenaProject(Text): + """ + Athena project format + """ + + file_ext = "prj" + compressed = True + compressed_format = "gzip" + + MetadataElement( + name="atsym", + desc="Atom symbol", + readonly=True, + visible=True, + ) + MetadataElement( + name="bkg_e0", + desc="Edge energy (eV)", + readonly=True, + visible=True, + ) + MetadataElement( + name="edge", + desc="Edge", + readonly=True, + visible=True, + ) + MetadataElement( + name="npts", + desc="Number of points", + readonly=True, + visible=True, + ) + MetadataElement( + name="xmax", + desc="Maximum energy (eV)", + readonly=True, + visible=True, + ) + MetadataElement( + name="xmin", + desc="Minimum energy (eV)", + readonly=True, + visible=True, + ) + + def sniff_prefix(self, file_prefix: FilePrefix) -> bool: + """ + Try to guess if the file is an Athena project file. + + >>> from galaxy.datatypes.sniff import get_test_fname + >>> fname = get_test_fname('test.prj') + >>> AthenaProject().sniff(fname) + True + """ + + return file_prefix.startswith("# Athena project file") + + def set_meta(self, dataset: "DatasetInstance", overwrite: bool = True, **kwd) -> None: + """ + Extract metadata from @args + """ + headers = get_headers(dataset.file_name, sep=" = ", count=3, comment_designator="#") + for header in headers: + if header[0] == "@args": + args = header[1][1:-2].split(",") + break + + index = args.index("'atsym'") + dataset.metadata.atsym = args[index + 1][1:-1] + index = args.index("'bkg_e0'") + dataset.metadata.bkg_e0 = args[index + 1][1:-1] + index = args.index("'edge'") + dataset.metadata.edge = args[index + 1][1:-1] + index = args.index("'npts'") + dataset.metadata.npts = args[index + 1][1:-1] + index = args.index("'xmax'") + dataset.metadata.xmax = args[index + 1][1:-1] + index = args.index("'xmin'") + dataset.metadata.xmin = args[index + 1][1:-1] + + def set_peek(self, dataset: "DatasetInstance", **kwd) -> None: + if not dataset.dataset.purged: + dataset.peek = get_file_peek(dataset.file_name) + dataset.info = ( + f"atsym: {dataset.metadata.atsym}\n" + f"bkg_e0: {dataset.metadata.bkg_e0}\n" + f"edge: {dataset.metadata.edge}\n" + f"npts: {dataset.metadata.npts}\n" + f"xmax: {dataset.metadata.xmax}\n" + f"xmin: {dataset.metadata.xmin}" + ) + dataset.blurb = f"Athena project file of {dataset.metadata.atsym} {dataset.metadata.edge} edge" + + else: + dataset.peek = "file does not exist" + dataset.blurb = "file purged from disk" + + +@build_sniff_from_prefix +class FEFFInput(Text): + """ + FEFF input format + """ + + file_ext = "inp" + + MetadataElement( + name="title_block", + desc="Title block", + readonly=True, + visible=True, + ) + + def sniff_prefix(self, file_prefix: FilePrefix) -> bool: + """ + Try to guess if the file is an FEFF input file. + + >>> from galaxy.datatypes.sniff import get_test_fname + >>> fname = get_test_fname('test.inp') + >>> FEFFInput().sniff(fname) + True + >>> from galaxy.datatypes.sniff import get_test_fname + >>> fname = get_test_fname('no_pymatgen.inp') + >>> FEFFInput().sniff(fname) + True + >>> from galaxy.datatypes.sniff import get_test_fname + >>> fname = get_test_fname('bad_atoms.inp') + >>> FEFFInput().sniff(fname) + False + >>> from galaxy.datatypes.sniff import get_test_fname + >>> fname = get_test_fname('bad_potential.inp') + >>> FEFFInput().sniff(fname) + False + >>> from galaxy.datatypes.sniff import get_test_fname + >>> fname = get_test_fname('Si.cif') + >>> FEFFInput().sniff(fname) + False + """ + # pymatgen marks generated FEFF inputs, but user might also upload from another source + if file_prefix.startswith("* This FEFF.inp file generated by pymatgen"): + return True + + generator = file_prefix.line_iterator() + line = next(generator) + while line is not None: + if line == "POTENTIALS": + line = next(generator).strip() + if line[0] == "*": + words = line[1:].split() + if words[0] in ["potential-index", "ipot"] and words[1] == "Z": + return True + return False + + elif line == "ATOMS": + line = next(generator).strip() + if line[0] == "*": + words = line[1:].split() + if words[:4] == ["x", "y", "z", "ipot"]: + return True + return False + + line = next(generator) + + return False + + def set_meta(self, dataset: "DatasetInstance", overwrite: bool = True, **kwd) -> None: + """ + Extract metadata from TITLE + """ + title_block = "" + headers = get_headers(dataset.file_name, sep=None, comment_designator="*") + for header in headers: + if header and header[0] == "TITLE": + title_block += " ".join(header[1:]) + "\n" + + dataset.metadata.title_block = title_block + + def set_peek(self, dataset: "DatasetInstance", **kwd) -> None: + if not dataset.dataset.purged: + dataset.peek = get_file_peek(dataset.file_name) + dataset.info = dataset.metadata.title_block + + else: + dataset.peek = "file does not exist" + dataset.blurb = "file purged from disk" diff --git a/lib/galaxy/datatypes/test/bad_atoms.inp b/lib/galaxy/datatypes/test/bad_atoms.inp new file mode 100644 index 000000000000..b801beb1bd16 --- /dev/null +++ b/lib/galaxy/datatypes/test/bad_atoms.inp @@ -0,0 +1,44 @@ +TITLE comment: None given +TITLE Source: +TITLE Structure Summary: Fe2 S4 +TITLE Reduced formula: FeS2 +TITLE space group: (Pnnm), space number: (58) +TITLE abc: 3.385200 4.447400 5.428700 +TITLE angles: 90.000000 90.000000 90.000000 +TITLE sites: 6 +* 1 Fe 0.000000 0.000000 0.000000 +* 2 Fe 0.500000 0.500000 0.500000 +* 3 S 0.000000 0.199900 0.378040 +* 4 S 0.000000 0.800100 0.621960 +* 5 S 0.500000 0.699900 0.121960 +* 6 S 0.500000 0.300100 0.878040 + +ATOMS + * a b c ipot Atom Distance Number +**********- ********- ******- ****** ****** ********** ******** + 0 0 0 0 Fe 0 0 + -0.889035 -2.05227 -0 2 S 2.23656 4 + 0.889035 2.05227 0 2 S 2.23656 22 + -1.33466 0.662084 -1.6926 2 S 2.2549 6 + -1.33466 0.662084 1.6926 2 S 2.2549 8 + 1.33466 -0.662084 -1.6926 2 S 2.2549 11 + 1.33466 -0.662084 1.6926 2 S 2.2549 13 + 0 0 -3.3852 1 Fe 3.3852 18 + 0 0 3.3852 1 Fe 3.3852 19 + -0.889035 3.37643 0 2 S 3.49152 9 + 0.889035 -3.37643 -0 2 S 3.49152 15 + -3.11273 -0.662084 -1.6926 2 S 3.60449 1 + -3.11273 -0.662084 1.6926 2 S 3.60449 3 + 3.11273 0.662084 -1.6926 2 S 3.60449 16 + 3.11273 0.662084 1.6926 2 S 3.60449 20 + -2.2237 -2.71435 -1.6926 1 Fe 3.89582 2 + -2.2237 -2.71435 1.6926 1 Fe 3.89582 5 + -2.2237 2.71435 -1.6926 1 Fe 3.89582 7 + -2.2237 2.71435 1.6926 1 Fe 3.89582 10 + 2.2237 -2.71435 -1.6926 1 Fe 3.89582 12 + 2.2237 -2.71435 1.6926 1 Fe 3.89582 14 + 2.2237 2.71435 -1.6926 1 Fe 3.89582 17 + 2.2237 2.71435 1.6926 1 Fe 3.89582 21 +END + + diff --git a/lib/galaxy/datatypes/test/bad_potentials.inp b/lib/galaxy/datatypes/test/bad_potentials.inp new file mode 100644 index 000000000000..e0c3a4f13f8c --- /dev/null +++ b/lib/galaxy/datatypes/test/bad_potentials.inp @@ -0,0 +1,24 @@ +TITLE comment: None given +TITLE Source: +TITLE Structure Summary: Fe2 S4 +TITLE Reduced formula: FeS2 +TITLE space group: (Pnnm), space number: (58) +TITLE abc: 3.385200 4.447400 5.428700 +TITLE angles: 90.000000 90.000000 90.000000 +TITLE sites: 6 +* 1 Fe 0.000000 0.000000 0.000000 +* 2 Fe 0.500000 0.500000 0.500000 +* 3 S 0.000000 0.199900 0.378040 +* 4 S 0.000000 0.800100 0.621960 +* 5 S 0.500000 0.699900 0.121960 +* 6 S 0.500000 0.300100 0.878040 + +POTENTIALS + * pot Z tag lmax1 lmax2 xnatph(stoichometry) spinph +******- **- ****- ******- ******- ********************** ******** + 0 26 Fe -1 -1 0.0001 0 + 1 26 Fe -1 -1 2 0 + 2 16 S -1 -1 4 0 +END + + diff --git a/lib/galaxy/datatypes/test/no_pymatgen.inp b/lib/galaxy/datatypes/test/no_pymatgen.inp new file mode 100644 index 000000000000..d75015dee1cf --- /dev/null +++ b/lib/galaxy/datatypes/test/no_pymatgen.inp @@ -0,0 +1,51 @@ +TITLE comment: None given +TITLE Source: +TITLE Structure Summary: Fe2 S4 +TITLE Reduced formula: FeS2 +TITLE space group: (Pnnm), space number: (58) +TITLE abc: 3.385200 4.447400 5.428700 +TITLE angles: 90.000000 90.000000 90.000000 +TITLE sites: 6 +* 1 Fe 0.000000 0.000000 0.000000 +* 2 Fe 0.500000 0.500000 0.500000 +* 3 S 0.000000 0.199900 0.378040 +* 4 S 0.000000 0.800100 0.621960 +* 5 S 0.500000 0.699900 0.121960 +* 6 S 0.500000 0.300100 0.878040 + +POTENTIALS + *ipot Z tag lmax1 lmax2 xnatph(stoichometry) spinph +******- **- ****- ******- ******- ********************** ******** + 0 26 Fe -1 -1 0.0001 0 + 1 26 Fe -1 -1 2 0 + 2 16 S -1 -1 4 0 + +ATOMS + * x y z ipot Atom Distance Number +**********- ********- ******- ****** ****** ********** ******** + 0 0 0 0 Fe 0 0 + -0.889035 -2.05227 -0 2 S 2.23656 4 + 0.889035 2.05227 0 2 S 2.23656 22 + -1.33466 0.662084 -1.6926 2 S 2.2549 6 + -1.33466 0.662084 1.6926 2 S 2.2549 8 + 1.33466 -0.662084 -1.6926 2 S 2.2549 11 + 1.33466 -0.662084 1.6926 2 S 2.2549 13 + 0 0 -3.3852 1 Fe 3.3852 18 + 0 0 3.3852 1 Fe 3.3852 19 + -0.889035 3.37643 0 2 S 3.49152 9 + 0.889035 -3.37643 -0 2 S 3.49152 15 + -3.11273 -0.662084 -1.6926 2 S 3.60449 1 + -3.11273 -0.662084 1.6926 2 S 3.60449 3 + 3.11273 0.662084 -1.6926 2 S 3.60449 16 + 3.11273 0.662084 1.6926 2 S 3.60449 20 + -2.2237 -2.71435 -1.6926 1 Fe 3.89582 2 + -2.2237 -2.71435 1.6926 1 Fe 3.89582 5 + -2.2237 2.71435 -1.6926 1 Fe 3.89582 7 + -2.2237 2.71435 1.6926 1 Fe 3.89582 10 + 2.2237 -2.71435 -1.6926 1 Fe 3.89582 12 + 2.2237 -2.71435 1.6926 1 Fe 3.89582 14 + 2.2237 2.71435 -1.6926 1 Fe 3.89582 17 + 2.2237 2.71435 1.6926 1 Fe 3.89582 21 +END + + diff --git a/lib/galaxy/datatypes/test/test.inp b/lib/galaxy/datatypes/test/test.inp new file mode 100644 index 000000000000..522743703c32 --- /dev/null +++ b/lib/galaxy/datatypes/test/test.inp @@ -0,0 +1,52 @@ +* This FEFF.inp file generated by pymatgen +TITLE comment: None given +TITLE Source: +TITLE Structure Summary: Fe2 S4 +TITLE Reduced formula: FeS2 +TITLE space group: (Pnnm), space number: (58) +TITLE abc: 3.385200 4.447400 5.428700 +TITLE angles: 90.000000 90.000000 90.000000 +TITLE sites: 6 +* 1 Fe 0.000000 0.000000 0.000000 +* 2 Fe 0.500000 0.500000 0.500000 +* 3 S 0.000000 0.199900 0.378040 +* 4 S 0.000000 0.800100 0.621960 +* 5 S 0.500000 0.699900 0.121960 +* 6 S 0.500000 0.300100 0.878040 + +POTENTIALS + *ipot Z tag lmax1 lmax2 xnatph(stoichometry) spinph +******- **- ****- ******- ******- ********************** ******** + 0 26 Fe -1 -1 0.0001 0 + 1 26 Fe -1 -1 2 0 + 2 16 S -1 -1 4 0 + +ATOMS + * x y z ipot Atom Distance Number +**********- ********- ******- ****** ****** ********** ******** + 0 0 0 0 Fe 0 0 + -0.889035 -2.05227 -0 2 S 2.23656 4 + 0.889035 2.05227 0 2 S 2.23656 22 + -1.33466 0.662084 -1.6926 2 S 2.2549 6 + -1.33466 0.662084 1.6926 2 S 2.2549 8 + 1.33466 -0.662084 -1.6926 2 S 2.2549 11 + 1.33466 -0.662084 1.6926 2 S 2.2549 13 + 0 0 -3.3852 1 Fe 3.3852 18 + 0 0 3.3852 1 Fe 3.3852 19 + -0.889035 3.37643 0 2 S 3.49152 9 + 0.889035 -3.37643 -0 2 S 3.49152 15 + -3.11273 -0.662084 -1.6926 2 S 3.60449 1 + -3.11273 -0.662084 1.6926 2 S 3.60449 3 + 3.11273 0.662084 -1.6926 2 S 3.60449 16 + 3.11273 0.662084 1.6926 2 S 3.60449 20 + -2.2237 -2.71435 -1.6926 1 Fe 3.89582 2 + -2.2237 -2.71435 1.6926 1 Fe 3.89582 5 + -2.2237 2.71435 -1.6926 1 Fe 3.89582 7 + -2.2237 2.71435 1.6926 1 Fe 3.89582 10 + 2.2237 -2.71435 -1.6926 1 Fe 3.89582 12 + 2.2237 -2.71435 1.6926 1 Fe 3.89582 14 + 2.2237 2.71435 -1.6926 1 Fe 3.89582 17 + 2.2237 2.71435 1.6926 1 Fe 3.89582 21 +END + + diff --git a/lib/galaxy/datatypes/test/test.prj b/lib/galaxy/datatypes/test/test.prj new file mode 100644 index 0000000000000000000000000000000000000000..55b0926c8718d2fd52ddd2a2bf5c50da946b48dd GIT binary patch literal 5372 zcmYkxbyO1$!0z!eT6%OyjvgueD5XYsjP4Fe0THA}3Id}W=?>|ZkPba(T5 z&U^1Y_k8|&&hur8$HP0l+pq&ZdHVVCd%bZ51e=+8f*0LG<{L;GF-C9!0pBq|WLOTvp9DyZ4Z2 z$qrIa^A7Ka@6sWtgJ%DQTdS>=wYA$!q|4p4mKL{+)zj85G}(lB>v+s-oiF>u4Zl6t zH&!-0dIUUNLr!OU8fLBxJ=D7pGJOa9Nn#{-E1B+DLA_nK6@MQOFV}p7f;%6#_OvUu z9_Bp@AH6O{kvnUIBS+Hi#4&#zdW$u^&wJMQiG73e?QhQA+1GZS1UGHEGXfTF{cDarD1Xdy=?kVn_f4{T>k4dX8&G~y|#QDMJk9Q_Nl#jG~Z0#+wxGV8X-<1>+HT~G0> z%=Y0?Gke60V*nXGc_@RGz|6#sku8JvuN#-2QojomzqjYIQ^spjq3BN$2iLzqhAr=y z?Q~AlFHkE5?YU??0d>)trjQVc0>HGu(JZ6rxNDz)2|q-n>P1H{@uSkyB{7e z3G9!O9APh$$I7tHS}!9EolOMdN_>wh+G?$g7I&_;<@yAO@I3KaPd9}xNGIfRW#&Ni zGY+nz0`^$++<`^v#yrY9265#IPQXPg{00RbRvtv!m>LCpHApkAk5!mlS3*}p^&gXe z6#p^!NAn*GiF^r5wS9TKQQlErPOg7={^9mQzfxc}i*_PeZZHp=ha-8XQ$ zpe7sC{omOj`F?J(24(sG)&FzJ`4Lz9=;Xt9Yk#oHi~sLrfNgz}#GR9BtdSgU9pXF-xcOj~ny+%Mu* zL*8SP?M>lzSOjgnm9pd*qA)|;F90{+Z4y#NVb7jnMVFR7e+ye=nA4V{P*AE0FPs;N znshR7MqfvoRv1V9vL=?U9e}4>M8GH*a_a|tZL@KX$H<7Tb|FP_5q86p_oVMI^^718 zLfUWfk}8@OgdYea-NwK#X*0~QHd4ow25&la96`sF9MfV@7UpX+D-n`d?`Q*A@cd>r zR&7&qqXno*GiWr7xhDO$GDP&J6tg{zG)DN8AFs7lHi3@alB2Me(1}^U`{o2zSgQ!j znj=fN;&&V(fHj2!pdVrc*fm8HWy=nj|KT0?!-Kg6iBr&1@#0MGWPb*QePWEnaw~QV z%U&cYlB7v9z?J`Cj@Nh`g$7bZLD>KVUM#7CF825US<+yPFpi(_dGZo&o%|Fcuhzck zX^@F=>EM8}!gc%U0GTBvJ{kVYYxD?NK-UD$DZ3Syo|1!z;yaysTEyK;s~38XX9!iA z^kF?sr&Pt#T)i7CHAsHvxXZmFIDRrL(M!kHUU37+jsP@~gA%oaMBhazvQ(;5f&zVl zl*Ef$hegPWc7aS`*XM*yDstnU0(xL2LxwdYiF;c%Sz&4plKnlr)C!cLd^YtlC(mM# zX+%DG_p1Svs6YjKB;hlXR6Q}!g;K(SLOGFqN}(|i4_pv&Kp-VKP*6arg0RhrVkN;P zPQ>arLQ&##CAr0^2v@MSNLXZ1d~rUR&xEJx6;`_(+4pE~UI3NHw7$L$`~5R5v;syyKKr zz^I*{`NO5c-JhEEw+ezZeAL!0*n%BkQGM*w-#dU^~iJ=fZk zUy)0->g<13jA#D_j1(b8MWoTtx?|gnbnwYWMfw&+j$ZL~1g`(!I(Luxt-O<3Y?Z+? zN(Yn7z-IPsa>jeYB_nw9N2vn@>l^dJGM+c?XcV`6Zc7&?#Z@f>4L;V)#`6`H%TlG) zpkuozqDR300*}1b1aBiBxl-M%#xdU?n=0kT7h0sCHuMPAE_3NSUg6fXiw{`3ijcge z1t}U+(l7Gz*wNO|Zx^*5a8AtbiZ$11w8*aMqW(t?R#eu1<0ZPHK>kF`wg}yMYUVVIi3EI zygWTimCsh!RMpUC$5DTYlk?lz4c+DEI7 zkkS(r>Nj$GH7gM@nwAxdG|?uM5>Skl`$9(!o4B|lZF4|B>;|#B!i@B%vFZMTw@TxJ2GN8bu)n=MVjof%XGI$c(1YyXZ zAo$IehU)6S8A#&fiufB>5}-iESM(yT3&1mr3kdmE&9&riUAj|jpmlRvx=6k-L+pn% z>TQFb(3QW5UHOj?*k2`bp39YEOIm6JuuxD|2st?={${woA!mIeauolQh^BJa`h3mc zU@8zQQ)dmee;=&F+VxYC$`>SH-LjlxO$WE~F|p3u`P;SSPDC8$e}f;>jSuqhNg6+7(e7ol&%&WzRG4UnGQ=jb7oRJ!bO}HE% zSvig7sp_f%{PWW}=+JJ*n1}hJjEwUcuj;D`u#4JCWu|ef*%xOpMp0X~3~zKd01FPP z@0m+#8RMel;@P*NjN2zO6^dMbGZ5$UZcwASPC{ES@Kft@KCwMv<&6BP=rCDE zb01w}N=A?A8Wuaq&&k-}OyZF`>`5$Y@+odM62^U66yc+~vzh+&nyuU04TelMj{s1$ zdze3{zAv3(<}6cUdWDWwiH+dwH}g&i*9%H;jZP@>{a%J6%2Y-mR0Drfuu$;gR@5w1 zqc$pXf4l36>lu$PNGUIbhgoVreU|B)h` zh?EWkfBx-fT4Cp7pe_lfYltig38b=T`>pUEi^4Ixd>v69iLwOIPZ^&ha9_^r7n2*j z{^;GI2Q1)Uz1VfOnk{N{-3NkTX4RP*C6Z&B3`A7DYSYn1 zU*+%!C~#Fbxkm4WqMUhTGp^!r{C%u7 zWk~T+Zy-N~yTkTSolGx*DDD9w&4EPL48Ba@kqO!Ffi{lmCf>@jueo;eebY`Ug;&Se zejjcsVewjawF_Tf&;OgUAY@eWGb(W`!h)*N^oKNXfF=yWoi{u+|9mk65pR*n=^Sc- z;3rX~U_@x)2$U$ON~vLFH=hTMaWE29RYoVain^rem<#iK&cnPHc?^THscmIvj==;2 zy_=Wzsm6B64cq+eC3xf&HG7o8P6 zm)eY4g5+wW`M5%+i$z+Lm*~XoJz%7ZQPP5^TiUlcs>w9*#=R|4m9JpEXpHp=pLlr! zWoe51&xMZuCZvKr#1txmflgcR5q&LhZ!uIf5-<7n+A;%xqQg5_fO86geD6B-7my>0 z>x6EAslDrhZ+IjR3tLxVJ zsRl~^!8vCMQzf}KYA7L+|FM?`t*1f0z=(}eH^s`aX^~SjbJg4tuJ+3h3uAIum}+>- zS-{QC(bBzyH>577Si-ABnWjbc(U(h7vy_4ida6~N$EF4SIl_-`7FJqxMq{U+zTTd| z_F0_rV>u{J9Sg8M#{PzQU8G7b-e>mL8E{Pg1k~=NpgY|$9%&}<2Ln1}4MLV3l?PLuB z+nX@M{Ai%*d*?<`_TjtWX3|N`R0IS38;C+g5YmD$({pfBO_Y=75mBB14kbE@edSK( z9qA879DrZ3&f&N+J75fne0H3q7(t{fxsFoWrYXu#kvFh7`QGk=321bj9Qvg}z8NH( zQtd$!7+x2+=V3wYGL(Y=&5$Go!?9B~NHS$)UM?P!Qu_X)>jWR+E>IHI(nO>M;1 zo~e`xl%lE(USp-@_5h*0sB8vl%;Byu?An(4X@2QA2)`N3wzy3kAzmIU8X=497=e*2&^9u6X`D-WbmEG$mY`~-RjrEdz-)qO<`u6 zbTdZMZvm2S(+s;K{fW!5Gc(W3x41#4;nB3sSiwFaWz)~-@SY(rI!X61I#Qfa(ID7M z8LL)I!4otI9@vN$Olx_kK9EB5a6rngJgjQim)tt);<3zEuW_K10c|?kwEMe+eh|bD zR@M>DP8q0Wj0v`*--Z@WCA7>a(Un@^OzzfL$5MH+c{N3a9#WZHnhNL6iObU0wt?Xf z)g$GM(z1S*bMl@TCUuT=zJkFPt?yoQ%md$0P7h3{3m7^sJ)5&@XkI@wZGCOpBPzAf zAiwmj&ML~KoyZV?45FEo!9z?R{6V1g&W10EH8OO|b~ss^hs2Q#@m0tTx{y?SvlxAm z`kiFHy4uJxTN^q)uSv%#OM=Us7s$ZYdUyNd^~ul9kGQBjgE3w256y;15O>$SL80rL zdCF-^pP{prQ_mOgaxk4Y=u4k3*XFd@QVQ37o3AE%eq#8m8p5%sGKAasy)#$`GW5;y zXQ2sQslazClLNS*rr{P6x=5t&w0AJS^4d6i>DxiIeBh8tI|HQ>hpe54kbT(aFf5Ur zDm!zhGL&s4dyUqVn#E^d2lf^vwz@0x_99HWKH{cdWWzb3x4te}47A>&QYP-)tNgG$ zQN#*3(qP_E1sM(d`;&nvbuP>~tEC+Bj{ZBL)LgElXi#$cYr!VtrM4p0cFKNtXcg0J zhW!tfnmAVjv)b#gLed-CM$69V365$?M#7f4;8@icaq&O*`D{L9-!$BZlHXrlXwMrN zh2CFW2THnKW)(j@%7k`wonBw Date: Tue, 6 Jun 2023 12:54:55 +0000 Subject: [PATCH 002/651] Handle StopIteration in FEFFInput sniffer --- lib/galaxy/datatypes/larch.py | 59 +++++++++++-------- .../test/{no_pymatgen.inp => feff_atoms.inp} | 7 --- .../{bad_atoms.inp => feff_bad_atoms.inp} | 0 ...potentials.inp => feff_bad_potentials.inp} | 0 lib/galaxy/datatypes/test/feff_potentials.inp | 24 ++++++++ .../test/{test.inp => feff_pymatgen.inp} | 0 6 files changed, 60 insertions(+), 30 deletions(-) rename lib/galaxy/datatypes/test/{no_pymatgen.inp => feff_atoms.inp} (85%) rename lib/galaxy/datatypes/test/{bad_atoms.inp => feff_bad_atoms.inp} (100%) rename lib/galaxy/datatypes/test/{bad_potentials.inp => feff_bad_potentials.inp} (100%) create mode 100644 lib/galaxy/datatypes/test/feff_potentials.inp rename lib/galaxy/datatypes/test/{test.inp => feff_pymatgen.inp} (100%) diff --git a/lib/galaxy/datatypes/larch.py b/lib/galaxy/datatypes/larch.py index 80f7fb358c07..43a738e6ba9b 100644 --- a/lib/galaxy/datatypes/larch.py +++ b/lib/galaxy/datatypes/larch.py @@ -70,6 +70,10 @@ def sniff_prefix(self, file_prefix: FilePrefix) -> bool: >>> fname = get_test_fname('test.prj') >>> AthenaProject().sniff(fname) True + >>> from galaxy.datatypes.sniff import get_test_fname + >>> fname = get_test_fname('Si.cif') + >>> FEFFInput().sniff(fname) + False """ return file_prefix.startswith("# Athena project file") @@ -135,19 +139,23 @@ def sniff_prefix(self, file_prefix: FilePrefix) -> bool: Try to guess if the file is an FEFF input file. >>> from galaxy.datatypes.sniff import get_test_fname - >>> fname = get_test_fname('test.inp') + >>> fname = get_test_fname('feff_pymatgen.inp') + >>> FEFFInput().sniff(fname) + True + >>> from galaxy.datatypes.sniff import get_test_fname + >>> fname = get_test_fname('feff_atoms.inp') >>> FEFFInput().sniff(fname) True >>> from galaxy.datatypes.sniff import get_test_fname - >>> fname = get_test_fname('no_pymatgen.inp') + >>> fname = get_test_fname('feff_potentials.inp') >>> FEFFInput().sniff(fname) True >>> from galaxy.datatypes.sniff import get_test_fname - >>> fname = get_test_fname('bad_atoms.inp') + >>> fname = get_test_fname('feff_bad_atoms.inp') >>> FEFFInput().sniff(fname) False >>> from galaxy.datatypes.sniff import get_test_fname - >>> fname = get_test_fname('bad_potential.inp') + >>> fname = get_test_fname('feff_bad_potentials.inp') >>> FEFFInput().sniff(fname) False >>> from galaxy.datatypes.sniff import get_test_fname @@ -160,25 +168,30 @@ def sniff_prefix(self, file_prefix: FilePrefix) -> bool: return True generator = file_prefix.line_iterator() - line = next(generator) - while line is not None: - if line == "POTENTIALS": - line = next(generator).strip() - if line[0] == "*": - words = line[1:].split() - if words[0] in ["potential-index", "ipot"] and words[1] == "Z": - return True - return False - - elif line == "ATOMS": - line = next(generator).strip() - if line[0] == "*": - words = line[1:].split() - if words[:4] == ["x", "y", "z", "ipot"]: - return True - return False - - line = next(generator) + try: + line = next(generator).strip() + while line is not None: + if line == "POTENTIALS": + line = next(generator).strip() + if line[0] == "*": + words = line[1:].split() + if (words[0] in ["potential-index", "ipot"]) and (words[1] == "Z"): + return True + return False + + elif line == "ATOMS": + line = next(generator).strip() + if line[0] == "*": + words = line[1:].split() + if words[:4] == ["x", "y", "z", "ipot"]: + return True + return False + + else: + line = next(generator).strip() + + except StopIteration: + return False return False diff --git a/lib/galaxy/datatypes/test/no_pymatgen.inp b/lib/galaxy/datatypes/test/feff_atoms.inp similarity index 85% rename from lib/galaxy/datatypes/test/no_pymatgen.inp rename to lib/galaxy/datatypes/test/feff_atoms.inp index d75015dee1cf..6deb2fb4635e 100644 --- a/lib/galaxy/datatypes/test/no_pymatgen.inp +++ b/lib/galaxy/datatypes/test/feff_atoms.inp @@ -13,13 +13,6 @@ TITLE sites: 6 * 5 S 0.500000 0.699900 0.121960 * 6 S 0.500000 0.300100 0.878040 -POTENTIALS - *ipot Z tag lmax1 lmax2 xnatph(stoichometry) spinph -******- **- ****- ******- ******- ********************** ******** - 0 26 Fe -1 -1 0.0001 0 - 1 26 Fe -1 -1 2 0 - 2 16 S -1 -1 4 0 - ATOMS * x y z ipot Atom Distance Number **********- ********- ******- ****** ****** ********** ******** diff --git a/lib/galaxy/datatypes/test/bad_atoms.inp b/lib/galaxy/datatypes/test/feff_bad_atoms.inp similarity index 100% rename from lib/galaxy/datatypes/test/bad_atoms.inp rename to lib/galaxy/datatypes/test/feff_bad_atoms.inp diff --git a/lib/galaxy/datatypes/test/bad_potentials.inp b/lib/galaxy/datatypes/test/feff_bad_potentials.inp similarity index 100% rename from lib/galaxy/datatypes/test/bad_potentials.inp rename to lib/galaxy/datatypes/test/feff_bad_potentials.inp diff --git a/lib/galaxy/datatypes/test/feff_potentials.inp b/lib/galaxy/datatypes/test/feff_potentials.inp new file mode 100644 index 000000000000..cc96ad4cd112 --- /dev/null +++ b/lib/galaxy/datatypes/test/feff_potentials.inp @@ -0,0 +1,24 @@ +TITLE comment: None given +TITLE Source: +TITLE Structure Summary: Fe2 S4 +TITLE Reduced formula: FeS2 +TITLE space group: (Pnnm), space number: (58) +TITLE abc: 3.385200 4.447400 5.428700 +TITLE angles: 90.000000 90.000000 90.000000 +TITLE sites: 6 +* 1 Fe 0.000000 0.000000 0.000000 +* 2 Fe 0.500000 0.500000 0.500000 +* 3 S 0.000000 0.199900 0.378040 +* 4 S 0.000000 0.800100 0.621960 +* 5 S 0.500000 0.699900 0.121960 +* 6 S 0.500000 0.300100 0.878040 + +POTENTIALS + *ipot Z tag lmax1 lmax2 xnatph(stoichometry) spinph +******- **- ****- ******- ******- ********************** ******** + 0 26 Fe -1 -1 0.0001 0 + 1 26 Fe -1 -1 2 0 + 2 16 S -1 -1 4 0 +END + + diff --git a/lib/galaxy/datatypes/test/test.inp b/lib/galaxy/datatypes/test/feff_pymatgen.inp similarity index 100% rename from lib/galaxy/datatypes/test/test.inp rename to lib/galaxy/datatypes/test/feff_pymatgen.inp From 2efae4c392fb98b2ea512fcc303464e34940fe8a Mon Sep 17 00:00:00 2001 From: Patrick Austin Date: Tue, 6 Jun 2023 15:26:36 +0000 Subject: [PATCH 003/651] Rename test files to avoid guessing feff datatype --- lib/galaxy/datatypes/larch.py | 10 +++++----- .../datatypes/test/{feff_atoms.inp => inp_atoms.inp} | 0 .../test/{feff_bad_atoms.inp => inp_bad_atoms.inp} | 0 ...{feff_bad_potentials.inp => inp_bad_potentials.inp} | 0 .../test/{feff_potentials.inp => inp_potentials.inp} | 0 .../test/{feff_pymatgen.inp => inp_pymatgen.inp} | 0 6 files changed, 5 insertions(+), 5 deletions(-) rename lib/galaxy/datatypes/test/{feff_atoms.inp => inp_atoms.inp} (100%) rename lib/galaxy/datatypes/test/{feff_bad_atoms.inp => inp_bad_atoms.inp} (100%) rename lib/galaxy/datatypes/test/{feff_bad_potentials.inp => inp_bad_potentials.inp} (100%) rename lib/galaxy/datatypes/test/{feff_potentials.inp => inp_potentials.inp} (100%) rename lib/galaxy/datatypes/test/{feff_pymatgen.inp => inp_pymatgen.inp} (100%) diff --git a/lib/galaxy/datatypes/larch.py b/lib/galaxy/datatypes/larch.py index 43a738e6ba9b..6494776e8cc8 100644 --- a/lib/galaxy/datatypes/larch.py +++ b/lib/galaxy/datatypes/larch.py @@ -139,23 +139,23 @@ def sniff_prefix(self, file_prefix: FilePrefix) -> bool: Try to guess if the file is an FEFF input file. >>> from galaxy.datatypes.sniff import get_test_fname - >>> fname = get_test_fname('feff_pymatgen.inp') + >>> fname = get_test_fname('inp_pymatgen.inp') >>> FEFFInput().sniff(fname) True >>> from galaxy.datatypes.sniff import get_test_fname - >>> fname = get_test_fname('feff_atoms.inp') + >>> fname = get_test_fname('inp_atoms.inp') >>> FEFFInput().sniff(fname) True >>> from galaxy.datatypes.sniff import get_test_fname - >>> fname = get_test_fname('feff_potentials.inp') + >>> fname = get_test_fname('inp_potentials.inp') >>> FEFFInput().sniff(fname) True >>> from galaxy.datatypes.sniff import get_test_fname - >>> fname = get_test_fname('feff_bad_atoms.inp') + >>> fname = get_test_fname('inp_bad_atoms.inp') >>> FEFFInput().sniff(fname) False >>> from galaxy.datatypes.sniff import get_test_fname - >>> fname = get_test_fname('feff_bad_potentials.inp') + >>> fname = get_test_fname('inp_bad_potentials.inp') >>> FEFFInput().sniff(fname) False >>> from galaxy.datatypes.sniff import get_test_fname diff --git a/lib/galaxy/datatypes/test/feff_atoms.inp b/lib/galaxy/datatypes/test/inp_atoms.inp similarity index 100% rename from lib/galaxy/datatypes/test/feff_atoms.inp rename to lib/galaxy/datatypes/test/inp_atoms.inp diff --git a/lib/galaxy/datatypes/test/feff_bad_atoms.inp b/lib/galaxy/datatypes/test/inp_bad_atoms.inp similarity index 100% rename from lib/galaxy/datatypes/test/feff_bad_atoms.inp rename to lib/galaxy/datatypes/test/inp_bad_atoms.inp diff --git a/lib/galaxy/datatypes/test/feff_bad_potentials.inp b/lib/galaxy/datatypes/test/inp_bad_potentials.inp similarity index 100% rename from lib/galaxy/datatypes/test/feff_bad_potentials.inp rename to lib/galaxy/datatypes/test/inp_bad_potentials.inp diff --git a/lib/galaxy/datatypes/test/feff_potentials.inp b/lib/galaxy/datatypes/test/inp_potentials.inp similarity index 100% rename from lib/galaxy/datatypes/test/feff_potentials.inp rename to lib/galaxy/datatypes/test/inp_potentials.inp diff --git a/lib/galaxy/datatypes/test/feff_pymatgen.inp b/lib/galaxy/datatypes/test/inp_pymatgen.inp similarity index 100% rename from lib/galaxy/datatypes/test/feff_pymatgen.inp rename to lib/galaxy/datatypes/test/inp_pymatgen.inp From 44bf8f5007ac7ba0f9764822e6186a483477ef7a Mon Sep 17 00:00:00 2001 From: Patrick Austin Date: Wed, 7 Jun 2023 08:39:12 +0000 Subject: [PATCH 004/651] Rename bad test files to avoid guessing inp datatype --- lib/galaxy/datatypes/test/{inp_atoms.inp => larch_atoms.inp} | 0 .../datatypes/test/{inp_bad_atoms.inp => larch_bad_atoms.txt} | 0 .../test/{inp_bad_potentials.inp => larch_bad_potentials.txt} | 0 .../datatypes/test/{inp_potentials.inp => larch_potentials.inp} | 0 .../datatypes/test/{inp_pymatgen.inp => larch_pymatgen.inp} | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename lib/galaxy/datatypes/test/{inp_atoms.inp => larch_atoms.inp} (100%) rename lib/galaxy/datatypes/test/{inp_bad_atoms.inp => larch_bad_atoms.txt} (100%) rename lib/galaxy/datatypes/test/{inp_bad_potentials.inp => larch_bad_potentials.txt} (100%) rename lib/galaxy/datatypes/test/{inp_potentials.inp => larch_potentials.inp} (100%) rename lib/galaxy/datatypes/test/{inp_pymatgen.inp => larch_pymatgen.inp} (100%) diff --git a/lib/galaxy/datatypes/test/inp_atoms.inp b/lib/galaxy/datatypes/test/larch_atoms.inp similarity index 100% rename from lib/galaxy/datatypes/test/inp_atoms.inp rename to lib/galaxy/datatypes/test/larch_atoms.inp diff --git a/lib/galaxy/datatypes/test/inp_bad_atoms.inp b/lib/galaxy/datatypes/test/larch_bad_atoms.txt similarity index 100% rename from lib/galaxy/datatypes/test/inp_bad_atoms.inp rename to lib/galaxy/datatypes/test/larch_bad_atoms.txt diff --git a/lib/galaxy/datatypes/test/inp_bad_potentials.inp b/lib/galaxy/datatypes/test/larch_bad_potentials.txt similarity index 100% rename from lib/galaxy/datatypes/test/inp_bad_potentials.inp rename to lib/galaxy/datatypes/test/larch_bad_potentials.txt diff --git a/lib/galaxy/datatypes/test/inp_potentials.inp b/lib/galaxy/datatypes/test/larch_potentials.inp similarity index 100% rename from lib/galaxy/datatypes/test/inp_potentials.inp rename to lib/galaxy/datatypes/test/larch_potentials.inp diff --git a/lib/galaxy/datatypes/test/inp_pymatgen.inp b/lib/galaxy/datatypes/test/larch_pymatgen.inp similarity index 100% rename from lib/galaxy/datatypes/test/inp_pymatgen.inp rename to lib/galaxy/datatypes/test/larch_pymatgen.inp From fb64b7d989601a0a1446922d45e1cb33e5117118 Mon Sep 17 00:00:00 2001 From: Patrick Austin Date: Wed, 7 Jun 2023 09:38:19 +0000 Subject: [PATCH 005/651] Edit test filenames in larch.py --- lib/galaxy/datatypes/larch.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/lib/galaxy/datatypes/larch.py b/lib/galaxy/datatypes/larch.py index 6494776e8cc8..28715e8695c7 100644 --- a/lib/galaxy/datatypes/larch.py +++ b/lib/galaxy/datatypes/larch.py @@ -139,23 +139,23 @@ def sniff_prefix(self, file_prefix: FilePrefix) -> bool: Try to guess if the file is an FEFF input file. >>> from galaxy.datatypes.sniff import get_test_fname - >>> fname = get_test_fname('inp_pymatgen.inp') + >>> fname = get_test_fname('larch_pymatgen.inp') >>> FEFFInput().sniff(fname) True >>> from galaxy.datatypes.sniff import get_test_fname - >>> fname = get_test_fname('inp_atoms.inp') + >>> fname = get_test_fname('larch_atoms.inp') >>> FEFFInput().sniff(fname) True >>> from galaxy.datatypes.sniff import get_test_fname - >>> fname = get_test_fname('inp_potentials.inp') + >>> fname = get_test_fname('larch_potentials.inp') >>> FEFFInput().sniff(fname) True >>> from galaxy.datatypes.sniff import get_test_fname - >>> fname = get_test_fname('inp_bad_atoms.inp') + >>> fname = get_test_fname('larch_bad_atoms.txt') >>> FEFFInput().sniff(fname) False >>> from galaxy.datatypes.sniff import get_test_fname - >>> fname = get_test_fname('inp_bad_potentials.inp') + >>> fname = get_test_fname('larch_bad_potentials.txt') >>> FEFFInput().sniff(fname) False >>> from galaxy.datatypes.sniff import get_test_fname From 73a44a6bc1598b039dd6b2e265af69e918f853f5 Mon Sep 17 00:00:00 2001 From: Pablo Moreno Date: Mon, 19 Dec 2022 22:36:56 +0000 Subject: [PATCH 006/651] Enables resubmissions for the k8s runner --- lib/galaxy/jobs/runners/kubernetes.py | 31 +++++++++++++++++++-------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/lib/galaxy/jobs/runners/kubernetes.py b/lib/galaxy/jobs/runners/kubernetes.py index 1f9e984605c3..87a28d815b8c 100644 --- a/lib/galaxy/jobs/runners/kubernetes.py +++ b/lib/galaxy/jobs/runners/kubernetes.py @@ -95,7 +95,8 @@ def __init__(self, app, nworkers, **kwargs): map=str, valid=lambda s: s == "$gid" or isinstance(s, int) or not s or s.isdigit(), default=None ), k8s_cleanup_job=dict(map=str, valid=lambda s: s in {"onsuccess", "always", "never"}, default="always"), - k8s_pod_retries=dict(map=int, valid=lambda x: int(x) >= 0, default=3), + k8s_pod_retries=dict(map=int, valid=lambda x: int(x) >= 0, default=1), # note that if the backOffLimit is lower, this paramer will have not effect. + k8s_job_spec_back_off_limit=dict(map=int, valid=lambda x: int(x) >= 0, default=0), # this means that it will stop retrying after 1 failure. k8s_walltime_limit=dict(map=int, valid=lambda x: int(x) >= 0, default=172800), k8s_unschedulable_walltime_limit=dict(map=int, valid=lambda x: not x or int(x) >= 0, default=None), k8s_interactivetools_use_ssl=dict(map=bool, default=False), @@ -323,6 +324,7 @@ def __get_k8s_job_spec(self, ajs): job_ttl = self.runner_params["k8s_job_ttl_secs_after_finished"] if self.runner_params["k8s_cleanup_job"] != "never" and job_ttl is not None: k8s_job_spec["ttlSecondsAfterFinished"] = job_ttl + k8s_job_spec["backoffLimit"] = self.runner_params["k8s_job_spec_back_off_limit"] return k8s_job_spec def __force_label_conformity(self, value): @@ -526,7 +528,7 @@ def __get_k8s_containers(self, ajs): # command line execution, separated by ;, which is what Galaxy does # to assemble the command. "command": [ajs.job_wrapper.shell], - "args": ["-c", ajs.job_file], + "args": ["-c", f"{ajs.job_file}; exit $(cat {ajs.exit_code_file})"], "workingDir": ajs.job_wrapper.working_directory, "volumeMounts": deduplicate_entries(mounts), } @@ -715,6 +717,10 @@ def check_watched_item(self, job_state): else: max_pod_retries = 1 + # make sure that we don't have any conditions by which the runner + # would wait forever for a pod that never gets sent. + max_pod_retries = min(max_pod_retries, self.runner_params["k8s_job_spec_back_off_limit"]) + # Check if job.obj['status'] is empty, # return job_state unchanged if this is the case # as probably this means that the k8s API server hasn't @@ -736,7 +742,7 @@ def check_watched_item(self, job_state): job_state.running = False self.mark_as_finished(job_state) return None - elif active > 0 and failed <= max_pod_retries: + elif active > 0 and failed < max_pod_retries + 1: if not job_state.running: if self.__job_pending_due_to_unschedulable_pod(job_state): if self.runner_params.get("k8s_unschedulable_walltime_limit"): @@ -760,7 +766,10 @@ def check_watched_item(self, job_state): job_state.job_wrapper.cleanup() return None else: - return self._handle_job_failure(job, job_state) + self._handle_job_failure(job, job_state) + # changes for resubmission (removed self.mark_as_failed from handle_job_failure) + self.work_queue.put((self.mark_as_failed, job_state)) + return None elif len(jobs.response["items"]) == 0: if job_state.job_wrapper.get_job().state == model.Job.states.DELETED: @@ -798,6 +807,8 @@ def _handle_unschedulable_job(self, job, job_state): def _handle_job_failure(self, job, job_state): # Figure out why job has failed with open(job_state.error_file, "a") as error_file: + # TODO we need to remove probably these error_file.writes, as they remove the stderr / stdout capture + # from failed Galaxy k8s jobs. if self.__job_failed_due_to_low_memory(job_state): error_file.write("Job killed after running out of memory. Try with more memory.\n") job_state.fail_message = "Tool failed due to insufficient memory. Try with more memory." @@ -809,8 +820,9 @@ def _handle_job_failure(self, job, job_state): else: error_file.write("Exceeded max number of Kubernetes pod retries allowed for job\n") job_state.fail_message = "More pods failed than allowed. See stdout for pods details." - job_state.running = False - self.mark_as_failed(job_state) + # changes for resubmission, to mimick what happens in the LSF-cli runner + # job_state.running = False + # self.mark_as_failed(job_state) try: if self.__has_guest_ports(job_state.job_wrapper): self.__cleanup_k8s_guest_ports(job_state.job_wrapper, job) @@ -855,11 +867,12 @@ def __job_failed_due_to_low_memory(self, job_state): if not pods.response["items"]: return False - pod = self._get_pod_for_job(job_state) + # pod = self._get_pod_for_job(job_state) # this was always None + pod = pods.response["items"][0] if ( pod - and pod.obj["status"]["phase"] == "Failed" - and pod.obj["status"]["containerStatuses"][0]["state"]["terminated"]["reason"] == "OOMKilled" + and "terminated" in pod["status"]["containerStatuses"][0]["state"] + and pod["status"]["containerStatuses"][0]["state"]["terminated"]["reason"] == "OOMKilled" ): return True From 4ed002540cafc6ef7411cf4ca9838f2a4680d24f Mon Sep 17 00:00:00 2001 From: Pablo Moreno Date: Mon, 27 Feb 2023 11:38:05 +0000 Subject: [PATCH 007/651] Fix detection of stderr / stdout and placement on UI --- lib/galaxy/jobs/runners/kubernetes.py | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/lib/galaxy/jobs/runners/kubernetes.py b/lib/galaxy/jobs/runners/kubernetes.py index 87a28d815b8c..4cb85284bfc7 100644 --- a/lib/galaxy/jobs/runners/kubernetes.py +++ b/lib/galaxy/jobs/runners/kubernetes.py @@ -741,10 +741,12 @@ def check_watched_item(self, job_state): if succeeded > 0 or job_state == model.Job.states.STOPPED: job_state.running = False self.mark_as_finished(job_state) + log.debug("k8s job succeeded") return None elif active > 0 and failed < max_pod_retries + 1: if not job_state.running: if self.__job_pending_due_to_unschedulable_pod(job_state): + log.debug("k8s job pending..") if self.runner_params.get("k8s_unschedulable_walltime_limit"): creation_time_str = job.obj["metadata"].get("creationTimestamp") creation_time = datetime.strptime(creation_time_str, "%Y-%m-%dT%H:%M:%SZ") @@ -756,19 +758,28 @@ def check_watched_item(self, job_state): else: pass else: + log.debug("k8s job is running..") job_state.running = True job_state.job_wrapper.change_state(model.Job.states.RUNNING) return job_state elif job_persisted_state == model.Job.states.DELETED: # Job has been deleted via stop_job and job has not been deleted, # remove from watched_jobs by returning `None` + log.debug("PP Job is DELETED..") if job_state.job_wrapper.cleanup_job in ("always", "onsuccess"): job_state.job_wrapper.cleanup() return None else: + log.debug("k8s job is failed and not deleted, looking at failure") self._handle_job_failure(job, job_state) # changes for resubmission (removed self.mark_as_failed from handle_job_failure) self.work_queue.put((self.mark_as_failed, job_state)) + # If the job was not resubmitted after being put in the failed queue, + # we mark it as finished as well for stderr / stdout detection. + # Otherwise, the user doesn't see any stdout/stderr in the UI. + if job_state.job_wrapper.get_state() != model.Job.states.RESUBMITTED: + self.mark_as_finished(job_state) + return None elif len(jobs.response["items"]) == 0: @@ -807,22 +818,21 @@ def _handle_unschedulable_job(self, job, job_state): def _handle_job_failure(self, job, job_state): # Figure out why job has failed with open(job_state.error_file, "a") as error_file: - # TODO we need to remove probably these error_file.writes, as they remove the stderr / stdout capture - # from failed Galaxy k8s jobs. + log.debug("Trying with error file in _handle_job_failure") if self.__job_failed_due_to_low_memory(job_state): + log.debug("OOM condition reached") error_file.write("Job killed after running out of memory. Try with more memory.\n") job_state.fail_message = "Tool failed due to insufficient memory. Try with more memory." job_state.runner_state = JobState.runner_states.MEMORY_LIMIT_REACHED elif self.__job_failed_due_to_walltime_limit(job): + log.debug("Walltime condition reached") error_file.write("DeadlineExceeded") job_state.fail_message = "Job was active longer than specified deadline" job_state.runner_state = JobState.runner_states.WALLTIME_REACHED else: - error_file.write("Exceeded max number of Kubernetes pod retries allowed for job\n") - job_state.fail_message = "More pods failed than allowed. See stdout for pods details." - # changes for resubmission, to mimick what happens in the LSF-cli runner - # job_state.running = False - # self.mark_as_failed(job_state) + log.debug("Runner cannot detect a specific reason for failure, must be a tool failure.") + error_file.write("Exceeded max number of job retries allowed for job\n") + job_state.fail_message = "More job retries failed than allowed. See standard output and standard error within the info section for details." try: if self.__has_guest_ports(job_state.job_wrapper): self.__cleanup_k8s_guest_ports(job_state.job_wrapper, job) @@ -962,6 +972,7 @@ def recover(self, job, job_wrapper): ajs.old_state = model.Job.states.QUEUED ajs.running = False self.monitor_queue.put(ajs) + def finish_job(self, job_state): self._handle_metadata_externally(job_state.job_wrapper, resolve_requirements=True) From e4aeae0892ad88858ed228861ad99f295e74557e Mon Sep 17 00:00:00 2001 From: Pablo Moreno Date: Wed, 8 Mar 2023 18:06:44 +0000 Subject: [PATCH 008/651] Everything working --- lib/galaxy/jobs/runners/kubernetes.py | 196 ++++++++++++++++++++------ 1 file changed, 152 insertions(+), 44 deletions(-) diff --git a/lib/galaxy/jobs/runners/kubernetes.py b/lib/galaxy/jobs/runners/kubernetes.py index 4cb85284bfc7..da212d2a68e6 100644 --- a/lib/galaxy/jobs/runners/kubernetes.py +++ b/lib/galaxy/jobs/runners/kubernetes.py @@ -3,14 +3,19 @@ """ import logging +import json # for debugging of API objects import math import os import re +import time from datetime import datetime import yaml from galaxy import model +from galaxy.util import ( + unicodify, +) from galaxy.jobs.runners import ( AsynchronousJobRunner, AsynchronousJobState, @@ -214,7 +219,11 @@ def queue_job(self, job_wrapper): self.monitor_queue.put(ajs) def __has_guest_ports(self, job_wrapper): - return bool(job_wrapper.guest_ports) + # Check if job has guest ports or interactive tool entry points that would signal that + # this is an interactive tool. + log.debug(f"Checking if job {job_wrapper.get_id_tag()} has guest ports: {job_wrapper.guest_ports}") + log.debug(f"Checking if job {job_wrapper.get_id_tag()} has interactive entry points: {job_wrapper.guest_ports}") + return bool(job_wrapper.guest_ports) or bool(job_wrapper.get_job().interactivetool_entry_points) def __configure_port_routing(self, ajs): # Configure interactive tool entry points first @@ -231,9 +240,19 @@ def __configure_port_routing(self, ajs): k8s_service_obj = service_object_dict(self.runner_params, k8s_job_name, self.__get_k8s_service_spec(ajs)) k8s_ingress_obj = ingress_object_dict(self.runner_params, k8s_job_name, self.__get_k8s_ingress_spec(ajs)) + # pretty print the objects for debugging + log.debug(f"Kubernetes service object: {json.dumps(k8s_service_obj, indent=4)}") + log.debug(f"Kubernetes ingress object: {json.dumps(k8s_ingress_obj, indent=4)}") + # We avoid creating service and ingress if they already exist (e.g. when Galaxy is restarted or resubmitting a job) service = Service(self._pykube_api, k8s_service_obj) + # if service.exists(): + # log.debug(f"Service {k8s_job_name} already exists, skipping creation") + # else: service.create() ingress = Ingress(self._pykube_api, k8s_ingress_obj) + # if ingress.exists(): + # log.debug(f"Ingress {k8s_job_name} already exists, skipping creation") + # else: ingress.create() def __get_overridable_params(self, job_wrapper, param_key): @@ -456,26 +475,27 @@ def __get_k8s_ingress_spec(self, ajs): "annotations": {"app.galaxyproject.org/tool_id": ajs.job_wrapper.tool.id}, }, "spec": { - "rules": [ - { - "host": ep["domain"], - "http": { - "paths": [ - { - "backend": { - "serviceName": self.__get_k8s_job_name( - self.__produce_k8s_job_prefix(), ajs.job_wrapper - ), - "servicePort": int(ep["tool_port"]), - }, - "path": ep.get("entry_path", "/"), - "pathType": "Prefix", - } - ] - }, - } - for ep in entry_points - ] + "ingressClassName": "nginx", + "rules":[ { + "host": ep["domain"], + "http": { + "paths": [ { + "backend": { + "service": { + "name": self.__get_k8s_job_name( + self.__produce_k8s_job_prefix(), ajs.job_wrapper + ), + "port": { "number": int(ep["tool_port"])} + } + }, + "path": ep.get("entry_path", "/"), + "pathType": "Prefix" + } + ] + }, + } + for ep in entry_points + ] }, } if self.runner_params.get("k8s_interactivetools_use_ssl"): @@ -741,12 +761,12 @@ def check_watched_item(self, job_state): if succeeded > 0 or job_state == model.Job.states.STOPPED: job_state.running = False self.mark_as_finished(job_state) - log.debug("k8s job succeeded") + log.debug("Job succeeded") return None elif active > 0 and failed < max_pod_retries + 1: if not job_state.running: if self.__job_pending_due_to_unschedulable_pod(job_state): - log.debug("k8s job pending..") + log.debug("PP Job pending..") if self.runner_params.get("k8s_unschedulable_walltime_limit"): creation_time_str = job.obj["metadata"].get("creationTimestamp") creation_time = datetime.strptime(creation_time_str, "%Y-%m-%dT%H:%M:%SZ") @@ -758,39 +778,39 @@ def check_watched_item(self, job_state): else: pass else: - log.debug("k8s job is running..") + log.debug("Job set to running..") job_state.running = True job_state.job_wrapper.change_state(model.Job.states.RUNNING) return job_state elif job_persisted_state == model.Job.states.DELETED: # Job has been deleted via stop_job and job has not been deleted, # remove from watched_jobs by returning `None` - log.debug("PP Job is DELETED..") + log.debug("Job is DELETED..") if job_state.job_wrapper.cleanup_job in ("always", "onsuccess"): job_state.job_wrapper.cleanup() return None else: - log.debug("k8s job is failed and not deleted, looking at failure") + log.debug(f"Job is failed and not deleted, looking at failure") + log.debug(f"Job state before handle job failure: {job_state.job_wrapper.get_state()}") self._handle_job_failure(job, job_state) # changes for resubmission (removed self.mark_as_failed from handle_job_failure) self.work_queue.put((self.mark_as_failed, job_state)) - # If the job was not resubmitted after being put in the failed queue, - # we mark it as finished as well for stderr / stdout detection. - # Otherwise, the user doesn't see any stdout/stderr in the UI. - if job_state.job_wrapper.get_state() != model.Job.states.RESUBMITTED: - self.mark_as_finished(job_state) return None elif len(jobs.response["items"]) == 0: if job_state.job_wrapper.get_job().state == model.Job.states.DELETED: - # Job has been deleted via stop_job and job has been deleted, - # cleanup and remove from watched_jobs by returning `None` if job_state.job_wrapper.cleanup_job in ("always", "onsuccess"): job_state.job_wrapper.cleanup() return None + if job_state.job_wrapper.get_job().state == model.Job.states.STOPPED and self.__has_guest_ports(job_state.job_wrapper): + # Interactive job has been stopped via stop_job (most likely by the user), + # cleanup and remove from watched_jobs by returning `None`. STOPPED jobs are cleaned up elsewhere. + # Marking as finished makes sure that the interactive job output is available in the UI. + self.mark_as_finished(job_state) + return None # there is no job responding to this job_id, it is either lost or something happened. - log.error("No Jobs are available under expected selector app=%s", job_state.job_id) + log.error(f"No Jobs are available under expected selector app={job_state.job_id} and they are not deleted or stopped either.") self.mark_as_failed(job_state) # job is no longer viable - remove from watched jobs return None @@ -818,21 +838,24 @@ def _handle_unschedulable_job(self, job, job_state): def _handle_job_failure(self, job, job_state): # Figure out why job has failed with open(job_state.error_file, "a") as error_file: - log.debug("Trying with error file in _handle_job_failure") + log.debug("PP Trying with error file in _handle_job_failure") if self.__job_failed_due_to_low_memory(job_state): - log.debug("OOM condition reached") + log.debug("PP OOM reached!") error_file.write("Job killed after running out of memory. Try with more memory.\n") job_state.fail_message = "Tool failed due to insufficient memory. Try with more memory." job_state.runner_state = JobState.runner_states.MEMORY_LIMIT_REACHED elif self.__job_failed_due_to_walltime_limit(job): - log.debug("Walltime condition reached") + log.debug("PP checking for walltime") error_file.write("DeadlineExceeded") job_state.fail_message = "Job was active longer than specified deadline" job_state.runner_state = JobState.runner_states.WALLTIME_REACHED else: - log.debug("Runner cannot detect a specific reason for failure, must be a tool failure.") + log.debug("PP no idea!") error_file.write("Exceeded max number of job retries allowed for job\n") - job_state.fail_message = "More job retries failed than allowed. See standard output and standard error within the info section for details." + job_state.fail_message = "More job retries failed than allowed. See standard output within the info section for details." + # changes for resubmission + # job_state.running = False + # self.mark_as_failed(job_state) try: if self.__has_guest_ports(job_state.job_wrapper): self.__cleanup_k8s_guest_ports(job_state.job_wrapper, job) @@ -845,11 +868,11 @@ def __cleanup_k8s_job(self, job): k8s_cleanup_job = self.runner_params["k8s_cleanup_job"] delete_job(job, k8s_cleanup_job) - def __cleanup_k8s_ingress(self, ingress, job_failed): + def __cleanup_k8s_ingress(self, ingress, job_failed=False): k8s_cleanup_job = self.runner_params["k8s_cleanup_job"] delete_ingress(ingress, k8s_cleanup_job, job_failed) - def __cleanup_k8s_service(self, service, job_failed): + def __cleanup_k8s_service(self, service, job_failed=False): k8s_cleanup_job = self.runner_params["k8s_cleanup_job"] delete_service(service, k8s_cleanup_job, job_failed) @@ -903,32 +926,48 @@ def __cleanup_k8s_guest_ports(self, job_wrapper, k8s_job): k8s_job_prefix = self.__produce_k8s_job_prefix() k8s_job_name = f"{k8s_job_prefix}-{self.__force_label_conformity(job_wrapper.get_id_tag())}" log.debug(f"Deleting service/ingress for job with ID {job_wrapper.get_id_tag()}") - job_failed = k8s_job.obj["status"]["failed"] > 0 if "failed" in k8s_job.obj["status"] else False ingress_to_delete = find_ingress_object_by_name( self._pykube_api, k8s_job_name, self.runner_params["k8s_namespace"] ) if ingress_to_delete and len(ingress_to_delete.response["items"]) > 0: k8s_ingress = Ingress(self._pykube_api, ingress_to_delete.response["items"][0]) - self.__cleanup_k8s_ingress(k8s_ingress, job_failed) + self.__cleanup_k8s_ingress(k8s_ingress) + else: + log.debug(f"No ingress found for job with k8s_job_name {k8s_job_name}") service_to_delete = find_service_object_by_name( self._pykube_api, k8s_job_name, self.runner_params["k8s_namespace"] ) if service_to_delete and len(service_to_delete.response["items"]) > 0: k8s_service = Service(self._pykube_api, service_to_delete.response["items"][0]) - self.__cleanup_k8s_service(k8s_service, job_failed) + self.__cleanup_k8s_service(k8s_service) + else: + log.debug(f"No service found for job with k8s_job_name {k8s_job_name}") + # remove the interactive environment entrypoints + eps = job_wrapper.get_job().interactivetool_entry_points + if eps: + log.debug(f"Removing entry points for job with ID {job_wrapper.get_id_tag()}") + self.app.interactivetool_manager.remove_entry_points(eps) def stop_job(self, job_wrapper): """Attempts to delete a dispatched job to the k8s cluster""" job = job_wrapper.get_job() try: + log.debug(f"Attempting to stop job {job.id} ({job.job_runner_external_id})") job_to_delete = find_job_object_by_name( self._pykube_api, job.get_job_runner_external_id(), self.runner_params["k8s_namespace"] ) if job_to_delete and len(job_to_delete.response["items"]) > 0: k8s_job = Job(self._pykube_api, job_to_delete.response["items"][0]) + log.debug(f"Found job with id {job.get_job_runner_external_id()} to delete") + # For interactive jobs, at this point because the job stopping has been partly handled by the + # interactive tool manager, the job wrapper no longer shows any guest ports. We need another way + # to check if the job is an interactive job. if self.__has_guest_ports(job_wrapper): + log.debug(f"Job {job.id} ({job.job_runner_external_id}) has guest ports, cleaning them up") self.__cleanup_k8s_guest_ports(job_wrapper, k8s_job) self.__cleanup_k8s_job(k8s_job) + else: + log.debug(f"Could not find job with id {job.get_job_runner_external_id()} to delete") # TODO assert whether job parallelism == 0 # assert not job_to_delete.exists(), "Could not delete job,"+job.job_runner_external_id+" it still exists" log.debug(f"({job.id}/{job.job_runner_external_id}) Terminated at user's request") @@ -972,6 +1011,75 @@ def recover(self, job, job_wrapper): ajs.old_state = model.Job.states.QUEUED ajs.running = False self.monitor_queue.put(ajs) + + # added to make sure that stdout and stderr is captured for Kubernetes + def fail_job(self, job_state: "JobState", exception=False, message="Job failed", full_status=None): + log.debug("PP Getting into fail_job in k8s runner") + job = job_state.job_wrapper.get_job() + + #### Get STDOUT and STDERR from the job and tool to be stored in the database #### + #### This is needed because when calling finish_job on a failed job, the check_output method + #### overrides the job error state and tries to figure it out from the job output files + #### breaking OOM resubmissions. + # To ensure that files below are readable, ownership must be reclaimed first + job_state.job_wrapper.reclaim_ownership() + + # wait for the files to appear + which_try = 0 + while which_try < self.app.config.retry_job_output_collection + 1: + try: + with open(job_state.output_file, "rb") as stdout_file, open(job_state.error_file, "rb") as stderr_file: + job_stdout = self._job_io_for_db(stdout_file) + job_stderr = self._job_io_for_db(stderr_file) + break + except Exception as e: + if which_try == self.app.config.retry_job_output_collection: + job_stdout = "" + job_stderr = job_state.runner_states.JOB_OUTPUT_NOT_RETURNED_FROM_CLUSTER + log.error(f"{job.id}/{job.job_runner_external_id} {job_stderr}: {unicodify(e)}") + else: + time.sleep(1) + which_try += 1 + + # get stderr and stdout to database + outputs_directory = os.path.join(job_state.job_wrapper.working_directory, "outputs") + if not os.path.exists(outputs_directory): + outputs_directory = job_state.job_wrapper.working_directory + + tool_stdout_path = os.path.join(outputs_directory, "tool_stdout") + tool_stderr_path = os.path.join(outputs_directory, "tool_stderr") + + # TODO: These might not exist for running jobs at the upgrade to 19.XX, remove that + # assumption in 20.XX. + tool_stderr = None + if os.path.exists(tool_stdout_path): + with open(tool_stdout_path, "rb") as stdout_file: + tool_stdout = self._job_io_for_db(stdout_file) + else: + # Legacy job, were getting a merged output - assume it is mostly tool output. + tool_stdout = job_stdout + job_stdout = None + + tool_stdout = None + if os.path.exists(tool_stderr_path): + with open(tool_stderr_path, "rb") as stdout_file: + tool_stderr = self._job_io_for_db(stdout_file) + else: + # Legacy job, were getting a merged output - assume it is mostly tool output. + tool_stderr = job_stderr + job_stderr = None + + #### END Get STDOUT and STDERR from the job and tool to be stored in the database #### + + # full status empty leaves the UI without stderr/stdout + full_status = { "stderr" : tool_stderr, "stdout" : tool_stdout} + log.debug(f"({job.id}/{job.job_runner_external_id}) tool_stdout: {tool_stdout}") + log.debug(f"({job.id}/{job.job_runner_external_id}) tool_stderr: {tool_stderr}") + log.debug(f"({job.id}/{job.job_runner_external_id}) job_stdout: {job_stdout}") + log.debug(f"({job.id}/{job.job_runner_external_id}) job_stderr: {job_stderr}") + + # run super method + super().fail_job(job_state, exception, message, full_status) def finish_job(self, job_state): From 8100e6966f3709c886c2d51f72af758d83f71027 Mon Sep 17 00:00:00 2001 From: Pablo Moreno Date: Tue, 3 Jan 2023 12:56:44 +0000 Subject: [PATCH 009/651] Move to pykube-ng (cherry picked from commit 652c04f315e45626570da6420606f74541f0117b) (cherry picked from commit 7d4b0b4b4e15654323aa30835ff307b12d4b40d1) --- lib/galaxy/dependencies/conditional-requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/dependencies/conditional-requirements.txt b/lib/galaxy/dependencies/conditional-requirements.txt index 17b16118f5a8..392cbb23f7c2 100644 --- a/lib/galaxy/dependencies/conditional-requirements.txt +++ b/lib/galaxy/dependencies/conditional-requirements.txt @@ -35,7 +35,8 @@ custos-sdk chronos-python==1.2.1 # Kubernetes job runner -pykube==0.15.0 +# pykube==0.15.0 +pykube-ng==22.9.0 # Synnefo / Pithos+ object store client kamaki From 8ba802083dd12647bbbd92ca8b2cc392672fccaf Mon Sep 17 00:00:00 2001 From: Pablo Moreno Date: Tue, 3 Jan 2023 22:41:12 +0000 Subject: [PATCH 010/651] Change ingress API version to current one (cherry picked from commit f1b92d911827c897cb8e65a060e99b44f9d4ebf5) --- lib/galaxy/jobs/runners/util/pykube_util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/jobs/runners/util/pykube_util.py b/lib/galaxy/jobs/runners/util/pykube_util.py index 7c3f32d87b09..28020a8c8dde 100644 --- a/lib/galaxy/jobs/runners/util/pykube_util.py +++ b/lib/galaxy/jobs/runners/util/pykube_util.py @@ -31,7 +31,7 @@ DEFAULT_JOB_API_VERSION = "batch/v1" DEFAULT_SERVICE_API_VERSION = "v1" -DEFAULT_INGRESS_API_VERSION = "extensions/v1beta1" +DEFAULT_INGRESS_API_VERSION = "networking.k8s.io/v1" DEFAULT_NAMESPACE = "default" INSTANCE_ID_INVALID_MESSAGE = ( "Galaxy instance [%s] is either too long " From c75b8123f7223c3ed9c6bf4ec521f14980ae9743 Mon Sep 17 00:00:00 2001 From: Pablo Moreno Date: Thu, 9 Mar 2023 08:58:03 +0000 Subject: [PATCH 011/651] Missing stdout (cherry picked from commit a201abbb08bd855ecf85fe8250384e972077cb9b) --- lib/galaxy/jobs/runners/kubernetes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/jobs/runners/kubernetes.py b/lib/galaxy/jobs/runners/kubernetes.py index da212d2a68e6..7c05d094635b 100644 --- a/lib/galaxy/jobs/runners/kubernetes.py +++ b/lib/galaxy/jobs/runners/kubernetes.py @@ -1051,7 +1051,8 @@ def fail_job(self, job_state: "JobState", exception=False, message="Job failed", # TODO: These might not exist for running jobs at the upgrade to 19.XX, remove that # assumption in 20.XX. - tool_stderr = None + tool_stderr = "Galaxy issue: Stderr failed to be retrieved from the job working directory." + tool_stdout = "Galaxy issue: Stdout failed to be retrieved from the job working directory." if os.path.exists(tool_stdout_path): with open(tool_stdout_path, "rb") as stdout_file: tool_stdout = self._job_io_for_db(stdout_file) @@ -1060,7 +1061,6 @@ def fail_job(self, job_state: "JobState", exception=False, message="Job failed", tool_stdout = job_stdout job_stdout = None - tool_stdout = None if os.path.exists(tool_stderr_path): with open(tool_stderr_path, "rb") as stdout_file: tool_stderr = self._job_io_for_db(stdout_file) From 63360bd5690bbf28d371e96be533da2a22a63e3a Mon Sep 17 00:00:00 2001 From: Pablo Moreno Date: Wed, 15 Mar 2023 09:12:10 +0000 Subject: [PATCH 012/651] Apply suggestions from code review Mostly cleanups from Nuwan and Pablo. Co-authored-by: Nuwan Goonasekera <2070605+nuwang@users.noreply.github.com> --- .../dependencies/conditional-requirements.txt | 1 - lib/galaxy/jobs/runners/kubernetes.py | 32 +++++++------------ 2 files changed, 12 insertions(+), 21 deletions(-) diff --git a/lib/galaxy/dependencies/conditional-requirements.txt b/lib/galaxy/dependencies/conditional-requirements.txt index 392cbb23f7c2..40861584fc09 100644 --- a/lib/galaxy/dependencies/conditional-requirements.txt +++ b/lib/galaxy/dependencies/conditional-requirements.txt @@ -35,7 +35,6 @@ custos-sdk chronos-python==1.2.1 # Kubernetes job runner -# pykube==0.15.0 pykube-ng==22.9.0 # Synnefo / Pithos+ object store client diff --git a/lib/galaxy/jobs/runners/kubernetes.py b/lib/galaxy/jobs/runners/kubernetes.py index 7c05d094635b..6d209a1b5b4d 100644 --- a/lib/galaxy/jobs/runners/kubernetes.py +++ b/lib/galaxy/jobs/runners/kubernetes.py @@ -100,7 +100,7 @@ def __init__(self, app, nworkers, **kwargs): map=str, valid=lambda s: s == "$gid" or isinstance(s, int) or not s or s.isdigit(), default=None ), k8s_cleanup_job=dict(map=str, valid=lambda s: s in {"onsuccess", "always", "never"}, default="always"), - k8s_pod_retries=dict(map=int, valid=lambda x: int(x) >= 0, default=1), # note that if the backOffLimit is lower, this paramer will have not effect. + k8s_pod_retries=dict(map=int, valid=lambda x: int(x) >= 0, default=1), # note that if the backOffLimit is lower, this paramer will have no effect. k8s_job_spec_back_off_limit=dict(map=int, valid=lambda x: int(x) >= 0, default=0), # this means that it will stop retrying after 1 failure. k8s_walltime_limit=dict(map=int, valid=lambda x: int(x) >= 0, default=172800), k8s_unschedulable_walltime_limit=dict(map=int, valid=lambda x: not x or int(x) >= 0, default=None), @@ -220,9 +220,7 @@ def queue_job(self, job_wrapper): def __has_guest_ports(self, job_wrapper): # Check if job has guest ports or interactive tool entry points that would signal that - # this is an interactive tool. - log.debug(f"Checking if job {job_wrapper.get_id_tag()} has guest ports: {job_wrapper.guest_ports}") - log.debug(f"Checking if job {job_wrapper.get_id_tag()} has interactive entry points: {job_wrapper.guest_ports}") + log.debug(f"Checking if job {job_wrapper.get_id_tag()} is an interactive tool. guest ports: {job_wrapper.guest_ports}. interactive entry points: {job_wrapper.interactivetool_entry_points}") return bool(job_wrapper.guest_ports) or bool(job_wrapper.get_job().interactivetool_entry_points) def __configure_port_routing(self, ajs): @@ -245,14 +243,8 @@ def __configure_port_routing(self, ajs): log.debug(f"Kubernetes ingress object: {json.dumps(k8s_ingress_obj, indent=4)}") # We avoid creating service and ingress if they already exist (e.g. when Galaxy is restarted or resubmitting a job) service = Service(self._pykube_api, k8s_service_obj) - # if service.exists(): - # log.debug(f"Service {k8s_job_name} already exists, skipping creation") - # else: service.create() ingress = Ingress(self._pykube_api, k8s_ingress_obj) - # if ingress.exists(): - # log.debug(f"Ingress {k8s_job_name} already exists, skipping creation") - # else: ingress.create() def __get_overridable_params(self, job_wrapper, param_key): @@ -766,7 +758,7 @@ def check_watched_item(self, job_state): elif active > 0 and failed < max_pod_retries + 1: if not job_state.running: if self.__job_pending_due_to_unschedulable_pod(job_state): - log.debug("PP Job pending..") + log.debug(f"Job id: {job_state.job_id} with k8s id: {job.name} pending...") if self.runner_params.get("k8s_unschedulable_walltime_limit"): creation_time_str = job.obj["metadata"].get("creationTimestamp") creation_time = datetime.strptime(creation_time_str, "%Y-%m-%dT%H:%M:%SZ") @@ -778,20 +770,20 @@ def check_watched_item(self, job_state): else: pass else: - log.debug("Job set to running..") + log.debug("Job set to running...") job_state.running = True job_state.job_wrapper.change_state(model.Job.states.RUNNING) return job_state elif job_persisted_state == model.Job.states.DELETED: # Job has been deleted via stop_job and job has not been deleted, # remove from watched_jobs by returning `None` - log.debug("Job is DELETED..") + log.debug(f"Job id: {job_state.job_id} has been already deleted...") if job_state.job_wrapper.cleanup_job in ("always", "onsuccess"): job_state.job_wrapper.cleanup() return None else: log.debug(f"Job is failed and not deleted, looking at failure") - log.debug(f"Job state before handle job failure: {job_state.job_wrapper.get_state()}") + log.debug(f"Job id: {job_state.job_id} failed but has not been deleted yet. Current state: {job_state.job_wrapper.get_state()}") self._handle_job_failure(job, job_state) # changes for resubmission (removed self.mark_as_failed from handle_job_failure) self.work_queue.put((self.mark_as_failed, job_state)) @@ -838,19 +830,19 @@ def _handle_unschedulable_job(self, job, job_state): def _handle_job_failure(self, job, job_state): # Figure out why job has failed with open(job_state.error_file, "a") as error_file: - log.debug("PP Trying with error file in _handle_job_failure") + log.debug("Trying with error file in _handle_job_failure") if self.__job_failed_due_to_low_memory(job_state): - log.debug("PP OOM reached!") + log.debug(f"OOM detected for job: {job_state.job_id}") error_file.write("Job killed after running out of memory. Try with more memory.\n") job_state.fail_message = "Tool failed due to insufficient memory. Try with more memory." job_state.runner_state = JobState.runner_states.MEMORY_LIMIT_REACHED elif self.__job_failed_due_to_walltime_limit(job): - log.debug("PP checking for walltime") + log.debug(f"Walltime limit reached for job: {job_state.job_id}") error_file.write("DeadlineExceeded") job_state.fail_message = "Job was active longer than specified deadline" job_state.runner_state = JobState.runner_states.WALLTIME_REACHED else: - log.debug("PP no idea!") + log.debug(f"Unknown error detected in job: {job_state.job_id}") error_file.write("Exceeded max number of job retries allowed for job\n") job_state.fail_message = "More job retries failed than allowed. See standard output within the info section for details." # changes for resubmission @@ -1051,8 +1043,8 @@ def fail_job(self, job_state: "JobState", exception=False, message="Job failed", # TODO: These might not exist for running jobs at the upgrade to 19.XX, remove that # assumption in 20.XX. - tool_stderr = "Galaxy issue: Stderr failed to be retrieved from the job working directory." - tool_stdout = "Galaxy issue: Stdout failed to be retrieved from the job working directory." + tool_stderr = "Galaxy issue: stderr could not be retrieved from the job working directory." + tool_stdout = "Galaxy issue: stdout could not be retrieved from the job working directory." if os.path.exists(tool_stdout_path): with open(tool_stdout_path, "rb") as stdout_file: tool_stdout = self._job_io_for_db(stdout_file) From 5c3ccb01191e6dd6378593ec36c79c07ff3decaa Mon Sep 17 00:00:00 2001 From: Pablo Moreno Date: Wed, 15 Mar 2023 09:18:07 +0000 Subject: [PATCH 013/651] Please linter --- lib/galaxy/jobs/runners/kubernetes.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/galaxy/jobs/runners/kubernetes.py b/lib/galaxy/jobs/runners/kubernetes.py index 6d209a1b5b4d..af9d5551ec66 100644 --- a/lib/galaxy/jobs/runners/kubernetes.py +++ b/lib/galaxy/jobs/runners/kubernetes.py @@ -782,8 +782,7 @@ def check_watched_item(self, job_state): job_state.job_wrapper.cleanup() return None else: - log.debug(f"Job is failed and not deleted, looking at failure") - log.debug(f"Job id: {job_state.job_id} failed but has not been deleted yet. Current state: {job_state.job_wrapper.get_state()}") + log.debug(f"Job id: {job_state.job_id} failed and it is not a deletion case. Current state: {job_state.job_wrapper.get_state()}") self._handle_job_failure(job, job_state) # changes for resubmission (removed self.mark_as_failed from handle_job_failure) self.work_queue.put((self.mark_as_failed, job_state)) From bc2ac6b15004f8baa1d680027040f2415160d97d Mon Sep 17 00:00:00 2001 From: Pablo Moreno Date: Wed, 15 Mar 2023 10:03:33 +0000 Subject: [PATCH 014/651] More linter pleasing --- lib/galaxy/jobs/runners/kubernetes.py | 59 +++++++++++++-------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/lib/galaxy/jobs/runners/kubernetes.py b/lib/galaxy/jobs/runners/kubernetes.py index af9d5551ec66..2f30496432b5 100644 --- a/lib/galaxy/jobs/runners/kubernetes.py +++ b/lib/galaxy/jobs/runners/kubernetes.py @@ -3,7 +3,7 @@ """ import logging -import json # for debugging of API objects +import json # for debugging of API objects import math import os import re @@ -100,8 +100,8 @@ def __init__(self, app, nworkers, **kwargs): map=str, valid=lambda s: s == "$gid" or isinstance(s, int) or not s or s.isdigit(), default=None ), k8s_cleanup_job=dict(map=str, valid=lambda s: s in {"onsuccess", "always", "never"}, default="always"), - k8s_pod_retries=dict(map=int, valid=lambda x: int(x) >= 0, default=1), # note that if the backOffLimit is lower, this paramer will have no effect. - k8s_job_spec_back_off_limit=dict(map=int, valid=lambda x: int(x) >= 0, default=0), # this means that it will stop retrying after 1 failure. + k8s_pod_retries=dict(map=int, valid=lambda x: int(x) >= 0, default=1), # note that if the backOffLimit is lower, this paramer will have no effect. + k8s_job_spec_back_off_limit=dict(map=int, valid=lambda x: int(x) >= 0, default=0), # this means that it will stop retrying after 1 failure. k8s_walltime_limit=dict(map=int, valid=lambda x: int(x) >= 0, default=172800), k8s_unschedulable_walltime_limit=dict(map=int, valid=lambda x: not x or int(x) >= 0, default=None), k8s_interactivetools_use_ssl=dict(map=bool, default=False), @@ -468,26 +468,26 @@ def __get_k8s_ingress_spec(self, ajs): }, "spec": { "ingressClassName": "nginx", - "rules":[ { - "host": ep["domain"], - "http": { - "paths": [ { - "backend": { - "service": { - "name": self.__get_k8s_job_name( - self.__produce_k8s_job_prefix(), ajs.job_wrapper - ), - "port": { "number": int(ep["tool_port"])} - } - }, - "path": ep.get("entry_path", "/"), - "pathType": "Prefix" - } - ] - }, + "rules": [ { + "host": ep["domain"], + "http": { + "paths": [ { + "backend": { + "service": { + "name": self.__get_k8s_job_name( + self.__produce_k8s_job_prefix(), ajs.job_wrapper + ), + "port": { "number": int(ep["tool_port"])} + } + }, + "path": ep.get("entry_path", "/"), + "pathType": "Prefix" + } + ] + }, } for ep in entry_points - ] + ] }, } if self.runner_params.get("k8s_interactivetools_use_ssl"): @@ -1007,11 +1007,11 @@ def recover(self, job, job_wrapper): def fail_job(self, job_state: "JobState", exception=False, message="Job failed", full_status=None): log.debug("PP Getting into fail_job in k8s runner") job = job_state.job_wrapper.get_job() - - #### Get STDOUT and STDERR from the job and tool to be stored in the database #### - #### This is needed because when calling finish_job on a failed job, the check_output method - #### overrides the job error state and tries to figure it out from the job output files - #### breaking OOM resubmissions. + + # Get STDOUT and STDERR from the job and tool to be stored in the database # + # This is needed because when calling finish_job on a failed job, the check_output method + # overrides the job error state and tries to figure it out from the job output files + # breaking OOM resubmissions. # To ensure that files below are readable, ownership must be reclaimed first job_state.job_wrapper.reclaim_ownership() @@ -1039,7 +1039,7 @@ def fail_job(self, job_state: "JobState", exception=False, message="Job failed", tool_stdout_path = os.path.join(outputs_directory, "tool_stdout") tool_stderr_path = os.path.join(outputs_directory, "tool_stderr") - + # TODO: These might not exist for running jobs at the upgrade to 19.XX, remove that # assumption in 20.XX. tool_stderr = "Galaxy issue: stderr could not be retrieved from the job working directory." @@ -1060,10 +1060,10 @@ def fail_job(self, job_state: "JobState", exception=False, message="Job failed", tool_stderr = job_stderr job_stderr = None - #### END Get STDOUT and STDERR from the job and tool to be stored in the database #### + # END Get STDOUT and STDERR from the job and tool to be stored in the database # # full status empty leaves the UI without stderr/stdout - full_status = { "stderr" : tool_stderr, "stdout" : tool_stdout} + full_status = {"stderr" : tool_stderr, "stdout" : tool_stdout} log.debug(f"({job.id}/{job.job_runner_external_id}) tool_stdout: {tool_stdout}") log.debug(f"({job.id}/{job.job_runner_external_id}) tool_stderr: {tool_stderr}") log.debug(f"({job.id}/{job.job_runner_external_id}) job_stdout: {job_stdout}") @@ -1071,7 +1071,6 @@ def fail_job(self, job_state: "JobState", exception=False, message="Job failed", # run super method super().fail_job(job_state, exception, message, full_status) - def finish_job(self, job_state): self._handle_metadata_externally(job_state.job_wrapper, resolve_requirements=True) From 80e254a4f2b7df22855a764a349804f55cd89ba7 Mon Sep 17 00:00:00 2001 From: Pablo Moreno Date: Wed, 15 Mar 2023 10:25:18 +0000 Subject: [PATCH 015/651] Black + isort --- lib/galaxy/jobs/runners/kubernetes.py | 76 ++++++++++++++++----------- 1 file changed, 45 insertions(+), 31 deletions(-) diff --git a/lib/galaxy/jobs/runners/kubernetes.py b/lib/galaxy/jobs/runners/kubernetes.py index 2f30496432b5..a1281e7cea54 100644 --- a/lib/galaxy/jobs/runners/kubernetes.py +++ b/lib/galaxy/jobs/runners/kubernetes.py @@ -2,8 +2,8 @@ Offload jobs to a Kubernetes cluster. """ -import logging import json # for debugging of API objects +import logging import math import os import re @@ -13,9 +13,6 @@ import yaml from galaxy import model -from galaxy.util import ( - unicodify, -) from galaxy.jobs.runners import ( AsynchronousJobRunner, AsynchronousJobState, @@ -48,6 +45,7 @@ Service, service_object_dict, ) +from galaxy.util import unicodify from galaxy.util.bytesize import ByteSize log = logging.getLogger(__name__) @@ -100,8 +98,12 @@ def __init__(self, app, nworkers, **kwargs): map=str, valid=lambda s: s == "$gid" or isinstance(s, int) or not s or s.isdigit(), default=None ), k8s_cleanup_job=dict(map=str, valid=lambda s: s in {"onsuccess", "always", "never"}, default="always"), - k8s_pod_retries=dict(map=int, valid=lambda x: int(x) >= 0, default=1), # note that if the backOffLimit is lower, this paramer will have no effect. - k8s_job_spec_back_off_limit=dict(map=int, valid=lambda x: int(x) >= 0, default=0), # this means that it will stop retrying after 1 failure. + k8s_pod_retries=dict( + map=int, valid=lambda x: int(x) >= 0, default=1 + ), # note that if the backOffLimit is lower, this paramer will have no effect. + k8s_job_spec_back_off_limit=dict( + map=int, valid=lambda x: int(x) >= 0, default=0 + ), # this means that it will stop retrying after 1 failure. k8s_walltime_limit=dict(map=int, valid=lambda x: int(x) >= 0, default=172800), k8s_unschedulable_walltime_limit=dict(map=int, valid=lambda x: not x or int(x) >= 0, default=None), k8s_interactivetools_use_ssl=dict(map=bool, default=False), @@ -220,7 +222,9 @@ def queue_job(self, job_wrapper): def __has_guest_ports(self, job_wrapper): # Check if job has guest ports or interactive tool entry points that would signal that - log.debug(f"Checking if job {job_wrapper.get_id_tag()} is an interactive tool. guest ports: {job_wrapper.guest_ports}. interactive entry points: {job_wrapper.interactivetool_entry_points}") + log.debug( + f"Checking if job {job_wrapper.get_id_tag()} is an interactive tool. guest ports: {job_wrapper.guest_ports}. interactive entry points: {job_wrapper.interactivetool_entry_points}" + ) return bool(job_wrapper.guest_ports) or bool(job_wrapper.get_job().interactivetool_entry_points) def __configure_port_routing(self, ajs): @@ -468,26 +472,28 @@ def __get_k8s_ingress_spec(self, ajs): }, "spec": { "ingressClassName": "nginx", - "rules": [ { - "host": ep["domain"], - "http": { - "paths": [ { - "backend": { - "service": { - "name": self.__get_k8s_job_name( - self.__produce_k8s_job_prefix(), ajs.job_wrapper - ), - "port": { "number": int(ep["tool_port"])} - } - }, - "path": ep.get("entry_path", "/"), - "pathType": "Prefix" - } - ] + "rules": [ + { + "host": ep["domain"], + "http": { + "paths": [ + { + "backend": { + "service": { + "name": self.__get_k8s_job_name( + self.__produce_k8s_job_prefix(), ajs.job_wrapper + ), + "port": {"number": int(ep["tool_port"])}, + } }, - } - for ep in entry_points - ] + "path": ep.get("entry_path", "/"), + "pathType": "Prefix", + } + ] + }, + } + for ep in entry_points + ], }, } if self.runner_params.get("k8s_interactivetools_use_ssl"): @@ -782,7 +788,9 @@ def check_watched_item(self, job_state): job_state.job_wrapper.cleanup() return None else: - log.debug(f"Job id: {job_state.job_id} failed and it is not a deletion case. Current state: {job_state.job_wrapper.get_state()}") + log.debug( + f"Job id: {job_state.job_id} failed and it is not a deletion case. Current state: {job_state.job_wrapper.get_state()}" + ) self._handle_job_failure(job, job_state) # changes for resubmission (removed self.mark_as_failed from handle_job_failure) self.work_queue.put((self.mark_as_failed, job_state)) @@ -794,14 +802,18 @@ def check_watched_item(self, job_state): if job_state.job_wrapper.cleanup_job in ("always", "onsuccess"): job_state.job_wrapper.cleanup() return None - if job_state.job_wrapper.get_job().state == model.Job.states.STOPPED and self.__has_guest_ports(job_state.job_wrapper): + if job_state.job_wrapper.get_job().state == model.Job.states.STOPPED and self.__has_guest_ports( + job_state.job_wrapper + ): # Interactive job has been stopped via stop_job (most likely by the user), # cleanup and remove from watched_jobs by returning `None`. STOPPED jobs are cleaned up elsewhere. # Marking as finished makes sure that the interactive job output is available in the UI. self.mark_as_finished(job_state) return None # there is no job responding to this job_id, it is either lost or something happened. - log.error(f"No Jobs are available under expected selector app={job_state.job_id} and they are not deleted or stopped either.") + log.error( + f"No Jobs are available under expected selector app={job_state.job_id} and they are not deleted or stopped either." + ) self.mark_as_failed(job_state) # job is no longer viable - remove from watched jobs return None @@ -843,7 +855,9 @@ def _handle_job_failure(self, job, job_state): else: log.debug(f"Unknown error detected in job: {job_state.job_id}") error_file.write("Exceeded max number of job retries allowed for job\n") - job_state.fail_message = "More job retries failed than allowed. See standard output within the info section for details." + job_state.fail_message = ( + "More job retries failed than allowed. See standard output within the info section for details." + ) # changes for resubmission # job_state.running = False # self.mark_as_failed(job_state) @@ -1063,7 +1077,7 @@ def fail_job(self, job_state: "JobState", exception=False, message="Job failed", # END Get STDOUT and STDERR from the job and tool to be stored in the database # # full status empty leaves the UI without stderr/stdout - full_status = {"stderr" : tool_stderr, "stdout" : tool_stdout} + full_status = {"stderr": tool_stderr, "stdout": tool_stdout} log.debug(f"({job.id}/{job.job_runner_external_id}) tool_stdout: {tool_stdout}") log.debug(f"({job.id}/{job.job_runner_external_id}) tool_stderr: {tool_stderr}") log.debug(f"({job.id}/{job.job_runner_external_id}) job_stdout: {job_stdout}") From 5dd8751c0579199caa6ce45176123b439edd01f5 Mon Sep 17 00:00:00 2001 From: Pablo Moreno Date: Tue, 28 Mar 2023 14:15:25 +0100 Subject: [PATCH 016/651] Fix its issue on logging (cherry picked from commit d5e73b8130ea29d3da9c073e1d74d295e2c4c03a) --- lib/galaxy/jobs/runners/kubernetes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/jobs/runners/kubernetes.py b/lib/galaxy/jobs/runners/kubernetes.py index a1281e7cea54..dab97fabacf3 100644 --- a/lib/galaxy/jobs/runners/kubernetes.py +++ b/lib/galaxy/jobs/runners/kubernetes.py @@ -223,7 +223,7 @@ def queue_job(self, job_wrapper): def __has_guest_ports(self, job_wrapper): # Check if job has guest ports or interactive tool entry points that would signal that log.debug( - f"Checking if job {job_wrapper.get_id_tag()} is an interactive tool. guest ports: {job_wrapper.guest_ports}. interactive entry points: {job_wrapper.interactivetool_entry_points}" + f"Checking if job {job_wrapper.get_id_tag()} is an interactive tool. guest ports: {job_wrapper.guest_ports}. interactive entry points: {job_wrapper.get_job().interactivetool_entry_points}" ) return bool(job_wrapper.guest_ports) or bool(job_wrapper.get_job().interactivetool_entry_points) From 5ca868703b394f7efbc2e9968cd6a29545d72e2f Mon Sep 17 00:00:00 2001 From: Pablo Moreno Date: Tue, 28 Mar 2023 14:35:01 +0100 Subject: [PATCH 017/651] trace for larger json dump logs --- lib/galaxy/jobs/runners/kubernetes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/jobs/runners/kubernetes.py b/lib/galaxy/jobs/runners/kubernetes.py index dab97fabacf3..7c4a04972142 100644 --- a/lib/galaxy/jobs/runners/kubernetes.py +++ b/lib/galaxy/jobs/runners/kubernetes.py @@ -243,8 +243,8 @@ def __configure_port_routing(self, ajs): k8s_ingress_obj = ingress_object_dict(self.runner_params, k8s_job_name, self.__get_k8s_ingress_spec(ajs)) # pretty print the objects for debugging - log.debug(f"Kubernetes service object: {json.dumps(k8s_service_obj, indent=4)}") - log.debug(f"Kubernetes ingress object: {json.dumps(k8s_ingress_obj, indent=4)}") + log.trace(f"Kubernetes service object: {json.dumps(k8s_service_obj, indent=4)}") + log.trace(f"Kubernetes ingress object: {json.dumps(k8s_ingress_obj, indent=4)}") # We avoid creating service and ingress if they already exist (e.g. when Galaxy is restarted or resubmitting a job) service = Service(self._pykube_api, k8s_service_obj) service.create() From a9cc2ff450887dee825fddaec10fda3ce4119f00 Mon Sep 17 00:00:00 2001 From: Pablo Moreno Date: Wed, 29 Mar 2023 21:37:13 +0100 Subject: [PATCH 018/651] Make ingress class configurable for ITs --- lib/galaxy/jobs/runners/kubernetes.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/jobs/runners/kubernetes.py b/lib/galaxy/jobs/runners/kubernetes.py index 7c4a04972142..3ee12585d071 100644 --- a/lib/galaxy/jobs/runners/kubernetes.py +++ b/lib/galaxy/jobs/runners/kubernetes.py @@ -108,6 +108,7 @@ def __init__(self, app, nworkers, **kwargs): k8s_unschedulable_walltime_limit=dict(map=int, valid=lambda x: not x or int(x) >= 0, default=None), k8s_interactivetools_use_ssl=dict(map=bool, default=False), k8s_interactivetools_ingress_annotations=dict(map=str), + k8s_interactivetools_ingress_class=dict(map=str, default=None), ) if "runner_param_specs" not in kwargs: @@ -471,7 +472,6 @@ def __get_k8s_ingress_spec(self, ajs): "annotations": {"app.galaxyproject.org/tool_id": ajs.job_wrapper.tool.id}, }, "spec": { - "ingressClassName": "nginx", "rules": [ { "host": ep["domain"], @@ -496,6 +496,9 @@ def __get_k8s_ingress_spec(self, ajs): ], }, } + default_ingress_class = self.runner_params.get("k8s_interactivetools_ingress_class") + if default_ingress_class: + k8s_spec_template["spec"]["ingressClassName"] = default_ingress_class if self.runner_params.get("k8s_interactivetools_use_ssl"): domains = list({e["domain"] for e in entry_points}) k8s_spec_template["spec"]["tls"] = [ From 6ca1a021f45ee9be07dc63c18124f3ae245a132c Mon Sep 17 00:00:00 2001 From: nuwang <2070605+nuwang@users.noreply.github.com> Date: Tue, 6 Jun 2023 00:51:06 +0530 Subject: [PATCH 019/651] Remove extra comments and minor tweaks to debug logs --- lib/galaxy/jobs/runners/kubernetes.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/lib/galaxy/jobs/runners/kubernetes.py b/lib/galaxy/jobs/runners/kubernetes.py index 3ee12585d071..f02608811cae 100644 --- a/lib/galaxy/jobs/runners/kubernetes.py +++ b/lib/galaxy/jobs/runners/kubernetes.py @@ -243,7 +243,6 @@ def __configure_port_routing(self, ajs): k8s_service_obj = service_object_dict(self.runner_params, k8s_job_name, self.__get_k8s_service_spec(ajs)) k8s_ingress_obj = ingress_object_dict(self.runner_params, k8s_job_name, self.__get_k8s_ingress_spec(ajs)) - # pretty print the objects for debugging log.trace(f"Kubernetes service object: {json.dumps(k8s_service_obj, indent=4)}") log.trace(f"Kubernetes ingress object: {json.dumps(k8s_ingress_obj, indent=4)}") # We avoid creating service and ingress if they already exist (e.g. when Galaxy is restarted or resubmitting a job) @@ -549,6 +548,7 @@ def __get_k8s_containers(self, ajs): # command line execution, separated by ;, which is what Galaxy does # to assemble the command. "command": [ajs.job_wrapper.shell], + # Make sure that the exit code is propagated to k8s, so k8s knows why the tool failed (e.g. OOM) "args": ["-c", f"{ajs.job_file}; exit $(cat {ajs.exit_code_file})"], "workingDir": ajs.job_wrapper.working_directory, "volumeMounts": deduplicate_entries(mounts), @@ -762,7 +762,7 @@ def check_watched_item(self, job_state): if succeeded > 0 or job_state == model.Job.states.STOPPED: job_state.running = False self.mark_as_finished(job_state) - log.debug("Job succeeded") + log.debug(f"Job id: {job_state.job_id} with k8s id: {job.name} succeeded") return None elif active > 0 and failed < max_pod_retries + 1: if not job_state.running: @@ -1077,8 +1077,6 @@ def fail_job(self, job_state: "JobState", exception=False, message="Job failed", tool_stderr = job_stderr job_stderr = None - # END Get STDOUT and STDERR from the job and tool to be stored in the database # - # full status empty leaves the UI without stderr/stdout full_status = {"stderr": tool_stderr, "stdout": tool_stdout} log.debug(f"({job.id}/{job.job_runner_external_id}) tool_stdout: {tool_stdout}") From 8276216fa8752718ab26517d8ae0ed02016e8cd3 Mon Sep 17 00:00:00 2001 From: nuwang <2070605+nuwang@users.noreply.github.com> Date: Tue, 6 Jun 2023 12:18:55 +0530 Subject: [PATCH 020/651] Change trace to debug --- lib/galaxy/jobs/runners/kubernetes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/jobs/runners/kubernetes.py b/lib/galaxy/jobs/runners/kubernetes.py index f02608811cae..6ef015f0d812 100644 --- a/lib/galaxy/jobs/runners/kubernetes.py +++ b/lib/galaxy/jobs/runners/kubernetes.py @@ -243,8 +243,8 @@ def __configure_port_routing(self, ajs): k8s_service_obj = service_object_dict(self.runner_params, k8s_job_name, self.__get_k8s_service_spec(ajs)) k8s_ingress_obj = ingress_object_dict(self.runner_params, k8s_job_name, self.__get_k8s_ingress_spec(ajs)) - log.trace(f"Kubernetes service object: {json.dumps(k8s_service_obj, indent=4)}") - log.trace(f"Kubernetes ingress object: {json.dumps(k8s_ingress_obj, indent=4)}") + log.debug(f"Kubernetes service object: {json.dumps(k8s_service_obj, indent=4)}") + log.debug(f"Kubernetes ingress object: {json.dumps(k8s_ingress_obj, indent=4)}") # We avoid creating service and ingress if they already exist (e.g. when Galaxy is restarted or resubmitting a job) service = Service(self._pykube_api, k8s_service_obj) service.create() From eeb4d25987a002fef154b66c2663751bfffefd0d Mon Sep 17 00:00:00 2001 From: nuwang <2070605+nuwang@users.noreply.github.com> Date: Wed, 21 Jun 2023 22:17:53 +0530 Subject: [PATCH 021/651] Don't mark job as failed if unknown exit code --- lib/galaxy/authnz/custos_authnz.py | 2 +- lib/galaxy/jobs/runners/kubernetes.py | 46 +++++++++++++++++++++++---- 2 files changed, 40 insertions(+), 8 deletions(-) diff --git a/lib/galaxy/authnz/custos_authnz.py b/lib/galaxy/authnz/custos_authnz.py index 4b4d40ec6860..75d3187cc508 100644 --- a/lib/galaxy/authnz/custos_authnz.py +++ b/lib/galaxy/authnz/custos_authnz.py @@ -204,7 +204,7 @@ def callback(self, state_token, authz_code, trans, login_redirect_url): ): user = existing_user else: - message = f"There already exists a user with email {email}. To associate this external login, you must first be logged in as that existing account." + message = f"There already exists a user with email {email}. To associate this external login, you must first be logged in as that existing account." log.info(message) login_redirect_url = ( f"{login_redirect_url}login/start" diff --git a/lib/galaxy/jobs/runners/kubernetes.py b/lib/galaxy/jobs/runners/kubernetes.py index 6ef015f0d812..6f033a1cd57f 100644 --- a/lib/galaxy/jobs/runners/kubernetes.py +++ b/lib/galaxy/jobs/runners/kubernetes.py @@ -794,9 +794,14 @@ def check_watched_item(self, job_state): log.debug( f"Job id: {job_state.job_id} failed and it is not a deletion case. Current state: {job_state.job_wrapper.get_state()}" ) - self._handle_job_failure(job, job_state) - # changes for resubmission (removed self.mark_as_failed from handle_job_failure) - self.work_queue.put((self.mark_as_failed, job_state)) + if self._handle_job_failure(job, job_state): + # changes for resubmission (removed self.mark_as_failed from handle_job_failure) + self.work_queue.put((self.mark_as_failed, job_state)) + else: + # Job failure was not due to a k8s issue or something that k8s can handle, so it's a tool error. + job_state.running = False + self.mark_as_finished(job_state) + return None return None @@ -843,6 +848,7 @@ def _handle_unschedulable_job(self, job, job_state): def _handle_job_failure(self, job, job_state): # Figure out why job has failed + mark_failed = True with open(job_state.error_file, "a") as error_file: log.debug("Trying with error file in _handle_job_failure") if self.__job_failed_due_to_low_memory(job_state): @@ -855,11 +861,18 @@ def _handle_job_failure(self, job, job_state): error_file.write("DeadlineExceeded") job_state.fail_message = "Job was active longer than specified deadline" job_state.runner_state = JobState.runner_states.WALLTIME_REACHED + elif self.__job_failed_due_to_unknown_exit_code(job_state): + msg = f"Job: {job_state.job_id} failed due to an unknown exit code from the tool." + log.debug(msg) + job_state.fail_message = msg + job_state.runner_state = JobState.runner_states.TOOL_DETECT_ERROR + mark_failed = False else: - log.debug(f"Unknown error detected in job: {job_state.job_id}") - error_file.write("Exceeded max number of job retries allowed for job\n") + msg = f"An unknown error occurred in this job and the maximum number of retries have been exceeded for job: {job_state.job_id}." + log.debug(msg) + error_file.write(msg) job_state.fail_message = ( - "More job retries failed than allowed. See standard output within the info section for details." + "An unknown error occurered with this job. See standard output within the info section for details." ) # changes for resubmission # job_state.running = False @@ -870,7 +883,7 @@ def _handle_job_failure(self, job, job_state): self.__cleanup_k8s_job(job) except Exception: log.exception("Could not clean up k8s batch job. Ignoring...") - return None + return mark_failed def __cleanup_k8s_job(self, job): k8s_cleanup_job = self.runner_params["k8s_cleanup_job"] @@ -930,6 +943,25 @@ def __job_pending_due_to_unschedulable_pod(self, job_state): pod = Pod(self._pykube_api, pods.response["items"][0]) return is_pod_unschedulable(self._pykube_api, pod, self.runner_params["k8s_namespace"]) + def __job_failed_due_to_unknown_exit_code(self, job_state): + """ + checks whether the pod exited prematurely due to an unknown exit code (i.e. not an exit code like OOM that + we can handle). This would mean that the tool failed, but the job should be considered to have succeeded. + """ + pods = find_pod_object_by_name(self._pykube_api, job_state.job_id, self.runner_params["k8s_namespace"]) + if not pods.response["items"]: + return False + + pod = pods.response["items"][0] + if ( + pod + and "terminated" in pod["status"]["containerStatuses"][0]["state"] + and pod["status"]["containerStatuses"][0]["state"].get("exitCode") + ): + return True + + return False + def __cleanup_k8s_guest_ports(self, job_wrapper, k8s_job): k8s_job_prefix = self.__produce_k8s_job_prefix() k8s_job_name = f"{k8s_job_prefix}-{self.__force_label_conformity(job_wrapper.get_id_tag())}" From 7724435df87af319351de967074e707290e1c1a0 Mon Sep 17 00:00:00 2001 From: nuwang <2070605+nuwang@users.noreply.github.com> Date: Wed, 21 Jun 2023 23:58:29 +0530 Subject: [PATCH 022/651] Get exitCode from correct dict entry --- lib/galaxy/jobs/runners/kubernetes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/jobs/runners/kubernetes.py b/lib/galaxy/jobs/runners/kubernetes.py index 6f033a1cd57f..7c6c6f153f84 100644 --- a/lib/galaxy/jobs/runners/kubernetes.py +++ b/lib/galaxy/jobs/runners/kubernetes.py @@ -956,7 +956,7 @@ def __job_failed_due_to_unknown_exit_code(self, job_state): if ( pod and "terminated" in pod["status"]["containerStatuses"][0]["state"] - and pod["status"]["containerStatuses"][0]["state"].get("exitCode") + and pod["status"]["containerStatuses"][0]["state"]["terminated"].get("exitCode") ): return True From c1b3b275344feaef63be73f0696b9d5dc4455c15 Mon Sep 17 00:00:00 2001 From: nuwang <2070605+nuwang@users.noreply.github.com> Date: Thu, 22 Jun 2023 02:49:37 +0530 Subject: [PATCH 023/651] Bump pykube version --- lib/galaxy/dependencies/conditional-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/dependencies/conditional-requirements.txt b/lib/galaxy/dependencies/conditional-requirements.txt index 40861584fc09..1bea9abfe5a2 100644 --- a/lib/galaxy/dependencies/conditional-requirements.txt +++ b/lib/galaxy/dependencies/conditional-requirements.txt @@ -35,7 +35,7 @@ custos-sdk chronos-python==1.2.1 # Kubernetes job runner -pykube-ng==22.9.0 +pykube-ng==23.6.0 # Synnefo / Pithos+ object store client kamaki From 452a042d672d11a76ab666eca8b52bb064b3f8a7 Mon Sep 17 00:00:00 2001 From: nuwang <2070605+nuwang@users.noreply.github.com> Date: Thu, 22 Jun 2023 17:41:31 +0530 Subject: [PATCH 024/651] Fix conditional requirement for pykube-ng --- lib/galaxy/dependencies/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/dependencies/__init__.py b/lib/galaxy/dependencies/__init__.py index e46c6fee60c9..091fb4da57dc 100644 --- a/lib/galaxy/dependencies/__init__.py +++ b/lib/galaxy/dependencies/__init__.py @@ -203,7 +203,7 @@ def check_total_perspective_vortex(self): def check_pbs_python(self): return "galaxy.jobs.runners.pbs:PBSJobRunner" in self.job_runners - def check_pykube(self): + def check_pykube_ng(self): return "galaxy.jobs.runners.kubernetes:KubernetesJobRunner" in self.job_runners or which("kubectl") def check_chronos_python(self): From 250be5db661ee7f8a6f43ca8086dacbbf6f7e7e1 Mon Sep 17 00:00:00 2001 From: nuwang <2070605+nuwang@users.noreply.github.com> Date: Thu, 22 Jun 2023 17:53:36 +0530 Subject: [PATCH 025/651] Set a pykube version that's available --- lib/galaxy/dependencies/conditional-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/dependencies/conditional-requirements.txt b/lib/galaxy/dependencies/conditional-requirements.txt index 1bea9abfe5a2..4d6eccc54007 100644 --- a/lib/galaxy/dependencies/conditional-requirements.txt +++ b/lib/galaxy/dependencies/conditional-requirements.txt @@ -35,7 +35,7 @@ custos-sdk chronos-python==1.2.1 # Kubernetes job runner -pykube-ng==23.6.0 +pykube-ng==21.3.0 # Synnefo / Pithos+ object store client kamaki From 1b24e52c6e21117116922e57a7a3d1f19934f070 Mon Sep 17 00:00:00 2001 From: Patrick Austin Date: Fri, 4 Aug 2023 17:09:15 +0000 Subject: [PATCH 026/651] Allow unset metadata for athena project --- lib/galaxy/datatypes/larch.py | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/lib/galaxy/datatypes/larch.py b/lib/galaxy/datatypes/larch.py index 28715e8695c7..75118f0f99da 100644 --- a/lib/galaxy/datatypes/larch.py +++ b/lib/galaxy/datatypes/larch.py @@ -82,24 +82,26 @@ def set_meta(self, dataset: "DatasetInstance", overwrite: bool = True, **kwd) -> """ Extract metadata from @args """ + def extract_arg(args: "list[str]", arg_name: str): + try: + index = args.index(f"'{arg_name}'") + setattr(dataset.metadata, arg_name, args[index + 1].replace("'", "")) + except ValueError: + return + headers = get_headers(dataset.file_name, sep=" = ", count=3, comment_designator="#") + args = [] for header in headers: if header[0] == "@args": args = header[1][1:-2].split(",") break - index = args.index("'atsym'") - dataset.metadata.atsym = args[index + 1][1:-1] - index = args.index("'bkg_e0'") - dataset.metadata.bkg_e0 = args[index + 1][1:-1] - index = args.index("'edge'") - dataset.metadata.edge = args[index + 1][1:-1] - index = args.index("'npts'") - dataset.metadata.npts = args[index + 1][1:-1] - index = args.index("'xmax'") - dataset.metadata.xmax = args[index + 1][1:-1] - index = args.index("'xmin'") - dataset.metadata.xmin = args[index + 1][1:-1] + extract_arg(args, "atsym") + extract_arg(args, "bkg_e0") + extract_arg(args, "edge") + extract_arg(args, "npts") + extract_arg(args, "xmax") + extract_arg(args, "xmin") def set_peek(self, dataset: "DatasetInstance", **kwd) -> None: if not dataset.dataset.purged: From 3e1b54b02c5fc3f77613d3136f4692c41e7782cf Mon Sep 17 00:00:00 2001 From: Enol Fernandez Date: Wed, 4 Oct 2023 12:55:58 +0100 Subject: [PATCH 027/651] Add Check-in as OIDC authentication option --- lib/galaxy/authnz/managers.py | 3 +++ lib/galaxy/authnz/psa_authnz.py | 2 ++ .../config/sample/oidc_backends_config.xml.sample | 14 ++++++++++++++ 3 files changed, 19 insertions(+) diff --git a/lib/galaxy/authnz/managers.py b/lib/galaxy/authnz/managers.py index 07ac51f4f809..a74d71456c39 100644 --- a/lib/galaxy/authnz/managers.py +++ b/lib/galaxy/authnz/managers.py @@ -165,6 +165,9 @@ def _parse_idp_config(self, config_xml): rtv["tenant_id"] = config_xml.find("tenant_id").text if config_xml.find("pkce_support") is not None: rtv["pkce_support"] = asbool(config_xml.find("pkce_support").text) + # this is a EGI Check-in specific config + if config_xml.find("checkin_env") is not None: + rtv["checkin_env"] = config_xml.find("checkin_env").text return rtv diff --git a/lib/galaxy/authnz/psa_authnz.py b/lib/galaxy/authnz/psa_authnz.py index 0dc084727980..501604da2f27 100644 --- a/lib/galaxy/authnz/psa_authnz.py +++ b/lib/galaxy/authnz/psa_authnz.py @@ -42,6 +42,7 @@ "elixir": "social_core.backends.elixir.ElixirOpenIdConnect", "okta": "social_core.backends.okta_openidconnect.OktaOpenIdConnect", "azure": "social_core.backends.azuread_tenant.AzureADV2TenantOAuth2", + "checkin": "social_core.backends.checkin.CheckinOpenIdConnect", } BACKENDS_NAME = { @@ -50,6 +51,7 @@ "elixir": "elixir", "okta": "okta-openidconnect", "azure": "azuread-v2-tenant-oauth2", + "checkin": "checkin", } AUTH_PIPELINE = ( diff --git a/lib/galaxy/config/sample/oidc_backends_config.xml.sample b/lib/galaxy/config/sample/oidc_backends_config.xml.sample index 1c07a7d3e116..1ae67d9a6e9c 100644 --- a/lib/galaxy/config/sample/oidc_backends_config.xml.sample +++ b/lib/galaxy/config/sample/oidc_backends_config.xml.sample @@ -197,4 +197,18 @@ Please mind `http` and `https`. ... + + + + ... + ... + http://localhost:8080/authnz/checkin/callback + https://im.egi.eu/im-dashboard/static/images/egicheckin.png + consent + + + + + From 9715b8448a24265d04d42d13261d16772a985ca3 Mon Sep 17 00:00:00 2001 From: Enol Fernandez Date: Tue, 17 Oct 2023 10:22:49 +0100 Subject: [PATCH 028/651] Use a less generic name --- lib/galaxy/authnz/psa_authnz.py | 4 ++-- lib/galaxy/config/sample/oidc_backends_config.xml.sample | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/galaxy/authnz/psa_authnz.py b/lib/galaxy/authnz/psa_authnz.py index 501604da2f27..acfef6ccbeb1 100644 --- a/lib/galaxy/authnz/psa_authnz.py +++ b/lib/galaxy/authnz/psa_authnz.py @@ -42,7 +42,7 @@ "elixir": "social_core.backends.elixir.ElixirOpenIdConnect", "okta": "social_core.backends.okta_openidconnect.OktaOpenIdConnect", "azure": "social_core.backends.azuread_tenant.AzureADV2TenantOAuth2", - "checkin": "social_core.backends.checkin.CheckinOpenIdConnect", + "egi_checkin": "social_core.backends.checkin.EGICheckinOpenIdConnect", } BACKENDS_NAME = { @@ -51,7 +51,7 @@ "elixir": "elixir", "okta": "okta-openidconnect", "azure": "azuread-v2-tenant-oauth2", - "checkin": "checkin", + "egi_checkin": "egi-checkin", } AUTH_PIPELINE = ( diff --git a/lib/galaxy/config/sample/oidc_backends_config.xml.sample b/lib/galaxy/config/sample/oidc_backends_config.xml.sample index 1ae67d9a6e9c..5b2c558e75b6 100644 --- a/lib/galaxy/config/sample/oidc_backends_config.xml.sample +++ b/lib/galaxy/config/sample/oidc_backends_config.xml.sample @@ -198,8 +198,8 @@ Please mind `http` and `https`. - - ... ... From 2eb4cc143a716ba56977bbd6bf42f6d08b201be5 Mon Sep 17 00:00:00 2001 From: Enol Fernandez Date: Wed, 18 Oct 2023 10:06:38 +0100 Subject: [PATCH 029/651] Fix module name --- lib/galaxy/authnz/psa_authnz.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/authnz/psa_authnz.py b/lib/galaxy/authnz/psa_authnz.py index acfef6ccbeb1..a1193f02868c 100644 --- a/lib/galaxy/authnz/psa_authnz.py +++ b/lib/galaxy/authnz/psa_authnz.py @@ -42,7 +42,7 @@ "elixir": "social_core.backends.elixir.ElixirOpenIdConnect", "okta": "social_core.backends.okta_openidconnect.OktaOpenIdConnect", "azure": "social_core.backends.azuread_tenant.AzureADV2TenantOAuth2", - "egi_checkin": "social_core.backends.checkin.EGICheckinOpenIdConnect", + "egi_checkin": "social_core.backends.egi_checkin.EGICheckinOpenIdConnect", } BACKENDS_NAME = { From 85f20770c1205e728740d14a3b1539a2f4d3a29d Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Thu, 19 Oct 2023 10:06:14 +0200 Subject: [PATCH 030/651] Add back 1.1.0 version of Filtering1 tool Which is the same as 1.1.1 but hard to explain to users why the run form shows a different version than what is shown in the user interface. --- lib/galaxy/config/sample/tool_conf.xml.sample | 1 + tools/stats/filtering_1_1_0.xml | 103 ++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 tools/stats/filtering_1_1_0.xml diff --git a/lib/galaxy/config/sample/tool_conf.xml.sample b/lib/galaxy/config/sample/tool_conf.xml.sample index f6f060739291..f3818f88f46c 100644 --- a/lib/galaxy/config/sample/tool_conf.xml.sample +++ b/lib/galaxy/config/sample/tool_conf.xml.sample @@ -79,6 +79,7 @@
+ diff --git a/tools/stats/filtering_1_1_0.xml b/tools/stats/filtering_1_1_0.xml new file mode 100644 index 000000000000..1a20cf40c947 --- /dev/null +++ b/tools/stats/filtering_1_1_0.xml @@ -0,0 +1,103 @@ + + data on any column using simple expressions + + operation_0335 + + + python '$__tool_directory__/filtering.py' '$input' '$out_file1' '$inputs' ${input.metadata.columns} "${input.metadata.column_types}" $header_lines + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +.. class:: warningmark + +Double equal signs, ==, must be used as *"equal to"* (e.g., **c1 == 'chr22'**) + +.. class:: infomark + +**TIP:** Attempting to apply a filtering condition may throw exceptions if the data type (e.g., string, integer) in every line of the columns being filtered is not appropriate for the condition (e.g., attempting certain numerical calculations on strings). If an exception is thrown when applying the condition to a line, that line is skipped as invalid for the filter condition. The number of invalid skipped lines is documented in the resulting history item as a "Condition/data issue". + +.. class:: infomark + +**TIP:** If your data is not TAB delimited, use *Text Manipulation->Convert* + +----- + +**Syntax** + +The filter tool allows you to restrict the dataset using simple conditional statements. + +- Columns are referenced with **c** and a **number**. For example, **c1** refers to the first column of a tab-delimited file +- Make sure that multi-character operators contain no white space ( e.g., **<=** is valid while **< =** is not valid ) +- When using 'equal-to' operator **double equal sign '==' must be used** ( e.g., **c1=='chr1'** ) +- Non-numerical values must be included in single or double quotes ( e.g., **c6=='+'** ) +- Filtering condition can include logical operators, but **make sure operators are all lower case** ( e.g., **(c1!='chrX' and c1!='chrY') or not c6=='+'** ) + +----- + +**Example** + +- **c1=='chr1'** selects lines in which the first column is chr1 +- **c3-c2<100*c4** selects lines where subtracting column 3 from column 2 is less than the value of column 4 times 100 +- **len(c2.split(',')) < 4** will select lines where the second column has less than four comma separated elements +- **c2>=1** selects lines in which the value of column 2 is greater than or equal to 1 +- Numbers should not contain commas - **c2<=44,554,350** will not work, but **c2<=44554350** will +- Some words in the data can be used, but must be single or double quoted ( e.g., **c3=='exon'** ) + + + + From baa8ef3811f45542a584f37bc2654381059eec99 Mon Sep 17 00:00:00 2001 From: Ahmed Awan Date: Mon, 16 Oct 2023 11:38:15 -0500 Subject: [PATCH 031/651] visually indicate currently viewed/edited dataset --- .../History/Content/ContentOptions.vue | 184 +++++++++--------- 1 file changed, 96 insertions(+), 88 deletions(-) diff --git a/client/src/components/History/Content/ContentOptions.vue b/client/src/components/History/Content/ContentOptions.vue index 3e1513da6961..7450737f5cd1 100644 --- a/client/src/components/History/Content/ContentOptions.vue +++ b/client/src/components/History/Content/ContentOptions.vue @@ -1,3 +1,90 @@ + + - From b552396749e9c120687540003b535a670706f450 Mon Sep 17 00:00:00 2001 From: francoismg <46478217+francoismg@users.noreply.github.com> Date: Tue, 31 Oct 2023 13:58:39 +0100 Subject: [PATCH 034/651] fix for folder location --- config/plugins/visualizations/fits_image_viewer/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/plugins/visualizations/fits_image_viewer/package.json b/config/plugins/visualizations/fits_image_viewer/package.json index f895a17bf537..efa0dd67be39 100644 --- a/config/plugins/visualizations/fits_image_viewer/package.json +++ b/config/plugins/visualizations/fits_image_viewer/package.json @@ -11,6 +11,6 @@ "parcel-bundler": "^1.4.1" }, "scripts": { - "build": "cp -r node_modules/aladin-lite-galaxy static/dist && parcel build src/script.js -d static" + "build": "cp -r node_modules/aladin-lite-galaxy/. static/dist/aladin-lite-galaxy && parcel build src/script.js -d static" } } From 077393641b2cc4c4b6ba6adda0f48601dedba817 Mon Sep 17 00:00:00 2001 From: francoismg <46478217+francoismg@users.noreply.github.com> Date: Tue, 31 Oct 2023 13:59:48 +0100 Subject: [PATCH 035/651] added cdn + fallback --- .../fits_image_viewer/src/script.js | 36 +++++++++++++++---- 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/config/plugins/visualizations/fits_image_viewer/src/script.js b/config/plugins/visualizations/fits_image_viewer/src/script.js index 1568f1408465..3cebefd5b8e7 100644 --- a/config/plugins/visualizations/fits_image_viewer/src/script.js +++ b/config/plugins/visualizations/fits_image_viewer/src/script.js @@ -1,7 +1,31 @@ -let aladin; +var aladin; -A.init.then(() => { - aladin = A.aladin('#aladin-lite-div', {showCooGridControl: true}); - aladin.displayFITS(file_url) - aladin.showCooGrid(true); -}); \ No newline at end of file +function initializeAladinLite() { + A.init.then(() => { + aladin = A.aladin('#aladin-lite-div', {showCooGridControl: true}); + aladin.displayFITS(fileUrl) + aladin.showCooGrid(true); + }); +} + +function localScriptLoadingError() { + addScriptToHead(appRoot+aladinLiteScriptAlternativeLocation); +} + +function cdnLoadingError() { + addScriptToHead(appRoot+aladinLiteScriptLocation, localScriptLoadingError); +} + +function addScriptToHead(url, onerrorFunction) { + const scriptToAdd = document.createElement("script"); + scriptToAdd.onload = initializeAladinLite; + + if(onerrorFunction) { + scriptToAdd.onerror = onerrorFunction + } + + document.head.appendChild(scriptToAdd); + scriptToAdd.src = url; +} + +addScriptToHead(aladinLiteCDNUrl, cdnLoadingError); From dcbcccc31014f251f5f3f8a3781bde5a0ec4e59a Mon Sep 17 00:00:00 2001 From: francoismg <46478217+francoismg@users.noreply.github.com> Date: Tue, 31 Oct 2023 14:02:47 +0100 Subject: [PATCH 036/651] added cdn and fallback urls + refactor --- .../fits_image_viewer/templates/fits_image_viewer.mako | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/config/plugins/visualizations/fits_image_viewer/templates/fits_image_viewer.mako b/config/plugins/visualizations/fits_image_viewer/templates/fits_image_viewer.mako index 63a8cb17e731..064171af8b45 100644 --- a/config/plugins/visualizations/fits_image_viewer/templates/fits_image_viewer.mako +++ b/config/plugins/visualizations/fits_image_viewer/templates/fits_image_viewer.mako @@ -13,15 +13,19 @@ ${h.stylesheet_link( app_root + 'style.css' )} - ${h.javascript_link( app_root + 'dist/aladin-lite-galaxy/aladin.js' )}
FITS aladin viewer : ${hda.name | h}
+ ${h.javascript_link( app_root + 'script.js' )} - \ No newline at end of file + From cf6c9805037d76be87f7ad33978f7381fc264336 Mon Sep 17 00:00:00 2001 From: Denys SAVCHENKO Date: Thu, 2 Nov 2023 14:04:11 +0100 Subject: [PATCH 037/651] fix no such directory in fits viewer build --- config/plugins/visualizations/fits_image_viewer/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/plugins/visualizations/fits_image_viewer/package.json b/config/plugins/visualizations/fits_image_viewer/package.json index efa0dd67be39..10799d30d02e 100644 --- a/config/plugins/visualizations/fits_image_viewer/package.json +++ b/config/plugins/visualizations/fits_image_viewer/package.json @@ -11,6 +11,6 @@ "parcel-bundler": "^1.4.1" }, "scripts": { - "build": "cp -r node_modules/aladin-lite-galaxy/. static/dist/aladin-lite-galaxy && parcel build src/script.js -d static" + "build": "mkdir -p static/dist; cp -r node_modules/aladin-lite-galaxy static/dist/. && parcel build src/script.js -d static" } } From 15d7da0f7f296b15b7ad2487fa511bf832f4b6b7 Mon Sep 17 00:00:00 2001 From: Ahmed Awan Date: Tue, 7 Nov 2023 16:06:46 -0600 Subject: [PATCH 038/651] fix jest fails caused by `DatasetDetails` id is `undefined` --- .../components/History/Content/ContentItem.test.js | 4 ++++ .../src/components/History/Content/ContentItem.vue | 2 +- .../History/Content/GenericElement.test.js | 6 ++++++ client/src/components/History/HistoryView.test.js | 10 +++++----- .../components/JobParameters/JobParameters.test.ts | 14 +++++++++++--- 5 files changed, 27 insertions(+), 9 deletions(-) diff --git a/client/src/components/History/Content/ContentItem.test.js b/client/src/components/History/Content/ContentItem.test.js index e7c0748fcb48..fba74e873824 100644 --- a/client/src/components/History/Content/ContentItem.test.js +++ b/client/src/components/History/Content/ContentItem.test.js @@ -3,13 +3,16 @@ import { mount } from "@vue/test-utils"; import { updateContentFields } from "components/History/model/queries"; import { PiniaVuePlugin } from "pinia"; import { getLocalVue } from "tests/jest/helpers"; +import VueRouter from "vue-router"; import ContentItem from "./ContentItem"; jest.mock("components/History/model/queries"); const localVue = getLocalVue(); +localVue.use(VueRouter); localVue.use(PiniaVuePlugin); +const router = new VueRouter(); // mock queries updateContentFields.mockImplementation(async () => {}); @@ -48,6 +51,7 @@ describe("ContentItem", () => { }, }, pinia: createTestingPinia(), + router, }); }); diff --git a/client/src/components/History/Content/ContentItem.vue b/client/src/components/History/Content/ContentItem.vue index b8022e68da8d..2fb25d0abd72 100644 --- a/client/src/components/History/Content/ContentItem.vue +++ b/client/src/components/History/Content/ContentItem.vue @@ -89,7 +89,7 @@ { let wrapper; @@ -54,6 +59,7 @@ describe("GenericElement", () => { }, }, localVue, + router, }); }); diff --git a/client/src/components/History/HistoryView.test.js b/client/src/components/History/HistoryView.test.js index ad880b5792ca..7a0ef49a8fe5 100644 --- a/client/src/components/History/HistoryView.test.js +++ b/client/src/components/History/HistoryView.test.js @@ -67,6 +67,7 @@ async function createWrapper(localVue, currentUserId, history) { localVue, stubs: { icon: { template: "
" }, + ContentItem: true, }, provide: { store: { @@ -119,14 +120,13 @@ describe("History center panel View", () => { // parts of the layout that should be similar for all cases expectCorrectLayout(wrapper); - // all history items, make sure all show up with hids and names - const historyItems = wrapper.findAll(".content-item"); + // make sure all history items show up + const historyItems = wrapper.findAll("contentitem-stub"); expect(historyItems.length).toBe(10); for (let i = 0; i < historyItems.length; i++) { const hid = historyItems.length - i; - const itemHeader = historyItems.at(i).find("[data-description='content item header info']"); - const headerText = `${hid}: Dataset ${hid}`; - expect(itemHeader.text()).toBe(headerText); + expect(historyItems.at(i).attributes("id")).toBe(`${hid}`); + expect(historyItems.at(i).attributes("name")).toBe(`Dataset ${hid}`); } }); diff --git a/client/src/components/JobParameters/JobParameters.test.ts b/client/src/components/JobParameters/JobParameters.test.ts index e0f6c04e669b..5a1887356e1a 100644 --- a/client/src/components/JobParameters/JobParameters.test.ts +++ b/client/src/components/JobParameters/JobParameters.test.ts @@ -46,6 +46,7 @@ describe("JobParameters/JobParameters.vue", () => { propsData, stubs: { DatasetProvider: DatasetProvider, + ContentItem: true, }, pinia, }); @@ -54,12 +55,18 @@ describe("JobParameters/JobParameters.vue", () => { const checkTableParameter = ( element: Wrapper, expectedTitle: string, - expectedValue: string, + expectedValue: string | { hid: number; name: string }, link?: string ) => { const tds = element.findAll("td"); expect(tds.at(0).text()).toBe(expectedTitle); - expect(tds.at(1).text()).toContain(expectedValue); + if (typeof expectedValue === "string") { + expect(tds.at(1).text()).toContain(expectedValue); + } else { + const contentItem = tds.at(1).find("contentitem-stub"); + expect(contentItem.attributes("id")).toBe(`${expectedValue.hid}`); + expect(contentItem.attributes("name")).toBe(expectedValue.name); + } if (link) { const a_element = tds.at(1).find("a"); expect(a_element.attributes("href")).toBe(link); @@ -74,7 +81,7 @@ describe("JobParameters/JobParameters.vue", () => { expect(elements.length).toBe(3); checkTableParameter(elements.at(0), "Add this value", "22", undefined); - checkTableParameter(elements.at(1), linkParam.text, `${raw.hid}: ${raw.name}`, undefined); + checkTableParameter(elements.at(1), linkParam.text, { hid: raw.hid, name: raw.name }, undefined); checkTableParameter(elements.at(2), "Iterate?", "NO", undefined); }); @@ -89,6 +96,7 @@ describe("JobParameters/JobParameters.vue", () => { propsData, stubs: { DatasetProvider: DatasetProvider, + ContentItem: true, }, pinia, }); From ec32669cf51bea5882316d84657fc2224a5976c5 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Wed, 8 Nov 2023 10:38:42 +0100 Subject: [PATCH 039/651] Change log level for duplicate data table entries to warning Fixes https://github.com/galaxyproject/galaxy/issues/16987, though there's probably something to be done for loc files being loaded from tools. At the minimum this is a versioning issue, if a new version of featureCounts comes with a new built-in reference then an old version shouldn't use it, and vice-versa. Seems like a hard problem to solve. --- lib/galaxy/tool_util/data/__init__.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/tool_util/data/__init__.py b/lib/galaxy/tool_util/data/__init__.py index 5e2fdfb7908b..927841d9004c 100644 --- a/lib/galaxy/tool_util/data/__init__.py +++ b/lib/galaxy/tool_util/data/__init__.py @@ -250,6 +250,11 @@ def add_entries( self.add_entry( entry, allow_duplicates=allow_duplicates, persist=persist, entry_source=entry_source, **kwd ) + except MessageException as e: + if e.type == "warning": + log.warning(str(e)) + else: + log.error(str(e)) except Exception as e: log.error(str(e)) return self._loaded_content_version @@ -686,7 +691,8 @@ def _add_entry( self.data.append(fields) else: raise MessageException( - f"Attempted to add fields ({fields}) to data table '{self.name}', but this entry already exists and allow_duplicates is False." + f"Attempted to add fields ({fields}) to data table '{self.name}', but this entry already exists and allow_duplicates is False.", + type="warning", ) else: raise MessageException( From c655d361966c9784e403d60e1cd2e9b7d06fa66c Mon Sep 17 00:00:00 2001 From: Dannon Baker Date: Wed, 8 Nov 2023 09:56:46 -0500 Subject: [PATCH 040/651] explicitly specify operation so tool_panels are included in apispec & doc --- lib/galaxy/webapps/galaxy/buildapp.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/webapps/galaxy/buildapp.py b/lib/galaxy/webapps/galaxy/buildapp.py index 6dcd4c291613..5048abcc36c8 100644 --- a/lib/galaxy/webapps/galaxy/buildapp.py +++ b/lib/galaxy/webapps/galaxy/buildapp.py @@ -367,8 +367,10 @@ def populate_api_routes(webapp, app): # ====== TOOLS API ====== # ======================= - webapp.mapper.connect("/api/tool_panels", action="panel_views", controller="tools") - webapp.mapper.connect("/api/tool_panels/{view}", action="panel_view", controller="tools") + webapp.mapper.connect("/api/tool_panels", action="panel_views", controller="tools", conditions=dict(method=["GET"])) + webapp.mapper.connect( + "/api/tool_panels/{view}", action="panel_view", controller="tools", conditions=dict(method=["GET"]) + ) webapp.mapper.connect("/api/tools/all_requirements", action="all_requirements", controller="tools") webapp.mapper.connect("/api/tools/error_stack", action="error_stack", controller="tools") From 3427f165bbe178de2848d9952ef009eeea13b5ed Mon Sep 17 00:00:00 2001 From: hujambo-dunia Date: Wed, 8 Nov 2023 13:59:23 -0500 Subject: [PATCH 041/651] Upgrade component to remove Bootstrap dependency (passed dev QA: on Chrome, Firefox, Safari) --- client/src/components/Upload/DragAndDropModal.vue | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client/src/components/Upload/DragAndDropModal.vue b/client/src/components/Upload/DragAndDropModal.vue index be56d8305bb5..5fb4962e35f9 100644 --- a/client/src/components/Upload/DragAndDropModal.vue +++ b/client/src/components/Upload/DragAndDropModal.vue @@ -29,9 +29,9 @@ function onDrop(event) { diff --git a/client/src/stores/toolStore.ts b/client/src/stores/toolStore.ts index 122b07db1a25..8c564510fb66 100644 --- a/client/src/stores/toolStore.ts +++ b/client/src/stores/toolStore.ts @@ -62,6 +62,15 @@ export interface FilterSettings { help?: string; } +export interface PanelView { + id: string; + model_class: string; + name: string; + description: string; + view_type: string; + searchable: boolean; +} + export const useToolStore = defineStore("toolStore", () => { const currentPanelView: Ref = useUserLocalStorage("tool-store-view", ""); const toolsById = ref>({}); @@ -168,7 +177,7 @@ export const useToolStore = defineStore("toolStore", () => { } } - // Directly related to the ToolPanel + // Used to initialize the ToolPanel with the default panel view for this site. async function initCurrentPanelView(siteDefaultPanelView: string) { if (!loading.value && !isPanelPopulated.value) { loading.value = true; @@ -194,14 +203,22 @@ export const useToolStore = defineStore("toolStore", () => { async function setCurrentPanelView(panelView: string) { if (!loading.value) { - currentPanelView.value = panelView; if (panel.value[panelView]) { + currentPanelView.value = panelView; return; } loading.value = true; - const { data } = await axios.get(`${getAppRoot()}api/tool_panels/${panelView}`); - savePanelView(panelView, data); - loading.value = false; + try { + await axios.get(`${getAppRoot()}api/tool_panels/${panelView}`); + const { data } = await axios.get(`${getAppRoot()}api/tool_panels/${panelView}`); + currentPanelView.value = panelView; + savePanelView(panelView, data); + loading.value = false; + } catch (e) { + const error = e as { response: { data: { err_msg: string } } }; + console.error("Could not change panel view", error.response.data.err_msg ?? error.response); + loading.value = false; + } } } From 6cde33f6e691c2e07e9374d704cf645e21d83e6a Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Mon, 13 Nov 2023 15:48:10 +0100 Subject: [PATCH 081/651] Apply Nicola's suggestions --- lib/galaxy/model/__init__.py | 4 ++-- lib/galaxy/workflow/scheduling_manager.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 5c413a4af799..c2e201eb3f52 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -1635,9 +1635,9 @@ def all_entry_points_configured(self): all_configured = ep.configured and all_configured return all_configured - def set_state(self, state): + def set_state(self, state: JobState) -> bool: """ - Save state history. Returns True if stat has changed, else False + Save state history. Returns True if state has changed, else False. """ if self.state == state: # Nothing changed, no action needed diff --git a/lib/galaxy/workflow/scheduling_manager.py b/lib/galaxy/workflow/scheduling_manager.py index b8851def3e7a..31f4bca49ae1 100644 --- a/lib/galaxy/workflow/scheduling_manager.py +++ b/lib/galaxy/workflow/scheduling_manager.py @@ -333,7 +333,7 @@ def __attempt_schedule(self, invocation_id, workflow_scheduler): try: if workflow_invocation.state == workflow_invocation.states.CANCELLING: workflow_invocation.cancel_invocation_steps() - workflow_invocation.state = workflow_invocation.states.CANCELLED + workflow_invocation.mark_cancelled() session.commit() return False From db9d5b5f8bf3aba4b1d8007231185ca7643e728a Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Mon, 13 Nov 2023 15:59:53 +0100 Subject: [PATCH 082/651] Add new finished_states and move deleting to that --- lib/galaxy/model/__init__.py | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index c2e201eb3f52..ac2abf82df32 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -1367,7 +1367,10 @@ class Job(Base, JobLike, UsesCreateAndUpdateTime, Dictifiable, Serializable): states = JobState - terminal_states = [states.OK, states.ERROR, states.DELETED, states.DELETING] + # states that are not expected to change, except through admin action or re-scheduling + terminal_states = [states.OK, states.ERROR, states.DELETED] + # deleting state should not turn back into any of the non-ready states + finished_states = terminal_states + [states.DELETING] #: job states where the job hasn't finished and the model may still change non_ready_states = [ states.NEW, @@ -1392,13 +1395,7 @@ def running(self): @property def finished(self): - states = self.states - return self.state in [ - states.OK, - states.ERROR, - states.DELETING, - states.DELETED, - ] + return self.state in self.finished_states def io_dicts(self, exclude_implicit_outputs=False) -> IoDicts: inp_data: Dict[str, Optional[DatasetInstance]] = {da.name: da.dataset for da in self.input_datasets} @@ -1643,7 +1640,7 @@ def set_state(self, state: JobState) -> bool: # Nothing changed, no action needed return False session = object_session(self) - if session and self.id and state not in Job.terminal_states: + if session and self.id and state not in Job.finished_states: # generate statement that will not revert DELETING or DELETED back to anything non-terminal rval = session.execute( update(Job.table) @@ -8233,7 +8230,7 @@ def cancel_invocation_steps(self): sa_session.query(Job.id) .join(WorkflowInvocationStep) .filter(WorkflowInvocationStep.workflow_invocation_id == self.id) - .filter(~Job.table.c.state.in_(Job.terminal_states)) + .filter(~Job.table.c.state.in_(Job.finished_states)) .with_for_update() .scalar_subquery() ) @@ -8247,7 +8244,7 @@ def cancel_invocation_steps(self): WorkflowInvocationStep, WorkflowInvocationStep.implicit_collection_jobs_id == ImplicitCollectionJobs.id ) .filter(WorkflowInvocationStep.workflow_invocation_id == self.id) - .filter(~Job.table.c.state.in_(Job.terminal_states)) + .filter(~Job.table.c.state.in_(Job.finished_states)) .with_for_update() .subquery() ) From 194f5ad5df3e606a01addf7d8de249f1d51ef2eb Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 14 Nov 2023 09:56:13 +0100 Subject: [PATCH 083/651] Use sqlalchemy 2.0 query syntax --- lib/galaxy/jobs/handler.py | 14 ++++++-------- lib/galaxy/model/__init__.py | 11 ++++++----- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/lib/galaxy/jobs/handler.py b/lib/galaxy/jobs/handler.py index 977293cb2117..359fdeddfc89 100644 --- a/lib/galaxy/jobs/handler.py +++ b/lib/galaxy/jobs/handler.py @@ -125,22 +125,20 @@ def __init__( def setup_query(self): if self.grab_model is model.Job: - grab_condition = self.grab_model.table.c.state == self.grab_model.states.NEW + grab_condition = self.grab_model.state == self.grab_model.states.NEW elif self.grab_model is model.WorkflowInvocation: - grab_condition = self.grab_model.table.c.state.in_( - (self.grab_model.states.NEW, self.grab_model.states.CANCELLING) - ) + grab_condition = self.grab_model.state.in_((self.grab_model.states.NEW, self.grab_model.states.CANCELLING)) else: raise NotImplementedError(f"Grabbing {self.grab_model.__name__} not implemented") subq = ( select(self.grab_model.id) .where( and_( - self.grab_model.table.c.handler.in_(self.self_handler_tags), + self.grab_model.handler.in_(self.self_handler_tags), grab_condition, ) ) - .order_by(self.grab_model.table.c.id) + .order_by(self.grab_model.id) ) if self.max_grab: subq = subq.limit(self.max_grab) @@ -148,11 +146,11 @@ def setup_query(self): subq = subq.with_for_update(skip_locked=True) self._grab_query = ( self.grab_model.table.update() - .where(self.grab_model.table.c.id.in_(subq)) + .where(self.grab_model.id.in_(subq)) .values(handler=self.app.config.server_name) ) if self._supports_returning: - self._grab_query = self._grab_query.returning(self.grab_model.table.c.id) + self._grab_query = self._grab_query.returning(self.grab_model.id) if self.handler_assignment_method == HANDLER_ASSIGNMENT_METHODS.DB_TRANSACTION_ISOLATION: self._grab_conn_opts["isolation_level"] = "SERIALIZABLE" log.info( diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index ac2abf82df32..3c19fe0384a6 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -1644,7 +1644,7 @@ def set_state(self, state: JobState) -> bool: # generate statement that will not revert DELETING or DELETED back to anything non-terminal rval = session.execute( update(Job.table) - .where(Job.table.c.id == self.id, ~Job.table.c.state.in_((Job.states.DELETING, Job.states.DELETED))) + .where(Job.id == self.id, ~Job.state.in_((Job.states.DELETING, Job.states.DELETED))) .values(state=state) ) if rval.rowcount == 1: @@ -8226,25 +8226,26 @@ def cancel(self): def cancel_invocation_steps(self): sa_session = object_session(self) + assert sa_session job_subq = ( - sa_session.query(Job.id) + select(Job.id) .join(WorkflowInvocationStep) .filter(WorkflowInvocationStep.workflow_invocation_id == self.id) - .filter(~Job.table.c.state.in_(Job.finished_states)) + .filter(~Job.state.in_(Job.finished_states)) .with_for_update() .scalar_subquery() ) sa_session.execute(update(Job.table).where(Job.id == job_subq).values({"state": Job.states.DELETING})) job_collection_subq = ( - sa_session.query(Job.id) + select(Job.id) .join(ImplicitCollectionJobsJobAssociation) .join(ImplicitCollectionJobs) .join( WorkflowInvocationStep, WorkflowInvocationStep.implicit_collection_jobs_id == ImplicitCollectionJobs.id ) .filter(WorkflowInvocationStep.workflow_invocation_id == self.id) - .filter(~Job.table.c.state.in_(Job.finished_states)) + .filter(~Job.state.in_(Job.finished_states)) .with_for_update() .subquery() ) From 571c511dcd66a1ac317dd350df170da9a0efdc94 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 14 Nov 2023 10:01:18 +0100 Subject: [PATCH 084/651] Use JobGrabber.get_grabbable_handler_assignment_method static method for consistency --- lib/galaxy/jobs/handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/jobs/handler.py b/lib/galaxy/jobs/handler.py index 359fdeddfc89..967c98a8c093 100644 --- a/lib/galaxy/jobs/handler.py +++ b/lib/galaxy/jobs/handler.py @@ -255,7 +255,7 @@ def __init__(self, app: MinimalManagerApp, dispatcher): name = "JobHandlerQueue.monitor_thread" self._init_monitor_thread(name, target=self.__monitor, config=app.config) self.job_grabber = None - handler_assignment_method = ItemGrabber.get_grabbable_handler_assignment_method( + handler_assignment_method = JobGrabber.get_grabbable_handler_assignment_method( self.app.job_config.handler_assignment_methods ) if handler_assignment_method: From 1b89fa3ab0f0828e31ffea12b7fdb488ba55b832 Mon Sep 17 00:00:00 2001 From: hujambo-dunia Date: Tue, 14 Nov 2023 07:09:15 -0500 Subject: [PATCH 085/651] Back-end fix for Tool Form page repeating elements/fields' MIN/DEFAULT parameter --- lib/galaxy/tools/parameters/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/galaxy/tools/parameters/__init__.py b/lib/galaxy/tools/parameters/__init__.py index 7fce2634dcb3..9b3eb6f43b36 100644 --- a/lib/galaxy/tools/parameters/__init__.py +++ b/lib/galaxy/tools/parameters/__init__.py @@ -526,9 +526,10 @@ def _populate_state_legacy( del group_state[:] while True: rep_prefix = "%s_%d" % (key, rep_index) + repeat_min_default = input.default if input.default > input.min else input.min if ( not any(incoming_key.startswith(rep_prefix) for incoming_key in incoming.keys()) - and rep_index >= input.min + and rep_index >= repeat_min_default ): break if rep_index < input.max: From 2a11420bc6548ba62bd1c7f1fc8cde9a8616be34 Mon Sep 17 00:00:00 2001 From: mvdbeek Date: Tue, 14 Nov 2023 10:58:29 +0100 Subject: [PATCH 086/651] Prevent cancelled invocation resetting back to new or ready --- lib/galaxy/model/__init__.py | 17 +++++++++++++++++ lib/galaxy/workflow/run.py | 4 ++-- lib/galaxy/workflow/scheduling_manager.py | 2 +- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index 3c19fe0384a6..c437cb1ee65a 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -8216,6 +8216,23 @@ def active(self): states = WorkflowInvocation.states return self.state in [states.NEW, states.READY] + def set_state(self, state: "WorkflowInvocation.states"): + session = object_session(self) + priority_states = (WorkflowInvocation.states.CANCELLING, WorkflowInvocation.states.CANCELLED) + if session and self.id and state not in priority_states: + # generate statement that will not revert CANCELLING or CANCELLED back to anything non-terminal + session.execute( + update(WorkflowInvocation.table) + .where( + WorkflowInvocation.id == self.id, + or_(~WorkflowInvocation.state.in_(priority_states), WorkflowInvocation.state.is_(None)), + ) + .values(state=state) + ) + else: + # Not bound to a session, or setting cancelling/cancelled + self.state = state + def cancel(self): if self.state not in [WorkflowInvocation.states.CANCELLING, WorkflowInvocation.states.CANCELLED]: # No use cancelling workflow again, for all others we may still want to be able to cancel diff --git a/lib/galaxy/workflow/run.py b/lib/galaxy/workflow/run.py index 59d19a5c079a..776b4e525803 100644 --- a/lib/galaxy/workflow/run.py +++ b/lib/galaxy/workflow/run.py @@ -193,7 +193,7 @@ def invoke(self) -> Dict[int, Any]: log.debug( f"Workflow invocation [{workflow_invocation.id}] exceeded maximum number of seconds allowed for scheduling [{maximum_duration}], failing." ) - workflow_invocation.state = model.WorkflowInvocation.states.FAILED + workflow_invocation.set_state(model.WorkflowInvocation.states.FAILED) # All jobs ran successfully, so we can save now self.trans.sa_session.add(workflow_invocation) @@ -264,7 +264,7 @@ def invoke(self) -> Dict[int, Any]: state = model.WorkflowInvocation.states.READY else: state = model.WorkflowInvocation.states.SCHEDULED - workflow_invocation.state = state + workflow_invocation.set_state(state) # All jobs ran successfully, so we can save now self.trans.sa_session.add(workflow_invocation) diff --git a/lib/galaxy/workflow/scheduling_manager.py b/lib/galaxy/workflow/scheduling_manager.py index 31f4bca49ae1..3868e24c13a9 100644 --- a/lib/galaxy/workflow/scheduling_manager.py +++ b/lib/galaxy/workflow/scheduling_manager.py @@ -155,7 +155,7 @@ def shutdown(self): raise exception def queue(self, workflow_invocation, request_params, flush=True): - workflow_invocation.state = model.WorkflowInvocation.states.NEW + workflow_invocation.set_state(model.WorkflowInvocation.states.NEW) workflow_invocation.scheduler = request_params.get("scheduler", None) or self.default_scheduler_id sa_session = self.app.model.context sa_session.add(workflow_invocation) From 01e627fdd5cbc5dda10050c1a59ab08151c5c82e Mon Sep 17 00:00:00 2001 From: John Davis Date: Tue, 14 Nov 2023 09:26:22 -0500 Subject: [PATCH 087/651] Move access permissions checks to service; add typing --- lib/galaxy/managers/jobs.py | 10 ++------ lib/galaxy/webapps/galaxy/services/jobs.py | 27 +++++++++++++++++----- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/lib/galaxy/managers/jobs.py b/lib/galaxy/managers/jobs.py index 3e2513b566f0..fcb09f1b9a84 100644 --- a/lib/galaxy/managers/jobs.py +++ b/lib/galaxy/managers/jobs.py @@ -6,6 +6,7 @@ datetime, ) +import sqlalchemy from boltons.iterutils import remap from pydantic import ( BaseModel, @@ -27,7 +28,6 @@ from galaxy import model from galaxy.exceptions import ( - AdminRequiredException, ItemAccessibilityException, ObjectNotFound, RequestParameterInvalidException, @@ -104,7 +104,7 @@ def __init__(self, app: StructuredApp): self.app = app self.dataset_manager = DatasetManager(app) - def index_query(self, trans, payload: JobIndexQueryPayload): + def index_query(self, trans, payload: JobIndexQueryPayload) -> sqlalchemy.engine.Result: is_admin = trans.user_is_admin user_details = payload.user_details decoded_user_id = payload.user_id @@ -192,12 +192,6 @@ def add_search_criteria(stmt): stmt = stmt.filter(raw_text_column_filter(columns, term)) return stmt - if not is_admin: - if user_details: - raise AdminRequiredException("Only admins can index the jobs with user details enabled") - if decoded_user_id is not None and decoded_user_id != trans.user.id: - raise AdminRequiredException("Only admins can index the jobs of others") - stmt = select(Job) if is_admin: diff --git a/lib/galaxy/webapps/galaxy/services/jobs.py b/lib/galaxy/webapps/galaxy/services/jobs.py index 98ac5bbb8f9e..5a92d629603a 100644 --- a/lib/galaxy/webapps/galaxy/services/jobs.py +++ b/lib/galaxy/webapps/galaxy/services/jobs.py @@ -2,6 +2,7 @@ from typing import ( Any, Dict, + Optional, ) from galaxy import ( @@ -59,15 +60,18 @@ def index( ): security = trans.security is_admin = trans.user_is_admin - if payload.view == JobIndexViewEnum.admin_job_list: + view = payload.view + if view == JobIndexViewEnum.admin_job_list: payload.user_details = True user_details = payload.user_details - if payload.view == JobIndexViewEnum.admin_job_list and not is_admin: - raise exceptions.AdminRequiredException("Only admins can use the admin_job_list view") - query = self.job_manager.index_query(trans, payload) + decoded_user_id = payload.user_id + + if not is_admin: + self._check_nonadmin_access(view, user_details, decoded_user_id, trans.user.id) + + jobs = self.job_manager.index_query(trans, payload) out = [] - view = payload.view - for job in query.yield_per(model.YIELD_PER_ROWS): + for job in jobs.yield_per(model.YIELD_PER_ROWS): job_dict = job.to_dict(view, system_details=is_admin) j = security.encode_all_ids(job_dict, True) if view == JobIndexViewEnum.admin_job_list: @@ -77,3 +81,14 @@ def index( out.append(j) return out + + def _check_nonadmin_access( + self, view: str, user_details: bool, decoded_user_id: Optional[DecodedDatabaseIdField], trans_user_id: int + ): + """Verify admin-only resources are not being accessed.""" + if view == JobIndexViewEnum.admin_job_list: + raise exceptions.AdminRequiredException("Only admins can use the admin_job_list view") + if user_details: + raise exceptions.AdminRequiredException("Only admins can index the jobs with user details enabled") + if decoded_user_id is not None and decoded_user_id != trans_user_id: + raise exceptions.AdminRequiredException("Only admins can index the jobs of others") From 9a9bfab9a1b1be88f9a417696e1ceb15bdec5217 Mon Sep 17 00:00:00 2001 From: hujambo-dunia Date: Tue, 14 Nov 2023 09:51:54 -0500 Subject: [PATCH 088/651] Prettier --- client/src/components/Form/FormRepeat.vue | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/client/src/components/Form/FormRepeat.vue b/client/src/components/Form/FormRepeat.vue index 19a636ad2e83..8f4bb29fa87e 100644 --- a/client/src/components/Form/FormRepeat.vue +++ b/client/src/components/Form/FormRepeat.vue @@ -16,13 +16,13 @@ interface Input { title: string; min: number; default: number; - max: number|string; + max: number | string; help?: string; cache: Array>; } const maxRepeats = computed(() => { - return (typeof props.input.max === "number") ? props.input.cache?.length >= props.input.max : false; + return typeof props.input.max === "number" ? props.input.cache?.length >= props.input.max : false; }); const props = defineProps({ @@ -159,8 +159,6 @@ const { keyObject } = useKeyedObjects(); Insert {{ props.input.title || "Repeat" }} -
- Maximum number of {{ props.input.title || "Repeat" }} fields reached -
+
Maximum number of {{ props.input.title || "Repeat" }} fields reached
From 34ba9e57406b8792363212a7fcd7aea4583b8329 Mon Sep 17 00:00:00 2001 From: hujambo-dunia Date: Tue, 14 Nov 2023 10:04:39 -0500 Subject: [PATCH 089/651] ES lint - missed a double return --- client/src/components/Upload/DragAndDropModal.vue | 1 + 1 file changed, 1 insertion(+) diff --git a/client/src/components/Upload/DragAndDropModal.vue b/client/src/components/Upload/DragAndDropModal.vue index 5c899a152f5c..d09073b04761 100644 --- a/client/src/components/Upload/DragAndDropModal.vue +++ b/client/src/components/Upload/DragAndDropModal.vue @@ -2,6 +2,7 @@ import { useFileDrop } from "composables/fileDrop"; import { useGlobalUploadModal } from "composables/globalUploadModal"; import { computed, ref } from "vue"; + import { useToast } from "@/composables/toast"; const modalContentElement = ref(null); From 332f1aaadc5bfa32d1ee4b6146b37aa995761a82 Mon Sep 17 00:00:00 2001 From: hujambo-dunia Date: Tue, 14 Nov 2023 10:20:48 -0500 Subject: [PATCH 090/651] Variable naming convention --- lib/galaxy/tools/parameters/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/galaxy/tools/parameters/__init__.py b/lib/galaxy/tools/parameters/__init__.py index 9b3eb6f43b36..ef1f5d1a9d8a 100644 --- a/lib/galaxy/tools/parameters/__init__.py +++ b/lib/galaxy/tools/parameters/__init__.py @@ -526,10 +526,10 @@ def _populate_state_legacy( del group_state[:] while True: rep_prefix = "%s_%d" % (key, rep_index) - repeat_min_default = input.default if input.default > input.min else input.min + rep_min_default = input.default if input.default > input.min else input.min if ( not any(incoming_key.startswith(rep_prefix) for incoming_key in incoming.keys()) - and rep_index >= repeat_min_default + and rep_index >= rep_min_default ): break if rep_index < input.max: From 898c5c942d5b7fb8473bd854bc6ad827f928e1d8 Mon Sep 17 00:00:00 2001 From: "Yakubov, Sergey" Date: Tue, 14 Nov 2023 11:57:40 -0500 Subject: [PATCH 091/651] optimize object store pull added sync_cache parameter to the get_filename function that allows skip pulling to cache when not needed --- .../sample/object_store_conf.xml.sample | 16 +++++----- lib/galaxy/datatypes/protocols.py | 2 +- lib/galaxy/job_execution/setup.py | 5 +++- lib/galaxy/managers/datasets.py | 2 +- lib/galaxy/model/__init__.py | 16 +++++----- lib/galaxy/model/none_like.py | 2 +- lib/galaxy/objectstore/azure_blob.py | 11 +++++-- lib/galaxy/objectstore/caching.py | 7 ++++- lib/galaxy/objectstore/cloud.py | 9 ++++-- lib/galaxy/objectstore/irods.py | 14 +++++++-- lib/galaxy/objectstore/s3.py | 14 ++++++--- lib/galaxy/tool_util/data/__init__.py | 2 +- test/integration/objectstore/_base.py | 3 +- ...est_remote_objectstore_cache_operations.py | 29 +++++++++++++++++++ .../dynamic_tool_destination/mockGalaxy.py | 2 +- test/unit/app/tools/test_wrappers.py | 2 +- .../data/datatypes/test_connectivity_table.py | 2 +- test/unit/data/datatypes/test_validation.py | 2 +- test/unit/data/datatypes/util.py | 6 ++-- .../tool_util/data/test_tool_data_bundles.py | 2 +- 20 files changed, 105 insertions(+), 43 deletions(-) diff --git a/lib/galaxy/config/sample/object_store_conf.xml.sample b/lib/galaxy/config/sample/object_store_conf.xml.sample index b99028bc4f7a..e6eeeaeabcb3 100644 --- a/lib/galaxy/config/sample/object_store_conf.xml.sample +++ b/lib/galaxy/config/sample/object_store_conf.xml.sample @@ -135,7 +135,7 @@ - + @@ -150,7 +150,7 @@ - + @@ -166,7 +166,7 @@ - + @@ -181,7 +181,7 @@ - + @@ -196,7 +196,7 @@ - + @@ -211,7 +211,7 @@ - + @@ -226,7 +226,7 @@ - + @@ -319,7 +319,7 @@ - + This data is backed up using iRODs native hierarchal storage management mechanisms. The rules describing how data is stored and backed up in iRODS can be found in our institutional [iRODS documentation](https://irods.org/uploads/2018/Saum-SURFsara-Data_Archiving_in_iRODS-slides.pdf) diff --git a/lib/galaxy/datatypes/protocols.py b/lib/galaxy/datatypes/protocols.py index 93b2875a9795..150866a8adae 100644 --- a/lib/galaxy/datatypes/protocols.py +++ b/lib/galaxy/datatypes/protocols.py @@ -30,7 +30,7 @@ def extra_files_path(self): class HasFileName(Protocol): - def get_file_name(self) -> str: + def get_file_name(self, sync_cache=True) -> str: ... diff --git a/lib/galaxy/job_execution/setup.py b/lib/galaxy/job_execution/setup.py index de321944418e..1cff9c37c532 100644 --- a/lib/galaxy/job_execution/setup.py +++ b/lib/galaxy/job_execution/setup.py @@ -276,7 +276,10 @@ def compute_outputs(self) -> None: da_false_path = dataset_path_rewriter.rewrite_dataset_path(da.dataset, "output") mutable = da.dataset.dataset.external_filename is None dataset_path = DatasetPath( - da.dataset.dataset.id, da.dataset.get_file_name(), false_path=da_false_path, mutable=mutable + da.dataset.dataset.id, + da.dataset.get_file_name(sync_cache=False), + false_path=da_false_path, + mutable=mutable, ) job_outputs.append(JobOutput(da.name, da.dataset, dataset_path)) diff --git a/lib/galaxy/managers/datasets.py b/lib/galaxy/managers/datasets.py index 6734b0d2fd10..c639316e9f5e 100644 --- a/lib/galaxy/managers/datasets.py +++ b/lib/galaxy/managers/datasets.py @@ -273,7 +273,7 @@ def serialize_file_name(self, item, key, user=None, **context): # expensive: allow config option due to cost of operation if is_admin or self.app.config.expose_dataset_path: if not dataset.purged: - return dataset.get_file_name() + return dataset.get_file_name(sync_cache=False) self.skip() def serialize_extra_files_path(self, item, key, user=None, **context): diff --git a/lib/galaxy/model/__init__.py b/lib/galaxy/model/__init__.py index a0199e157afb..a3595acc569f 100644 --- a/lib/galaxy/model/__init__.py +++ b/lib/galaxy/model/__init__.py @@ -2516,8 +2516,8 @@ def __init__(self, dataset=None): self.dataset = dataset self.metadata = dict() - def get_file_name(self): - return self.dataset.get_file_name() + def get_file_name(self, sync_cache=True): + return self.dataset.get_file_name(sync_cache) def __eq__(self, other): return isinstance(other, FakeDatasetAssociation) and self.dataset == other.dataset @@ -3926,14 +3926,14 @@ def ensure_shareable(self): if not self.shareable: raise Exception(CANNOT_SHARE_PRIVATE_DATASET_MESSAGE) - def get_file_name(self): + def get_file_name(self, sync_cache=True): if self.purged: log.warning(f"Attempt to get file name of purged dataset {self.id}") return "" if not self.external_filename: object_store = self._assert_object_store_set() if object_store.exists(self): - file_name = object_store.get_filename(self) + file_name = object_store.get_filename(self, sync_cache=sync_cache) else: file_name = "" if not file_name and self.state not in (self.states.NEW, self.states.QUEUED): @@ -4387,10 +4387,10 @@ def set_dataset_state(self, state): state = property(get_dataset_state, set_dataset_state) - def get_file_name(self) -> str: + def get_file_name(self, sync_cache=True) -> str: if self.dataset.purged: return "" - return self.dataset.get_file_name() + return self.dataset.get_file_name(sync_cache=sync_cache) def set_file_name(self, filename: str): return self.dataset.set_file_name(filename) @@ -9168,7 +9168,7 @@ def update_from_file(self, file_name): alt_name=os.path.basename(self.get_file_name()), ) - def get_file_name(self): + def get_file_name(self, sync_cache=True): # Ensure the directory structure and the metadata file object exist try: da = self.history_dataset or self.library_dataset @@ -9183,7 +9183,7 @@ def get_file_name(self): if not object_store.exists(self, extra_dir="_metadata_files", extra_dir_at_root=True, alt_name=alt_name): object_store.create(self, extra_dir="_metadata_files", extra_dir_at_root=True, alt_name=alt_name) path = object_store.get_filename( - self, extra_dir="_metadata_files", extra_dir_at_root=True, alt_name=alt_name + self, extra_dir="_metadata_files", extra_dir_at_root=True, alt_name=alt_name, sync_cache=sync_cache ) return path except AttributeError: diff --git a/lib/galaxy/model/none_like.py b/lib/galaxy/model/none_like.py index d24a70329cda..7fa7175d7f3f 100644 --- a/lib/galaxy/model/none_like.py +++ b/lib/galaxy/model/none_like.py @@ -38,5 +38,5 @@ def __getattr__(self, name): def missing_meta(self): return False - def get_file_name(self): + def get_file_name(self, _sync_cache=True): return "None" diff --git a/lib/galaxy/objectstore/azure_blob.py b/lib/galaxy/objectstore/azure_blob.py index b72e6b9dbfd6..83c1d700e195 100644 --- a/lib/galaxy/objectstore/azure_blob.py +++ b/lib/galaxy/objectstore/azure_blob.py @@ -109,6 +109,7 @@ def __init__(self, config, config_dict): self.cache_size = cache_dict.get("size") or self.config.object_store_cache_size self.staging_path = cache_dict.get("path") or self.config.object_store_cache_path + self.cache_updated_data = cache_dict.get("cache_updated_data", True) self._initialize() @@ -136,6 +137,7 @@ def to_dict(self): "cache": { "size": self.cache_size, "path": self.staging_path, + "cache_updated_data": self.cache_updated_data, }, } ) @@ -501,12 +503,15 @@ def _get_filename(self, obj, **kwargs): base_dir = kwargs.get("base_dir", None) dir_only = kwargs.get("dir_only", False) obj_dir = kwargs.get("obj_dir", False) + sync_cache = kwargs.get("sync_cache", True) # for JOB_WORK directory if base_dir and dir_only and obj_dir: return os.path.abspath(rel_path) cache_path = self._get_cache_path(rel_path) + if not sync_cache: + return cache_path # S3 does not recognize directories as files so cannot check if those exist. # So, if checking dir only, ensure given dir exists in cache and return # the expected cache path. @@ -515,8 +520,8 @@ def _get_filename(self, obj, **kwargs): # if not os.path.exists(cache_path): # os.makedirs(cache_path) # return cache_path - # Check if the file exists in the cache first - if self._in_cache(rel_path): + # Check if the file exists in the cache first, always pull if file size in cache is zero + if self._in_cache(rel_path) and (dir_only or os.path.getsize(self._get_cache_path(rel_path)) > 0): return cache_path # Check if the file exists in persistent storage and, if it does, pull it into cache elif self._exists(obj, **kwargs): @@ -542,7 +547,7 @@ def _update_from_file(self, obj, file_name=None, create=False, **kwargs): # Copy into cache cache_file = self._get_cache_path(rel_path) try: - if source_file != cache_file: + if source_file != cache_file and self.cache_updated_data: # FIXME? Should this be a `move`? shutil.copy2(source_file, cache_file) self._fix_permissions(cache_file) diff --git a/lib/galaxy/objectstore/caching.py b/lib/galaxy/objectstore/caching.py index 2097da2c680e..2df468681498 100644 --- a/lib/galaxy/objectstore/caching.py +++ b/lib/galaxy/objectstore/caching.py @@ -12,7 +12,10 @@ from typing_extensions import NamedTuple -from galaxy.util import nice_size +from galaxy.util import ( + nice_size, + string_as_bool, +) from galaxy.util.sleeper import Sleeper log = logging.getLogger(__name__) @@ -124,11 +127,13 @@ def parse_caching_config_dict_from_xml(config_xml): cache_size = float(c_xml.get("size", -1)) staging_path = c_xml.get("path", None) monitor = c_xml.get("monitor", "auto") + cache_updated_data = string_as_bool(c_xml.get("cache_updated_data", "True")) cache_dict = { "size": cache_size, "path": staging_path, "monitor": monitor, + "cache_updated_data": cache_updated_data, } else: cache_dict = {} diff --git a/lib/galaxy/objectstore/cloud.py b/lib/galaxy/objectstore/cloud.py index 4a995d833f66..888df3549f4f 100644 --- a/lib/galaxy/objectstore/cloud.py +++ b/lib/galaxy/objectstore/cloud.py @@ -66,6 +66,7 @@ def _config_to_dict(self): "cache": { "size": self.cache_size, "path": self.staging_path, + "cache_updated_data": self.cache_updated_data, }, } @@ -103,6 +104,7 @@ def __init__(self, config, config_dict): self.cache_size = cache_dict.get("size") or self.config.object_store_cache_size self.staging_path = cache_dict.get("path") or self.config.object_store_cache_path + self.cache_updated_data = cache_dict.get("cache_updated_data", True) self._initialize() @@ -621,12 +623,15 @@ def _get_filename(self, obj, **kwargs): dir_only = kwargs.get("dir_only", False) obj_dir = kwargs.get("obj_dir", False) rel_path = self._construct_path(obj, **kwargs) + sync_cache = kwargs.get("sync_cache", True) # for JOB_WORK directory if base_dir and dir_only and obj_dir: return os.path.abspath(rel_path) cache_path = self._get_cache_path(rel_path) + if not sync_cache: + return cache_path # S3 does not recognize directories as files so cannot check if those exist. # So, if checking dir only, ensure given dir exists in cache and return # the expected cache path. @@ -636,7 +641,7 @@ def _get_filename(self, obj, **kwargs): # os.makedirs(cache_path) # return cache_path # Check if the file exists in the cache first - if self._in_cache(rel_path): + if self._in_cache(rel_path) and (dir_only or os.path.getsize(self._get_cache_path(rel_path)) > 0): return cache_path # Check if the file exists in persistent storage and, if it does, pull it into cache elif self._exists(obj, **kwargs): @@ -663,7 +668,7 @@ def _update_from_file(self, obj, file_name=None, create=False, **kwargs): # Copy into cache cache_file = self._get_cache_path(rel_path) try: - if source_file != cache_file: + if source_file != cache_file and self.cache_updated_data: # FIXME? Should this be a `move`? shutil.copy2(source_file, cache_file) self._fix_permissions(cache_file) diff --git a/lib/galaxy/objectstore/irods.py b/lib/galaxy/objectstore/irods.py index 63b81e7a1a9d..aaf672ca0c4d 100644 --- a/lib/galaxy/objectstore/irods.py +++ b/lib/galaxy/objectstore/irods.py @@ -26,6 +26,7 @@ from galaxy.util import ( directory_hash_id, ExecutionTimer, + string_as_bool, umask_fix_perms, unlink, ) @@ -81,6 +82,7 @@ def parse_config_xml(config_xml): _config_xml_error("cache") cache_size = float(c_xml[0].get("size", -1)) staging_path = c_xml[0].get("path", None) + cache_updated_data = string_as_bool(c_xml.get("cache_updated_data", "True")) attrs = ("type", "path") e_xml = config_xml.findall("extra_dir") @@ -109,6 +111,7 @@ def parse_config_xml(config_xml): "cache": { "size": cache_size, "path": staging_path, + "cache_updated_data": cache_updated_data, }, "extra_dirs": extra_dirs, "private": DiskObjectStore.parse_private_from_config_xml(config_xml), @@ -142,6 +145,7 @@ def _config_to_dict(self): "cache": { "size": self.cache_size, "path": self.staging_path, + "cache_updated_data": self.cache_updated_data, }, } @@ -209,6 +213,7 @@ def __init__(self, config, config_dict): self.staging_path = cache_dict.get("path") or self.config.object_store_cache_path if self.staging_path is None: _config_dict_error("cache->path") + self.cache_updated_data = cache_dict.get("cache_updated_data", True) extra_dirs = {e["type"]: e["path"] for e in config_dict.get("extra_dirs", [])} if not extra_dirs: @@ -716,6 +721,7 @@ def _get_filename(self, obj, **kwargs): dir_only = kwargs.get("dir_only", False) obj_dir = kwargs.get("obj_dir", False) rel_path = self._construct_path(obj, **kwargs) + sync_cache = kwargs.get("sync_cache", True) # for JOB_WORK directory if base_dir and dir_only and obj_dir: @@ -723,6 +729,8 @@ def _get_filename(self, obj, **kwargs): return os.path.abspath(rel_path) cache_path = self._get_cache_path(rel_path) + if not sync_cache: + return cache_path # iRODS does not recognize directories as files so cannot check if those exist. # So, if checking dir only, ensure given dir exists in cache and return # the expected cache path. @@ -731,8 +739,8 @@ def _get_filename(self, obj, **kwargs): # if not os.path.exists(cache_path): # os.makedirs(cache_path) # return cache_path - # Check if the file exists in the cache first - if self._in_cache(rel_path): + # Check if the file exists in the cache first, always pull if file size in cache is zero + if self._in_cache(rel_path) and (dir_only or os.path.getsize(self._get_cache_path(rel_path)) > 0): log.debug("irods_pt _get_filename: %s", ipt_timer) return cache_path # Check if the file exists in persistent storage and, if it does, pull it into cache @@ -764,7 +772,7 @@ def _update_from_file(self, obj, file_name=None, create=False, **kwargs): # Copy into cache cache_file = self._get_cache_path(rel_path) try: - if source_file != cache_file: + if source_file != cache_file and self.cache_updated_data: # FIXME? Should this be a `move`? shutil.copy2(source_file, cache_file) self._fix_permissions(cache_file) diff --git a/lib/galaxy/objectstore/s3.py b/lib/galaxy/objectstore/s3.py index e0656864a939..41b4d58e96a9 100644 --- a/lib/galaxy/objectstore/s3.py +++ b/lib/galaxy/objectstore/s3.py @@ -145,6 +145,7 @@ def _config_to_dict(self): "cache": { "size": self.cache_size, "path": self.staging_path, + "cache_updated_data": self.cache_updated_data, }, } @@ -186,6 +187,7 @@ def __init__(self, config, config_dict): self.cache_size = cache_dict.get("size") or self.config.object_store_cache_size self.staging_path = cache_dict.get("path") or self.config.object_store_cache_path + self.cache_updated_data = cache_dict.get("cache_updated_data", True) extra_dirs = {e["type"]: e["path"] for e in config_dict.get("extra_dirs", [])} self.extra_dirs.update(extra_dirs) @@ -634,7 +636,7 @@ def _delete(self, obj, entire_dir=False, **kwargs): def _get_data(self, obj, start=0, count=-1, **kwargs): rel_path = self._construct_path(obj, **kwargs) # Check cache first and get file if not there - if not self._in_cache(rel_path): + if not self._in_cache(rel_path) or os.path.getsize(self._get_cache_path(rel_path)) == 0: self._pull_into_cache(rel_path) # Read the file content from cache data_file = open(self._get_cache_path(rel_path)) @@ -647,6 +649,8 @@ def _get_filename(self, obj, **kwargs): base_dir = kwargs.get("base_dir", None) dir_only = kwargs.get("dir_only", False) obj_dir = kwargs.get("obj_dir", False) + sync_cache = kwargs.get("sync_cache", True) + rel_path = self._construct_path(obj, **kwargs) # for JOB_WORK directory @@ -654,6 +658,8 @@ def _get_filename(self, obj, **kwargs): return os.path.abspath(rel_path) cache_path = self._get_cache_path(rel_path) + if not sync_cache: + return cache_path # S3 does not recognize directories as files so cannot check if those exist. # So, if checking dir only, ensure given dir exists in cache and return # the expected cache path. @@ -662,8 +668,8 @@ def _get_filename(self, obj, **kwargs): # if not os.path.exists(cache_path): # os.makedirs(cache_path) # return cache_path - # Check if the file exists in the cache first - if self._in_cache(rel_path): + # Check if the file exists in the cache first, always pull if file size in cache is zero + if self._in_cache(rel_path) and (dir_only or os.path.getsize(self._get_cache_path(rel_path)) > 0): return cache_path # Check if the file exists in persistent storage and, if it does, pull it into cache elif self._exists(obj, **kwargs): @@ -691,7 +697,7 @@ def _update_from_file(self, obj, file_name=None, create=False, **kwargs): # Copy into cache cache_file = self._get_cache_path(rel_path) try: - if source_file != cache_file: + if source_file != cache_file and self.cache_updated_data: # FIXME? Should this be a `move`? shutil.copy2(source_file, cache_file) self._fix_permissions(cache_file) diff --git a/lib/galaxy/tool_util/data/__init__.py b/lib/galaxy/tool_util/data/__init__.py index 7846188320d1..94e274379391 100644 --- a/lib/galaxy/tool_util/data/__init__.py +++ b/lib/galaxy/tool_util/data/__init__.py @@ -891,7 +891,7 @@ def extra_files_path_exists(self) -> bool: class OutputDataset(HasExtraFiles, Protocol): ext: str - def get_file_name(self) -> str: + def get_file_name(self, _sync_cache=True) -> str: ... diff --git a/test/integration/objectstore/_base.py b/test/integration/objectstore/_base.py index 272aa0fa1348..f85b77cb0bea 100644 --- a/test/integration/objectstore/_base.py +++ b/test/integration/objectstore/_base.py @@ -17,7 +17,7 @@ - + @@ -102,6 +102,7 @@ def handle_galaxy_config_kwds(cls, config): "port": OBJECT_STORE_PORT, "access_key": OBJECT_STORE_ACCESS_KEY, "secret_key": OBJECT_STORE_SECRET_KEY, + "cache_updated_data": False, } ) ) diff --git a/test/integration/objectstore/test_remote_objectstore_cache_operations.py b/test/integration/objectstore/test_remote_objectstore_cache_operations.py index 5ac29c6d9ff5..212ac517f888 100644 --- a/test/integration/objectstore/test_remote_objectstore_cache_operations.py +++ b/test/integration/objectstore/test_remote_objectstore_cache_operations.py @@ -3,6 +3,7 @@ import pytest +from galaxy_test.base.populators import skip_without_tool from ._base import ( BaseSwiftObjectStoreIntegrationTestCase, files_count, @@ -32,6 +33,34 @@ def test_cache_populated(self): self.upload_dataset() assert files_count(self.object_store_cache_path) == 1 + @skip_without_tool("create_2") + def test_cache_populated_after_tool_run(self): + history_id = self.dataset_populator.new_history() + running_response = self.dataset_populator.run_tool_raw( + "create_2", + {"sleep_time": 0}, + history_id, + ) + result = self.dataset_populator.wait_for_tool_run( + run_response=running_response, history_id=history_id, assert_ok=False + ).json() + details = self.dataset_populator.get_job_details(result["jobs"][0]["id"], full=True).json() + assert details["state"] == "ok" + # check files in the cache are empty after job finishes (they are created by Galaxy before the tool executes) + assert files_count(self.object_store_cache_path) == 2 + hda1_details = self.dataset_populator.get_history_dataset_details(history_id, assert_ok=True, hid=1) + hda2_details = self.dataset_populator.get_history_dataset_details(history_id, assert_ok=True, hid=2) + assert os.path.getsize(hda1_details["file_name"]) == 0 + assert os.path.getsize(hda2_details["file_name"]) == 0 + # get dataset content - this should trigger pulling to the cache + content1 = self.dataset_populator.get_history_dataset_content(history_id, dataset_id=hda1_details["dataset_id"]) + content2 = self.dataset_populator.get_history_dataset_content(history_id, dataset_id=hda2_details["dataset_id"]) + assert content1 == "1\n" + assert content2 == "2\n" + # check files in the cache are not empty now + assert os.path.getsize(hda1_details["file_name"]) != 0 + assert os.path.getsize(hda2_details["file_name"]) != 0 + def test_cache_repopulated(self): hda = self.upload_dataset() assert files_count(self.object_store_cache_path) == 1 diff --git a/test/unit/app/jobs/dynamic_tool_destination/mockGalaxy.py b/test/unit/app/jobs/dynamic_tool_destination/mockGalaxy.py index 4a618a34b032..919eecfc642c 100644 --- a/test/unit/app/jobs/dynamic_tool_destination/mockGalaxy.py +++ b/test/unit/app/jobs/dynamic_tool_destination/mockGalaxy.py @@ -43,7 +43,7 @@ def __init__(self, file_name, file_ext, value): def get_metadata(self): return self.metadata - def get_file_name(self, **_kwargs): + def get_file_name(self, _sync_cache=True): return self.file_name_ diff --git a/test/unit/app/tools/test_wrappers.py b/test/unit/app/tools/test_wrappers.py index e9185de69ef9..96c18253e873 100644 --- a/test/unit/app/tools/test_wrappers.py +++ b/test/unit/app/tools/test_wrappers.py @@ -312,7 +312,7 @@ def __init__(self): self.ext = MOCK_DATASET_EXT self.tags = [] - def get_file_name(self): + def get_file_name(self, _sync_cache=True): return MOCK_DATASET_PATH diff --git a/test/unit/data/datatypes/test_connectivity_table.py b/test/unit/data/datatypes/test_connectivity_table.py index 3a3037c27c7a..521ecb59c134 100644 --- a/test/unit/data/datatypes/test_connectivity_table.py +++ b/test/unit/data/datatypes/test_connectivity_table.py @@ -9,7 +9,7 @@ @pytest.fixture def dataset(): class MockDataset: - def get_file_name(self): + def get_file_name(self, _sync_cache=True): return os.path.join(galaxy_directory(), "test-data/1.ct") return MockDataset() diff --git a/test/unit/data/datatypes/test_validation.py b/test/unit/data/datatypes/test_validation.py index dc6ed5ac6623..0b4164dd5e18 100644 --- a/test/unit/data/datatypes/test_validation.py +++ b/test/unit/data/datatypes/test_validation.py @@ -60,5 +60,5 @@ def __init__(self, file_name, datatype): self.file_name_ = file_name self.datatype = datatype - def get_file_name(self): + def get_file_name(self, _sync_cache=True): return self.file_name_ diff --git a/test/unit/data/datatypes/util.py b/test/unit/data/datatypes/util.py index 881f847ab7aa..df12959a6b82 100644 --- a/test/unit/data/datatypes/util.py +++ b/test/unit/data/datatypes/util.py @@ -13,7 +13,7 @@ def __init__(self, file_name): self.purged = False self.file_name_ = file_name - def get_file_name(self): + def get_file_name(self, _sync_cache=True): return self.file_name_ def set_file_name(self, file_name): @@ -23,7 +23,7 @@ def set_file_name(self, file_name): class MockMetadata: file_name_: Optional[str] = None - def get_file_name(self): + def get_file_name(self, _sync_cache=True): return self.file_name_ def set_file_name(self, file_name): @@ -37,7 +37,7 @@ def __init__(self, id): self.dataset = None self.file_name_: Optional[str] = None - def get_file_name(self): + def get_file_name(self, _sync_cache=True): return self.file_name_ def set_file_name(self, file_name): diff --git a/test/unit/tool_util/data/test_tool_data_bundles.py b/test/unit/tool_util/data/test_tool_data_bundles.py index a884bf2486d1..f4926156a324 100644 --- a/test/unit/tool_util/data/test_tool_data_bundles.py +++ b/test/unit/tool_util/data/test_tool_data_bundles.py @@ -76,7 +76,7 @@ class OutputDataset: extra_files_path: str ext: str = "data_manager_json" - def get_file_name(self) -> str: + def get_file_name(self, _sync_cache=True) -> str: return self.file_name_ def extra_files_path_exists(self) -> bool: From f9182d9bd38f1464cad447615ddbaee45353ea47 Mon Sep 17 00:00:00 2001 From: "Yakubov, Sergey" Date: Tue, 14 Nov 2023 13:48:45 -0500 Subject: [PATCH 092/651] fix xml parsing for irods --- lib/galaxy/objectstore/irods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/galaxy/objectstore/irods.py b/lib/galaxy/objectstore/irods.py index aaf672ca0c4d..76f80aeb83b3 100644 --- a/lib/galaxy/objectstore/irods.py +++ b/lib/galaxy/objectstore/irods.py @@ -82,7 +82,7 @@ def parse_config_xml(config_xml): _config_xml_error("cache") cache_size = float(c_xml[0].get("size", -1)) staging_path = c_xml[0].get("path", None) - cache_updated_data = string_as_bool(c_xml.get("cache_updated_data", "True")) + cache_updated_data = string_as_bool(c_xml[0].get("cache_updated_data", "True")) attrs = ("type", "path") e_xml = config_xml.findall("extra_dir") From c7670e82357345ff8831e56e0ce1a370d7b3fbc7 Mon Sep 17 00:00:00 2001 From: Ahmed Awan Date: Tue, 14 Nov 2023 13:20:40 -0600 Subject: [PATCH 093/651] add view icon to `ToolPanel` header, add Favorites button back --- .../components/Panels/Menus/PanelViewMenu.vue | 2 +- .../Panels/Menus/PanelViewMenuItem.vue | 9 +--- client/src/components/Panels/ToolPanel.vue | 42 +++++++++++++++---- client/src/components/Panels/utilities.ts | 10 +++++ client/src/stores/toolStore.ts | 4 +- 5 files changed, 47 insertions(+), 20 deletions(-) diff --git a/client/src/components/Panels/Menus/PanelViewMenu.vue b/client/src/components/Panels/Menus/PanelViewMenu.vue index f1a689406b5b..50af093e2735 100644 --- a/client/src/components/Panels/Menus/PanelViewMenu.vue +++ b/client/src/components/Panels/Menus/PanelViewMenu.vue @@ -9,7 +9,7 @@ toggle-class="text-decoration-none" role="menu" aria-label="View all tool panel configurations" - class="tool-panel-dropdown" + class="tool-panel-dropdown w-100" size="sm"> diff --git a/client/src/components/Notifications/Broadcasts/BroadcastsOverlay.vue b/client/src/components/Notifications/Broadcasts/BroadcastsOverlay.vue index de9b6a1c8791..ecbf47311545 100644 --- a/client/src/components/Notifications/Broadcasts/BroadcastsOverlay.vue +++ b/client/src/components/Notifications/Broadcasts/BroadcastsOverlay.vue @@ -1,26 +1,15 @@ From 58655dc79a8374cdd03e8533bc378b27993bea85 Mon Sep 17 00:00:00 2001 From: Laila Los <44241786+ElectronicBlueberry@users.noreply.github.com> Date: Wed, 15 Nov 2023 14:31:06 +0100 Subject: [PATCH 111/651] change vue.set for set --- client/src/stores/broadcastsStore.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/client/src/stores/broadcastsStore.ts b/client/src/stores/broadcastsStore.ts index 7a3cb2c56d6c..e4b986325e9d 100644 --- a/client/src/stores/broadcastsStore.ts +++ b/client/src/stores/broadcastsStore.ts @@ -1,5 +1,5 @@ import { defineStore } from "pinia"; -import Vue, { computed, ref } from "vue"; +import { computed, ref, set } from "vue"; import { fetchAllBroadcasts } from "@/api/notifications.broadcast"; import type { components } from "@/api/schema"; @@ -37,7 +37,7 @@ export const useBroadcastsStore = defineStore("broadcastsStore", () => { } function dismissBroadcast(broadcast: BroadcastNotification) { - Vue.set(dismissedBroadcasts.value, broadcast.id, { expiration_time: broadcast.expiration_time }); + set(dismissedBroadcasts.value, broadcast.id, { expiration_time: broadcast.expiration_time }); } function hasExpired(expirationTimeStr?: string) { From e8aefae09c0757f9612f9ae9b25439143bc74c8d Mon Sep 17 00:00:00 2001 From: Laila Los <44241786+ElectronicBlueberry@users.noreply.github.com> Date: Wed, 15 Nov 2023 14:45:18 +0100 Subject: [PATCH 112/651] fix test selectors --- .../Broadcasts/BroadcastContainer.vue | 4 ++-- .../Broadcasts/BroadcastsOverlay.test.ts | 16 +++++++++------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/client/src/components/Notifications/Broadcasts/BroadcastContainer.vue b/client/src/components/Notifications/Broadcasts/BroadcastContainer.vue index a9cac078eafc..31cbf9d09e0b 100644 --- a/client/src/components/Notifications/Broadcasts/BroadcastContainer.vue +++ b/client/src/components/Notifications/Broadcasts/BroadcastContainer.vue @@ -152,7 +152,7 @@ function dismiss() { - + @@ -227,7 +227,7 @@ $margin: 1rem; } } - .cross { + .dismiss-button { font-size: 1.5rem; color: $border-color; diff --git a/client/src/components/Notifications/Broadcasts/BroadcastsOverlay.test.ts b/client/src/components/Notifications/Broadcasts/BroadcastsOverlay.test.ts index 210707179ffe..34d9f5213bee 100644 --- a/client/src/components/Notifications/Broadcasts/BroadcastsOverlay.test.ts +++ b/client/src/components/Notifications/Broadcasts/BroadcastsOverlay.test.ts @@ -58,6 +58,8 @@ async function mountBroadcastsOverlayWith(broadcasts: BroadcastNotification[] = return wrapper; } +const messageCssSelector = ".broadcast-container .message"; + describe("BroadcastsOverlay.vue", () => { it("should not render anything when there is no broadcast", async () => { const wrapper = await mountBroadcastsOverlayWith(); @@ -68,19 +70,19 @@ describe("BroadcastsOverlay.vue", () => { it("should render only one broadcast at a time", async () => { const wrapper = await mountBroadcastsOverlayWith(FAKE_BROADCASTS); - expect(wrapper.findAll(".broadcast-message")).toHaveLength(1); - expect(wrapper.find(".broadcast-message").text()).toContain("Test message 1"); + expect(wrapper.findAll(messageCssSelector)).toHaveLength(1); + expect(wrapper.find(messageCssSelector).text()).toContain("Test message 1"); }); it("should render the next broadcast when the current one is dismissed", async () => { const wrapper = await mountBroadcastsOverlayWith(FAKE_BROADCASTS); - expect(wrapper.findAll(".broadcast-message")).toHaveLength(1); - expect(wrapper.find(".broadcast-message").text()).toContain("Test message 1"); + expect(wrapper.findAll(messageCssSelector)).toHaveLength(1); + expect(wrapper.find(messageCssSelector).text()).toContain("Test message 1"); - const dismissButton = wrapper.find("#dismiss-button"); + const dismissButton = wrapper.find(".dismiss-button"); await dismissButton.trigger("click"); - expect(wrapper.findAll(".broadcast-message")).toHaveLength(1); - expect(wrapper.find(".broadcast-message").text()).toContain("Test message 2"); + expect(wrapper.findAll(messageCssSelector)).toHaveLength(1); + expect(wrapper.find(messageCssSelector).text()).toContain("Test message 2"); }); }); From e60bf593046c5525fdabdfc089eed8e3ceff6f8c Mon Sep 17 00:00:00 2001 From: Laila Los <44241786+ElectronicBlueberry@users.noreply.github.com> Date: Wed, 15 Nov 2023 14:56:04 +0100 Subject: [PATCH 113/651] check currentPage bounds when broadcasts update --- .../Broadcasts/BroadcastContainer.vue | 27 +++++++++++++------ 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/client/src/components/Notifications/Broadcasts/BroadcastContainer.vue b/client/src/components/Notifications/Broadcasts/BroadcastContainer.vue index 31cbf9d09e0b..6c0bcc2fa80e 100644 --- a/client/src/components/Notifications/Broadcasts/BroadcastContainer.vue +++ b/client/src/components/Notifications/Broadcasts/BroadcastContainer.vue @@ -10,7 +10,7 @@ import { } from "@fortawesome/free-solid-svg-icons"; import { FontAwesomeIcon } from "@fortawesome/vue-fontawesome"; import { BButton } from "bootstrap-vue"; -import { computed, ref } from "vue"; +import { computed, ref, watch } from "vue"; import { type components } from "@/api/schema"; import { useMarkdown } from "@/composables/markdown"; @@ -49,21 +49,25 @@ const multiple = computed(() => { }); const page = ref(0); + const currentPage = computed({ get: () => { return page.value; }, set: (newPage) => { - if (newPage < 0) { - page.value = sortedBroadcasts.value.length - 1; - } else if (newPage >= sortedBroadcasts.value.length) { - page.value = 0; - } else if (multiple.value) { - page.value = newPage; - } + page.value = newPage; + checkPageInBounds(); }, }); +function checkPageInBounds() { + if (page.value < 0) { + page.value = sortedBroadcasts.value.length - 1; + } else if (page.value >= sortedBroadcasts.value.length) { + page.value = 0; + } +} + const displayedBroadcast = computed( () => ensureDefined(sortedBroadcasts.value[currentPage.value]) as BroadcastNotification ); @@ -83,6 +87,13 @@ const sortedBroadcasts = computed(() => { } }); +watch( + () => sortedBroadcasts.value, + () => { + checkPageInBounds(); + } +); + function actionLinkBind(link: string) { if (link.startsWith("/")) { return { From 768d80497c269374b3551a6dc3661c3784e709f2 Mon Sep 17 00:00:00 2001 From: guerler Date: Mon, 16 Oct 2023 09:30:15 +0300 Subject: [PATCH 114/651] Replace visualization grid --- client/src/components/Grid/GridList.vue | 84 +++++++++++++++++++++++++ client/src/entry/analysis/router.js | 11 ++++ 2 files changed, 95 insertions(+) create mode 100644 client/src/components/Grid/GridList.vue diff --git a/client/src/components/Grid/GridList.vue b/client/src/components/Grid/GridList.vue new file mode 100644 index 000000000000..08d36b206cc7 --- /dev/null +++ b/client/src/components/Grid/GridList.vue @@ -0,0 +1,84 @@ + + + diff --git a/client/src/entry/analysis/router.js b/client/src/entry/analysis/router.js index c995cf368c40..fad078e787ca 100644 --- a/client/src/entry/analysis/router.js +++ b/client/src/entry/analysis/router.js @@ -8,6 +8,7 @@ import DatasetDetails from "components/DatasetInformation/DatasetDetails"; import DatasetError from "components/DatasetInformation/DatasetError"; import FormGeneric from "components/Form/FormGeneric"; import GridHistory from "components/Grid/GridHistory"; +import GridList from "components/Grid/GridList"; import GridShared from "components/Grid/GridShared"; import HistoryExportTasks from "components/History/Export/HistoryExport"; import HistoryPublished from "components/History/HistoryPublished"; @@ -453,6 +454,16 @@ export function getRouter(Galaxy) { modelClass: "Visualization", }), }, + { + path: "visualizations/list", + component: GridList, + props: (route) => ({ + url: "/api/visualizations", + item: "visualization", + plural: "Visualizations", + title: "Visualizations", + }), + }, { path: "visualizations/:actionId", component: GridShared, From 8d74a1fa32dbb13fc95843e22de75c5e9de1d44a Mon Sep 17 00:00:00 2001 From: guerler Date: Mon, 16 Oct 2023 10:20:24 +0300 Subject: [PATCH 115/651] Move visualizations api to fast api --- lib/galaxy/managers/visualizations.py | 22 +++++-- lib/galaxy/webapps/base/controller.py | 42 ------------ .../webapps/galaxy/api/visualizations.py | 53 +++++++-------- .../webapps/galaxy/services/visualizations.py | 65 +++++++++++++++++++ 4 files changed, 110 insertions(+), 72 deletions(-) diff --git a/lib/galaxy/managers/visualizations.py b/lib/galaxy/managers/visualizations.py index 59657a902853..748864fb1f17 100644 --- a/lib/galaxy/managers/visualizations.py +++ b/lib/galaxy/managers/visualizations.py @@ -6,8 +6,12 @@ """ import logging +from typing import ( + Dict, +) + from galaxy import model -from galaxy.managers import sharable +from galaxy.managers import (base, sharable) from galaxy.structured_app import MinimalManagerApp log = logging.getLogger(__name__) @@ -47,12 +51,22 @@ def __init__(self, app: MinimalManagerApp): self.visualization_manager = self.manager self.default_view = "summary" - self.add_view("summary", []) - self.add_view("detailed", []) + self.add_view("summary", [ + "id", + "title", + "type", + "dbkey", + ]) + self.add_view("detailed", [ + "create_time", + "update_time", + ], include_keys_from="summary",) def add_serializers(self): super().add_serializers() - self.serializers.update({}) + serializers: Dict[str, base.Serializer] = { + } + self.serializers.update(serializers) class VisualizationDeserializer(sharable.SharableModelDeserializer): diff --git a/lib/galaxy/webapps/base/controller.py b/lib/galaxy/webapps/base/controller.py index f3fc7603931a..dfbc90492520 100644 --- a/lib/galaxy/webapps/base/controller.py +++ b/lib/galaxy/webapps/base/controller.py @@ -631,48 +631,6 @@ def get_visualization(self, trans, id, check_ownership=True, check_accessible=Fa else: return self.security_check(trans, visualization, check_ownership, check_accessible) - def get_visualizations_by_user(self, trans, user): - """Return query results of visualizations filtered by a user.""" - stmt = select(model.Visualization).filter(model.Visualization.user == user).order_by(model.Visualization.title) - return trans.sa_session.scalars(stmt).all() - - def get_visualizations_shared_with_user(self, trans, user): - """Return query results for visualizations shared with the given user.""" - # The second `where` clause removes duplicates when a user shares with themselves. - stmt = ( - select(model.Visualization) - .join(model.VisualizationUserShareAssociation) - .where(model.VisualizationUserShareAssociation.user_id == user.id) - .where(model.Visualization.user_id != user.id) - .order_by(model.Visualization.title) - ) - return trans.sa_session.scalars(stmt).all() - - def get_published_visualizations(self, trans, exclude_user=None): - """ - Return query results for published visualizations optionally excluding the user in `exclude_user`. - """ - stmt = select(model.Visualization).filter(model.Visualization.published == true()) - if exclude_user: - stmt = stmt.filter(model.Visualization.user != exclude_user) - stmt = stmt.order_by(model.Visualization.title) - return trans.sa_session.scalars(stmt).all() - - # TODO: move into model (to_dict) - def get_visualization_summary_dict(self, visualization): - """ - Return a set of summary attributes for a visualization in dictionary form. - NOTE: that encoding ids isn't done here should happen at the caller level. - """ - # TODO: deleted - # TODO: importable - return { - "id": visualization.id, - "title": visualization.title, - "type": visualization.type, - "dbkey": visualization.dbkey, - } - def get_visualization_dict(self, visualization): """ Return a set of detailed attributes for a visualization in dictionary form. diff --git a/lib/galaxy/webapps/galaxy/api/visualizations.py b/lib/galaxy/webapps/galaxy/api/visualizations.py index 28db3c02dae8..91bc6057d1a4 100644 --- a/lib/galaxy/webapps/galaxy/api/visualizations.py +++ b/lib/galaxy/webapps/galaxy/api/visualizations.py @@ -7,8 +7,14 @@ import json import logging +from typing import ( + Any, + List, +) + from fastapi import ( Body, + Depends, Path, Response, status, @@ -22,6 +28,10 @@ from galaxy.managers.context import ProvidesUserContext from galaxy.model.base import transaction from galaxy.model.item_attrs import UsesAnnotations +from galaxy.schema import ( + FilterQueryParams, + SerializationParams, +) from galaxy.schema.fields import DecodedDatabaseIdField from galaxy.schema.schema import ( SetSlugPayload, @@ -39,6 +49,10 @@ DependsOnTrans, Router, ) +from galaxy.webapps.galaxy.api.common import ( + get_filter_query_params, + query_serialization_params, +) from galaxy.webapps.galaxy.services.visualizations import VisualizationsService log = logging.getLogger(__name__) @@ -54,6 +68,19 @@ class FastAPIVisualizations: service: VisualizationsService = depends(VisualizationsService) + @router.get( + "/api/visualizations", + summary="Returns visualizations for the current user.", + ) + def index( + self, + trans: ProvidesUserContext = DependsOnTrans, + serialization_params: SerializationParams = Depends(query_serialization_params), + ) -> List[Any]: + return self.service.index( + trans, serialization_params + ) + @router.get( "/api/visualizations/{id}/sharing", summary="Get the current sharing status of the given Page.", @@ -150,32 +177,6 @@ class VisualizationsController(BaseGalaxyAPIController, UsesVisualizationMixin, service: VisualizationsService = depends(VisualizationsService) - @expose_api - def index(self, trans: GalaxyWebTransaction, **kwargs): - """ - GET /api/visualizations: - """ - rval = [] - user = trans.user - - # TODO: search for: title, made by user, creation time range, type (vis name), dbkey, etc. - # TODO: limit, offset, order_by - # TODO: deleted - - # this is the default search - user's vis, vis shared with user, published vis - visualizations = self.get_visualizations_by_user(trans, user) - visualizations += self.get_visualizations_shared_with_user(trans, user) - visualizations += self.get_published_visualizations(trans, exclude_user=user) - # TODO: the admin case - everything - - for visualization in visualizations: - item = self.get_visualization_summary_dict(visualization) - item = trans.security.encode_dict_ids(item) - item["url"] = web.url_for("visualization", id=item["id"]) - rval.append(item) - - return rval - @expose_api def show(self, trans: GalaxyWebTransaction, id: str, **kwargs): """ diff --git a/lib/galaxy/webapps/galaxy/services/visualizations.py b/lib/galaxy/webapps/galaxy/services/visualizations.py index 557d4cd65082..26128aec9d68 100644 --- a/lib/galaxy/webapps/galaxy/services/visualizations.py +++ b/lib/galaxy/webapps/galaxy/services/visualizations.py @@ -1,11 +1,29 @@ import logging +from typing import ( + Any, + List, +) + +from sqlalchemy import ( + select, + true, +) +from galaxy.model import (Visualization, VisualizationUserShareAssociation) + +from galaxy.managers.context import ProvidesHistoryContext from galaxy.managers.notification import NotificationManager from galaxy.managers.visualizations import ( VisualizationManager, VisualizationSerializer, ) from galaxy.security.idencoding import IdEncodingHelper + +from galaxy.schema import ( + FilterQueryParams, + SerializationParams, +) + from galaxy.webapps.galaxy.services.base import ServiceBase from galaxy.webapps.galaxy.services.sharable import ShareableService @@ -32,3 +50,50 @@ def __init__( self.shareable_service = ShareableService(self.manager, self.serializer, notification_manager) # TODO: add the rest of the API actions here and call them directly from the API controller + + def index( + self, + trans: ProvidesHistoryContext, + serialization_params: SerializationParams, + ) -> List[Any]: + """ + Search visualizations using a query system and returns a list + containing visualization information. + """ + user = self.get_authenticated_user(trans) + visualizations = self.get_visualizations_by_user(trans, user) + visualizations += self.get_visualizations_shared_with_user(trans, user) + visualizations += self.get_published_visualizations(trans, exclude_user=user) + return [ + self.serializer.serialize_to_view( + content, user=user, trans=trans, **serialization_params.dict() + ) + for content in visualizations + ] + + def get_visualizations_by_user(self, trans, user): + """Return query results of visualizations filtered by a user.""" + stmt = select(Visualization).filter(Visualization.user == user).order_by(Visualization.title) + return trans.sa_session.scalars(stmt).all() + + def get_visualizations_shared_with_user(self, trans, user): + """Return query results for visualizations shared with the given user.""" + # The second `where` clause removes duplicates when a user shares with themselves. + stmt = ( + select(Visualization) + .join(VisualizationUserShareAssociation) + .where(VisualizationUserShareAssociation.user_id == user.id) + .where(Visualization.user_id != user.id) + .order_by(Visualization.title) + ) + return trans.sa_session.scalars(stmt).all() + + def get_published_visualizations(self, trans, exclude_user=None): + """ + Return query results for published visualizations optionally excluding the user in `exclude_user`. + """ + stmt = select(Visualization).filter(Visualization.published == true()) + if exclude_user: + stmt = stmt.filter(Visualization.user != exclude_user) + stmt = stmt.order_by(Visualization.title) + return trans.sa_session.scalars(stmt).all() \ No newline at end of file From 5b4387cafba0bc29cda0751085c0896bdffcb5e3 Mon Sep 17 00:00:00 2001 From: guerler Date: Mon, 16 Oct 2023 16:14:11 +0300 Subject: [PATCH 116/651] Add operations, date column and sharing details --- client/src/components/Grid/GridList.vue | 100 ++++++++++++------ client/src/entry/analysis/router.js | 3 +- lib/galaxy/managers/visualizations.py | 4 +- .../webapps/galaxy/api/visualizations.py | 3 +- .../webapps/galaxy/services/visualizations.py | 15 ++- 5 files changed, 86 insertions(+), 39 deletions(-) diff --git a/client/src/components/Grid/GridList.vue b/client/src/components/Grid/GridList.vue index 08d36b206cc7..fe1af77b10c2 100644 --- a/client/src/components/Grid/GridList.vue +++ b/client/src/components/Grid/GridList.vue @@ -1,46 +1,62 @@