From 4316f1dd708cef6cf9810b7f0992127fe231eeb1 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Fri, 6 Dec 2024 15:52:21 +0800 Subject: [PATCH 01/80] modernize python-package, remove lint --- .github/workflows/python-package.yml | 30 +++++++++++----------------- 1 file changed, 12 insertions(+), 18 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 262df29..ccb5e89 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -1,7 +1,8 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# This workflow will build msyd as a python package, and call the CLI interface to check the install worked +# adapted from # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python -name: Python package +name: Build python package on: push: @@ -16,29 +17,22 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} + cache: pip - name: Install dependencies run: | - python -m pip install --upgrade pip - python -m pip install flake8 pytest + python -m pip install --upgrade pip setuptools if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - # flake8 . --filename *.py,*.pyx --ignore E225,E226 --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --filename *.py,*.pyx --ignore E225,E226 --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - name: Test with pytest run: | - python setup.py install - pasy -h - pasy call -h - pasy view -h - #pytest + pip install . + msyd -h + msyd call -h + msyd view -h From bc7deb061dc276d904d58af9fa0ba0e8c23a7bd8 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Fri, 6 Dec 2024 16:03:39 +0800 Subject: [PATCH 02/80] add lint workflow --- .github/workflows/lint.yml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/workflows/lint.yml diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..4c27960 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,30 @@ +# This workflow will lint python and cython files using pylint and cython, respectively +# adapted from https://github.com/actions/starter-workflows/blob/main/ci/pylint.yml +# Linting can be configured in pyproject.toml + +name: Lint using pylint and cython-lint + +on: + push: + branches: ["main", "dev"] + pull_request: + branches: ["main", "dev"] + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: "Set up Python 3.12" + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + - name: Install linters + - run: | + python -m pip install --upgrade pip + pip install pylint cython-lint + - name: Run lint + - run: | + pylint $(git ls-files '*.py') + cython-lint $(git ls-files '*.pyx') From 060e8632b0b72b9654085d61685e0f3988ca09b5 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Fri, 6 Dec 2024 16:10:07 +0800 Subject: [PATCH 03/80] add conda for installing syri dependency --- .github/workflows/python-package.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index ccb5e89..1f2a254 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -30,6 +30,9 @@ jobs: run: | python -m pip install --upgrade pip setuptools if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + # apparently the runners all have conda installed + # use that to install syri, as it isn't on pypi + $CONDA/bin/conda install syri >= 1.6.5 - name: Test with pytest run: | pip install . From 1d257951074bef236048a2319db87e71e56b8c15 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Fri, 6 Dec 2024 16:10:40 +0800 Subject: [PATCH 04/80] fix indents in lint.yml --- .github/workflows/lint.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 4c27960..a7c3ff5 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -22,9 +22,9 @@ jobs: cache: pip - name: Install linters - run: | - python -m pip install --upgrade pip - pip install pylint cython-lint + python -m pip install --upgrade pip + pip install pylint cython-lint - name: Run lint - run: | - pylint $(git ls-files '*.py') - cython-lint $(git ls-files '*.pyx') + pylint $(git ls-files '*.py') + cython-lint $(git ls-files '*.pyx') From a6fd16aca7321a120afcf12b59893c1379f3d248 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Fri, 6 Dec 2024 16:22:48 +0800 Subject: [PATCH 05/80] misc fixes --- .github/workflows/lint.yml | 5 +++-- .github/workflows/python-package.yml | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index a7c3ff5..3d60a18 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -2,7 +2,7 @@ # adapted from https://github.com/actions/starter-workflows/blob/main/ci/pylint.yml # Linting can be configured in pyproject.toml -name: Lint using pylint and cython-lint +name: "Lint using pylint and cython-lint" on: push: @@ -14,7 +14,8 @@ jobs: build: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 + - name: Checkout code + uses: actions/checkout@v4 - name: "Set up Python 3.12" uses: actions/setup-python@v5 with: diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 1f2a254..61320b0 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -29,10 +29,10 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip setuptools - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi # apparently the runners all have conda installed # use that to install syri, as it isn't on pypi $CONDA/bin/conda install syri >= 1.6.5 + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Test with pytest run: | pip install . From c30875f2149e80e6f28028645094362ca27519c9 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Fri, 6 Dec 2024 16:24:31 +0800 Subject: [PATCH 06/80] fix indent --- .github/workflows/lint.yml | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 3d60a18..06650ae 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -14,18 +14,18 @@ jobs: build: runs-on: ubuntu-latest steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: "Set up Python 3.12" - uses: actions/setup-python@v5 - with: - python-version: "3.12" - cache: pip - - name: Install linters - - run: | - python -m pip install --upgrade pip - pip install pylint cython-lint - - name: Run lint - - run: | - pylint $(git ls-files '*.py') - cython-lint $(git ls-files '*.pyx') + - name: Checkout code + uses: actions/checkout@v4 + - name: "Set up Python 3.12" + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + - name: Install linters + - run: | + python -m pip install --upgrade pip + pip install pylint cython-lint + - name: Run lint + - run: | + pylint $(git ls-files '*.py') + cython-lint $(git ls-files '*.pyx') From bfaa34989766e25f8057da189061802978aa92b4 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Fri, 6 Dec 2024 16:27:27 +0800 Subject: [PATCH 07/80] fix indent properly --- .github/workflows/lint.yml | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 06650ae..e276db1 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -13,19 +13,19 @@ on: jobs: build: runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 - - name: "Set up Python 3.12" - uses: actions/setup-python@v5 - with: - python-version: "3.12" - cache: pip - - name: Install linters - - run: | - python -m pip install --upgrade pip - pip install pylint cython-lint - - name: Run lint - - run: | - pylint $(git ls-files '*.py') - cython-lint $(git ls-files '*.pyx') + steps: + - name: Checkout code + uses: actions/checkout@v4 + - name: "Set up Python 3.12" + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + - name: Install linters + - run: | + python -m pip install --upgrade pip + pip install pylint cython-lint + - name: Run lint + - run: | + pylint $(git ls-files '*.py') + cython-lint $(git ls-files '*.pyx') From 9097c42fc93dbce90c356a9970d4ce2f90e329c8 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Fri, 6 Dec 2024 16:28:41 +0800 Subject: [PATCH 08/80] fix yaml --- .github/workflows/lint.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index e276db1..b94511b 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -22,10 +22,10 @@ jobs: python-version: "3.12" cache: pip - name: Install linters - - run: | + run: | python -m pip install --upgrade pip pip install pylint cython-lint - name: Run lint - - run: | + run: | pylint $(git ls-files '*.py') cython-lint $(git ls-files '*.pyx') From bc923c0cd6073814b5a2478f534f64e7255c6afe Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Fri, 6 Dec 2024 16:31:17 +0800 Subject: [PATCH 09/80] don't lint test files --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index b94511b..af33b94 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -27,5 +27,5 @@ jobs: pip install pylint cython-lint - name: Run lint run: | - pylint $(git ls-files '*.py') + pylint $(git ls-files 'msyd/*.py') cython-lint $(git ls-files '*.pyx') From d2ae46ffdf6fffe5dff16c44fa1139613f822287 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Fri, 6 Dec 2024 16:32:17 +0800 Subject: [PATCH 10/80] use better name --- .github/workflows/lint.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index af33b94..c8ac7ff 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -11,7 +11,7 @@ on: branches: ["main", "dev"] jobs: - build: + lint: runs-on: ubuntu-latest steps: - name: Checkout code From 673fe7b017b6efd1449bde6993586e9c05ba5e6d Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 11:22:28 +0800 Subject: [PATCH 11/80] configure cython lint --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 076b2ab..ade40f7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,3 +33,6 @@ msyd = "msyd:main" [tool.setuptools.dynamic] version = {attr = "msyd.__version__"} +[tool.cython-lint] +max-line-length = 200 +ignore = ['E266', 'E265'] # don't lint comment style From 6504153a329b87d53e65ca3829d96ff49871f73e Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 11:52:16 +0800 Subject: [PATCH 12/80] make cigar.pyx pass lint --- msyd/pyxfiles/cigar.pyx | 55 +++++++++++++++-------------------------- 1 file changed, 20 insertions(+), 35 deletions(-) diff --git a/msyd/pyxfiles/cigar.pyx b/msyd/pyxfiles/cigar.pyx index 4c53b26..985b6f0 100644 --- a/msyd/pyxfiles/cigar.pyx +++ b/msyd/pyxfiles/cigar.pyx @@ -2,13 +2,8 @@ # -*- coding: utf-8 -*- # distutils: language = c++ # cython: language_level = 3 -import itertools -import logging import re -from cpython cimport array -import array - from libcpp.vector cimport vector from libcpp.unordered_set cimport unordered_set @@ -25,7 +20,7 @@ cdef: qryfwd = set(['M', 'I', 'S', '=', 'X']) cig_types = set(['M', '=', 'X', 'S', 'H', 'D', 'I', 'N']) cig_aln_types = set(['M', 'X', '=']) - cig_clips = set(['S', 'H', 'P', 'N']) # N is not clipping, but is ignored anyway. Really, it shouldn't even occur in alignments like these + cig_clips = set(['S', 'H', 'P', 'N']) # N is not clipping, but is ignored anyway. Really, it shouldn't even occur in alignments like these unordered_set[char] c_reffwd = unordered_set[char]([ord('M'), ord('D'), ord('N'), ord('='), ord('X')]) unordered_set[char] c_reffwd_noclip = unordered_set[char]([ord('M'), ord('D'), ord('='), ord('X')]) @@ -33,7 +28,8 @@ cdef: unordered_set[char] c_qryfwd_noclip = unordered_set[char]([ord('M'), ord('I'), ord('='), ord('X')]) unordered_set[char] c_cig_types = unordered_set[char]([ord('M'), ord('='), ord('X'), ord('S'), ord('H'), ord('D'), ord('I'), ord('N')]) unordered_set[char] c_cig_aln_types = unordered_set[char]([ord('M'), ord('X'), ord('=')]) - unordered_set[char] c_cig_clips = unordered_set[char]([ord('S'), ord('H'), ord('P'), ord('N')]) # N is not clipping, but is ignored anyway. Really, it shouldnord('t even occur in alignments like these + # N is not clipping, but is ignored anyway. shouldn't here anyway + unordered_set[char] c_cig_clips = unordered_set[char]([ord('S'), ord('H'), ord('P'), ord('N')]) bam_code_map = [ord('M'), ord('I'), ord('D'), ord('N'), ord('S'), ord('H'), ord('P'), ord('='), ord('X')] @@ -60,14 +56,14 @@ cdef vector[Cigt] cigt_from_string(str cg): logger.error("Tried to construct a Cigar object with invalid type") raise ValueError("Not a CIGAR type!") tups.push_back(Cigt(int(match[0]), ord(match[1]))) - + # free up unnecessarily reserved memory in case we were too pessimistic tups.shrink_to_fit() return tups # maybe implement cigar_from_full_string? -cpdef cigar_from_bam(bam): +cpdef cigar_from_bam(bam): """ Takes a List of Cigar tuples with BAM codes as input, returns as a Cigar struct. """ @@ -79,24 +75,16 @@ cpdef cigar_from_bam(bam): for tup in bam: assert(0 < tup[1] and tup[1] < 9) tups.push_back(Cigt(tup[0], bam_code_map[tup[1]])) - + return Cigar.__new__(Cigar, tups=tups) # small struct to contain the length and type of a cigar tuple cdef packed struct Cigt: -#cdef struct Cigt: # slower unsigned int n char t - -# got it working to not work with two arrays -# pretty sure this is faster, might try exact benchmark though - - cdef class Cigar: - #cdef array.array lens # array storing the lengths of each cigar tuple - #cdef array.array types # array storing the type of each cigar tuple cdef vector[Cigt] tups def __cinit__(self, tups=None): @@ -117,11 +105,11 @@ cdef class Cigar: cdef get_len_of_type(self, unordered_set[char] typeset): cdef unsigned int buf = 0 for tup in self.tups: - if typeset.count(tup.t): # contains method still not supported until C++20 + if typeset.count(tup.t): # contains method still not supported until C++20 buf += tup.n return buf - def get_identity(self):#, bint ref=True): + def get_identity(self): """ Returns the fraction of covered bases (of the reference/query) that are an exact match ('='). """ @@ -198,8 +186,7 @@ cdef class Cigar: """ cdef size_t start = 0 for tup in self.tups: - if not c_cig_clips.count(tup.t): # wtf is up here? why is this converted to a dict, but this is not done in get_len_of_type? - # must be something with the context? no idea + if not c_cig_clips.count(tup.t): break start += 1 @@ -239,14 +226,14 @@ cdef class Cigar: edrop, tmp = tmp.get_removed(e, ref=ref) return (sdrop, edrop, tmp) - # TODO maybe benchmark bints vs separate char's or argstruct or separate methods - cpdef get_removed(self, unsigned int n, bint ref=True, bint start=True, bint only_pos=False): #nogil + # TODO make nogil + cpdef get_removed(self, unsigned int n, bint ref=True, bint start=True, bint only_pos=False): """ If ref=True, removes from the 'start'/end of the QUERY strand until 'n' bases from the REFERENCE strand have been removed, if ref=False vice versa. :return: The number of bases deleted in the query/ref and a CIGAR with these bases removed. """ - if n == 0: # shortcut for a common path + if n == 0: # shortcut for a common path if only_pos: return 0 else: @@ -255,14 +242,14 @@ cdef class Cigar: if self.tups.empty(): logger.error("Trying to remove from an empty Cigar!") raise ValueError("empty Cigar!") - + cdef: - unsigned int ind = 0 # position currently being evaluated for skipping - unsigned int skip = 0 # bases skipped in the other sequence + unsigned int ind = 0 # position currently being evaluated for skipping + unsigned int skip = 0 # bases skipped in the other sequence # two sets containing the CIGAR codes incrementing one or the other strand - unordered_set[char] fwd = c_reffwd if ref else c_qryfwd + unordered_set[char] fwd = c_reffwd if ref else c_qryfwd unordered_set[char] altfwd = c_qryfwd if ref else c_reffwd - int rem = n # tally how much is still left to remove + int rem = n # tally how much is still left to remove Cigt cur = self.tups[ind] if start else self.tups[self.tups.size()-1] # loop and remove regions as long as the skip is more than one region @@ -280,20 +267,19 @@ cdef class Cigar: logger.error(f"tried to remove more than CIGAR length Params: n: {n}, start: {start}, ref: {ref}, Cigar len on ref/alt: {self.get_len(ref=ref)}, terminated at index {ind}") raise ValueError("tried to remove more than CIGAR length") - if altfwd.count(cur.t): # remove overadded value + # remove overadded value + if altfwd.count(cur.t): skip += rem if only_pos: return skip - # TODO try changing backing vector and only storing index - + cdef vector[Cigt] newtups = vector[Cigt]() newtups.reserve(self.tups.size() - ind + 1) if start: # if there is a remainder, add it to the front if rem < 0: newtups.push_back(Cigt(-rem, cur.t)) - # TODO check if this is cythonized efficiently for i in range(ind, self.tups.size()): newtups.push_back(self.tups[i]) else: @@ -306,7 +292,6 @@ cdef class Cigar: return (skip, Cigar(newtups)) - #TODO rewrite with copying for cython # def clean(self): # """ From f9df7ce3968efe9fd6f44cc81ea57f3ab35017a4 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 13:20:05 +0800 Subject: [PATCH 13/80] adjust linting --- .github/workflows/lint.yml | 2 +- pyproject.toml | 9 ++++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index c8ac7ff..9b695e5 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -28,4 +28,4 @@ jobs: - name: Run lint run: | pylint $(git ls-files 'msyd/*.py') - cython-lint $(git ls-files '*.pyx') + cython-lint --no-pycodestyle $(git ls-files '*.pyx') diff --git a/pyproject.toml b/pyproject.toml index ade40f7..ae5f563 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,4 +35,11 @@ version = {attr = "msyd.__version__"} [tool.cython-lint] max-line-length = 200 -ignore = ['E266', 'E265'] # don't lint comment style +#ignore = ['E266', 'E265'] # don't lint comment style + +[tool.pylint.'MESSAGES CONTROL'] +disable=['no-name-in-module'] + +[tool.pylint] +errors-only = 'True' +ignore = 'annotate_sv.py,io.py' From 539d2d516b91556af0bd2812a552efe10ea8603d Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 13:23:41 +0800 Subject: [PATCH 14/80] comment out invalid line --- msyd/annotate_sv.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msyd/annotate_sv.py b/msyd/annotate_sv.py index cf75f10..6fa42dc 100644 --- a/msyd/annotate_sv.py +++ b/msyd/annotate_sv.py @@ -100,4 +100,4 @@ def concatsyriout(syrifins, qrynames): return # END -CP116280.1,784681,893258,OX291513.1,791184,899980,INV,IP-Evs-12 +#CP116280.1,784681,893258,OX291513.1,791184,899980,INV,IP-Evs-12 From cf4c8415b73a0c287c9a0975ca06afac5d8b1a04 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 14:06:41 +0800 Subject: [PATCH 15/80] make lint happy, clean up code --- msyd/pyxfiles/coords.pyx | 7 +++--- msyd/pyxfiles/imputation.pyx | 18 +++++++------- msyd/pyxfiles/intersection.pyx | 11 ++++----- msyd/pyxfiles/io.pyx | 44 ++++++++++++++++------------------ msyd/pyxfiles/multisyn.pyx | 2 -- msyd/pyxfiles/realignment.pyx | 27 ++++++++++----------- msyd/pyxfiles/varcmp.pyx | 2 +- msyd/pyxfiles/vcf.pyx | 33 +++++-------------------- pyproject.toml | 1 + 9 files changed, 57 insertions(+), 88 deletions(-) diff --git a/msyd/pyxfiles/coords.pyx b/msyd/pyxfiles/coords.pyx index 444a77a..8bcb093 100644 --- a/msyd/pyxfiles/coords.pyx +++ b/msyd/pyxfiles/coords.pyx @@ -4,8 +4,6 @@ # cython: language_level = 3 import functools -#import cython -import logging import msyd.util as util @@ -14,10 +12,11 @@ logger = util.CustomFormatter.getlogger(__name__) # these classes form a part of the general SV format # A position is specified by the organism, chromosome, haplotype and base position # A range takes a start and an end position. If the end < start, the range is defined as inverted -#TODO use proper cython types, e.g. char for haplo + # decorator to auto-implement __gt__ etc. from __lt__ and __eq__ -@functools.total_ordering # not sure how performant, TO/DO replace later? +# not sure how performant, TO/DO replace later? +@functools.total_ordering class Position: def __init__(self, org:str, chr:int, haplo:str, pos: int): self.org = org diff --git a/msyd/pyxfiles/imputation.pyx b/msyd/pyxfiles/imputation.pyx index 8c2e504..124cfb8 100644 --- a/msyd/pyxfiles/imputation.pyx +++ b/msyd/pyxfiles/imputation.pyx @@ -4,7 +4,7 @@ # cython: language_level = 3 ## The code in this file is currently not exposed on the CLI, and is largely untested. -## I left it as part of the msyd package, and it may be used via the CLI +## I left it here in case it may be useful in the future or to someone else. import msyd.cigar from msyd.cigar import Cigar#, cig_clips, cig_aln_types @@ -18,15 +18,13 @@ cdef cig_clips = set(['S', 'H', 'P', 'N']) # N is not clipping, but is ignored a logger = util.CustomFormatter.getlogger(__name__) -""" -TODOs -– write unit tests -– write frontend: - • maybe have a convenience function imputing everything given two bam files? - • maybe have a convenience function automatically detecting core/cross synteny from such imputed bams? - • maybe have a fucntion that automatically imputes along all multisyntenic regions? - => convenience functions may be moved to util -""" +# TODOs +# – write unit tests +# – write frontend: +# • maybe have a convenience function imputing everything given two bam files? +# • maybe have a convenience function automatically detecting core/cross synteny from such imputed bams? +# • maybe have a fucntion that automatically imputes along all multisyntenic regions? +# => convenience functions may be moved to util def impute_strings(strl: str, strr: str): """Convenience function performing the imputation on just two CIGAR strings. diff --git a/msyd/pyxfiles/intersection.pyx b/msyd/pyxfiles/intersection.pyx index 02ed44e..03478e2 100644 --- a/msyd/pyxfiles/intersection.pyx +++ b/msyd/pyxfiles/intersection.pyx @@ -5,18 +5,15 @@ import pandas as pd #import numpy as np -import copy import functools from collections import deque import multiprocessing -from cython.parallel import prange +#from cython.parallel import prange import msyd.io as io import msyd.util as util import msyd.cigar -from msyd.cigar import Cigar -from msyd.coords import Range, Position from msyd.multisyn import Multisyn cdef int MIN_SYN_THRESH = 30 @@ -166,9 +163,9 @@ def find_overlaps(left, right, only_core=False): ret = pd.DataFrame(data=list(ret))#sorted(list(ret))) # sorting shouldn't be necessary - total_len_left = sum(map(lambda x: len(x.ref), map(lambda x: x[1][0], left.iterrows()))) - total_len_right = sum(map(lambda x: len(x.ref), map(lambda x: x[1][0], right.iterrows()))) - total_len_ret = sum(map(lambda x: len(x.ref), map(lambda x: x[1][0], ret.iterrows()))) + #total_len_left = sum(map(lambda x: len(x.ref), map(lambda x: x[1][0], left.iterrows()))) + #total_len_right = sum(map(lambda x: len(x.ref), map(lambda x: x[1][0], right.iterrows()))) + #total_len_ret = sum(map(lambda x: len(x.ref), map(lambda x: x[1][0], ret.iterrows()))) #logger.debug(f"left orgs: {util.get_orgs_from_df(left)}, right orgs: {util.get_orgs_from_df(right)}, ret orgs: {util.get_orgs_from_df(ret)}") #logger.debug(f"left len: {total_len_left}, right len: {total_len_right}, ret len: {total_len_ret}") diff --git a/msyd/pyxfiles/io.pyx b/msyd/pyxfiles/io.pyx index 477d1f6..7e10598 100644 --- a/msyd/pyxfiles/io.pyx +++ b/msyd/pyxfiles/io.pyx @@ -5,9 +5,9 @@ import numpy as np import pandas as pd from scipy.stats import * +import pysam -from multiprocessing import Pool -from functools import partial +#from multiprocessing import Pool from collections import deque, defaultdict, OrderedDict from gzip import open as gzopen from gzip import BadGzipFile @@ -16,13 +16,11 @@ from collections import deque import sys import os import logging -import pysam -import re -from gc import collect +#import re cimport numpy as np -from msyd.coords import Range, read_psf_range +from msyd.coords import Range from msyd.multisyn import Multisyn from msyd.vars import SNV import msyd.util as util @@ -134,34 +132,34 @@ def samtocoords(f): bf = '{:012b}'.format(int(l[1])) - rs = int(l[3]) - re = rs - 1 + sum([i[0] for i in cgt if i[1] in ['X', '=', 'D']]) + rstart = int(l[3]) + rend = rstart - 1 + sum([i[0] for i in cgt if i[1] in ['X', '=', 'D']]) if bf[7] == '0': # forward alignment if cgt[0][1] == '=': - qs = 1 + qstart = 1 elif cgt[0][1] in ['S', 'H']: - qs = cgt[0][0] + 1 + qstart = cgt[0][0] + 1 else: print('ERROR: CIGAR string starting with non-matching base') - qe = qs - 1 + sum([i[0] for i in cgt if i[1] in ['X', '=', 'I']]) + qend = qstart - 1 + sum([i[0] for i in cgt if i[1] in ['X', '=', 'I']]) elif bf[7] == '1': # inverted alignment if cgt[-1][1] == '=': - qs = 1 + qstart = 1 elif cgt[-1][1] in ['S', 'H']: - qs = cgt[-1][0] + 1 + qstart = cgt[-1][0] + 1 else: print('ERROR: CIGAR string starting with non-matching base') - qe = qs - 1 + sum([i[0] for i in cgt if i[1] in ['X', '=', 'I']]) - qs, qe = qe, qs + qend = qstart - 1 + sum([i[0] for i in cgt if i[1] in ['X', '=', 'I']]) + qstart, qend = qend, qstart al.append([ - rs, - re, - qs, - qe, - abs(re-rs) + 1, - abs(qs-qe) + 1, + rstart, + rend, + qstart, + qend, + abs(rend-rstart) + 1, + abs(qstart-qend) + 1, format((sum([i[0] for i in cgt if i[1] == '=']) / sum( [i[0] for i in cgt if i[1] in ['=', 'X', 'I', 'D']])) * 100, '.2f'), 1, @@ -445,14 +443,14 @@ cpdef extract_syri_snvs(fin): if l[10] == 'SNP': #TODO maybe store annotation information from fields 8-10 snv = SNV(Position('a', 'x', l[0], int(l[1])), Position('b', 'x', l[5], int(l[6])), l[4], l[5]) - syri_regs.append(SNV) + syri_regs.append(snv) df = pd.DataFrame(list(syri_regs))#[[0, 1, 3, 4, 5, 6, 8, 9, 10]] #TODO maybe do chromosome mapping? return df cpdef extract_syri_regions_from_file(fin, ref='a', anns=['SYN'], reforg='ref', qryorg='qry'): - raw, chr_mapping = readsyriout(fin) #TODO? handle chr_mapping + raw, _chr_mapping = readsyriout(fin) #TODO? handle chr_mapping return extract_syri_regions(raw, ref=ref, anns=anns, reforg=reforg, qryorg=qryorg) diff --git a/msyd/pyxfiles/multisyn.pyx b/msyd/pyxfiles/multisyn.pyx index 64445a0..aba0aae 100644 --- a/msyd/pyxfiles/multisyn.pyx +++ b/msyd/pyxfiles/multisyn.pyx @@ -6,8 +6,6 @@ import copy import functools import traceback -#import cython -import logging from msyd.cigar import Cigar import msyd.util as util diff --git a/msyd/pyxfiles/realignment.pyx b/msyd/pyxfiles/realignment.pyx index 8513a66..f154286 100644 --- a/msyd/pyxfiles/realignment.pyx +++ b/msyd/pyxfiles/realignment.pyx @@ -3,14 +3,11 @@ # -*- coding: utf-8 -*- # distutils: language = c++ # cython: language_level = 3 -import sys import pandas as pd -import numpy as np import mappy as mp import pysam import logging -import os from collections import deque, defaultdict from functools import partial from multiprocessing import Pool @@ -21,14 +18,14 @@ from intervaltree import IntervalTree, Interval logging.getLogger('syri').setLevel(logging.WARNING) logging.getLogger('getCTX').setLevel(logging.WARNING) -from syri.synsearchFunctions import syri, mergeOutputFiles, outSyn, apply_TS, alignmentBlock, getSynPath -from syri.tdfunc import getCTX -from syri.writeout import getsrtable +#from syri.synsearchFunctions import syri, mergeOutputFiles, outSyn, apply_TS, alignmentBlock, getSynPath +#from syri.tdfunc import getCTX +#from syri.writeout import getsrtable +from syri.synsearchFunctions import apply_TS, alignmentBlock, getSynPath import msyd.util as util import msyd.cigar as cigar import msyd.intersection as intersection -import msyd.io as io from msyd.multisyn import Multisyn from msyd.coords import Range @@ -342,8 +339,9 @@ cpdef get_at_pos(alns, rchrom, rstart, rend, qchrom, qstart, qend): #logger.debug(f"Removing {rstart - aln.astart}, {aln.aend - rend} from aln with len {cg.get_len()}") #print(cg.to_string()) - srem, erem, cg = cg.trim(max(0, rstart - aln.astart), max(0, aln.aend - rend)) + _, _, cg = cg.trim(max(0, rstart - aln.astart), max(0, aln.aend - rend)) + #srem, erem, cg = cg.trim(max(0, rstart - aln.astart), max(0, aln.aend - rend)) # check that the positions after removing match #if srem != qstart - aln.bstart: # logger.error(f"Mismatch during alignment trimming, start does not map on query! Should have removed {qstart - aln.bstart}, actually removed {srem}. CIGAR: {cg.to_string()}") @@ -421,7 +419,7 @@ cpdef get_nonsyn_alns(alnsdf, reftree, qrytree): #logger.debug(f"Found: {ret}") if len(ret) == 0 or all([r is None for r in ret]): - logger.warning(f"No alignments found in this region! This could be a repetitive region, or the alignments could be truncated!") + logger.warning("No alignments found in this region! This could be a repetitive region, or the alignments could be truncated!") return None return syrify(pd.concat(ret)) @@ -697,7 +695,7 @@ cdef process_gaps(df, qrynames, fastas, mp_preset='asm20', ncores=1, pairwise=No for org in seqdict: if org in lendict: # eliminating sequences is always okay #logger.info(f"Re-constructing {org} sequence. New len {util.siprefix(len(seqdict[org]))}, old {util.siprefix(lendict[org])}") - assert(len(seqdict[org]) <= lendict[org] + _NULL_CNT, "sequence length extended during update") + assert len(seqdict[org]) <= lendict[org] + _NULL_CNT, "sequence length extended during update" # incorporate into output DF, sorted alphabetically by ref name @@ -719,11 +717,12 @@ cdef process_gaps(df, qrynames, fastas, mp_preset='asm20', ncores=1, pairwise=No cdef syri_get_syntenic(reforg, alns): # Synteny call parameters - BRT = 20 - TUC = 1000 - TUP = 0.5 + # all except T are unused + #BRT = 20 + #TUC = 1000 + #TUP = 0.5 + #invgl = 1000000 T = 50 - invgl = 1000000 syns = {} diff --git a/msyd/pyxfiles/varcmp.pyx b/msyd/pyxfiles/varcmp.pyx index 588e879..2b3f9c1 100644 --- a/msyd/pyxfiles/varcmp.pyx +++ b/msyd/pyxfiles/varcmp.pyx @@ -9,7 +9,7 @@ from collections import defaultdict from msyd.coords import Range from msyd.multisyn import Multisyn -from msyd.vars import SNV +#from msyd.vars import SNV import msyd.util as util logger = util.CustomFormatter.getlogger(__name__) diff --git a/msyd/pyxfiles/vcf.pyx b/msyd/pyxfiles/vcf.pyx index 5eb43d9..af21e77 100644 --- a/msyd/pyxfiles/vcf.pyx +++ b/msyd/pyxfiles/vcf.pyx @@ -3,19 +3,14 @@ # cython: language_level = 3 -import sys import os -import logging import re from copy import copy -import pandas as pd - import pysam -from msyd.vars import SNV +#from msyd.vars import SNV import msyd.util as util -import msyd.cigar as cigar import msyd.io as io logger = util.CustomFormatter.getlogger(__name__) @@ -316,7 +311,7 @@ cdef add_syn_ann(syn, ovcf, ref=None, no=None, add_cigar=False, add_identity=Tru ovcf.write(rec) -cdef str merge_vcfs(lf: Union[str, os.PathLike], rf:Union[str, os.PathLike], of:Union[str, os.PathLike], condense_errors=True): +cdef str merge_vcfs(lf: Union[str, os.PathLike], rf:Union[str, os.PathLike], of:Union[str, os.PathLike]): logger.info(f"Merging {lf} and {rf} into {of}") # TODO reimplement this with common framework with merge psfs # do all this in memory to be faster @@ -324,10 +319,6 @@ cdef str merge_vcfs(lf: Union[str, os.PathLike], rf:Union[str, os.PathLike], of: rvcf = pysam.VariantFile(rf, 'r') ovcf = pysam.VariantFile(of, 'w') - condense_errors = condense_errors # this might be necessary to access parameters from within inner fns? - conflictinginfo = False - conflictingid = False - # Prepare the header if str(lvcf.header) != str(ovcf.header): logger.info(f"Headers not matching in {lf} and {rf}! Combining.") @@ -370,7 +361,7 @@ cdef str merge_vcfs(lf: Union[str, os.PathLike], rf:Union[str, os.PathLike], of: lann = next(lvcf) rann = next(rvcf) except StopIteration: - logger.error(f"Empty VCF encountered. Outputting empty VCF!") + logger.error("Empty VCF encountered. Outputting empty VCF!") return of try: @@ -453,12 +444,6 @@ cdef str merge_vcfs(lf: Union[str, os.PathLike], rf:Union[str, os.PathLike], of: except StopIteration: pass - if condense_errors: # not working currently - if conflictinginfo: - logger.warning(f"There was conflicting information stored in INFO! {rf} values were overwritten!") - if conflictingid: - logger.warning(f"There were VCF records at the same position with different IDs! {lf} IDs were used") - return of # to enable reduction operation cdef copy_record(rec: VariantRecord, ovcf:VariantFile, int pid=0): @@ -481,7 +466,7 @@ cdef copy_record(rec: VariantRecord, ovcf:VariantFile, int pid=0): new_rec.info['PID'] = pid ovcf.write(new_rec) -cdef merge_vcf_records(lrec: VariantRecord, rrec:VariantRecord, ovcf:VariantFile, condense_errors=True): +cdef merge_vcf_records(lrec: VariantRecord, rrec:VariantRecord, ovcf:VariantFile): """ Merge two vcf records from different files, append to ovcf. """ @@ -518,10 +503,7 @@ cdef merge_vcf_records(lrec: VariantRecord, rrec:VariantRecord, ovcf:VariantFile rec.alleles = alleles if lrec.id != rrec.id: - if condense_errors: - conflictingid = True - else: - logger.warning(f"id not matching in {lrec.id} and {rrec.id}! Choosing {lrec.id}") + logger.warning(f"id not matching in {lrec.id} and {rrec.id}! Choosing {lrec.id}") rec.id = lrec.id @@ -563,10 +545,7 @@ cdef merge_vcf_records(lrec: VariantRecord, rrec:VariantRecord, ovcf:VariantFile for key in rrec.info: if key in lrec.info and lrec.info[key] != rrec.info[key]: - if condense_errors: - conflictinginfo = True - else: - logger.warning(f"Conflicting info stored for {key} in {rec.id}: {lrec.info[key]} != {rrec.info[key]}! Choosing {lrec.info[key]}") + logger.warning(f"Conflicting info stored for {key} in {rec.id}: {lrec.info[key]} != {rrec.info[key]}! Choosing {lrec.info[key]}") #continue else: rec.info[key] = rrec.info[key] diff --git a/pyproject.toml b/pyproject.toml index ae5f563..65a335b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ version = {attr = "msyd.__version__"} [tool.cython-lint] max-line-length = 200 #ignore = ['E266', 'E265'] # don't lint comment style +no-pycodestyle = 'True' [tool.pylint.'MESSAGES CONTROL'] disable=['no-name-in-module'] From 4fb73697cb3cdd2a34d043e041d42ede44926aac Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 14:17:35 +0800 Subject: [PATCH 16/80] fix issue showed by lint --- msyd/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msyd/main.py b/msyd/main.py index af15133..8499f2b 100644 --- a/msyd/main.py +++ b/msyd/main.py @@ -306,7 +306,7 @@ def view(args): if args.intersect: logger.info(f"Writing intersection to {args.outfile.name} as VCF") - vcf.extract_syntenic_from_vcf(df, args.intersect.name, args.outfile.name, ref=args.ref.name if args.ref else None, impute_ref=args.impute) + vcf.extract_syntenic_from_vcf(pd.concat(syndict.values()), args.intersect.name, args.outfile.name, ref=args.ref.name if args.ref else None, impute_ref=args.impute) return # has been saved already # save From 5cdf607a39592be988504d9c42414af62da6ec1d Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 14:19:38 +0800 Subject: [PATCH 17/80] fix missing format string --- msyd/pyxfiles/realignment.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msyd/pyxfiles/realignment.pyx b/msyd/pyxfiles/realignment.pyx index f154286..45d0afc 100644 --- a/msyd/pyxfiles/realignment.pyx +++ b/msyd/pyxfiles/realignment.pyx @@ -144,7 +144,7 @@ cpdef subtract_mts(mappingtrees, merasyns): #assert rng.end <= curint.end - curint.begin + curint.data, "Synteny in a spacer offset detected! An alignment went into the separator. Most likely, something went wrong during alignment." if rng.end > curint.end - curint.begin + curint.data: logger.debug(f"{rng.end}, {curint.end - curint.begin + curint.data}") - logger.warning("Synteny in a spacer detected! An alignment went into the separator. Most likely, something went wrong during the alignment call ({rng.end} vs {curint.end - curint.begin + curint.data}).") + logger.warning(f"Synteny in a spacer detected! An alignment went into the separator. Most likely, something went wrong during the alignment call ({rng.end} vs {curint.end - curint.begin + curint.data}).") # there was no interval overlapping this merasyn anyway, we don't need to subtract anything From ba583d53a865e79aae98bd4caa551e115236db82 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 14:40:23 +0800 Subject: [PATCH 18/80] format code --- msyd/main.py | 283 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 200 insertions(+), 83 deletions(-) diff --git a/msyd/main.py b/msyd/main.py index 8499f2b..c1e63b1 100644 --- a/msyd/main.py +++ b/msyd/main.py @@ -28,19 +28,19 @@ def main(): from msyd import __version__ as msydv parser = argparse.ArgumentParser(description=""" - msyd is a tool for identifying and processing multisynteny. - msyd consists of a Python library and a CLI interface.\n - The CLI interface consists of multiple subcommands, described briefly below.\n - For more information, see the documentation and subparser help messages accessed by calling msyd [subparser] -h. - """) + msyd is a tool for identifying and processing multisynteny. + msyd consists of a Python library and a CLI interface.\n + The CLI interface consists of multiple subcommands, described briefly below.\n + For more information, see the documentation and subparser help messages accessed by calling msyd [subcommand] -h. + """) parser.set_defaults(func=None, cores=1) parser.add_argument('--version', action='version', version=msydv) subparsers = parser.add_subparsers()#description="See also msyd [subparser] -h:") # title/description? # ordering parser order_parser = subparsers.add_parser("order", - help="Determine a suitable ordering for plotting from a multisynteny callset.", - description=""" + help="Determine a suitable ordering for plotting from a multisynteny callset.", + description=""" Determine the optimal ordering of the supplied genomes for plotting using a clustering-based algorithm. The ordering is determined such that adjacent organisms share as many basepairs of multisynteny as possible. """) @@ -76,89 +76,186 @@ def main(): Output can be saved either in Population Synteny File Format (.psf) or VCF. VCF output does not preserve alignment information and cannot be used for some of the further processing!\n """) call_parser.set_defaults(func=call) - call_parser.add_argument("-i", dest='infile', required=True, type=argparse.FileType('r'), help="The .tsv file to read SyRI output, alignment and VCF files in from. For more details, see the Readme.") - call_parser.add_argument("-o", dest='psf', required=True, type=argparse.FileType('wt'), help="Where to save the output PSF file (see format.md)") - call_parser.add_argument("-m", "--merge-vcf", dest='vcf', type=argparse.FileType('wt'), help="Merge the VCFs specified in the input table, store the merged VCF at the path specified. Does not currently work with --realign, as non-ref haplotypes do not have coordinates on the reference that VCF records can be fetched from.") - call_parser.add_argument("-a", "--all", dest='all', action='store_true', default=False, help="Merge all VCF records instead of only records annotated in multisyntenic regions.") - call_parser.add_argument("-x", "--complex", dest='no_complex', action='store_const', const=False, default=True, help="Do not filter the input VCFs to only contain SNPs and INDELs") - call_parser.add_argument("-r", "--reference", dest='ref', type=argparse.FileType('r'), help="Reference to use for the VCF output") - call_parser.add_argument("--incremental", dest='incremental', type=argparse.FileType('r'), help="A PSF file containing a previous multisynteny callset to combine with the calls derived from the input TSV. Should contain CIGAR strings.") - call_parser.add_argument("-c", dest="cores", help="Number of cores to use for parallel computation. Multisyn cannot make effective use of more cores than the number of input organisms divided by two. Defaults to 1.", type=int, default=1) - call_parser.add_argument("--core", dest='core', action='store_true', default=False, help="Call only core synteny. Improves runtime significantly, particularly on larger datasets.") - call_parser.add_argument("--syn", "-s", dest='SYNAL', action='store_const', const=False, default=True, help="Use SYN instead of SYNAL SyRI annotations. Fast, but error-prone and inaccurate. Not recommended.") - call_parser.add_argument("--no-cigars", dest='cigars', action='store_const', const=False, default=True, help="Don't store CIGAR strings in the saved .psf file. Has no effect when --syn is specified.") - call_parser.add_argument("--realign", "-ali", dest='realign', action='store_true', default=False, help="After calling core and reference cross synteny, realign missing regions to identify non-reference synteny.") - call_parser.add_argument("--pairwise", dest='pairwise', required=False, type=argparse.FileType('r'), help="Path to a TSV containing paths to full pairwise alignments that msyd will read in from disk during realignment if this parameter is passed. Otherwise, individual regions will be realigned on the fly with minimap2/mappy. This is useful if you already have pairwise alignments, or want to use a different aligner.") - call_parser.add_argument("-p", "--print", dest='print', action='store_true', default=False, help="print a subset of the output to stdout, for debugging.") - call_parser.add_argument("--impute", dest='impute', action='store_true', default=False, help="When processing small variants in a VCF, interpret the lack of a variant as identical to the reference genotype for that haplotype.") - call_parser.add_argument("--workdir", "-w", dest='tmp', required=False, type=str, help="Path to a working directory to be used for storing temporary files. If the path does not exist, it will be created!") - call_parser.add_argument("--min-realign", dest="min_realign", help="Minimum region size to realign, in bp. Default 150 bp.", type=int, default=-1) - call_parser.add_argument("--min-syn-id", dest="min_syn_id", help="Percent identity required for a region to be called as syntenic during the realignment step. Default 80.", type=int, default=80) - call_parser.add_argument("--max-realign", dest="max_realign", help="Maximum number of realignment steps to perform. Default 0 (unlimited).", type=int, default=-1) - call_parser.add_argument("--minimap-preset", dest="mp_preset", help="minimap2 alignment preset to use. Default 'asm20'.", type=str, default="asm20") + call_parser.add_argument("-i", dest='infile', + required=True, type=argparse.FileType('r'), + help="The .tsv file to read SyRI output, alignment and VCF files in from. For more details, see the Readme.") + call_parser.add_argument("-o", dest='psf', + required=True, type=argparse.FileType('wt'), + help="Where to save the output PSF file (see format.md)") + call_parser.add_argument("-m", "--merge-vcf", + dest='vcf', type=argparse.FileType('wt'), + help="Merge the VCFs specified in the input table, store the merged VCF at the path specified. Does not currently work with --realign, as non-ref haplotypes do not have coordinates on the reference that VCF records can be fetched from.") + call_parser.add_argument("-a", "--all", + dest='all', action='store_true', default=False, + help="Merge all VCF records instead of only records annotated in multisyntenic regions.") + call_parser.add_argument("-x", "--complex", + dest='no_complex', action='store_const', const=False, default=True, + help="Do not filter the input VCFs to only contain SNPs and INDELs") + call_parser.add_argument("-r", "--reference", + dest='ref', type=argparse.FileType('r'), + help="Reference to use for the VCF output") + call_parser.add_argument("--incremental", dest='incremental', + type=argparse.FileType('r'), + help="A PSF file containing a previous multisynteny callset to combine with the calls derived from the input TSV. Should contain CIGAR strings.") + call_parser.add_argument("-c", dest="cores", + type=int, default=1, + help="Number of cores to use for parallel computation. Multisyn cannot make effective use of more cores than the number of input organisms divided by two. Defaults to 1.") + call_parser.add_argument("--core", dest='core', + action='store_true', default=False, + help="Call only core synteny. Improves runtime significantly, particularly on larger datasets.") + call_parser.add_argument("--syn", "-s", + dest='SYNAL', action='store_const', + const=False, default=True, + help="Use SYN instead of SYNAL SyRI annotations. Fast, but error-prone and inaccurate. Not recommended.") + call_parser.add_argument("--no-cigars", dest='cigars', + action='store_const', const=False, default=True, + help="Don't store CIGAR strings in the saved .psf file. Has no effect when --syn is specified.") + call_parser.add_argument("--realign", "-ali", + dest='realign', action='store_true', default=False, + help="After calling core and reference cross synteny, realign missing regions to identify non-reference synteny.") + call_parser.add_argument("--pairwise", dest='pairwise', + required=False, type=argparse.FileType('r'), + help="Path to a TSV containing paths to full pairwise alignments that msyd will read in from disk during realignment if this parameter is passed. Otherwise, individual regions will be realigned on the fly with minimap2/mappy. This is useful if you already have pairwise alignments, or want to use a different aligner.") + call_parser.add_argument("-p", "--print", dest='print', + action='store_true', default=False, + help="print a subset of the output to stdout, for debugging.") + call_parser.add_argument("--impute", dest='impute', + action='store_true', default=False, + help="When processing small variants in a VCF, interpret the lack of a variant as identical to the reference genotype for that haplotype.") + call_parser.add_argument("--workdir", "-w", dest='tmp', + required=False, type=str, + help="Path to a working directory to be used for storing temporary files. If the path does not exist, it will be created!") + call_parser.add_argument("--min-realign", dest="min_realign", + type=int, default=-1, + help="Minimum region size to realign, in bp. Default 150 bp.") + call_parser.add_argument("--min-syn-id", dest="min_syn_id", + type=int, default=80, + help="Percent identity required for a region to be called as syntenic during the realignment step. Default 80.") + call_parser.add_argument("--max-realign", dest="max_realign", + type=int, default=-1, + help="Maximum number of realignment steps to perform. Default 0 (unlimited).") + call_parser.add_argument("--minimap-preset", dest="mp_preset", + type=str, default="asm20", + help="minimap2 alignment preset to use. Default 'asm20'.") # view subparser view_parser = subparsers.add_parser("view", - help="Filter, convert or analyze existing PSF Files", - description=""" - Used for filtering VCF files to only contain calls in multisyntenic regions for now. - Additional functionality will be implemented later. - """) + help="Filter, convert or analyze existing PSF Files", + description=""" + Used for filtering VCF files to only contain calls in multisyntenic regions for now. + Additional functionality will be implemented later. + """) view_parser.set_defaults(func=view) - view_parser.add_argument("-i", dest='infile', required=True, type=argparse.FileType('r'), help="PSF file to read multisynteny information from.") - view_parser.add_argument("-o", dest='outfile', required=True, type=argparse.FileType('wt'), help="Where to store the output. File format is determined automatically from the extension, but can be overridden by supplying any of the --o flags.") - view_parser.add_argument("-e", dest='expr', action='store', type=str, help="Expression to use for filtering the multisyntenic regions. This is done before --intersect is evaluated if also supplied") - view_parser.add_argument("-p", dest='print', action='store_const', const=10, help="Print the first 10 regions after filtering, mainly for debugging") - view_parser.add_argument("-r", "--reference", dest='ref', type=argparse.FileType('r'), help="If saving to VCF, the reference to use can be specified with this flag") - view_parser.add_argument("--intersect", dest='intersect', type=argparse.FileType('r'), help="VCF File to intersect with the PSF file given with -i. Will only keep annotations within multisyntenic regions") - view_parser.add_argument("--impute", dest='impute', action='store_true', default=False, help="When processing small variants in a VCF, interpret the lack of a variant as identical to the reference genotype for that haplotype.") - - view_parser.add_argument("--opsf", dest='filetype', action='store_const', const='psf', help="store output in PSF format") - view_parser.add_argument("--opsf-nocg", dest='filetype', action='store_const', const='psf-nocg', help="store output in PSF format, discarding cigar strings") - view_parser.add_argument("--ovcf", dest='filetype', action='store_const', const='vcf', help="store output in VCF format, discarding cigar strings") + view_parser.add_argument("-i", dest='infile', + required=True, type=argparse.FileType('r'), + help="PSF file to read multisynteny information from.") + view_parser.add_argument("-o", dest='outfile', + required=True, type=argparse.FileType('wt'), + help="Where to store the output. File format is determined automatically from the extension, but can be overridden by supplying any of the --o flags.") + view_parser.add_argument("-e", dest='expr', + action='store', type=str, + help="Expression to use for filtering the multisyntenic regions. This is done before --intersect is evaluated if also supplied") + view_parser.add_argument("-p", dest='print', + action='store_const', const=10, + help="Print the first 10 regions after filtering, mainly for debugging") + view_parser.add_argument("-r", "--reference", + dest='ref', type=argparse.FileType('r'), + help="If saving to VCF, the reference to use can be specified with this flag") + view_parser.add_argument("--intersect", dest='intersect', + type=argparse.FileType('r'), + help="VCF File to intersect with the PSF file given with -i. Will only keep annotations within multisyntenic regions") + view_parser.add_argument("--impute", dest='impute', + action='store_true', default=False, + help="When processing small variants in a VCF, interpret the lack of a variant as identical to the reference genotype for that haplotype.") + + view_parser.add_argument("--opsf", dest='filetype', + action='store_const', const='psf', + help="store output in PSF format") + view_parser.add_argument("--opsf-nocg", dest='filetype', + action='store_const', const='psf-nocg', + help="store output in PSF format, discarding cigar strings") + view_parser.add_argument("--ovcf", dest='filetype', + action='store_const', const='vcf', + help="store output in VCF format, discarding cigar strings") merge_parser = subparsers.add_parser("merge", - help="Merge different VCFs", - description=""" + help="Merge different VCFs", + description=""" Exposes the optional VCF merging functionality in msyd call directly. Mainly for testing and debugging purposes """) merge_parser.set_defaults(func=merge) - merge_parser.add_argument("-v", dest='vcfs', nargs='+', required=True, type=argparse.FileType('r'), help="The VCF files to merge.") - merge_parser.add_argument("-o", dest='outfile', required=True, type=argparse.FileType('wt'), help="Where to store the merged VCF.") + merge_parser.add_argument("-v", dest='vcfs', + nargs='+', required=True, type=argparse.FileType('r'), + help="The VCF files to merge.") + merge_parser.add_argument("-o", dest='outfile', + required=True, type=argparse.FileType('wt'), + help="Where to store the merged VCF.") realign_parser = subparsers.add_parser("realign", - help="Iteratively realign a set of genomes based on a PSF file", - description=""" + help="Iteratively realign a set of genomes based on a PSF file", + description=""" Exposes the realignment functionality in msyd call directly. Useful for realigning only a specific region by prefiltering the PSF. """) realign_parser.set_defaults(func=realign) - realign_parser.add_argument("-i", dest='infile', required=True, type=argparse.FileType('r'), help="PSF file to read multisynteny information from.") - realign_parser.add_argument("-o", dest='outfile', required=True, type=argparse.FileType('wt'), help="Where to save the output PSF file (see format.md)") - realign_parser.add_argument("-t", dest='tsvfile', required=True, type=argparse.FileType('r'), help="TSV containing the sample names and path to genome fastas.") - realign_parser.add_argument("-p", "--pairwise", dest='pairwise', required=False, type=argparse.FileType('r'), help="Path to a TSV containing paths to full pairwise alignments that msyd will read in from disk if this parameter is passed. Otherwise, individual regions will be realigned on the fly with minimap2/mappy. This is useful if you already have pairwise alignments, or want to use a different aligner.") - realign_parser.add_argument("--workdir", "-w", dest='tmp', required=False, type=str, help="Path to a working directory to be used for storing temporary files. If the path does not exist, it will be created!") - realign_parser.add_argument("--no-cigars", dest='cigars', action='store_const', const=False, default=True, help="Don't store CIGAR strings in the saved .psf file. Has no effect when --syn is specified.") - realign_parser.add_argument("--min-realign", dest="min_realign", help="Minimum region size to realign, in bp. Default 100 bp.", type=int, default=-1) - realign_parser.add_argument("--min-syn-id", dest="min_syn_id", help="Percent Identity required for a region to be called as syntenic during the realignment step. Default 80.", type=int, default=80) - realign_parser.add_argument("--max-realign", dest="max_realign", help="Maximum number of realignment steps to perform. Default 0 (unlimited).", type=int, default=-1) - realign_parser.add_argument("--minimap-preset", dest="mp_preset", help="minimap2 alignment preset to use. Default 'asm20'.", type=str, default="asm20") + realign_parser.add_argument("-i", dest='infile', + required=True, type=argparse.FileType('r'), + help="PSF file to read multisynteny information from.") + realign_parser.add_argument("-o", dest='outfile', + required=True, type=argparse.FileType('wt'), + help="Where to save the output PSF file (see format.md)") + realign_parser.add_argument("-t", dest='tsvfile', + required=True, type=argparse.FileType('r'), + help="TSV containing the sample names and path to genome fastas.") + realign_parser.add_argument("-p", "--pairwise", + dest='pairwise', required=False, type=argparse.FileType('r'), + help="Path to a TSV containing paths to full pairwise alignments that msyd will read in from disk if this parameter is passed. Otherwise, individual regions will be realigned on the fly with minimap2/mappy. This is useful if you already have pairwise alignments, or want to use a different aligner.") + realign_parser.add_argument("--workdir", "-w", + dest='tmp', required=False, + type=str, help="Path to a working directory to be used for storing temporary files. If the path does not exist, it will be created!") + realign_parser.add_argument("--no-cigars", dest='cigars', + action='store_const', const=False, default=True, + help="Don't store CIGAR strings in the saved .psf file. Has no effect when --syn is specified.") + realign_parser.add_argument("--min-realign", dest="min_realign", + type=int, default=-1, + help="Minimum region size to realign, in bp. Default 100 bp.") + realign_parser.add_argument("--min-syn-id", dest="min_syn_id", + type=int, default=80, + help="Percent Identity required for a region to be called as syntenic during the realignment step. Default 80.") + realign_parser.add_argument("--max-realign", dest="max_realign", + type=int, default=-1, + help="Maximum number of realignment steps to perform. Default 0 (unlimited).") + realign_parser.add_argument("--minimap-preset", dest="mp_preset", + type=str, default="asm20", + help="minimap2 alignment preset to use. Default 'asm20'.") stats_parser = subparsers.add_parser("stats", - help="Compute some statistics on a PSF file", - description=""" + help="Compute some statistics on a PSF file", + description=""" Computes some basic statistics on a PSF file. Useful as input for plotting or to get a feel for the dataset. """) stats_parser.set_defaults(func=stats) - stats_parser.add_argument("-i", dest='infile', required=True, type=argparse.FileType('r'), help="PSF file to read multisynteny information from.") - stats_parser.add_argument("-o", dest='outfile', default='-', type=argparse.FileType('wt'), help="Where to send the statistics to. Default stdout.") - stats_parser.add_argument("--separator", "-s", dest="sep", help="Separator to use for printing the stats. Default is tab (for TSV), set to ',' for CSV.", type=str, default="\t") - stats_parser.add_argument("-p", "--prefix", dest='siprefix', action='store_true', default=False, help="Whether to attach SI prefixes to the output for human readability. If not supplied, print exact numbers.") - stats_parser.add_argument("-a", "--aggregate", dest='agg', action='store_true', default=False, help="If passed, will report summary statistics for all haplotypes instead of by organism.") - stats_parser.add_argument("--no-header", dest='header', action='store_false', default=True, help="If passed, msyd will not print a header for the CSV.") + stats_parser.add_argument("-i", dest='infile', + required=True, type=argparse.FileType('r'), + help="PSF file to read multisynteny information from.") + stats_parser.add_argument("-o", dest='outfile', + default='-', type=argparse.FileType('wt'), + help="Where to send the statistics to. Default stdout.") + stats_parser.add_argument("--separator", "-s", + type=str, default="\t", + dest="sep", help="Separator to use for printing the stats. Default is tab (for TSV), set to ',' for CSV.") + stats_parser.add_argument("-p", "--prefix", + dest='siprefix', action='store_true', default=False, + help="Whether to attach SI prefixes to the output for human readability. If not supplied, print exact numbers.") + stats_parser.add_argument("-a", "--aggregate", + dest='agg', action='store_true', default=False, + help="If passed, will report summary statistics for all haplotypes instead of by organism.") + stats_parser.add_argument("--no-header", dest='header', + action='store_false', default=True, + help="If passed, msyd will not print a header for the CSV.") #stats_parser.add_argument("-r", "--reference", dest='agg', action='store_true', default=False, help="If passed, will report summary statistics") #fact_parser = subparsers.add_parser("fact", @@ -197,20 +294,26 @@ def call(args): qrynames, syns, alns, vcfs, fastas = util.parse_input_tsv(args.infile) # find reference synteny #syndicts = intersection.find_multisyn(qrynames, syns, alns, only_core=args.core, SYNAL=args.SYNAL, base=args.incremental) - syndict = intersection.prepare_input(qrynames, syns, alns, cores=args.cores, SYNAL=args.SYNAL, base=args.incremental) + syndict = intersection.prepare_input(qrynames, syns, alns, + cores=args.cores, + SYNAL=args.SYNAL, + base=args.incremental) logger.info("Read input files") syndict = intersection.process_syndicts(syndict, cores=args.cores) logger.info("Intersected synteny") if args.realign: - # read in full pairwise alns if supplied - if args.pairwise: - alndict = io.read_alnsfile(args.pairwise) - #TODO directly do in call to realignment - # use reference synteny as base to identify all haplotypes - syndict = realignment.realign(syndict, qrynames, fastas, MIN_REALIGN_LEN=args.min_realign, MIN_SYN_ID=args.min_syn_id, MAX_REALIGN=args.max_realign, mp_preset=args.mp_preset, ncores=args.cores, pairwise=alndict if args.pairwise else None) + syndict = realignment.realign(syndict, qrynames, fastas, + MIN_REALIGN_LEN=args.min_realign, + MIN_SYN_ID=args.min_syn_id, + MAX_REALIGN=args.max_realign, + mp_preset=args.mp_preset, + ncores=args.cores, + # read in full pairwise alns if supplied + pairwise=io.read_alnsfile(args.pairwise) \ + if args.pairwise else None) # garb = realign(df, qrynames, fastas, MIN_REALIGN_LEN=args.min_realign, MAX_REALIGN=args.max_realign, mp_preset=args.mp_preset, ncores=args.cores, cwd=args.tmp) # realign(syns, qrynames, fastas, MIN_REALIGN_LEN=None, MAX_REALIGN=None, mp_preset='asm5'): @@ -244,8 +347,11 @@ def call(args): if not args.all: logger.info("Pre-filtering VCFs to multisyntenic regions") - vcfs = vcf.filter_vcfs(df, vcfs, ref, no_complex=args.no_complex, add_syn_anns=False, impute_ref=args.impute) - + vcfs = vcf.filter_vcfs(df, vcfs, ref, + no_complex=args.no_complex, + add_syn_anns=False, + impute_ref=args.impute) + logger.info(f"Filtered files: {vcfs}") @@ -255,7 +361,10 @@ def call(args): if args.impute: logger.info(f"Imputing reference genotypes in syntenic regions, saving to {args.vcf.name}") - vcf.extract_syntenic_from_vcf(df, tmpfile, args.vcf.name, no_complex=args.no_complex, add_syn_anns=True, impute_ref=args.impute) + vcf.extract_syntenic_from_vcf(df, tmpfile, args.vcf.name, + no_complex=args.no_complex, + add_syn_anns=True, + impute_ref=args.impute) else: logger.info(f"Adding multisynteny annotations, saving to {args.vcf.name}") vcf.add_syn_anns_to_vcf(df, tmpfile, args.vcf.name, ref=ref) @@ -282,7 +391,8 @@ def order(args): logger = util.CustomFormatter.getlogger("order") syndict = io.read_psf(args.infile) - print(ordering.order_hierarchical(pd.concat(syndict.values()), orgs=None, score_fn=ordering.syn_score)) + print(ordering.order_hierarchical(pd.concat(syndict.values()), + orgs=None, score_fn=ordering.syn_score)) logger.info("Finished running msyd order") def view(args): @@ -306,7 +416,11 @@ def view(args): if args.intersect: logger.info(f"Writing intersection to {args.outfile.name} as VCF") - vcf.extract_syntenic_from_vcf(pd.concat(syndict.values()), args.intersect.name, args.outfile.name, ref=args.ref.name if args.ref else None, impute_ref=args.impute) + vcf.extract_syntenic_from_vcf(pd.concat(syndict.values()), + args.intersect.name, + args.outfile.name, + ref=args.ref.name if args.ref else None, + impute_ref=args.impute) return # has been saved already # save @@ -330,15 +444,18 @@ def realign(args): import msyd.util as util logger = util.CustomFormatter.getlogger("realign") - # read in full pairwise alns if supplied - if args.pairwise: - alndict = io.read_alnsfile(args.pairwise) logger.info(f"realigning from {args.infile.name}, taking genome files from {args.tsvfile.name}") qrynames, syris, alns, vcfs, fastas = util.parse_input_tsv(args.tsvfile) syndict = io.read_psf(args.infile) logger.info("Read input file") - resyns = realignment.realign(syndict, qrynames, fastas, MIN_REALIGN_LEN=args.min_realign, MIN_SYN_ID=args.min_syn_id, MAX_REALIGN=args.max_realign, pairwise=alndict if args.pairwise else None) + resyns = realignment.realign(syndict, qrynames, fastas, + MIN_REALIGN_LEN=args.min_realign, + MIN_SYN_ID=args.min_syn_id, + MAX_REALIGN=args.max_realign, + # read in full pairwise alns if supplied + pairwise= io.read_alnsfile(args.pairwise) \ + if args.pairwise else None) print(util.get_stats(resyns)) logger.info(f"Saving to {args.outfile.name} in PSF format.") From dca89636b002da2429d8a8d1964e7ee6f31a4f5e Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 14:46:30 +0800 Subject: [PATCH 19/80] disable debug printing --- msyd/pyxfiles/realignment.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/msyd/pyxfiles/realignment.pyx b/msyd/pyxfiles/realignment.pyx index 45d0afc..a1e06bf 100644 --- a/msyd/pyxfiles/realignment.pyx +++ b/msyd/pyxfiles/realignment.pyx @@ -448,7 +448,7 @@ cpdef realign(syndict, qrynames, fastas, MIN_REALIGN_LEN=None, MIN_SYN_ID=None, cores = min(len(syndict), ncores) with Pool(cores) as pool: - print([(chrom, syndict[chrom], qrynames, fastas, mp_preset, int(ncores/len(syndict))) for chrom in syndict]) + #print([(chrom, syndict[chrom], qrynames, fastas, mp_preset, int(ncores/len(syndict))) for chrom in syndict]) return dict(pool.map(_workaround, [(chrom, pd.DataFrame(syndict[chrom]), qrynames, fastas, mp_preset, max(1, int(ncores/len(syndict)))) for chrom in syndict])) cpdef _workaround(args): # args: (chrom, syndf, qrynames, fastas, mp_preset, ncores) From cfe770a2c1ea6fbed52a969593b6faf3d6317fcc Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 16:43:36 +0800 Subject: [PATCH 20/80] disable error from lint not seeing cython imports --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 65a335b..eac5e8b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ max-line-length = 200 no-pycodestyle = 'True' [tool.pylint.'MESSAGES CONTROL'] -disable=['no-name-in-module'] +disable=['no-name-in-module', 'import-error'] [tool.pylint] errors-only = 'True' From 0da3d76d5cd1abeddc458f195f17f7488b2c9efb Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 16:47:32 +0800 Subject: [PATCH 21/80] remove unnecessary variable --- msyd/pyxfiles/vcf.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/msyd/pyxfiles/vcf.pyx b/msyd/pyxfiles/vcf.pyx index af21e77..85ea4ad 100644 --- a/msyd/pyxfiles/vcf.pyx +++ b/msyd/pyxfiles/vcf.pyx @@ -220,8 +220,6 @@ cpdef add_syn_anns_to_vcf(syns, vcfin: Union[str, os.PathLike], vcfout: Union[st oldvcf = pysam.VariantFile(vcfin, 'r') newvcf = pysam.VariantFile(vcfout, 'w') int syncounter = 1 - orgs = sorted(util.get_orgs_from_df(syns)) - #int orgsc = len(orgs) # copy header, deduplicate along the way headerset = set() From 4583c0f853361cc2ff2b99d8e3d4e31e40de4996 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 16:50:12 +0800 Subject: [PATCH 22/80] make lint stop complaining about default arg --- msyd/pyxfiles/io.pyx | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/msyd/pyxfiles/io.pyx b/msyd/pyxfiles/io.pyx index 7e10598..b121aad 100644 --- a/msyd/pyxfiles/io.pyx +++ b/msyd/pyxfiles/io.pyx @@ -449,12 +449,16 @@ cpdef extract_syri_snvs(fin): #TODO maybe do chromosome mapping? return df -cpdef extract_syri_regions_from_file(fin, ref='a', anns=['SYN'], reforg='ref', qryorg='qry'): +# cython-lint flags the default arg list as dangerous +# but in this case it's fine since its static +cpdef extract_syri_regions_from_file(fin, ref='a', anns=['SYN'], reforg='ref', qryorg='qry'): # no-cython-lint raw, _chr_mapping = readsyriout(fin) #TODO? handle chr_mapping return extract_syri_regions(raw, ref=ref, anns=anns, reforg=reforg, qryorg=qryorg) -cpdef extract_syri_regions(rawsyriout, ref='a', anns=['SYN'], reforg='ref', qryorg='qry'): +# cython-lint flags the default arg list as dangerous +# but in this case it's fine since its static +cpdef extract_syri_regions(rawsyriout, ref='a', anns=['SYN'], reforg='ref', qryorg='qry'): # no-cython-lint """ Given a syri output file, extract all regions matching a given annotation. Returns the output as a dict containing one Dataframe per chromosome. From 097a01ee9f1894465a0cb6a056def34880da2e1d Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 16:58:16 +0800 Subject: [PATCH 23/80] specify channel when downloading syri --- .github/workflows/python-package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 61320b0..0107fb6 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -31,7 +31,7 @@ jobs: python -m pip install --upgrade pip setuptools # apparently the runners all have conda installed # use that to install syri, as it isn't on pypi - $CONDA/bin/conda install syri >= 1.6.5 + $CONDA/bin/conda install bioconda::syri>=1.6.5 if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Test with pytest run: | From 345e28c25927c820794e77e548b7cd9457cbd3bf Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 17:09:00 +0800 Subject: [PATCH 24/80] fix conda spec --- .github/workflows/python-package.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 0107fb6..b056299 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -12,7 +12,6 @@ on: jobs: build: - runs-on: ubuntu-latest strategy: fail-fast: false @@ -31,11 +30,12 @@ jobs: python -m pip install --upgrade pip setuptools # apparently the runners all have conda installed # use that to install syri, as it isn't on pypi - $CONDA/bin/conda install bioconda::syri>=1.6.5 + $CONDA/bin/conda install bioconda::syri if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Test with pytest + - name: Build package + run: pip install . + - name: Test package is installed successfully run: | - pip install . msyd -h msyd call -h msyd view -h From 47512783d093f18be5ae04548a411cc33d2df40e Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 17:09:32 +0800 Subject: [PATCH 25/80] rename build job --- .github/workflows/{python-package.yml => build.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{python-package.yml => build.yml} (100%) diff --git a/.github/workflows/python-package.yml b/.github/workflows/build.yml similarity index 100% rename from .github/workflows/python-package.yml rename to .github/workflows/build.yml From 2a58f5564697b88364fc67ac548e3cfb65190228 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 17:43:16 +0800 Subject: [PATCH 26/80] add condaforge for dependencies --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b056299..c3d8dae 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -30,7 +30,7 @@ jobs: python -m pip install --upgrade pip setuptools # apparently the runners all have conda installed # use that to install syri, as it isn't on pypi - $CONDA/bin/conda install bioconda::syri + $CONDA/bin/conda install -c conda-forge -c bioconda "syri>=1.6.5" if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Build package run: pip install . From fc71acd539d0b3ba2c4303ed26c6e961d9d4c161 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 18:47:25 +0800 Subject: [PATCH 27/80] try ignoring pin --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c3d8dae..d0a25a3 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -30,7 +30,7 @@ jobs: python -m pip install --upgrade pip setuptools # apparently the runners all have conda installed # use that to install syri, as it isn't on pypi - $CONDA/bin/conda install -c conda-forge -c bioconda "syri>=1.6.5" + $CONDA/bin/conda install --no-pin -c conda-forge -c bioconda "syri>=1.6.5" if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Build package run: pip install . From 2173157c5dc746806f2cb7624969afe28332df38 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 18:50:20 +0800 Subject: [PATCH 28/80] Revert "try ignoring pin" This reverts commit fc71acd539d0b3ba2c4303ed26c6e961d9d4c161. --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d0a25a3..c3d8dae 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -30,7 +30,7 @@ jobs: python -m pip install --upgrade pip setuptools # apparently the runners all have conda installed # use that to install syri, as it isn't on pypi - $CONDA/bin/conda install --no-pin -c conda-forge -c bioconda "syri>=1.6.5" + $CONDA/bin/conda install -c conda-forge -c bioconda "syri>=1.6.5" if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Build package run: pip install . From 731ee3a291952050f3d5d70d09e854c85169c95c Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 18:50:43 +0800 Subject: [PATCH 29/80] add linting badge to readme --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index fd5ae5d..f3e36e7 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,6 @@ # msyd +![lint](https://github.com/schneebergerlab/msyd/actions/workflows/lint.yml/badge.svg) + msyd is still under active development, so expect some bugs and changes! If in doubt about the behaviour of msyd or how it might change, feel free to reach out by opening an issue! From fb5b731ee15648046472a92c6a849fd8e349ccd4 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Mon, 9 Dec 2024 18:50:53 +0800 Subject: [PATCH 30/80] touch up example workflow --- example/example_workflow.sh | 50 ++++++++++++++++++++++--------------- 1 file changed, 30 insertions(+), 20 deletions(-) diff --git a/example/example_workflow.sh b/example/example_workflow.sh index ba8f670..aad1f44 100644 --- a/example/example_workflow.sh +++ b/example/example_workflow.sh @@ -1,6 +1,10 @@ #!/bin/sh -## Download some genomes +# This file serves as an example workflow illustrating how and when to use msyd. +# It is a part of the msyd CI, and should pass so long as your system + + +## Download three publicly available, high quality A. thaliana genomes # download the Col-CC assembly curl -OJX GET "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/GCA_028009825.1/download?include_annotation_type=GENOME_FASTA&filename=GCA_028009825.1.zip" -H "Accept: application/zip" @@ -15,14 +19,15 @@ curl -OJX GET "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/GC unzip "./*.zip" mv ncbi_dataset/data/*/*.fna ./ -## Prepare them for running msyd +### Prepare them for running msyd -# rename them to shorter names +## rename them to shorter names mv GCA_001651475.1_Ler_Assembly_genomic.fna ler.fna mv GCA_028009825.1_Col-CC_genomic.fna col.fna mv GCA_902460295.1_Arabidopsis_thaliana_Sha_genomic.fna sha.fna mv GCA_024498555.1_ASM2449855v1_genomic.fna swe.fna +## filter out small scaffolds grep -n -P ">" ./*.fna # col and swe do not require truncating, # for ler the small scaffolds start at line 1442097 @@ -33,20 +38,22 @@ head -n 1480076 sha.fna > sha.filtered.fna mv sha.filtered.fna sha.fna -## Generate inputs for msyd +### Generate inputs for msyd -# generate alignments to col-CC +## generate alignments to col-CC mv col.fna ref.fna -minimap2 -cx asm5 --eqx ref.fna ler.fna > ler.paf -minimap2 -cx asm5 --eqx ref.fna sha.fna > sha.paf -minimap2 -cx asm5 --eqx ref.fna swe.fna > swe.paf +minimap2 -cx asm10 --eqx ref.fna ler.fna > ler.paf +minimap2 -cx asm10 --eqx ref.fna sha.fna > sha.paf +minimap2 -cx asm10 --eqx ref.fna swe.fna > swe.paf -# run syri on the alignments +## run syri on the alignments +# make sure to pass --cigar and specify appropriate prefixes, so the msyd output is more easily interpretable syri --nc 5 -F P --cigar --prefix ler -c ler.paf -r ref.fna -q ler.fna --lf ler.syri.log --samplename ler syri --nc 5 -F P --cigar --prefix sha -c sha.paf -r ref.fna -q sha.fna --lf sha.syri.log --samplename sha syri --nc 5 -F P --cigar --prefix swe -c swe.paf -r ref.fna -q swe.fna --lf swe.syri.log --samplename swe ## construct genomes.tsv file +# as msyd needs many input files, the paths are stored in a samplesheet echo "#name\taln\tsyri\tvcf\tseq" > genomes.tsv for f in *syri.out do @@ -54,24 +61,27 @@ do echo "$bs\t$bs.paf\t${bs}syri.out\t${bs}syri.vcf\t${bs}.fna" >> genomes.tsv done -# run msyd to call pansynteny -msyd call -i genomes.tsv -o athalianas.pff -m athalianas.vcf -r ref.fna +### run msyd to call multisynteny +msyd call -c 5 -i genomes.tsv -o athalianas.psf -m athalianas.vcf -r ref.fna + +### work with the output -## work with the output +## export multisynteny on Chr3 for use in visualization/other software -# CP116282 is the id corresponding to chromosome 3 in Col-CC -msyd view -e "on CP116283.1" -i athalianas.pff -o athalianas-chr3.pff +# CP116283 is the id corresponding to chromosome 3 in Col-CC +# filter for multisynteny on this chromosome +msyd view -e "on CP116283.1" -i athalianas.psf -o athalianas-chr3.psf -# convert to VCF for use in visualization/other software -msyd view -i athalianas-chr3.pff -o athalianas-chr3-syn.vcf +# export to VCF; this could also be done in the command above +msyd view -i athalianas-chr3.psf -o athalianas-chr3-syn.vcf -## download 1001 genome project VCF, filter for vars in pansyntenic regions +## download 1001 genome project VCF, filter for small variants structurally conserved regions curl https://ftp.ebi.ac.uk/ensemblgenomes/pub/release-56/plants/variation/vcf/arabidopsis_thaliana/arabidopsis_thaliana.vcf.gz -o ensembl_athaliana.vcf.gz gunzip ensembl_athaliana.vcf.gz # change from ids to chr numbers, to match vcf nomenclature -sed -e s/CP116280.1/1/ -e s/CP116281.1/2/ -e s/CP116282.1/3/ -e s/CP116283.1/4/ -e s/CP116284.1/5/ athalianas.pff > athalianas-chrnames.pff +sed -e s/CP116280\.1/Chr1/ -e s/CP116281\.1/Chr2/ -e s/CP116282\.1/Chr3/ -e s/CP116283\.1/Chr4/ -e s/CP116284\.1/Chr5/ athalianas.psf > athalianas-chrnames.psf -# filter for variants in pansyntenic regions! -msyd view -i athalianas-chrnames.pff -e "deg >= 3" -o pansynt-vars.vcf --intersect ensembl_athaliana.vcf +# filter for variants in coresyntenic regions! +msyd view -i athalianas-chrnames.psf -e "deg >= 3" -o coresynt-snvs.vcf --intersect ensembl_athaliana.vcf From 4d47f48e9f4f15149bc04269a1b58c08c0b5c5a2 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 11:31:11 +0800 Subject: [PATCH 31/80] sort requirements alphabetically --- pyproject.toml | 8 ++++---- requirements.txt | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index eac5e8b..3568654 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,13 +15,13 @@ requires-python = ">=3.8" keywords = ["Structural Variation", "SV", "Pangenomics", "Bioinformatics"] license = {text = "MIT"} dependencies = [ - "numpy", - "scipy", "cython", + "intervaltree", + "mappy", + "numpy", "pandas", "pysam >= 0.21", - "mappy", - "intervaltree", + "scipy", "syri >= 1.6.5", ] dynamic = ["version"] diff --git a/requirements.txt b/requirements.txt index 544fb2a..92edacd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ -Cython -pandas +cython +intervaltree +mappy numpy +pandas pysam >= 0.21 # for the ordering functionality scipy # for the realignment functionality -mappy -intervaltree syri >= 1.6.5 From 6e49c1b00e6e89eb782e7d43d0bd3b601b22e926 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 11:31:37 +0800 Subject: [PATCH 32/80] try manually installing syri dependencies from pypi --- .github/workflows/build.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c3d8dae..b556b4b 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -30,7 +30,9 @@ jobs: python -m pip install --upgrade pip setuptools # apparently the runners all have conda installed # use that to install syri, as it isn't on pypi - $CONDA/bin/conda install -c conda-forge -c bioconda "syri>=1.6.5" + # manually install syris dependencies, conda does not install + pip install Cython numpy pandas scipy psutil igraph longestrunsubsequence pysam pulp + $CONDA/bin/conda install --freeze-installed --no-deps -c conda-forge -c bioconda "syri>=1.6.5" if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Build package run: pip install . From 846f68460dd64776a2a3d7101600e473bf70bc1c Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 11:38:47 +0800 Subject: [PATCH 33/80] switch to pip install from git --- .github/workflows/build.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b556b4b..8f2133f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -28,11 +28,11 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip setuptools - # apparently the runners all have conda installed - # use that to install syri, as it isn't on pypi - # manually install syris dependencies, conda does not install - pip install Cython numpy pandas scipy psutil igraph longestrunsubsequence pysam pulp - $CONDA/bin/conda install --freeze-installed --no-deps -c conda-forge -c bioconda "syri>=1.6.5" + # manually install syris dependencies + # seems to be no longer necessary with github install + #pip install Cython numpy pandas scipy psutil igraph longestrunsubsequence pysam pulp + # manually use pip to install syri from github, as it isn't on pypi + pip install git+https://github.com/schneebergerlab/syri.git#egg=syri if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Build package run: pip install . From a9af9dc83af576b4daf8b5d4a3e2dc3c5d63d5d4 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 11:43:13 +0800 Subject: [PATCH 34/80] spoof python version requirement --- .github/workflows/build.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 8f2133f..721458f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -16,7 +16,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 @@ -32,7 +32,8 @@ jobs: # seems to be no longer necessary with github install #pip install Cython numpy pandas scipy psutil igraph longestrunsubsequence pysam pulp # manually use pip to install syri from github, as it isn't on pypi - pip install git+https://github.com/schneebergerlab/syri.git#egg=syri + # spoof python version to get around bounds check + pip install git+https://github.com/schneebergerlab/syri.git#egg=syri --python-version '3.10' #--no-warn-conflicts if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Build package run: pip install . From b906632773fab7ae4020f09212702c0f697edbd1 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 11:44:03 +0800 Subject: [PATCH 35/80] fix accidental comment --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 721458f..7b0ca65 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -33,7 +33,7 @@ jobs: #pip install Cython numpy pandas scipy psutil igraph longestrunsubsequence pysam pulp # manually use pip to install syri from github, as it isn't on pypi # spoof python version to get around bounds check - pip install git+https://github.com/schneebergerlab/syri.git#egg=syri --python-version '3.10' #--no-warn-conflicts + pip install 'git+https://github.com/schneebergerlab/syri.git' --python-version '3.10' #--no-warn-conflicts if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Build package run: pip install . From c7df56aa712643044d2633d6d6e150bae75f3711 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 11:46:13 +0800 Subject: [PATCH 36/80] reenable manual dependency download --- .github/workflows/build.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 7b0ca65..b91e0c2 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -29,11 +29,11 @@ jobs: run: | python -m pip install --upgrade pip setuptools # manually install syris dependencies - # seems to be no longer necessary with github install - #pip install Cython numpy pandas scipy psutil igraph longestrunsubsequence pysam pulp + # the python version spoofing requires the --no-deps flag, so this is necessary + pip install Cython numpy pandas scipy psutil igraph longestrunsubsequence pysam pulp # manually use pip to install syri from github, as it isn't on pypi # spoof python version to get around bounds check - pip install 'git+https://github.com/schneebergerlab/syri.git' --python-version '3.10' #--no-warn-conflicts + pip install 'git+https://github.com/schneebergerlab/syri.git' --python-version '3.10' --no-deps --no-warn-conflicts if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Build package run: pip install . From 8846d90b23b95250bfd4fe45e09b5125948422e2 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 12:05:55 +0800 Subject: [PATCH 37/80] force install to right dir --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index b91e0c2..c00dc58 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -33,7 +33,7 @@ jobs: pip install Cython numpy pandas scipy psutil igraph longestrunsubsequence pysam pulp # manually use pip to install syri from github, as it isn't on pypi # spoof python version to get around bounds check - pip install 'git+https://github.com/schneebergerlab/syri.git' --python-version '3.10' --no-deps --no-warn-conflicts + pip install 'git+https://github.com/schneebergerlab/syri.git' --python-version '3.10' --no-deps --no-warn-conflicts --target $(python -m site --user-site) if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Build package run: pip install . From ac2f4d3f1bc53d1773de3e07102a3ee9be81cd03 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 14:26:33 +0800 Subject: [PATCH 38/80] add badges --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index f3e36e7..a2f6f01 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # msyd ![lint](https://github.com/schneebergerlab/msyd/actions/workflows/lint.yml/badge.svg) +![build](https://github.com/schneebergerlab/msyd/actions/workflows/build.yml/badge.svg) +[![linting: pylint](https://img.shields.io/badge/linting-pylint-yellowgreen)](https://github.com/pylint-dev/pylint) msyd is still under active development, so expect some bugs and changes! From 897a8a3e799041267a33e4d17d943265b6aedd01 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 15:11:49 +0800 Subject: [PATCH 39/80] add x permission to example workflow --- example/example_workflow.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 example/example_workflow.sh diff --git a/example/example_workflow.sh b/example/example_workflow.sh old mode 100644 new mode 100755 From a1a6d193a5c440d081e1b3e61e51946fd97e5768 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 15:12:25 +0800 Subject: [PATCH 40/80] add test workflow --- .github/workflows/build.yml | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index c00dc58..dcefeea 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -2,13 +2,13 @@ # adapted from # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python -name: Build python package +name: Build and test msyd on: push: - branches: [ "main" ] + branches: [ "main", "dev" ] pull_request: - branches: [ "main" ] + branches: [ "main", "dev" ] jobs: build: @@ -37,8 +37,18 @@ jobs: if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Build package run: pip install . - - name: Test package is installed successfully + test: + runs-on: ubuntu-latest + needs: build + steps: + - name: Test installation run: | msyd -h msyd call -h msyd view -h + - name: Test example_run.sh + run: | + # alias to a call launching the syri entrypoint from python + # necessary, as the hacky git install does not install the CLI entrypoints + alias syri='python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")' + ./example From 047748dd7ece7244e29bd7c587f8d32180fe1f0e Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 15:13:37 +0800 Subject: [PATCH 41/80] empty commit to trigger new CI From 5f1f8fb4fe416ec414439020e54953a64664c118 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 15:13:57 +0800 Subject: [PATCH 42/80] whoops, enable test --- .github/workflows/build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index dcefeea..234c78c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -51,4 +51,4 @@ jobs: # alias to a call launching the syri entrypoint from python # necessary, as the hacky git install does not install the CLI entrypoints alias syri='python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")' - ./example + ./example/example_workflow.sh From c0d1852fa17d489d47e184ea4e76a6f85b0a4de2 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 16:44:01 +0800 Subject: [PATCH 43/80] refactor build into reusable workflow --- .github/workflows/build.yml | 61 +++++++++++------------------- .github/workflows/test_build.yml | 39 +++++++++++++++++++ .github/workflows/test_example.yml | 29 ++++++++++++++ 3 files changed, 90 insertions(+), 39 deletions(-) create mode 100644 .github/workflows/test_build.yml create mode 100644 .github/workflows/test_example.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 234c78c..78845ed 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,54 +1,37 @@ -# This workflow will build msyd as a python package, and call the CLI interface to check the install worked -# adapted from -# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python +# adapted from https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python -name: Build and test msyd +name: Build msyd +description: "Checks out, installs dependencies and builds the msyd package. Formulated as a reusable workflow to reduce code duplication in testing." on: - push: - branches: [ "main", "dev" ] - pull_request: - branches: [ "main", "dev" ] + workflow_call: + inputs: + python-version: + description: 'Python version to use' + required: true + default: '3.12' -jobs: - build: - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] - - steps: - - uses: actions/checkout@v4 - - name: Set up Python ${{ matrix.python-version }} +run: + using: composite + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Set up Python ${{ inputs.python-version }} uses: actions/setup-python@v5 with: - python-version: ${{ matrix.python-version }} + python-version: ${{ inputs.python-version }} cache: pip - - name: Install dependencies + - name: Update pip + run: python -m pip install --upgrade pip setuptools + - name: Install SyRI manually run: | - python -m pip install --upgrade pip setuptools # manually install syris dependencies # the python version spoofing requires the --no-deps flag, so this is necessary pip install Cython numpy pandas scipy psutil igraph longestrunsubsequence pysam pulp # manually use pip to install syri from github, as it isn't on pypi # spoof python version to get around bounds check pip install 'git+https://github.com/schneebergerlab/syri.git' --python-version '3.10' --no-deps --no-warn-conflicts --target $(python -m site --user-site) - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Build package + - name: Install other dependencies + run: pip install -r requirements.txt + - name: Build msyd run: pip install . - test: - runs-on: ubuntu-latest - needs: build - steps: - - name: Test installation - run: | - msyd -h - msyd call -h - msyd view -h - - name: Test example_run.sh - run: | - # alias to a call launching the syri entrypoint from python - # necessary, as the hacky git install does not install the CLI entrypoints - alias syri='python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")' - ./example/example_workflow.sh diff --git a/.github/workflows/test_build.yml b/.github/workflows/test_build.yml new file mode 100644 index 0000000..56b4f3b --- /dev/null +++ b/.github/workflows/test_build.yml @@ -0,0 +1,39 @@ +# This workflow will use the build action to build the msyd python package, and call the CLI interface to check the install worked +name: Test build + +on: + push: + branches: [ "main", "dev" ] + pull_request: + branches: [ "main", "dev" ] + +# Cancel if a newer run is started +# taken from https://github.com/nf-core/modules/blob/master/.github/workflows/nf-test.yml +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + + +jobs: + test: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] + steps: + - name: Build msyd + uses: ./.github/workflows/build.yml + with: + python-version: ${{ matrix.python-version }} + - name: Test installation + run: | + msyd --version + msyd -h + msyd call -h + msyd view -h + - name: Test example_run.sh + run: | + # alias to a call launching the syri entrypoint from python + # necessary, as the hacky git install does not install the CLI entrypoints + alias syri='python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")' + ./example/example_workflow.sh diff --git a/.github/workflows/test_example.yml b/.github/workflows/test_example.yml new file mode 100644 index 0000000..a4e9f0a --- /dev/null +++ b/.github/workflows/test_example.yml @@ -0,0 +1,29 @@ +name: Test example.sh + +on: + push: + branches: [ "main", "dev" ] + pull_request: + branches: [ "main", "dev" ] + +# Cancel if a newer run is started +# taken from https://github.com/nf-core/modules/blob/master/.github/workflows/nf-test.yml +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + + +jobs: + test: + runs-on: ubuntu-latest + steps: + - name: Build msyd + uses: ./.github/workflows/build.yml + with: + python-version: 3.12 + - name: Test example_run.sh + run: | + # alias to a call launching the syri entrypoint from python + # necessary, as the hacky git install does not install the CLI entrypoints + alias syri='python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")' + ./example/example_workflow.sh From 4772498cef5332f9f185485e5caf30178c734458 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 16:45:11 +0800 Subject: [PATCH 44/80] whoops, fix residual action stuff --- .github/workflows/build.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 78845ed..6c33bc4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -11,8 +11,7 @@ on: required: true default: '3.12' -run: - using: composite +jobs: steps: - name: Checkout repo uses: actions/checkout@v4 From bbaa52e627c06fbd09bb33fe98705b5c86a13e91 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 16:50:16 +0800 Subject: [PATCH 45/80] fix indents --- .github/workflows/build.yml | 4 +++- .github/workflows/test_build.yml | 32 +++++++++++++++--------------- .github/workflows/test_example.yml | 20 +++++++++---------- 3 files changed, 29 insertions(+), 27 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6c33bc4..38f76e6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,7 +12,9 @@ on: default: '3.12' jobs: - steps: + build: + runs-on: ubuntu-latest + steps: - name: Checkout repo uses: actions/checkout@v4 - name: Set up Python ${{ inputs.python-version }} diff --git a/.github/workflows/test_build.yml b/.github/workflows/test_build.yml index 56b4f3b..d232a76 100644 --- a/.github/workflows/test_build.yml +++ b/.github/workflows/test_build.yml @@ -21,19 +21,19 @@ jobs: matrix: python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] steps: - - name: Build msyd - uses: ./.github/workflows/build.yml - with: - python-version: ${{ matrix.python-version }} - - name: Test installation - run: | - msyd --version - msyd -h - msyd call -h - msyd view -h - - name: Test example_run.sh - run: | - # alias to a call launching the syri entrypoint from python - # necessary, as the hacky git install does not install the CLI entrypoints - alias syri='python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")' - ./example/example_workflow.sh + - name: Build msyd + uses: ./.github/workflows/build.yml + with: + python-version: ${{ matrix.python-version }} + - name: Test installation + run: | + msyd --version + msyd -h + msyd call -h + msyd view -h + - name: Test example_run.sh + run: | + # alias to a call launching the syri entrypoint from python + # necessary, as the hacky git install does not install the CLI entrypoints + alias syri='python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")' + ./example/example_workflow.sh diff --git a/.github/workflows/test_example.yml b/.github/workflows/test_example.yml index a4e9f0a..2da216b 100644 --- a/.github/workflows/test_example.yml +++ b/.github/workflows/test_example.yml @@ -17,13 +17,13 @@ jobs: test: runs-on: ubuntu-latest steps: - - name: Build msyd - uses: ./.github/workflows/build.yml - with: - python-version: 3.12 - - name: Test example_run.sh - run: | - # alias to a call launching the syri entrypoint from python - # necessary, as the hacky git install does not install the CLI entrypoints - alias syri='python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")' - ./example/example_workflow.sh + - name: Build msyd + uses: ./.github/workflows/build.yml + with: + python-version: "3.12" + - name: Test example_run.sh + run: | + # alias to a call launching the syri entrypoint from python + # necessary, as the hacky git install does not install the CLI entrypoints + alias syri='python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")' + ./example/example_workflow.sh From 6524f88975a63e9835e3b891b77909549f3a6ef9 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 16:52:08 +0800 Subject: [PATCH 46/80] checkout in test workshops --- .github/workflows/build.yml | 4 ++-- .github/workflows/test_build.yml | 2 ++ .github/workflows/test_example.yml | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 38f76e6..4a6eb2f 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -15,8 +15,8 @@ jobs: build: runs-on: ubuntu-latest steps: - - name: Checkout repo - uses: actions/checkout@v4 +# - name: Checkout repo +# uses: actions/checkout@v4 - name: Set up Python ${{ inputs.python-version }} uses: actions/setup-python@v5 with: diff --git a/.github/workflows/test_build.yml b/.github/workflows/test_build.yml index d232a76..57dcf7f 100644 --- a/.github/workflows/test_build.yml +++ b/.github/workflows/test_build.yml @@ -21,6 +21,8 @@ jobs: matrix: python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] steps: + - name: Checkout repo + uses: actions/checkout@v4 - name: Build msyd uses: ./.github/workflows/build.yml with: diff --git a/.github/workflows/test_example.yml b/.github/workflows/test_example.yml index 2da216b..1ee0ca5 100644 --- a/.github/workflows/test_example.yml +++ b/.github/workflows/test_example.yml @@ -17,6 +17,8 @@ jobs: test: runs-on: ubuntu-latest steps: + - name: Checkout repo + uses: actions/checkout@v4 - name: Build msyd uses: ./.github/workflows/build.yml with: From 558917606a03ad9b4820c02fbb594f6180f69990 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:05:24 +0800 Subject: [PATCH 47/80] refactor into a composite action again --- .github/workflows/build.yml | 63 ++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 4a6eb2f..1439763 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -1,38 +1,35 @@ # adapted from https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python name: Build msyd -description: "Checks out, installs dependencies and builds the msyd package. Formulated as a reusable workflow to reduce code duplication in testing." +description: "Checks out, installs dependencies and builds the msyd package. Formulated as a composite action to reduce code duplication in testing. Composite workflows cannot perserve state." -on: - workflow_call: - inputs: - python-version: - description: 'Python version to use' - required: true - default: '3.12' +inputs: + python-version: + description: 'Python version to use' + required: true + default: '3.12' -jobs: - build: - runs-on: ubuntu-latest - steps: -# - name: Checkout repo -# uses: actions/checkout@v4 - - name: Set up Python ${{ inputs.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ inputs.python-version }} - cache: pip - - name: Update pip - run: python -m pip install --upgrade pip setuptools - - name: Install SyRI manually - run: | - # manually install syris dependencies - # the python version spoofing requires the --no-deps flag, so this is necessary - pip install Cython numpy pandas scipy psutil igraph longestrunsubsequence pysam pulp - # manually use pip to install syri from github, as it isn't on pypi - # spoof python version to get around bounds check - pip install 'git+https://github.com/schneebergerlab/syri.git' --python-version '3.10' --no-deps --no-warn-conflicts --target $(python -m site --user-site) - - name: Install other dependencies - run: pip install -r requirements.txt - - name: Build msyd - run: pip install . +run: + using: composite + steps: + - name: Checkout repo + uses: actions/checkout@v4 + - name: Set up Python ${{ inputs.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ inputs.python-version }} + cache: pip + - name: Update pip + run: python -m pip install --upgrade pip setuptools + - name: Install SyRI manually + run: | + # manually install syris dependencies + # the python version spoofing requires the --no-deps flag, so this is necessary + pip install Cython numpy pandas scipy psutil igraph longestrunsubsequence pysam pulp + # manually use pip to install syri from github, as it isn't on pypi + # spoof python version to get around bounds check + pip install 'git+https://github.com/schneebergerlab/syri.git' --python-version '3.10' --no-deps --no-warn-conflicts --target $(python -m site --user-site) + - name: Install other dependencies + run: pip install -r requirements.txt + - name: Build msyd + run: pip install . From 6a5094561f07ae06571fa072fde6bbffd6b3bb85 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:08:29 +0800 Subject: [PATCH 48/80] configure tests to use action --- .github/workflows/test_build.yml | 2 +- .github/workflows/test_example.yml | 4 +--- .github/workflows/build.yml => action.yml | 0 3 files changed, 2 insertions(+), 4 deletions(-) rename .github/workflows/build.yml => action.yml (100%) diff --git a/.github/workflows/test_build.yml b/.github/workflows/test_build.yml index 57dcf7f..1b99eed 100644 --- a/.github/workflows/test_build.yml +++ b/.github/workflows/test_build.yml @@ -24,7 +24,7 @@ jobs: - name: Checkout repo uses: actions/checkout@v4 - name: Build msyd - uses: ./.github/workflows/build.yml + uses: schneebergerlab/msyd@main with: python-version: ${{ matrix.python-version }} - name: Test installation diff --git a/.github/workflows/test_example.yml b/.github/workflows/test_example.yml index 1ee0ca5..3b20569 100644 --- a/.github/workflows/test_example.yml +++ b/.github/workflows/test_example.yml @@ -17,10 +17,8 @@ jobs: test: runs-on: ubuntu-latest steps: - - name: Checkout repo - uses: actions/checkout@v4 - name: Build msyd - uses: ./.github/workflows/build.yml + uses: schneebergerlab/msyd@main with: python-version: "3.12" - name: Test example_run.sh diff --git a/.github/workflows/build.yml b/action.yml similarity index 100% rename from .github/workflows/build.yml rename to action.yml From de6843e5cb0642e2beb3b8fec9ba1d84fab937fd Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:11:09 +0800 Subject: [PATCH 49/80] fix typo --- action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/action.yml b/action.yml index 1439763..61c546e 100644 --- a/action.yml +++ b/action.yml @@ -9,7 +9,7 @@ inputs: required: true default: '3.12' -run: +runs: using: composite steps: - name: Checkout repo From 79bf62948615a394ce92ad6951937756b61b41ca Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:13:06 +0800 Subject: [PATCH 50/80] add shell parameter --- action.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/action.yml b/action.yml index 61c546e..405e062 100644 --- a/action.yml +++ b/action.yml @@ -21,6 +21,7 @@ runs: cache: pip - name: Update pip run: python -m pip install --upgrade pip setuptools + shell: bash - name: Install SyRI manually run: | # manually install syris dependencies @@ -29,7 +30,10 @@ runs: # manually use pip to install syri from github, as it isn't on pypi # spoof python version to get around bounds check pip install 'git+https://github.com/schneebergerlab/syri.git' --python-version '3.10' --no-deps --no-warn-conflicts --target $(python -m site --user-site) + shell: bash - name: Install other dependencies run: pip install -r requirements.txt + shell: bash - name: Build msyd run: pip install . + shell: bash From 3b692f4ae0de8b9149ede21e358597e7a7ccc70b Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:27:17 +0800 Subject: [PATCH 51/80] whoops, remove test_example.sh from build process --- .github/workflows/test_build.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/test_build.yml b/.github/workflows/test_build.yml index 1b99eed..92e6794 100644 --- a/.github/workflows/test_build.yml +++ b/.github/workflows/test_build.yml @@ -19,6 +19,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: + fail-fast: false python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] steps: - name: Checkout repo @@ -33,9 +34,3 @@ jobs: msyd -h msyd call -h msyd view -h - - name: Test example_run.sh - run: | - # alias to a call launching the syri entrypoint from python - # necessary, as the hacky git install does not install the CLI entrypoints - alias syri='python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")' - ./example/example_workflow.sh From 94f9e7f89bc1e7415cdd427785cd699b42d6921e Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:30:08 +0800 Subject: [PATCH 52/80] add conda dep install to integrated test --- .github/workflows/test_example.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test_example.yml b/.github/workflows/test_example.yml index 3b20569..9f02a71 100644 --- a/.github/workflows/test_example.yml +++ b/.github/workflows/test_example.yml @@ -21,6 +21,9 @@ jobs: uses: schneebergerlab/msyd@main with: python-version: "3.12" + - name: Install other tools using conda + run: | + $CONDA/bin/conda install -c conda-forge -c bioconda "minimap2" - name: Test example_run.sh run: | # alias to a call launching the syri entrypoint from python From c4bd95c082fbe1f7f9108143692fcff5d047ad4a Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:37:42 +0800 Subject: [PATCH 53/80] fix --- .github/workflows/test_build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_build.yml b/.github/workflows/test_build.yml index 92e6794..d0f5a0b 100644 --- a/.github/workflows/test_build.yml +++ b/.github/workflows/test_build.yml @@ -18,8 +18,8 @@ jobs: test: runs-on: ubuntu-latest strategy: + fail-fast: false matrix: - fail-fast: false python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"] steps: - name: Checkout repo From 2d27b204ff72c0ff73cb2e313e94f9fa4f1c66ff Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Tue, 10 Dec 2024 17:51:33 +0800 Subject: [PATCH 54/80] adjust badge to new organization --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a2f6f01..0623148 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # msyd ![lint](https://github.com/schneebergerlab/msyd/actions/workflows/lint.yml/badge.svg) -![build](https://github.com/schneebergerlab/msyd/actions/workflows/build.yml/badge.svg) +![build](https://github.com/schneebergerlab/msyd/actions/workflows/test_build.yml/badge.svg) [![linting: pylint](https://img.shields.io/badge/linting-pylint-yellowgreen)](https://github.com/pylint-dev/pylint) From 3082802a77743445fd05614159fc46ad92e077c3 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 12 Dec 2024 14:04:30 +0800 Subject: [PATCH 55/80] refactor example_worklfow test run into separate script to preserve alias --- .github/workflows/run_test_example.sh | 8 ++++++++ .github/workflows/test_example.yml | 6 +----- 2 files changed, 9 insertions(+), 5 deletions(-) create mode 100755 .github/workflows/run_test_example.sh diff --git a/.github/workflows/run_test_example.sh b/.github/workflows/run_test_example.sh new file mode 100755 index 0000000..4f5678f --- /dev/null +++ b/.github/workflows/run_test_example.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +# alias to a call launching the syri entrypoint from python +# necessary, as the hacky git install does not install the CLI entrypoints +alias syri='python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")' + +# run using source to preserve alias +source ../../example/example_workflow.sh diff --git a/.github/workflows/test_example.yml b/.github/workflows/test_example.yml index 9f02a71..4ab148c 100644 --- a/.github/workflows/test_example.yml +++ b/.github/workflows/test_example.yml @@ -25,8 +25,4 @@ jobs: run: | $CONDA/bin/conda install -c conda-forge -c bioconda "minimap2" - name: Test example_run.sh - run: | - # alias to a call launching the syri entrypoint from python - # necessary, as the hacky git install does not install the CLI entrypoints - alias syri='python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")' - ./example/example_workflow.sh + run: ./.github/workflows/run_test_example.sh From 0736cb4730c26892b3193f57e6a85c57f4622bf7 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 12 Dec 2024 14:18:57 +0800 Subject: [PATCH 56/80] change path to script from git root --- .github/workflows/run_test_example.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/run_test_example.sh b/.github/workflows/run_test_example.sh index 4f5678f..89c00f6 100755 --- a/.github/workflows/run_test_example.sh +++ b/.github/workflows/run_test_example.sh @@ -5,4 +5,4 @@ alias syri='python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")' # run using source to preserve alias -source ../../example/example_workflow.sh +source ./example/example_workflow.sh From 93bdfb1f166cd8483c85fe1f6e07887a1818592e Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 12 Dec 2024 14:32:16 +0800 Subject: [PATCH 57/80] try out test dependency installation --- .github/workflows/run_test_example.sh | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/run_test_example.sh b/.github/workflows/run_test_example.sh index 89c00f6..13a4a5e 100755 --- a/.github/workflows/run_test_example.sh +++ b/.github/workflows/run_test_example.sh @@ -3,6 +3,9 @@ # alias to a call launching the syri entrypoint from python # necessary, as the hacky git install does not install the CLI entrypoints alias syri='python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")' - +python <(echo "import syri.scripts.syri;syri.scripts.syri.main()") --version +syri --version +minimap2 --version +minimap2.py --version # run using source to preserve alias source ./example/example_workflow.sh From d1d6e0ce97c8832ff7a470028e761645e763eb26 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 12 Dec 2024 15:01:07 +0800 Subject: [PATCH 58/80] try specifying bash shell --- .github/workflows/run_test_example.sh | 5 +++-- .github/workflows/test_example.yml | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run_test_example.sh b/.github/workflows/run_test_example.sh index 13a4a5e..4fce6a2 100755 --- a/.github/workflows/run_test_example.sh +++ b/.github/workflows/run_test_example.sh @@ -2,10 +2,11 @@ # alias to a call launching the syri entrypoint from python # necessary, as the hacky git install does not install the CLI entrypoints -alias syri='python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")' +alias syri="python <(echo 'import syri.scripts.syri;syri.scripts.syri.main()')" +alias minimap2="python <(echo 'import syri.scripts.syri;syri.scripts.syri.main()')" python <(echo "import syri.scripts.syri;syri.scripts.syri.main()") --version syri --version minimap2 --version -minimap2.py --version +minimap2.py # run using source to preserve alias source ./example/example_workflow.sh diff --git a/.github/workflows/test_example.yml b/.github/workflows/test_example.yml index 4ab148c..27a068d 100644 --- a/.github/workflows/test_example.yml +++ b/.github/workflows/test_example.yml @@ -26,3 +26,4 @@ jobs: $CONDA/bin/conda install -c conda-forge -c bioconda "minimap2" - name: Test example_run.sh run: ./.github/workflows/run_test_example.sh + shell: bash From c2338b861417d67c0a75165e7d06bb00c89742da Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 12 Dec 2024 15:18:59 +0800 Subject: [PATCH 59/80] try aliasing with files in PATH --- .github/workflows/run_test_example.sh | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/.github/workflows/run_test_example.sh b/.github/workflows/run_test_example.sh index 4fce6a2..6863521 100755 --- a/.github/workflows/run_test_example.sh +++ b/.github/workflows/run_test_example.sh @@ -1,12 +1,15 @@ #!/bin/bash -# alias to a call launching the syri entrypoint from python + +# hacky way to hopefully alias the calls +# normal alias does not seem to work in GitHub CI # necessary, as the hacky git install does not install the CLI entrypoints -alias syri="python <(echo 'import syri.scripts.syri;syri.scripts.syri.main()')" -alias minimap2="python <(echo 'import syri.scripts.syri;syri.scripts.syri.main()')" -python <(echo "import syri.scripts.syri;syri.scripts.syri.main()") --version +echo "#!/bin/bash python <(echo 'import syri.scripts.syri;syri.scripts.syri.main()')" > syri +chmod +x ./syri +echo "minimap2.py" > ./minimap2 +chmod +x ./minimap2 +PATH=$PATH:./ syri --version -minimap2 --version -minimap2.py +minimap2 # run using source to preserve alias source ./example/example_workflow.sh From 582d111902bb9ba176f5a97190e1774e83f51067 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 12 Dec 2024 15:30:25 +0800 Subject: [PATCH 60/80] switch to modifying code at runtime --- .github/workflows/run_test_example.sh | 22 +++++++++++++--------- .github/workflows/test_example.yml | 2 +- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/.github/workflows/run_test_example.sh b/.github/workflows/run_test_example.sh index 6863521..fc4db71 100755 --- a/.github/workflows/run_test_example.sh +++ b/.github/workflows/run_test_example.sh @@ -4,12 +4,16 @@ # hacky way to hopefully alias the calls # normal alias does not seem to work in GitHub CI # necessary, as the hacky git install does not install the CLI entrypoints -echo "#!/bin/bash python <(echo 'import syri.scripts.syri;syri.scripts.syri.main()')" > syri -chmod +x ./syri -echo "minimap2.py" > ./minimap2 -chmod +x ./minimap2 -PATH=$PATH:./ -syri --version -minimap2 -# run using source to preserve alias -source ./example/example_workflow.sh +#echo "python <(echo 'import syri.scripts.syri;syri.scripts.syri.main()')" > syri +#chmod +x ./syri +#echo "minimap2.py" > ./minimap2 +#chmod +x ./minimap2 +#PATH=$PATH:./ +#syri --version +#minimap2 +## run using source to preserve alias +#source ./example/example_workflow.sh + +$CONDA/bin/conda install -c conda-forge -c bioconda "bioconda::minimap2" +$(tail -n -1 ./example/example_workflow.sh |\ + sed -e 's/^syri/python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")/' ) diff --git a/.github/workflows/test_example.yml b/.github/workflows/test_example.yml index 27a068d..b42fcca 100644 --- a/.github/workflows/test_example.yml +++ b/.github/workflows/test_example.yml @@ -23,7 +23,7 @@ jobs: python-version: "3.12" - name: Install other tools using conda run: | - $CONDA/bin/conda install -c conda-forge -c bioconda "minimap2" + $CONDA/bin/conda install -c conda-forge -c bioconda "bioconda::minimap2" - name: Test example_run.sh run: ./.github/workflows/run_test_example.sh shell: bash From 99628b84c25478c64f3269dceb00fb0988ff3267 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 12 Dec 2024 15:56:51 +0800 Subject: [PATCH 61/80] change to proper tail command --- .github/workflows/run_test_example.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/run_test_example.sh b/.github/workflows/run_test_example.sh index fc4db71..bcb18f0 100755 --- a/.github/workflows/run_test_example.sh +++ b/.github/workflows/run_test_example.sh @@ -4,7 +4,8 @@ # hacky way to hopefully alias the calls # normal alias does not seem to work in GitHub CI # necessary, as the hacky git install does not install the CLI entrypoints -#echo "python <(echo 'import syri.scripts.syri;syri.scripts.syri.main()')" > syri +#echo "#!/bin/python" > syri +#echo "import syri.scripts.syri;syri.scripts.syri.main()" >> syri #chmod +x ./syri #echo "minimap2.py" > ./minimap2 #chmod +x ./minimap2 @@ -14,6 +15,5 @@ ## run using source to preserve alias #source ./example/example_workflow.sh -$CONDA/bin/conda install -c conda-forge -c bioconda "bioconda::minimap2" -$(tail -n -1 ./example/example_workflow.sh |\ +$(tail -n +2 ./example/example_workflow.sh |\ sed -e 's/^syri/python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")/' ) From 03c6094bc93e37048e574fcf71db4d5cc71292f2 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 12 Dec 2024 16:09:11 +0800 Subject: [PATCH 62/80] eliminate second cript call --- .github/workflows/test_example.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_example.yml b/.github/workflows/test_example.yml index b42fcca..18516d9 100644 --- a/.github/workflows/test_example.yml +++ b/.github/workflows/test_example.yml @@ -25,5 +25,7 @@ jobs: run: | $CONDA/bin/conda install -c conda-forge -c bioconda "bioconda::minimap2" - name: Test example_run.sh - run: ./.github/workflows/run_test_example.sh + run: | + minimap2 -h + $(tail -n +2 ./example/example_workflow.sh | sed -e 's/^syri/python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")/' ) shell: bash From ce43346b0366c1ac880e4117f51b739152738747 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 12 Dec 2024 16:23:39 +0800 Subject: [PATCH 63/80] install minimap2 binary directly --- .github/workflows/test_example.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test_example.yml b/.github/workflows/test_example.yml index 18516d9..4cc9f36 100644 --- a/.github/workflows/test_example.yml +++ b/.github/workflows/test_example.yml @@ -21,11 +21,12 @@ jobs: uses: schneebergerlab/msyd@main with: python-version: "3.12" - - name: Install other tools using conda + - name: Install minimap2 manually # conda doesn't seem to work run: | - $CONDA/bin/conda install -c conda-forge -c bioconda "bioconda::minimap2" + curl -L https://github.com/lh3/minimap2/releases/download/v2.28/minimap2-2.28_x64-linux.tar.bz2 | tar -jxvf - + mv minimap2-2.28_x64-linux\/minimap2 ./ + ./minimap2 -h # test it worked & is callable - name: Test example_run.sh run: | - minimap2 -h - $(tail -n +2 ./example/example_workflow.sh | sed -e 's/^syri/python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")/' ) + $(tail -n +2 ./example/example_workflow.sh | sed -e 's/^syri/python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")/' -e 's/^minimap2/.\/minimap2/' ) shell: bash From 03af7db791b6245c7118a1eeaddb3af831986da2 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 12 Dec 2024 16:48:47 +0800 Subject: [PATCH 64/80] try it in script again, not sure why this fails --- .github/workflows/run_test_example.sh | 3 +-- .github/workflows/test_example.yml | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/run_test_example.sh b/.github/workflows/run_test_example.sh index bcb18f0..fc54f52 100755 --- a/.github/workflows/run_test_example.sh +++ b/.github/workflows/run_test_example.sh @@ -15,5 +15,4 @@ ## run using source to preserve alias #source ./example/example_workflow.sh -$(tail -n +2 ./example/example_workflow.sh |\ - sed -e 's/^syri/python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")/' ) +$(tail -n +2 ./example/example_workflow.sh | sed -e 's/^syri/python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")/' -e 's/^minimap2/.\/minimap2/' ) diff --git a/.github/workflows/test_example.yml b/.github/workflows/test_example.yml index 4cc9f36..bf3910c 100644 --- a/.github/workflows/test_example.yml +++ b/.github/workflows/test_example.yml @@ -27,6 +27,5 @@ jobs: mv minimap2-2.28_x64-linux\/minimap2 ./ ./minimap2 -h # test it worked & is callable - name: Test example_run.sh - run: | - $(tail -n +2 ./example/example_workflow.sh | sed -e 's/^syri/python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")/' -e 's/^minimap2/.\/minimap2/' ) + run: ./run_test_example.sh shell: bash From 45c2a3e782d4c2c53aff856ff4536d2a835dd99b Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 12 Dec 2024 17:13:51 +0800 Subject: [PATCH 65/80] correct path --- .github/workflows/test_example.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test_example.yml b/.github/workflows/test_example.yml index bf3910c..de8fa18 100644 --- a/.github/workflows/test_example.yml +++ b/.github/workflows/test_example.yml @@ -27,5 +27,5 @@ jobs: mv minimap2-2.28_x64-linux\/minimap2 ./ ./minimap2 -h # test it worked & is callable - name: Test example_run.sh - run: ./run_test_example.sh + run: ./.github/workflows/run_test_example.sh shell: bash From 0acfba4d6e0cff0f2738906486150c5db3b2c1f4 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 12 Dec 2024 23:26:55 +0800 Subject: [PATCH 66/80] add environment.yml for tests --- environment.yml | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 environment.yml diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..9a1066d --- /dev/null +++ b/environment.yml @@ -0,0 +1,9 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::syri=1.7.0 + - bioconda::minimap2=2.1.1 + - bioconda::mappy=2.28 + - conda-forge::cython=3.0.11 + - conda-forge::intervaltree=3.1.0 From c310df8f21533b1f397576e88bb1c976dffc02c8 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 12 Dec 2024 23:28:48 +0800 Subject: [PATCH 67/80] switch tests to build in conda env --- .github/workflows/test_build.yml | 28 ++++++++++++++-- .github/workflows/test_example.yml | 14 ++++---- action.yml | 53 ++++++++++++++++-------------- 3 files changed, 61 insertions(+), 34 deletions(-) diff --git a/.github/workflows/test_build.yml b/.github/workflows/test_build.yml index d0f5a0b..1a5a049 100644 --- a/.github/workflows/test_build.yml +++ b/.github/workflows/test_build.yml @@ -24,10 +24,32 @@ jobs: steps: - name: Checkout repo uses: actions/checkout@v4 - - name: Build msyd - uses: schneebergerlab/msyd@main + # use old install manually + # since switching to conda no longer supports different python version + # - name: Build msyd + # uses: schneebergerlab/msyd@main + # # with: + # # python-version: ${{ matrix.python-version }} + - name: Set up Python ${{ inputs.python-version }} + uses: actions/setup-python@v5 with: - python-version: ${{ matrix.python-version }} + python-version: ${{ inputs.python-version }} + cache: pip + - name: Update pip + run: python -m pip install --upgrade pip setuptools + shell: bash + - name: Install SyRI manually + run: | + # manually install syris dependencies + # the python version spoofing requires the --no-deps flag, so this is necessary + pip install Cython numpy pandas scipy psutil igraph longestrunsubsequence pysam pulp + # manually use pip to install syri from github, as it isn't on pypi + # spoof python version to get around bounds check + pip install 'git+https://github.com/schneebergerlab/syri.git' --python-version '3.10' --no-deps --no-warn-conflicts --target $(python -m site --user-site) + shell: bash + - name: Install other dependencies + run: pip install -r requirements.txt + shell: bash - name: Test installation run: | msyd --version diff --git a/.github/workflows/test_example.yml b/.github/workflows/test_example.yml index de8fa18..f88ec50 100644 --- a/.github/workflows/test_example.yml +++ b/.github/workflows/test_example.yml @@ -19,13 +19,13 @@ jobs: steps: - name: Build msyd uses: schneebergerlab/msyd@main - with: - python-version: "3.12" - - name: Install minimap2 manually # conda doesn't seem to work - run: | - curl -L https://github.com/lh3/minimap2/releases/download/v2.28/minimap2-2.28_x64-linux.tar.bz2 | tar -jxvf - - mv minimap2-2.28_x64-linux\/minimap2 ./ - ./minimap2 -h # test it worked & is callable + # with: + # python-version: "3.12" + # - name: Install minimap2 manually # conda doesn't seem to work + # run: | + # curl -L https://github.com/lh3/minimap2/releases/download/v2.28/minimap2-2.28_x64-linux.tar.bz2 | tar -jxvf - + # mv minimap2-2.28_x64-linux\/minimap2 ./ + # ./minimap2 -h # test it worked & is callable - name: Test example_run.sh run: ./.github/workflows/run_test_example.sh shell: bash diff --git a/action.yml b/action.yml index 405e062..9d78cfa 100644 --- a/action.yml +++ b/action.yml @@ -3,37 +3,42 @@ name: Build msyd description: "Checks out, installs dependencies and builds the msyd package. Formulated as a composite action to reduce code duplication in testing. Composite workflows cannot perserve state." -inputs: - python-version: - description: 'Python version to use' - required: true - default: '3.12' +#inputs: +# python-version: +# description: 'Python version to use' +# required: true +# default: '3.12' runs: using: composite steps: - name: Checkout repo uses: actions/checkout@v4 - - name: Set up Python ${{ inputs.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ inputs.python-version }} - cache: pip - - name: Update pip - run: python -m pip install --upgrade pip setuptools - shell: bash - - name: Install SyRI manually + # - name: Set up Python ${{ inputs.python-version }} + # uses: actions/setup-python@v5 + # with: + # python-version: ${{ inputs.python-version }} + # cache: pip + # - name: Update pip + # run: python -m pip install --upgrade pip setuptools + # shell: bash + # - name: Install SyRI manually + # run: | + # # manually install syris dependencies + # # the python version spoofing requires the --no-deps flag, so this is necessary + # pip install Cython numpy pandas scipy psutil igraph longestrunsubsequence pysam pulp + # # manually use pip to install syri from github, as it isn't on pypi + # # spoof python version to get around bounds check + # pip install 'git+https://github.com/schneebergerlab/syri.git' --python-version '3.10' --no-deps --no-warn-conflicts --target $(python -m site --user-site) + # shell: bash + # - name: Install other dependencies + # run: pip install -r requirements.txt + # shell: bash + - name: Setup conda env run: | - # manually install syris dependencies - # the python version spoofing requires the --no-deps flag, so this is necessary - pip install Cython numpy pandas scipy psutil igraph longestrunsubsequence pysam pulp - # manually use pip to install syri from github, as it isn't on pypi - # spoof python version to get around bounds check - pip install 'git+https://github.com/schneebergerlab/syri.git' --python-version '3.10' --no-deps --no-warn-conflicts --target $(python -m site --user-site) - shell: bash - - name: Install other dependencies - run: pip install -r requirements.txt - shell: bash + $CONDA/bin/conda env create -n msyd --file ./environment.yml + $CONDA/bin/conda activate msyd + # python -m pip install mappy - name: Build msyd run: pip install . shell: bash From fe3ed64a7a389fef4ff3565c79df2168e5fbc893 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Fri, 13 Dec 2024 10:52:41 +0800 Subject: [PATCH 68/80] whoops, add build step again --- .github/workflows/test_build.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test_build.yml b/.github/workflows/test_build.yml index 1a5a049..85873e1 100644 --- a/.github/workflows/test_build.yml +++ b/.github/workflows/test_build.yml @@ -50,6 +50,9 @@ jobs: - name: Install other dependencies run: pip install -r requirements.txt shell: bash + - name: Build msyd + run: pip install . + shell: bash - name: Test installation run: | msyd --version From ab97a6ce675628edd52bea1b762b7fdafc791f06 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Fri, 13 Dec 2024 10:53:31 +0800 Subject: [PATCH 69/80] add shell variable --- action.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/action.yml b/action.yml index 9d78cfa..3db5248 100644 --- a/action.yml +++ b/action.yml @@ -38,6 +38,7 @@ runs: run: | $CONDA/bin/conda env create -n msyd --file ./environment.yml $CONDA/bin/conda activate msyd + shell: bash # python -m pip install mappy - name: Build msyd run: pip install . From 50884a0ea496f43b36f14154e1cd9aa41290e951 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:20:57 +0800 Subject: [PATCH 70/80] add conda init before conda activate --- action.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/action.yml b/action.yml index 3db5248..7b85aba 100644 --- a/action.yml +++ b/action.yml @@ -36,6 +36,7 @@ runs: # shell: bash - name: Setup conda env run: | + $CONDA/bin/conda init $CONDA/bin/conda env create -n msyd --file ./environment.yml $CONDA/bin/conda activate msyd shell: bash From 17a0296ca9bb2b5baef7a1d889da7bdeef93c4a4 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:48:49 +0800 Subject: [PATCH 71/80] Revert "add conda init before conda activate" This reverts commit 50884a0ea496f43b36f14154e1cd9aa41290e951. --- action.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/action.yml b/action.yml index 7b85aba..3db5248 100644 --- a/action.yml +++ b/action.yml @@ -36,7 +36,6 @@ runs: # shell: bash - name: Setup conda env run: | - $CONDA/bin/conda init $CONDA/bin/conda env create -n msyd --file ./environment.yml $CONDA/bin/conda activate msyd shell: bash From e65b46df24cadf772a1656951f69b9de9c7b056d Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:49:52 +0800 Subject: [PATCH 72/80] activate in call function, maybe the shell is reinitialized? --- .github/workflows/run_test_example.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/run_test_example.sh b/.github/workflows/run_test_example.sh index fc54f52..6332190 100755 --- a/.github/workflows/run_test_example.sh +++ b/.github/workflows/run_test_example.sh @@ -15,4 +15,5 @@ ## run using source to preserve alias #source ./example/example_workflow.sh +$CONDA/bin/conda activate msyd $(tail -n +2 ./example/example_workflow.sh | sed -e 's/^syri/python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")/' -e 's/^minimap2/.\/minimap2/' ) From 92820c1b793667458da1c1822878d2c8dd18f6a2 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:55:31 +0800 Subject: [PATCH 73/80] add conda init before activate --- .github/workflows/run_test_example.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/run_test_example.sh b/.github/workflows/run_test_example.sh index 6332190..0b0412b 100755 --- a/.github/workflows/run_test_example.sh +++ b/.github/workflows/run_test_example.sh @@ -15,5 +15,6 @@ ## run using source to preserve alias #source ./example/example_workflow.sh +$CONDA/bin/conda init $CONDA/bin/conda activate msyd $(tail -n +2 ./example/example_workflow.sh | sed -e 's/^syri/python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")/' -e 's/^minimap2/.\/minimap2/' ) From 15a0440f7643984df41d139ae84c5c8f1ffe227d Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 19 Dec 2024 15:55:43 +0800 Subject: [PATCH 74/80] Reapply "add conda init before conda activate" This reverts commit 17a0296ca9bb2b5baef7a1d889da7bdeef93c4a4. --- action.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/action.yml b/action.yml index 3db5248..7b85aba 100644 --- a/action.yml +++ b/action.yml @@ -36,6 +36,7 @@ runs: # shell: bash - name: Setup conda env run: | + $CONDA/bin/conda init $CONDA/bin/conda env create -n msyd --file ./environment.yml $CONDA/bin/conda activate msyd shell: bash From fdaa4f1f7bf1c7e1af4326f7ecc8a073ba4cfe65 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:05:40 +0800 Subject: [PATCH 75/80] source .bashrc --- .github/workflows/run_test_example.sh | 1 - action.yml | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/run_test_example.sh b/.github/workflows/run_test_example.sh index 0b0412b..6332190 100755 --- a/.github/workflows/run_test_example.sh +++ b/.github/workflows/run_test_example.sh @@ -15,6 +15,5 @@ ## run using source to preserve alias #source ./example/example_workflow.sh -$CONDA/bin/conda init $CONDA/bin/conda activate msyd $(tail -n +2 ./example/example_workflow.sh | sed -e 's/^syri/python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")/' -e 's/^minimap2/.\/minimap2/' ) diff --git a/action.yml b/action.yml index 7b85aba..d8c163e 100644 --- a/action.yml +++ b/action.yml @@ -34,9 +34,10 @@ runs: # - name: Install other dependencies # run: pip install -r requirements.txt # shell: bash - - name: Setup conda env + - name: Setup conda env, install msyd run: | $CONDA/bin/conda init + source ~/.bashrc $CONDA/bin/conda env create -n msyd --file ./environment.yml $CONDA/bin/conda activate msyd shell: bash From 04a8ce25fa59fd0b461088c2314068297a977773 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:09:11 +0800 Subject: [PATCH 76/80] try updating base env instead --- .github/workflows/run_test_example.sh | 2 +- action.yml | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/.github/workflows/run_test_example.sh b/.github/workflows/run_test_example.sh index 6332190..f10db33 100755 --- a/.github/workflows/run_test_example.sh +++ b/.github/workflows/run_test_example.sh @@ -15,5 +15,5 @@ ## run using source to preserve alias #source ./example/example_workflow.sh -$CONDA/bin/conda activate msyd +#$CONDA/bin/conda activate msyd $(tail -n +2 ./example/example_workflow.sh | sed -e 's/^syri/python <(echo "import syri.scripts.syri;syri.scripts.syri.main()")/' -e 's/^minimap2/.\/minimap2/' ) diff --git a/action.yml b/action.yml index d8c163e..ea98436 100644 --- a/action.yml +++ b/action.yml @@ -36,10 +36,11 @@ runs: # shell: bash - name: Setup conda env, install msyd run: | - $CONDA/bin/conda init - source ~/.bashrc - $CONDA/bin/conda env create -n msyd --file ./environment.yml - $CONDA/bin/conda activate msyd + #$CONDA/bin/conda init + #source ~/.bashrc + #$CONDA/bin/conda env create -n msyd --file ./environment.yml + #$CONDA/bin/conda activate msyd + conda env update -n base --file ./environment.yml shell: bash # python -m pip install mappy - name: Build msyd From 533329589d7b9bcfa5ba9f603deea60904f91780 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:21:15 +0800 Subject: [PATCH 77/80] try setting python version manually --- action.yml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/action.yml b/action.yml index ea98436..19c14f7 100644 --- a/action.yml +++ b/action.yml @@ -14,11 +14,11 @@ runs: steps: - name: Checkout repo uses: actions/checkout@v4 - # - name: Set up Python ${{ inputs.python-version }} - # uses: actions/setup-python@v5 - # with: - # python-version: ${{ inputs.python-version }} - # cache: pip + - name: Set up Python ${{ inputs.python-version }} + uses: actions/setup-python@v5 + with: + python-version: 3.10 + cache: pip # - name: Update pip # run: python -m pip install --upgrade pip setuptools # shell: bash From 98571046dea848ad43eca900bceb952e21829d4a Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:23:19 +0800 Subject: [PATCH 78/80] whoops, not a decimal number --- action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/action.yml b/action.yml index 19c14f7..77466eb 100644 --- a/action.yml +++ b/action.yml @@ -17,7 +17,7 @@ runs: - name: Set up Python ${{ inputs.python-version }} uses: actions/setup-python@v5 with: - python-version: 3.10 + python-version: '3.10' cache: pip # - name: Update pip # run: python -m pip install --upgrade pip setuptools From 33141efa606244195cdcb6e9908a4338fa1bad11 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:26:05 +0800 Subject: [PATCH 79/80] disable test_example_workflow for now, conda weirdness isn't ending --- .github/workflows/test_example.yml | 10 ++++++---- action.yml | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/workflows/test_example.yml b/.github/workflows/test_example.yml index f88ec50..6319adc 100644 --- a/.github/workflows/test_example.yml +++ b/.github/workflows/test_example.yml @@ -1,10 +1,12 @@ name: Test example.sh on: - push: - branches: [ "main", "dev" ] - pull_request: - branches: [ "main", "dev" ] + # do not run, as the conda package is currently broken, + # making getting the right environment not possible. + # push: + # branches: [ "main", "dev" ] + # pull_request: + # branches: [ "main", "dev" ] # Cancel if a newer run is started # taken from https://github.com/nf-core/modules/blob/master/.github/workflows/nf-test.yml diff --git a/action.yml b/action.yml index 77466eb..bfe4b63 100644 --- a/action.yml +++ b/action.yml @@ -1,7 +1,7 @@ # adapted from https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python name: Build msyd -description: "Checks out, installs dependencies and builds the msyd package. Formulated as a composite action to reduce code duplication in testing. Composite workflows cannot perserve state." +description: "Checks out, installs dependencies and builds the msyd package. Formulated as a composite action to reduce code duplication in testing. Composite workflows cannot perserve state. Currently broken b/c of a version bound in the SyRI conda package." #inputs: # python-version: From d51d93e6b00082d3e97c53825e59013cab79645d Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:26:45 +0800 Subject: [PATCH 80/80] reorder badges --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 0623148..1131d41 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # msyd +[![linting: pylint](https://img.shields.io/badge/linting-pylint-yellowgreen)](https://github.com/pylint-dev/pylint) ![lint](https://github.com/schneebergerlab/msyd/actions/workflows/lint.yml/badge.svg) ![build](https://github.com/schneebergerlab/msyd/actions/workflows/test_build.yml/badge.svg) -[![linting: pylint](https://img.shields.io/badge/linting-pylint-yellowgreen)](https://github.com/pylint-dev/pylint) msyd is still under active development, so expect some bugs and changes!