Merge pull request astheeggeggs#36 from jeromekelleher/simplify-setup

Simplify CI setup
szhan · Apr 17, 2024 · efe28ac · efe28ac
2 parents ab8d4bb + 31553d1
commit efe28ac
Show file tree

Hide file tree

Showing 16 changed files with 65 additions and 296 deletions.
diff --git a/.github/workflows/black.yml b/.github/workflows/black.yml
@@ -0,0 +1,11 @@
+name: Lint
+
+on: [push, pull_request]
+
+jobs:
+  lint:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: psf/black@stable
+
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,25 @@
+name: Tests
+
+on: [push, pull_request]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+          cache: 'pip'
+      - name: Display Python version
+        run: python -c "import sys; print(sys.version)"
+      - name: Install dependencies
+        run: python -m pip install -r requirements.txt
+      - name: Run tests
+        run: python -m pytest -vs
diff --git a/lshmm/api.py b/lshmm/api.py
@@ -1,4 +1,5 @@
 """External API definitions."""
+
 import warnings
 
 import numpy as np
@@ -194,8 +195,8 @@ def set_emission_probabilities(
         # DEV: there's a wrinkle here.
         e = np.zeros((m, 8))
         e[:, EQUAL_BOTH_HOM] = (1 - p_mutation) ** 2
-        e[:, UNEQUAL_BOTH_HOM] = p_mutation ** 2
-        e[:, BOTH_HET] = (1 - p_mutation) ** 2 + p_mutation ** 2
+        e[:, UNEQUAL_BOTH_HOM] = p_mutation**2
+        e[:, BOTH_HET] = (1 - p_mutation) ** 2 + p_mutation**2
         e[:, REF_HOM_OBS_HET] = 2 * p_mutation * (1 - p_mutation)
         e[:, REF_HET_OBS_HOM] = p_mutation * (1 - p_mutation)
         e[:, MISSING_INDEX] = 1
@@ -204,7 +205,6 @@ def set_emission_probabilities(
 
 
 def viterbi_hap(n, m, reference_panel, query, emissions, p_recombination):
-
     V, P, log_likelihood = forwards_viterbi_hap_lower_mem_rescaling(
         n, m, reference_panel, query, emissions, p_recombination
     )
@@ -214,7 +214,6 @@ def viterbi_hap(n, m, reference_panel, query, emissions, p_recombination):
 
 
 def viterbi_dip(n, m, reference_panel, query, emissions, p_recombination):
-
     V, P, log_likelihood = forwards_viterbi_dip_low_mem(
         n, m, reference_panel, query, emissions, p_recombination
     )
@@ -374,7 +373,6 @@ def path_ll(
     p_mutation=None,
     scale_mutation_based_on_n_alleles=True,
 ):
-
     n, m, ploidy = checks(
         reference_panel,
         query,

diff --git a/lshmm/forward_backward/fb_diploid.py b/lshmm/forward_backward/fb_diploid.py
@@ -1,4 +1,5 @@
 """Collection of functions to run forwards and backwards algorithms on haploid genotype data, where the data is structured as variants x samples."""
+
 import numpy as np
 
 from lshmm import jit
@@ -12,6 +13,7 @@
 MISSING = -1
 MISSING_INDEX = 3
 
+
 # https://github.com/numba/numba/issues/1269
 @jit.numba_njit
 def np_apply_along_axis(func1d, axis, arr):
@@ -51,7 +53,7 @@ def forwards_ls_dip(n, m, G, s, e, r, norm=True):
     """Matrix based diploid LS forward algorithm using numpy vectorisation."""
     # Initialise the forward tensor
     F = np.zeros((m, n, n))
-    F[0, :, :] = 1 / (n ** 2)
+    F[0, :, :] = 1 / (n**2)
     c = np.ones(m)
     r_n = r / n
 
@@ -143,7 +145,6 @@ def backwards_ls_dip(n, m, G, s, e, c, r):
 
     # Backwards
     for l in range(m - 2, -1, -1):
-
         if s[0, l + 1] == MISSING:
             index = MISSING_INDEX * np.ones(
                 (n, n), dtype=np.int64
@@ -181,7 +182,7 @@ def forward_ls_dip_starting_point(n, m, G, s, e, r):
     r_n = r / n
     for j1 in range(n):
         for j2 in range(n):
-            F[0, j1, j2] = 1 / (n ** 2)
+            F[0, j1, j2] = 1 / (n**2)
             if s[0, 0] == MISSING:
                 index_tmp = MISSING_INDEX
             else:
@@ -193,7 +194,6 @@ def forward_ls_dip_starting_point(n, m, G, s, e, r):
             F[0, j1, j2] *= e[0, index_tmp]
 
     for l in range(1, m):
-
         # Determine the various components
         F_no_change = np.zeros((n, n))
         F_j1_change = np.zeros(n)
@@ -266,7 +266,6 @@ def backward_ls_dip_starting_point(n, m, G, s, e, r):
     r_n = r / n
 
     for l in range(m - 2, -1, -1):
-
         # Determine the various components
         B_no_change = np.zeros((n, n))
         B_j1_change = np.zeros(n)
@@ -341,7 +340,7 @@ def forward_ls_dip_loop(n, m, G, s, e, r, norm=True):
     F = np.zeros((m, n, n))
     for j1 in range(n):
         for j2 in range(n):
-            F[0, j1, j2] = 1 / (n ** 2)
+            F[0, j1, j2] = 1 / (n**2)
             if s[0, 0] == MISSING:
                 index_tmp = MISSING_INDEX
             else:
@@ -355,12 +354,10 @@ def forward_ls_dip_loop(n, m, G, s, e, r, norm=True):
     c = np.ones(m)
 
     if norm:
-
         c[0] = np.sum(F[0, :, :])
         F[0, :, :] *= 1 / c[0]
 
         for l in range(1, m):
-
             # Determine the various components
             F_no_change = np.zeros((n, n))
             F_j_change = np.zeros(n)
@@ -406,9 +403,7 @@ def forward_ls_dip_loop(n, m, G, s, e, r, norm=True):
         ll = np.sum(np.log10(c))
 
     else:
-
         for l in range(1, m):
-
             # Determine the various components
             F_no_change = np.zeros((n, n))
             F_j1_change = np.zeros(n)
@@ -466,7 +461,6 @@ def backward_ls_dip_loop(n, m, G, s, e, c, r):
     r_n = r / n
 
     for l in range(m - 2, -1, -1):
-
         # Determine the various components
         B_no_change = np.zeros((n, n))
         B_j1_change = np.zeros(n)

diff --git a/lshmm/forward_backward/fb_haploid.py b/lshmm/forward_backward/fb_haploid.py
@@ -1,4 +1,5 @@
 """Collection of functions to run forwards and backwards algorithms on haploid genotype data, where the data is structured as variants x samples."""
+
 import numpy as np
 
 from lshmm import jit
@@ -14,7 +15,6 @@ def forwards_ls_hap(n, m, H, s, e, r, norm=True):
     r_n = r / n
 
     if norm:
-
         c = np.zeros(m)
         for i in range(n):
             F[0, i] = (
@@ -40,7 +40,6 @@ def forwards_ls_hap(n, m, H, s, e, r, norm=True):
         ll = np.sum(np.log10(c))
 
     else:
-
         c = np.ones(m)
 
         for i in range(n):

diff --git a/lshmm/vit_diploid.py b/lshmm/vit_diploid.py
@@ -1,11 +1,13 @@
 """Collection of functions to run Viterbi algorithms on dipoid genotype data, where the data is structured as variants x samples."""
+
 import numpy as np
 
 from . import jit
 
 MISSING = -1
 MISSING_INDEX = 3
 
+
 # https://github.com/numba/numba/issues/1269
 @jit.numba_njit
 def np_apply_along_axis(func1d, axis, arr):
@@ -60,7 +62,7 @@ def forwards_viterbi_dip_naive(n, m, G, s, e, r):
                     + 2 * np.int64((G[0, j1, j2] == 1))
                     + np.int64(s[0, 0] == 1)
                 )
-            V[0, j1, j2] = 1 / (n ** 2) * e[0, index_tmp]
+            V[0, j1, j2] = 1 / (n**2) * e[0, index_tmp]
 
     for l in range(1, m):
         if s[0, l] == MISSING:
@@ -117,7 +119,7 @@ def forwards_viterbi_dip_naive_low_mem(n, m, G, s, e, r):
                     + 2 * np.int64((G[0, j1, j2] == 1))
                     + np.int64(s[0, 0] == 1)
                 )
-            V_previous[j1, j2] = 1 / (n ** 2) * e[0, index_tmp]
+            V_previous[j1, j2] = 1 / (n**2) * e[0, index_tmp]
 
     # Take a look at Haploid Viterbi implementation in Jeromes code and see if we can pinch some ideas.
     # Diploid Viterbi, with smaller memory footprint.
@@ -175,7 +177,7 @@ def forwards_viterbi_dip_low_mem(n, m, G, s, e, r):
                     + 2 * np.int64((G[0, j1, j2] == 1))
                     + np.int64(s[0, 0] == 1)
                 )
-            V_previous[j1, j2] = 1 / (n ** 2) * e[0, index_tmp]
+            V_previous[j1, j2] = 1 / (n**2) * e[0, index_tmp]
 
     # Diploid Viterbi, with smaller memory footprint, rescaling, and using the structure of the HMM.
     for l in range(1, m):
@@ -203,7 +205,6 @@ def forwards_viterbi_dip_low_mem(n, m, G, s, e, r):
 
         for j1 in range(n):
             for j2 in range(n):
-
                 V_single_switch = max(V_rowcol_max[j1], V_rowcol_max[j2])
                 P_single_switch = np.argmax(
                     np.array([V_rowcol_max[j1], V_rowcol_max[j2]])
@@ -269,7 +270,7 @@ def forwards_viterbi_dip_low_mem_no_pointer(n, m, G, s, e, r):
                     + 2 * np.int64((G[0, j1, j2] == 1))
                     + np.int64(s[0, 0] == 1)
                 )
-            V_previous[j1, j2] = 1 / (n ** 2) * e[0, index_tmp]
+            V_previous[j1, j2] = 1 / (n**2) * e[0, index_tmp]
 
     # Diploid Viterbi, with smaller memory footprint, rescaling, and using the structure of the HMM.
     for l in range(1, m):
@@ -300,7 +301,6 @@ def forwards_viterbi_dip_low_mem_no_pointer(n, m, G, s, e, r):
 
         for j1 in range(n):
             for j2 in range(n):
-
                 V_single_switch = max(V_rowcol_max[j1], V_rowcol_max[j2])
                 V[j1, j2] = V_previous[j1, j2] * no_switch  # No switch in either
 
@@ -356,7 +356,7 @@ def forwards_viterbi_dip_naive_vec(n, m, G, s, e, r):
                     + 2 * np.int64((G[0, j1, j2] == 1))
                     + np.int64(s[0, 0] == 1)
                 )
-            V[0, j1, j2] = 1 / (n ** 2) * e[0, index_tmp]
+            V[0, j1, j2] = 1 / (n**2) * e[0, index_tmp]
 
     # Jumped the gun - vectorising.
     for l in range(1, m):
@@ -406,7 +406,7 @@ def forwards_viterbi_dip_naive_full_vec(n, m, G, s, e, r):
             + 2 * (G[0, :, :] == 1).astype(np.int64)
             + np.int64(s[0, 0] == 1)
         )
-    V[0, :, :] = 1 / (n ** 2) * e[0, index]
+    V[0, :, :] = 1 / (n**2) * e[0, index]
     r_n = r / n
 
     for l in range(1, m):
@@ -511,12 +511,11 @@ def path_ll_dip(n, m, G, phased_path, s, e, r):
             + 2 * np.int64(G[0, phased_path[0][0], phased_path[1][0]] == 1)
             + np.int64(s[0, 0] == 1)
         )
-    log_prob_path = np.log10(1 / (n ** 2) * e[0, index])
+    log_prob_path = np.log10(1 / (n**2) * e[0, index])
     old_phase = np.array([phased_path[0][0], phased_path[1][0]])
     r_n = r / n
 
     for l in range(1, m):
-
         if s[0, l] == MISSING:
             index = MISSING_INDEX
         else:

diff --git a/lshmm/vit_haploid.py b/lshmm/vit_haploid.py
@@ -1,4 +1,5 @@
 """Collection of functions to run Viterbi algorithms on haploid genotype data, where the data is structured as variants x samples."""
+
 import numpy as np
 
 from . import jit