Skip to content

Commit

Permalink
Merge pull request astheeggeggs#36 from jeromekelleher/simplify-setup
Browse files Browse the repository at this point in the history
Simplify CI setup
  • Loading branch information
astheeggeggs authored Apr 17, 2024
2 parents ab8d4bb + 31553d1 commit efe28ac
Show file tree
Hide file tree
Showing 16 changed files with 65 additions and 296 deletions.
11 changes: 11 additions & 0 deletions .github/workflows/black.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
name: Lint

on: [push, pull_request]

jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: psf/black@stable

92 changes: 0 additions & 92 deletions .github/workflows/ci.yml

This file was deleted.

25 changes: 25 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: Tests

on: [push, pull_request]

jobs:
build:

runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11"]

steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'
- name: Display Python version
run: python -c "import sys; print(sys.version)"
- name: Install dependencies
run: python -m pip install -r requirements.txt
- name: Run tests
run: python -m pytest -vs
8 changes: 3 additions & 5 deletions lshmm/api.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""External API definitions."""

import warnings

import numpy as np
Expand Down Expand Up @@ -194,8 +195,8 @@ def set_emission_probabilities(
# DEV: there's a wrinkle here.
e = np.zeros((m, 8))
e[:, EQUAL_BOTH_HOM] = (1 - p_mutation) ** 2
e[:, UNEQUAL_BOTH_HOM] = p_mutation ** 2
e[:, BOTH_HET] = (1 - p_mutation) ** 2 + p_mutation ** 2
e[:, UNEQUAL_BOTH_HOM] = p_mutation**2
e[:, BOTH_HET] = (1 - p_mutation) ** 2 + p_mutation**2
e[:, REF_HOM_OBS_HET] = 2 * p_mutation * (1 - p_mutation)
e[:, REF_HET_OBS_HOM] = p_mutation * (1 - p_mutation)
e[:, MISSING_INDEX] = 1
Expand All @@ -204,7 +205,6 @@ def set_emission_probabilities(


def viterbi_hap(n, m, reference_panel, query, emissions, p_recombination):

V, P, log_likelihood = forwards_viterbi_hap_lower_mem_rescaling(
n, m, reference_panel, query, emissions, p_recombination
)
Expand All @@ -214,7 +214,6 @@ def viterbi_hap(n, m, reference_panel, query, emissions, p_recombination):


def viterbi_dip(n, m, reference_panel, query, emissions, p_recombination):

V, P, log_likelihood = forwards_viterbi_dip_low_mem(
n, m, reference_panel, query, emissions, p_recombination
)
Expand Down Expand Up @@ -374,7 +373,6 @@ def path_ll(
p_mutation=None,
scale_mutation_based_on_n_alleles=True,
):

n, m, ploidy = checks(
reference_panel,
query,
Expand Down
16 changes: 5 additions & 11 deletions lshmm/forward_backward/fb_diploid.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Collection of functions to run forwards and backwards algorithms on haploid genotype data, where the data is structured as variants x samples."""

import numpy as np

from lshmm import jit
Expand All @@ -12,6 +13,7 @@
MISSING = -1
MISSING_INDEX = 3


# https://github.com/numba/numba/issues/1269
@jit.numba_njit
def np_apply_along_axis(func1d, axis, arr):
Expand Down Expand Up @@ -51,7 +53,7 @@ def forwards_ls_dip(n, m, G, s, e, r, norm=True):
"""Matrix based diploid LS forward algorithm using numpy vectorisation."""
# Initialise the forward tensor
F = np.zeros((m, n, n))
F[0, :, :] = 1 / (n ** 2)
F[0, :, :] = 1 / (n**2)
c = np.ones(m)
r_n = r / n

Expand Down Expand Up @@ -143,7 +145,6 @@ def backwards_ls_dip(n, m, G, s, e, c, r):

# Backwards
for l in range(m - 2, -1, -1):

if s[0, l + 1] == MISSING:
index = MISSING_INDEX * np.ones(
(n, n), dtype=np.int64
Expand Down Expand Up @@ -181,7 +182,7 @@ def forward_ls_dip_starting_point(n, m, G, s, e, r):
r_n = r / n
for j1 in range(n):
for j2 in range(n):
F[0, j1, j2] = 1 / (n ** 2)
F[0, j1, j2] = 1 / (n**2)
if s[0, 0] == MISSING:
index_tmp = MISSING_INDEX
else:
Expand All @@ -193,7 +194,6 @@ def forward_ls_dip_starting_point(n, m, G, s, e, r):
F[0, j1, j2] *= e[0, index_tmp]

for l in range(1, m):

# Determine the various components
F_no_change = np.zeros((n, n))
F_j1_change = np.zeros(n)
Expand Down Expand Up @@ -266,7 +266,6 @@ def backward_ls_dip_starting_point(n, m, G, s, e, r):
r_n = r / n

for l in range(m - 2, -1, -1):

# Determine the various components
B_no_change = np.zeros((n, n))
B_j1_change = np.zeros(n)
Expand Down Expand Up @@ -341,7 +340,7 @@ def forward_ls_dip_loop(n, m, G, s, e, r, norm=True):
F = np.zeros((m, n, n))
for j1 in range(n):
for j2 in range(n):
F[0, j1, j2] = 1 / (n ** 2)
F[0, j1, j2] = 1 / (n**2)
if s[0, 0] == MISSING:
index_tmp = MISSING_INDEX
else:
Expand All @@ -355,12 +354,10 @@ def forward_ls_dip_loop(n, m, G, s, e, r, norm=True):
c = np.ones(m)

if norm:

c[0] = np.sum(F[0, :, :])
F[0, :, :] *= 1 / c[0]

for l in range(1, m):

# Determine the various components
F_no_change = np.zeros((n, n))
F_j_change = np.zeros(n)
Expand Down Expand Up @@ -406,9 +403,7 @@ def forward_ls_dip_loop(n, m, G, s, e, r, norm=True):
ll = np.sum(np.log10(c))

else:

for l in range(1, m):

# Determine the various components
F_no_change = np.zeros((n, n))
F_j1_change = np.zeros(n)
Expand Down Expand Up @@ -466,7 +461,6 @@ def backward_ls_dip_loop(n, m, G, s, e, c, r):
r_n = r / n

for l in range(m - 2, -1, -1):

# Determine the various components
B_no_change = np.zeros((n, n))
B_j1_change = np.zeros(n)
Expand Down
3 changes: 1 addition & 2 deletions lshmm/forward_backward/fb_haploid.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Collection of functions to run forwards and backwards algorithms on haploid genotype data, where the data is structured as variants x samples."""

import numpy as np

from lshmm import jit
Expand All @@ -14,7 +15,6 @@ def forwards_ls_hap(n, m, H, s, e, r, norm=True):
r_n = r / n

if norm:

c = np.zeros(m)
for i in range(n):
F[0, i] = (
Expand All @@ -40,7 +40,6 @@ def forwards_ls_hap(n, m, H, s, e, r, norm=True):
ll = np.sum(np.log10(c))

else:

c = np.ones(m)

for i in range(n):
Expand Down
19 changes: 9 additions & 10 deletions lshmm/vit_diploid.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
"""Collection of functions to run Viterbi algorithms on dipoid genotype data, where the data is structured as variants x samples."""

import numpy as np

from . import jit

MISSING = -1
MISSING_INDEX = 3


# https://github.com/numba/numba/issues/1269
@jit.numba_njit
def np_apply_along_axis(func1d, axis, arr):
Expand Down Expand Up @@ -60,7 +62,7 @@ def forwards_viterbi_dip_naive(n, m, G, s, e, r):
+ 2 * np.int64((G[0, j1, j2] == 1))
+ np.int64(s[0, 0] == 1)
)
V[0, j1, j2] = 1 / (n ** 2) * e[0, index_tmp]
V[0, j1, j2] = 1 / (n**2) * e[0, index_tmp]

for l in range(1, m):
if s[0, l] == MISSING:
Expand Down Expand Up @@ -117,7 +119,7 @@ def forwards_viterbi_dip_naive_low_mem(n, m, G, s, e, r):
+ 2 * np.int64((G[0, j1, j2] == 1))
+ np.int64(s[0, 0] == 1)
)
V_previous[j1, j2] = 1 / (n ** 2) * e[0, index_tmp]
V_previous[j1, j2] = 1 / (n**2) * e[0, index_tmp]

# Take a look at Haploid Viterbi implementation in Jeromes code and see if we can pinch some ideas.
# Diploid Viterbi, with smaller memory footprint.
Expand Down Expand Up @@ -175,7 +177,7 @@ def forwards_viterbi_dip_low_mem(n, m, G, s, e, r):
+ 2 * np.int64((G[0, j1, j2] == 1))
+ np.int64(s[0, 0] == 1)
)
V_previous[j1, j2] = 1 / (n ** 2) * e[0, index_tmp]
V_previous[j1, j2] = 1 / (n**2) * e[0, index_tmp]

# Diploid Viterbi, with smaller memory footprint, rescaling, and using the structure of the HMM.
for l in range(1, m):
Expand Down Expand Up @@ -203,7 +205,6 @@ def forwards_viterbi_dip_low_mem(n, m, G, s, e, r):

for j1 in range(n):
for j2 in range(n):

V_single_switch = max(V_rowcol_max[j1], V_rowcol_max[j2])
P_single_switch = np.argmax(
np.array([V_rowcol_max[j1], V_rowcol_max[j2]])
Expand Down Expand Up @@ -269,7 +270,7 @@ def forwards_viterbi_dip_low_mem_no_pointer(n, m, G, s, e, r):
+ 2 * np.int64((G[0, j1, j2] == 1))
+ np.int64(s[0, 0] == 1)
)
V_previous[j1, j2] = 1 / (n ** 2) * e[0, index_tmp]
V_previous[j1, j2] = 1 / (n**2) * e[0, index_tmp]

# Diploid Viterbi, with smaller memory footprint, rescaling, and using the structure of the HMM.
for l in range(1, m):
Expand Down Expand Up @@ -300,7 +301,6 @@ def forwards_viterbi_dip_low_mem_no_pointer(n, m, G, s, e, r):

for j1 in range(n):
for j2 in range(n):

V_single_switch = max(V_rowcol_max[j1], V_rowcol_max[j2])
V[j1, j2] = V_previous[j1, j2] * no_switch # No switch in either

Expand Down Expand Up @@ -356,7 +356,7 @@ def forwards_viterbi_dip_naive_vec(n, m, G, s, e, r):
+ 2 * np.int64((G[0, j1, j2] == 1))
+ np.int64(s[0, 0] == 1)
)
V[0, j1, j2] = 1 / (n ** 2) * e[0, index_tmp]
V[0, j1, j2] = 1 / (n**2) * e[0, index_tmp]

# Jumped the gun - vectorising.
for l in range(1, m):
Expand Down Expand Up @@ -406,7 +406,7 @@ def forwards_viterbi_dip_naive_full_vec(n, m, G, s, e, r):
+ 2 * (G[0, :, :] == 1).astype(np.int64)
+ np.int64(s[0, 0] == 1)
)
V[0, :, :] = 1 / (n ** 2) * e[0, index]
V[0, :, :] = 1 / (n**2) * e[0, index]
r_n = r / n

for l in range(1, m):
Expand Down Expand Up @@ -511,12 +511,11 @@ def path_ll_dip(n, m, G, phased_path, s, e, r):
+ 2 * np.int64(G[0, phased_path[0][0], phased_path[1][0]] == 1)
+ np.int64(s[0, 0] == 1)
)
log_prob_path = np.log10(1 / (n ** 2) * e[0, index])
log_prob_path = np.log10(1 / (n**2) * e[0, index])
old_phase = np.array([phased_path[0][0], phased_path[1][0]])
r_n = r / n

for l in range(1, m):

if s[0, l] == MISSING:
index = MISSING_INDEX
else:
Expand Down
1 change: 1 addition & 0 deletions lshmm/vit_haploid.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""Collection of functions to run Viterbi algorithms on haploid genotype data, where the data is structured as variants x samples."""

import numpy as np

from . import jit
Expand Down
Loading

0 comments on commit efe28ac

Please sign in to comment.