Skip to content

Commit

Permalink
Remove references to renamed window function.
Browse files Browse the repository at this point in the history
  • Loading branch information
tomwhite authored and mergify[bot] committed Jul 5, 2021
1 parent d9538b0 commit a7cb326
Show file tree
Hide file tree
Showing 8 changed files with 37 additions and 42 deletions.
1 change: 0 additions & 1 deletion docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ Utilities
infer_sample_ploidy
infer_variant_ploidy
simulate_genotype_call_dataset
window
window_by_position
window_by_variant

Expand Down
4 changes: 2 additions & 2 deletions docs/getting_started.rst
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ example shows how to give cohorts names.
.. ipython:: python
:okwarning:
ds = sg.window(ds, size=20)
ds = sg.window_by_variant(ds, size=20)
ds = sg.Fst(ds)
cohort_names = ["Africa", "Asia", "Europe"]
Expand Down Expand Up @@ -320,7 +320,7 @@ Xarray and Pandas operations in a single pipeline:
# Apply filter to include variants present across > 80% of samples
.pipe(lambda ds: ds.sel(variants=ds.variant_call_rate > .8))
# Create windows of size 20 variants
.pipe(lambda ds: sg.window(ds, size=20))
.pipe(lambda ds: sg.window_by_variant(ds, size=20))
# Assign a "cohort" variable that splits samples into two groups
.assign(sample_cohort=np.repeat([0, 1], ds.dims['samples'] // 2))
# Compute Fst between the groups
Expand Down
3 changes: 1 addition & 2 deletions sgkit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
from .stats.preprocessing import filter_partial_calls
from .stats.regenie import regenie
from .testing import simulate_genotype_call_dataset
from .window import window, window_by_position, window_by_variant
from .window import window_by_position, window_by_variant

try:
__version__ = get_distribution(__name__).version
Expand Down Expand Up @@ -85,7 +85,6 @@
"variables",
"observed_heterozygosity",
"pca",
"window",
"window_by_position",
"window_by_variant",
"load_dataset",
Expand Down
6 changes: 3 additions & 3 deletions sgkit/stats/ld.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def ld_matrix(
Parameters
----------
ds
Dataset containing genotype dosages. Must already be windowed with :func:`window`.
Dataset containing genotype dosages. Must already be windowed with :func:`window_by_position` or :func:`window_by_variant`.
dosage
Name of genetic dosage variable.
Defined by :data:`sgkit.variables.dosage_spec`.
Expand Down Expand Up @@ -410,7 +410,7 @@ def ld_prune(
Parameters
----------
ds
Dataset containing genotype dosages. Must already be windowed with :func:`window`.
Dataset containing genotype dosages. Must already be windowed with :func:`window_by_position` or :func:`window_by_variant`.
dosage
Name of genetic dosage variable.
Defined by :data:`sgkit.variables.dosage_spec`.
Expand Down Expand Up @@ -445,7 +445,7 @@ def ld_prune(
>>> ds["dosage"] = ds["call_genotype"].sum(dim="ploidy")
>>> # Divide into windows of size five (variants)
>>> ds = sg.window(ds, size=5)
>>> ds = sg.window_by_variant(ds, size=5)
>>> pruned_ds = sg.ld_prune(ds)
>>> pruned_ds.dims["variants"]
Expand Down
28 changes: 14 additions & 14 deletions sgkit/stats/popgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def diversity(
"""Compute diversity from cohort allele counts.
By default, values of this statistic are calculated per variant.
To compute values in windows, call :func:`window` before calling
To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
this function.
Parameters
Expand Down Expand Up @@ -83,7 +83,7 @@ def diversity(
[0.5 , 0.5 ]])
>>> # Divide into windows of size three (variants)
>>> ds = sg.window(ds, size=3)
>>> ds = sg.window_by_variant(ds, size=3)
>>> sg.diversity(ds)["stat_diversity"].values # doctest: +NORMALIZE_WHITESPACE
array([[1.83333333, 1.83333333],
[1. , 1. ]])
Expand Down Expand Up @@ -188,7 +188,7 @@ def divergence(
is the diversity for cohort i.
By default, values of this statistic are calculated per variant.
To compute values in windows, call :func:`window` before calling
To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
this function.
Parameters
Expand Down Expand Up @@ -247,7 +247,7 @@ def divergence(
[0.625 , 0.5 ]]])
>>> # Divide into windows of size three (variants)
>>> ds = sg.window(ds, size=3)
>>> ds = sg.window_by_variant(ds, size=3)
>>> sg.divergence(ds)["stat_divergence"].values # doctest: +NORMALIZE_WHITESPACE
array([[[1.83333333, 1.5 ],
[1.5 , 1.83333333]],
Expand Down Expand Up @@ -373,7 +373,7 @@ def Fst(
"""Compute Fst between pairs of cohorts.
By default, values of this statistic are calculated per variant.
To compute values in windows, call :func:`window` before calling
To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
this function.
Parameters
Expand Down Expand Up @@ -439,7 +439,7 @@ def Fst(
[ 0.2 , nan]]])
>>> # Divide into windows of size three (variants)
>>> ds = sg.window(ds, size=3)
>>> ds = sg.window_by_variant(ds, size=3)
>>> sg.Fst(ds)["stat_Fst"].values # doctest: +NORMALIZE_WHITESPACE
array([[[ nan, -0.22222222],
[-0.22222222, nan]],
Expand Down Expand Up @@ -480,7 +480,7 @@ def Tajimas_D(
"""Compute Tajimas' D for a genotype call dataset.
By default, values of this statistic are calculated per variant.
To compute values in windows, call :func:`window` before calling
To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
this function.
Parameters
Expand Down Expand Up @@ -533,7 +533,7 @@ def Tajimas_D(
[0.88883234, 0.88883234]])
>>> # Divide into windows of size three (variants)
>>> ds = sg.window(ds, size=3)
>>> ds = sg.window_by_variant(ds, size=3)
>>> sg.Tajimas_D(ds)["stat_Tajimas_D"].values # doctest: +NORMALIZE_WHITESPACE
array([[2.40517586, 2.40517586],
[1.10393559, 1.10393559]])
Expand Down Expand Up @@ -671,7 +671,7 @@ def pbs(
"""Compute the population branching statistic (PBS) between cohort triples.
By default, values of this statistic are calculated per variant.
To compute values in windows, call :func:`window` before calling
To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
this function.
Parameters
Expand Down Expand Up @@ -721,7 +721,7 @@ def pbs(
>>> ds = ds.assign_coords({"cohorts_0": cohort_names, "cohorts_1": cohort_names, "cohorts_2": cohort_names})
>>> # Divide into two windows of size three (variants)
>>> ds = sg.window(ds, size=3)
>>> ds = sg.window_by_variant(ds, size=3)
>>> sg.pbs(ds)["stat_pbs"].sel(cohorts_0="co_0", cohorts_1="co_1", cohorts_2="co_2").values # doctest: +NORMALIZE_WHITESPACE
array([ 0. , -0.160898])
"""
Expand Down Expand Up @@ -806,7 +806,7 @@ def Garud_H(
of soft sweeps, as defined in Garud et al. (2015).
By default, values of this statistic are calculated across all variants.
To compute values in windows, call :func:`window` before calling
To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
this function.
Parameters
Expand Down Expand Up @@ -868,7 +868,7 @@ def Garud_H(
>>> ds["sample_cohort"] = xr.DataArray(sample_cohort, dims="samples")
>>> # Divide into windows of size three (variants)
>>> ds = sg.window(ds, size=3, step=3)
>>> ds = sg.window_by_variant(ds, size=3, step=3)
>>> gh = sg.Garud_H(ds)
>>> gh["stat_Garud_h1"].values # doctest: +NORMALIZE_WHITESPACE
Expand Down Expand Up @@ -999,7 +999,7 @@ def observed_heterozygosity(
mean.
By default, values of this statistic are calculated per variant.
To compute values in windows, call :func:`window` before calling
To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
this function.
Parameters
Expand Down Expand Up @@ -1045,7 +1045,7 @@ def observed_heterozygosity(
[0.5, 0.5]])
>>> # Divide into windows of size three (variants)
>>> ds = sg.window(ds, size=3)
>>> ds = sg.window_by_variant(ds, size=3)
>>> sg.observed_heterozygosity(ds)["stat_observed_heterozygosity"].values # doctest: +NORMALIZE_WHITESPACE
array([[1.5, 2.5],
[1. , 1. ]])
Expand Down
8 changes: 4 additions & 4 deletions sgkit/tests/test_ld.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from hypothesis import strategies as st
from hypothesis.extra.numpy import arrays

from sgkit import variables, window
from sgkit import variables, window_by_variant
from sgkit.stats.ld import (
ld_matrix,
ld_prune,
Expand Down Expand Up @@ -66,7 +66,7 @@ def ldm_df(
) -> DataFrame:
ds = simulate_genotype_call_dataset(n_variant=x.shape[0], n_sample=x.shape[1])
ds["dosage"] = (["variants", "samples"], x)
ds = window(ds, size=size, step=step)
ds = window_by_variant(ds, size=size, step=step)
df = ld_matrix(ds, threshold=threshold).compute()
if not diag:
df = df.pipe(lambda df: df[df["i"] != df["j"]])
Expand Down Expand Up @@ -156,7 +156,7 @@ def test_vs_skallel(args):

ds = simulate_genotype_call_dataset(n_variant=x.shape[0], n_sample=x.shape[1])
ds["dosage"] = (["variants", "samples"], da.asarray(x).rechunk({0: chunks}))
ds = window(ds, size=size, step=step)
ds = window_by_variant(ds, size=size, step=step)

ldm = ld_matrix(ds, threshold=threshold)
has_duplicates = ldm.compute().duplicated(subset=["i", "j"]).any()
Expand All @@ -183,7 +183,7 @@ def test_scores():

ds = simulate_genotype_call_dataset(n_variant=x.shape[0], n_sample=x.shape[1])
ds["dosage"] = (["variants", "samples"], x)
ds = window(ds, size=10)
ds = window_by_variant(ds, size=10)

ldm = ld_matrix(ds, threshold=0.2)
idx_drop_ds = maximal_independent_set(ldm)
Expand Down
26 changes: 13 additions & 13 deletions sgkit/tests/test_popgen.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
simulate_genotype_call_dataset,
variables,
)
from sgkit.window import window
from sgkit.window import window_by_variant

from .test_aggregation import get_dataset

Expand Down Expand Up @@ -135,7 +135,7 @@ def test_diversity__windowed(sample_size):
ts = simulate_ts(sample_size, length=200)
ds = ts_to_dataset(ts) # type: ignore[no-untyped-call]
ds, subsets = add_cohorts(ds, ts, cohort_key_names=["cohorts"]) # type: ignore[no-untyped-call]
ds = window(ds, size=25)
ds = window_by_variant(ds, size=25)
ds = diversity(ds)
div = ds["stat_diversity"].sel(cohorts="co_0").compute()

Expand Down Expand Up @@ -195,7 +195,7 @@ def test_divergence__windowed(sample_size, n_cohorts, chunks):
ts = simulate_ts(sample_size, length=200)
ds = ts_to_dataset(ts, chunks) # type: ignore[no-untyped-call]
ds, subsets = add_cohorts(ds, ts, n_cohorts) # type: ignore[no-untyped-call]
ds = window(ds, size=25)
ds = window_by_variant(ds, size=25)
ds = divergence(ds)
div = ds["stat_divergence"].values
# test off-diagonal entries, by replacing diagonal with NaNs
Expand All @@ -222,7 +222,7 @@ def test_divergence__windowed_scikit_allel_comparison(sample_size, n_cohorts, ch
ts = simulate_ts(sample_size, length=200)
ds = ts_to_dataset(ts, chunks) # type: ignore[no-untyped-call]
ds, subsets = add_cohorts(ds, ts, n_cohorts) # type: ignore[no-untyped-call]
ds = window(ds, size=25)
ds = window_by_variant(ds, size=25)
ds = divergence(ds)
div = ds["stat_divergence"].values
# test off-diagonal entries, by replacing diagonal with NaNs
Expand Down Expand Up @@ -261,7 +261,7 @@ def test_Fst__Hudson(sample_size):
ds = ts_to_dataset(ts) # type: ignore[no-untyped-call]
ds, subsets = add_cohorts(ds, ts, n_cohorts) # type: ignore[no-untyped-call]
n_variants = ds.dims["variants"]
ds = window(ds, size=n_variants) # single window
ds = window_by_variant(ds, size=n_variants) # single window
ds = Fst(ds, estimator="Hudson")
fst = ds.stat_Fst.sel(cohorts_0="co_0", cohorts_1="co_1").values

Expand All @@ -283,7 +283,7 @@ def test_Fst__Nei(sample_size, n_cohorts):
ds = ts_to_dataset(ts) # type: ignore[no-untyped-call]
ds, subsets = add_cohorts(ds, ts, n_cohorts) # type: ignore[no-untyped-call]
n_variants = ds.dims["variants"]
ds = window(ds, size=n_variants) # single window
ds = window_by_variant(ds, size=n_variants) # single window
ds = Fst(ds, estimator="Nei")
fst = ds.stat_Fst.values

Expand Down Expand Up @@ -312,7 +312,7 @@ def test_Fst__windowed(sample_size, n_cohorts, chunks):
ts = simulate_ts(sample_size, length=200)
ds = ts_to_dataset(ts, chunks) # type: ignore[no-untyped-call]
ds, subsets = add_cohorts(ds, ts, n_cohorts) # type: ignore[no-untyped-call]
ds = window(ds, size=25)
ds = window_by_variant(ds, size=25)
fst_ds = Fst(ds, estimator="Nei")
fst = fst_ds["stat_Fst"].values

Expand Down Expand Up @@ -354,7 +354,7 @@ def test_Tajimas_D(sample_size):
ds = ts_to_dataset(ts) # type: ignore[no-untyped-call]
ds, subsets = add_cohorts(ds, ts, cohort_key_names=None) # type: ignore[no-untyped-call]
n_variants = ds.dims["variants"]
ds = window(ds, size=n_variants) # single window
ds = window_by_variant(ds, size=n_variants) # single window
ds = Tajimas_D(ds)
d = ds.stat_Tajimas_D.compute()
ts_d = ts.Tajimas_D()
Expand Down Expand Up @@ -382,7 +382,7 @@ def test_pbs(sample_size, n_cohorts):
ds = ts_to_dataset(ts) # type: ignore[no-untyped-call]
ds, subsets = add_cohorts(ds, ts, n_cohorts, cohort_key_names=["cohorts_0", "cohorts_1", "cohorts_2"]) # type: ignore[no-untyped-call]
n_variants = ds.dims["variants"]
ds = window(ds, size=n_variants) # single window
ds = window_by_variant(ds, size=n_variants) # single window

ds = pbs(ds)

Expand Down Expand Up @@ -416,7 +416,7 @@ def test_pbs__windowed(sample_size, n_cohorts, cohorts, cohort_indexes, chunks):
ts = simulate_ts(sample_size, length=200)
ds = ts_to_dataset(ts, chunks) # type: ignore[no-untyped-call]
ds, subsets = add_cohorts(ds, ts, n_cohorts, cohort_key_names=["cohorts_0", "cohorts_1", "cohorts_2"]) # type: ignore[no-untyped-call]
ds = window(ds, size=25)
ds = window_by_variant(ds, size=25)

ds = pbs(ds, cohorts=cohorts)

Expand Down Expand Up @@ -466,7 +466,7 @@ def test_Garud_h(
cohort_names = [f"co_{i}" for i in range(n_cohorts)]
coords = {k: cohort_names for k in ["cohorts"]}
ds = ds.assign_coords(coords) # type: ignore[no-untyped-call]
ds = window(ds, size=3)
ds = window_by_variant(ds, size=3)

gh = Garud_H(ds, cohorts=cohorts)
h1 = gh.stat_Garud_h1.values
Expand Down Expand Up @@ -635,7 +635,7 @@ def test_observed_heterozygosity__windowed(chunks, cohorts, expectation):
["samples"],
da.asarray(cohorts).rechunk(chunks[1]),
)
ds = window(ds, size=2)
ds = window_by_variant(ds, size=2)
ho = observed_heterozygosity(ds)["stat_observed_heterozygosity"]
np.testing.assert_almost_equal(
ho,
Expand All @@ -662,7 +662,7 @@ def test_observed_heterozygosity__scikit_allel_comparison(
["samples"],
np.zeros(n_sample, int),
)
ds = window(ds, size=window_size)
ds = window_by_variant(ds, size=window_size)
ho_sg = observed_heterozygosity(ds)["stat_observed_heterozygosity"].values
if n_sample % window_size:
# scikit-allel will drop the ragged end
Expand Down
3 changes: 0 additions & 3 deletions sgkit/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,6 @@ def window_by_variant(
return _window_per_contig(ds, variant_contig, merge, _get_windows, size, step)


window = window_by_variant


def window_by_position(
ds: Dataset,
*,
Expand Down

0 comments on commit a7cb326

Please sign in to comment.