Remove references to renamed window function.

sgkit-dev · Jul 5, 2021 · a7cb326 · a7cb326
1 parent d9538b0
commit a7cb326
Show file tree

Hide file tree

Showing 8 changed files with 37 additions and 42 deletions.
diff --git a/docs/api.rst b/docs/api.rst
@@ -107,7 +107,6 @@ Utilities
  infer_sample_ploidy
  infer_variant_ploidy
  simulate_genotype_call_dataset
- window
  window_by_position
  window_by_variant
 

diff --git a/docs/getting_started.rst b/docs/getting_started.rst
@@ -279,7 +279,7 @@ example shows how to give cohorts names.
 .. ipython:: python
  :okwarning:
 
- ds = sg.window(ds, size=20)
+ ds = sg.window_by_variant(ds, size=20)
  ds = sg.Fst(ds)
 
  cohort_names = ["Africa", "Asia", "Europe"]
@@ -320,7 +320,7 @@ Xarray and Pandas operations in a single pipeline:
  # Apply filter to include variants present across > 80% of samples
  .pipe(lambda ds: ds.sel(variants=ds.variant_call_rate > .8))
  # Create windows of size 20 variants
- .pipe(lambda ds: sg.window(ds, size=20))
+ .pipe(lambda ds: sg.window_by_variant(ds, size=20))
  # Assign a "cohort" variable that splits samples into two groups
  .assign(sample_cohort=np.repeat([0, 1], ds.dims['samples'] // 2))
  # Compute Fst between the groups

diff --git a/sgkit/__init__.py b/sgkit/__init__.py
@@ -41,7 +41,7 @@
 from .stats.preprocessing import filter_partial_calls
 from .stats.regenie import regenie
 from .testing import simulate_genotype_call_dataset
-from .window import window, window_by_position, window_by_variant
+from .window import window_by_position, window_by_variant
 
 try:
  __version__ = get_distribution(__name__).version
@@ -85,7 +85,6 @@
  "variables",
  "observed_heterozygosity",
  "pca",
- "window",
  "window_by_position",
  "window_by_variant",
  "load_dataset",

diff --git a/sgkit/stats/ld.py b/sgkit/stats/ld.py
@@ -88,7 +88,7 @@ def ld_matrix(
  Parameters
  ----------
  ds
- Dataset containing genotype dosages. Must already be windowed with :func:`window`.
+ Dataset containing genotype dosages. Must already be windowed with :func:`window_by_position` or :func:`window_by_variant`.
  dosage
  Name of genetic dosage variable.
  Defined by :data:`sgkit.variables.dosage_spec`.
@@ -410,7 +410,7 @@ def ld_prune(
  Parameters
  ----------
  ds
- Dataset containing genotype dosages. Must already be windowed with :func:`window`.
+ Dataset containing genotype dosages. Must already be windowed with :func:`window_by_position` or :func:`window_by_variant`.
  dosage
  Name of genetic dosage variable.
  Defined by :data:`sgkit.variables.dosage_spec`.
@@ -445,7 +445,7 @@ def ld_prune(
  >>> ds["dosage"] = ds["call_genotype"].sum(dim="ploidy")
 
  >>> # Divide into windows of size five (variants)
- >>> ds = sg.window(ds, size=5)
+ >>> ds = sg.window_by_variant(ds, size=5)
 
  >>> pruned_ds = sg.ld_prune(ds)
  >>> pruned_ds.dims["variants"]

diff --git a/sgkit/stats/popgen.py b/sgkit/stats/popgen.py
@@ -35,7 +35,7 @@ def diversity(
  """Compute diversity from cohort allele counts.
 
  By default, values of this statistic are calculated per variant.
- To compute values in windows, call :func:`window` before calling
+ To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
  this function.
 
  Parameters
@@ -83,7 +83,7 @@ def diversity(
  [0.5 , 0.5 ]])
 
  >>> # Divide into windows of size three (variants)
- >>> ds = sg.window(ds, size=3)
+ >>> ds = sg.window_by_variant(ds, size=3)
  >>> sg.diversity(ds)["stat_diversity"].values # doctest: +NORMALIZE_WHITESPACE
  array([[1.83333333, 1.83333333],
  [1. , 1. ]])
@@ -188,7 +188,7 @@ def divergence(
  is the diversity for cohort i.
 
  By default, values of this statistic are calculated per variant.
- To compute values in windows, call :func:`window` before calling
+ To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
  this function.
 
  Parameters
@@ -247,7 +247,7 @@ def divergence(
  [0.625 , 0.5 ]]])
 
  >>> # Divide into windows of size three (variants)
- >>> ds = sg.window(ds, size=3)
+ >>> ds = sg.window_by_variant(ds, size=3)
  >>> sg.divergence(ds)["stat_divergence"].values # doctest: +NORMALIZE_WHITESPACE
  array([[[1.83333333, 1.5 ],
  [1.5 , 1.83333333]],
@@ -373,7 +373,7 @@ def Fst(
  """Compute Fst between pairs of cohorts.
 
  By default, values of this statistic are calculated per variant.
- To compute values in windows, call :func:`window` before calling
+ To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
  this function.
 
  Parameters
@@ -439,7 +439,7 @@ def Fst(
  [ 0.2 , nan]]])
 
  >>> # Divide into windows of size three (variants)
- >>> ds = sg.window(ds, size=3)
+ >>> ds = sg.window_by_variant(ds, size=3)
  >>> sg.Fst(ds)["stat_Fst"].values # doctest: +NORMALIZE_WHITESPACE
  array([[[ nan, -0.22222222],
  [-0.22222222, nan]],
@@ -480,7 +480,7 @@ def Tajimas_D(
  """Compute Tajimas' D for a genotype call dataset.
 
  By default, values of this statistic are calculated per variant.
- To compute values in windows, call :func:`window` before calling
+ To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
  this function.
 
  Parameters
@@ -533,7 +533,7 @@ def Tajimas_D(
  [0.88883234, 0.88883234]])
 
  >>> # Divide into windows of size three (variants)
- >>> ds = sg.window(ds, size=3)
+ >>> ds = sg.window_by_variant(ds, size=3)
  >>> sg.Tajimas_D(ds)["stat_Tajimas_D"].values # doctest: +NORMALIZE_WHITESPACE
  array([[2.40517586, 2.40517586],
  [1.10393559, 1.10393559]])
@@ -671,7 +671,7 @@ def pbs(
  """Compute the population branching statistic (PBS) between cohort triples.
 
  By default, values of this statistic are calculated per variant.
- To compute values in windows, call :func:`window` before calling
+ To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
  this function.
 
  Parameters
@@ -721,7 +721,7 @@ def pbs(
  >>> ds = ds.assign_coords({"cohorts_0": cohort_names, "cohorts_1": cohort_names, "cohorts_2": cohort_names})
 
  >>> # Divide into two windows of size three (variants)
- >>> ds = sg.window(ds, size=3)
+ >>> ds = sg.window_by_variant(ds, size=3)
  >>> sg.pbs(ds)["stat_pbs"].sel(cohorts_0="co_0", cohorts_1="co_1", cohorts_2="co_2").values # doctest: +NORMALIZE_WHITESPACE
  array([ 0. , -0.160898])
  """
@@ -806,7 +806,7 @@ def Garud_H(
  of soft sweeps, as defined in Garud et al. (2015).
 
  By default, values of this statistic are calculated across all variants.
- To compute values in windows, call :func:`window` before calling
+ To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
  this function.
 
  Parameters
@@ -868,7 +868,7 @@ def Garud_H(
  >>> ds["sample_cohort"] = xr.DataArray(sample_cohort, dims="samples")
 
  >>> # Divide into windows of size three (variants)
- >>> ds = sg.window(ds, size=3, step=3)
+ >>> ds = sg.window_by_variant(ds, size=3, step=3)
 
  >>> gh = sg.Garud_H(ds)
  >>> gh["stat_Garud_h1"].values # doctest: +NORMALIZE_WHITESPACE
@@ -999,7 +999,7 @@ def observed_heterozygosity(
  mean.
 
  By default, values of this statistic are calculated per variant.
- To compute values in windows, call :func:`window` before calling
+ To compute values in windows, call :func:`window_by_position` or :func:`window_by_variant` before calling
  this function.
 
  Parameters
@@ -1045,7 +1045,7 @@ def observed_heterozygosity(
  [0.5, 0.5]])
 
  >>> # Divide into windows of size three (variants)
- >>> ds = sg.window(ds, size=3)
+ >>> ds = sg.window_by_variant(ds, size=3)
  >>> sg.observed_heterozygosity(ds)["stat_observed_heterozygosity"].values # doctest: +NORMALIZE_WHITESPACE
  array([[1.5, 2.5],
  [1. , 1. ]])

diff --git a/sgkit/tests/test_ld.py b/sgkit/tests/test_ld.py
@@ -10,7 +10,7 @@
 from hypothesis import strategies as st
 from hypothesis.extra.numpy import arrays
 
-from sgkit import variables, window
+from sgkit import variables, window_by_variant
 from sgkit.stats.ld import (
  ld_matrix,
  ld_prune,
@@ -66,7 +66,7 @@ def ldm_df(
 ) -> DataFrame:
  ds = simulate_genotype_call_dataset(n_variant=x.shape[0], n_sample=x.shape[1])
  ds["dosage"] = (["variants", "samples"], x)
- ds = window(ds, size=size, step=step)
+ ds = window_by_variant(ds, size=size, step=step)
  df = ld_matrix(ds, threshold=threshold).compute()
  if not diag:
  df = df.pipe(lambda df: df[df["i"] != df["j"]])
@@ -156,7 +156,7 @@ def test_vs_skallel(args):
 
  ds = simulate_genotype_call_dataset(n_variant=x.shape[0], n_sample=x.shape[1])
  ds["dosage"] = (["variants", "samples"], da.asarray(x).rechunk({0: chunks}))
- ds = window(ds, size=size, step=step)
+ ds = window_by_variant(ds, size=size, step=step)
 
  ldm = ld_matrix(ds, threshold=threshold)
  has_duplicates = ldm.compute().duplicated(subset=["i", "j"]).any()
@@ -183,7 +183,7 @@ def test_scores():
 
  ds = simulate_genotype_call_dataset(n_variant=x.shape[0], n_sample=x.shape[1])
  ds["dosage"] = (["variants", "samples"], x)
- ds = window(ds, size=10)
+ ds = window_by_variant(ds, size=10)
 
  ldm = ld_matrix(ds, threshold=0.2)
  idx_drop_ds = maximal_independent_set(ldm)

diff --git a/sgkit/tests/test_popgen.py b/sgkit/tests/test_popgen.py
@@ -23,7 +23,7 @@
  simulate_genotype_call_dataset,
  variables,
 )
-from sgkit.window import window
+from sgkit.window import window_by_variant
 
 from .test_aggregation import get_dataset
 
@@ -135,7 +135,7 @@ def test_diversity__windowed(sample_size):
  ts = simulate_ts(sample_size, length=200)
  ds = ts_to_dataset(ts) # type: ignore[no-untyped-call]
  ds, subsets = add_cohorts(ds, ts, cohort_key_names=["cohorts"]) # type: ignore[no-untyped-call]
- ds = window(ds, size=25)
+ ds = window_by_variant(ds, size=25)
  ds = diversity(ds)
  div = ds["stat_diversity"].sel(cohorts="co_0").compute()
 
@@ -195,7 +195,7 @@ def test_divergence__windowed(sample_size, n_cohorts, chunks):
  ts = simulate_ts(sample_size, length=200)
  ds = ts_to_dataset(ts, chunks) # type: ignore[no-untyped-call]
  ds, subsets = add_cohorts(ds, ts, n_cohorts) # type: ignore[no-untyped-call]
- ds = window(ds, size=25)
+ ds = window_by_variant(ds, size=25)
  ds = divergence(ds)
  div = ds["stat_divergence"].values
  # test off-diagonal entries, by replacing diagonal with NaNs
@@ -222,7 +222,7 @@ def test_divergence__windowed_scikit_allel_comparison(sample_size, n_cohorts, ch
  ts = simulate_ts(sample_size, length=200)
  ds = ts_to_dataset(ts, chunks) # type: ignore[no-untyped-call]
  ds, subsets = add_cohorts(ds, ts, n_cohorts) # type: ignore[no-untyped-call]
- ds = window(ds, size=25)
+ ds = window_by_variant(ds, size=25)
  ds = divergence(ds)
  div = ds["stat_divergence"].values
  # test off-diagonal entries, by replacing diagonal with NaNs
@@ -261,7 +261,7 @@ def test_Fst__Hudson(sample_size):
  ds = ts_to_dataset(ts) # type: ignore[no-untyped-call]
  ds, subsets = add_cohorts(ds, ts, n_cohorts) # type: ignore[no-untyped-call]
  n_variants = ds.dims["variants"]
- ds = window(ds, size=n_variants) # single window
+ ds = window_by_variant(ds, size=n_variants) # single window
  ds = Fst(ds, estimator="Hudson")
  fst = ds.stat_Fst.sel(cohorts_0="co_0", cohorts_1="co_1").values
 
@@ -283,7 +283,7 @@ def test_Fst__Nei(sample_size, n_cohorts):
  ds = ts_to_dataset(ts) # type: ignore[no-untyped-call]
  ds, subsets = add_cohorts(ds, ts, n_cohorts) # type: ignore[no-untyped-call]
  n_variants = ds.dims["variants"]
- ds = window(ds, size=n_variants) # single window
+ ds = window_by_variant(ds, size=n_variants) # single window
  ds = Fst(ds, estimator="Nei")
  fst = ds.stat_Fst.values
 
@@ -312,7 +312,7 @@ def test_Fst__windowed(sample_size, n_cohorts, chunks):
  ts = simulate_ts(sample_size, length=200)
  ds = ts_to_dataset(ts, chunks) # type: ignore[no-untyped-call]
  ds, subsets = add_cohorts(ds, ts, n_cohorts) # type: ignore[no-untyped-call]
- ds = window(ds, size=25)
+ ds = window_by_variant(ds, size=25)
  fst_ds = Fst(ds, estimator="Nei")
  fst = fst_ds["stat_Fst"].values
 
@@ -354,7 +354,7 @@ def test_Tajimas_D(sample_size):
  ds = ts_to_dataset(ts) # type: ignore[no-untyped-call]
  ds, subsets = add_cohorts(ds, ts, cohort_key_names=None) # type: ignore[no-untyped-call]
  n_variants = ds.dims["variants"]
- ds = window(ds, size=n_variants) # single window
+ ds = window_by_variant(ds, size=n_variants) # single window
  ds = Tajimas_D(ds)
  d = ds.stat_Tajimas_D.compute()
  ts_d = ts.Tajimas_D()
@@ -382,7 +382,7 @@ def test_pbs(sample_size, n_cohorts):
  ds = ts_to_dataset(ts) # type: ignore[no-untyped-call]
  ds, subsets = add_cohorts(ds, ts, n_cohorts, cohort_key_names=["cohorts_0", "cohorts_1", "cohorts_2"]) # type: ignore[no-untyped-call]
  n_variants = ds.dims["variants"]
- ds = window(ds, size=n_variants) # single window
+ ds = window_by_variant(ds, size=n_variants) # single window
 
  ds = pbs(ds)
 
@@ -416,7 +416,7 @@ def test_pbs__windowed(sample_size, n_cohorts, cohorts, cohort_indexes, chunks):
  ts = simulate_ts(sample_size, length=200)
  ds = ts_to_dataset(ts, chunks) # type: ignore[no-untyped-call]
  ds, subsets = add_cohorts(ds, ts, n_cohorts, cohort_key_names=["cohorts_0", "cohorts_1", "cohorts_2"]) # type: ignore[no-untyped-call]
- ds = window(ds, size=25)
+ ds = window_by_variant(ds, size=25)
 
  ds = pbs(ds, cohorts=cohorts)
 
@@ -466,7 +466,7 @@ def test_Garud_h(
  cohort_names = [f"co_{i}" for i in range(n_cohorts)]
  coords = {k: cohort_names for k in ["cohorts"]}
  ds = ds.assign_coords(coords) # type: ignore[no-untyped-call]
- ds = window(ds, size=3)
+ ds = window_by_variant(ds, size=3)
 
  gh = Garud_H(ds, cohorts=cohorts)
  h1 = gh.stat_Garud_h1.values
@@ -635,7 +635,7 @@ def test_observed_heterozygosity__windowed(chunks, cohorts, expectation):
  ["samples"],
  da.asarray(cohorts).rechunk(chunks[1]),
  )
- ds = window(ds, size=2)
+ ds = window_by_variant(ds, size=2)
  ho = observed_heterozygosity(ds)["stat_observed_heterozygosity"]
  np.testing.assert_almost_equal(
  ho,
@@ -662,7 +662,7 @@ def test_observed_heterozygosity__scikit_allel_comparison(
  ["samples"],
  np.zeros(n_sample, int),
  )
- ds = window(ds, size=window_size)
+ ds = window_by_variant(ds, size=window_size)
  ho_sg = observed_heterozygosity(ds)["stat_observed_heterozygosity"].values
  if n_sample % window_size:
  # scikit-allel will drop the ragged end

diff --git a/sgkit/window.py b/sgkit/window.py
@@ -91,9 +91,6 @@ def window_by_variant(
  return _window_per_contig(ds, variant_contig, merge, _get_windows, size, step)
 
 
-window = window_by_variant
-
-
 def window_by_position(
  ds: Dataset,
  *,