Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize downsample-tips, add CLI #197

Merged
merged 28 commits into from
Jan 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
bf78c15
Optimize prune_extinct_lineages for contiguous ids
mmore500 Jan 4, 2025
9b2b7c8
Fixup joinem cli help message details, and test
mmore500 Jan 4, 2025
3f3b238
Fix tidy
mmore500 Jan 4, 2025
d436dac
fixup! Fixup joinem cli help message details, and test
mmore500 Jan 4, 2025
a7d8caf
Fix typo
mmore500 Jan 4, 2025
a027804
Fix fstring
mmore500 Jan 4, 2025
90785f2
Fix lazyframe handling in polars coercion
mmore500 Jan 4, 2025
a4cfd1c
Draft downsample tips CLI
mmore500 Jan 4, 2025
eca6bcb
Update docstring
mmore500 Jan 4, 2025
39dd681
fixup! Fix lazyframe handling in polars coercion
mmore500 Jan 4, 2025
1a037f9
fixup! fixup! Fixup joinem cli help message details, and test
mmore500 Jan 4, 2025
78ae9db
Update CLI command listing
mmore500 Jan 4, 2025
a8a76c3
Fix downstream overriden_arguments API use
mmore500 Jan 4, 2025
976c8cf
Fix typo
mmore500 Jan 4, 2025
d6bb8c6
Re-enable commented out test
mmore500 Jan 4, 2025
0d55881
Enable jit for prune extinct lineages
mmore500 Jan 4, 2025
64d19a0
Optimize leaf identification
mmore500 Jan 4, 2025
0862347
Fix type hint
mmore500 Jan 4, 2025
60ba403
Optimize alifestd_find_leaf_ids
mmore500 Jan 4, 2025
8e8cc51
Fix tidy
mmore500 Jan 4, 2025
1e7296d
Further optimize find leaf ids
mmore500 Jan 4, 2025
bd0497f
Fix typo, clarify flow
mmore500 Jan 4, 2025
681b71b
Fix bad default arg
mmore500 Jan 4, 2025
f9a7a05
Add another optimized impl for prune extinct lineages asexual
mmore500 Jan 5, 2025
b8021e1
Bugfix: ensure tips sampled without replacement
mmore500 Jan 5, 2025
dc25072
Fix array/list compat in tests
mmore500 Jan 5, 2025
b542226
Fix copy/paste race condition
mmore500 Jan 5, 2025
bf8c592
fixup! Fix array/list compat in tests
mmore500 Jan 5, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions hstrat/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,15 @@
print(f"hstrat v{get_hstrat_version()}")
print()
print("Available commands (stabilized API):")
print("$ python3 -m hstrat.dataframe.surface_build_tree")
print("$ python3 -m hstrat.dataframe.surface_unpack_reconstruct")
print("$ python3 -m hstrat.dataframe.surface_postprocess_trie")
print()
print("Available commands (experimental API):")
print("$ python3 -m hstrat._auxiliary_lib._alifestd_as_newick_asexual")
print(
"$ python3 -m hstrat._auxiliary_lib._alifestd_downsample_tips_asexual"
)
print(
"$ python3 -m hstrat._auxiliary_lib._alifestd_try_add_ancestor_list_col"
)
Expand Down
93 changes: 88 additions & 5 deletions hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,24 @@
import random
import argparse
import functools
import logging
import sys

Check warning on line 4 in hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py#L1-L4

Added lines #L1 - L4 were not covered by tests
import typing

from joinem._dataframe_cli import _add_parser_base, _run_dataframe_cli
import numpy as np

Check warning on line 8 in hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py#L7-L8

Added lines #L7 - L8 were not covered by tests
import pandas as pd

from ._alifestd_find_leaf_ids import alifestd_find_leaf_ids
from ._alifestd_has_contiguous_ids import alifestd_has_contiguous_ids

Check warning on line 12 in hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py#L12

Added line #L12 was not covered by tests
from ._alifestd_prune_extinct_lineages_asexual import (
alifestd_prune_extinct_lineages_asexual,
)
from ._alifestd_try_add_ancestor_id_col import alifestd_try_add_ancestor_id_col
from ._configure_prod_logging import configure_prod_logging
from ._delegate_polars_implementation import delegate_polars_implementation
from ._format_cli_description import format_cli_description
from ._get_hstrat_version import get_hstrat_version
from ._log_context_duration import log_context_duration

Check warning on line 21 in hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py#L17-L21

Added lines #L17 - L21 were not covered by tests
from ._with_rng_state_context import with_rng_state_context


Expand All @@ -17,8 +28,13 @@
) -> pd.DataFrame:
"""Implementation detail for alifestd_downsample_tips_asexual."""
tips = alifestd_find_leaf_ids(phylogeny_df)
kept = random.sample(tips, min(n_downsample, len(tips)))
phylogeny_df["extant"] = phylogeny_df["id"].isin(kept)
kept = np.random.choice(tips, min(n_downsample, len(tips)), replace=False)
if alifestd_has_contiguous_ids(phylogeny_df):
extant = np.zeros(len(phylogeny_df), dtype=bool)
extant[kept] = True
phylogeny_df["extant"] = extant

Check warning on line 35 in hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py#L31-L35

Added lines #L31 - L35 were not covered by tests
else:
phylogeny_df["extant"] = phylogeny_df["id"].isin(kept)

Check warning on line 37 in hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py#L37

Added line #L37 was not covered by tests

return alifestd_prune_extinct_lineages_asexual(
phylogeny_df, mutate=True
Expand All @@ -31,8 +47,9 @@
mutate: bool = False,
seed: typing.Optional[int] = None,
) -> pd.DataFrame:
"""Subsample phylogeny containing `num_tips` tips. If `num_tips` is greater
than the number of tips in the phylogeny, the whole phylogeny is returned.
"""Create a subsample phylogeny containing `num_tips` tips. If `num_tips`
is greater than the number of tips in the phylogeny, the whole phylogeny is
returned.

Only supports asexual phylogenies.
"""
Expand All @@ -56,3 +73,69 @@
)

return impl(phylogeny_df, n_downsample)


_raw_description = """Create a subsample phylogeny containing `num_tips` tips.

Check warning on line 78 in hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py#L78

Added line #L78 was not covered by tests

If `num_tips` is greater than the number of tips in the phylogeny, the whole phylogeny is returned.

Data is assumed to be in alife standard format.
Only supports asexual phylogenies.

Additional Notes
================
- Requires 'ancestor_id' column to be present in input DataFrame.
Otherwise, no action is taken.

- Use `--eager-read` if modifying data file inplace.

- This CLI entrypoint is experimental and may be subject to change.
"""


def _create_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(

Check warning on line 97 in hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py#L96-L97

Added lines #L96 - L97 were not covered by tests
add_help=False,
description=format_cli_description(_raw_description),
formatter_class=argparse.RawTextHelpFormatter,
)
parser = _add_parser_base(

Check warning on line 102 in hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py#L102

Added line #L102 was not covered by tests
parser=parser,
dfcli_module="hstrat._auxiliary_lib._alifestd_downsample_tips_asexual",
dfcli_version=get_hstrat_version(),
)
parser.add_argument(

Check warning on line 107 in hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py#L107

Added line #L107 was not covered by tests
"-n",
default=sys.maxsize,
type=int,
help="Number of tips to subsample.",
)
parser.add_argument(

Check warning on line 113 in hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py#L113

Added line #L113 was not covered by tests
"--seed",
default=None,
dest="seed",
help="Integer seed for deterministic behavior.",
type=int,
)
return parser

Check warning on line 120 in hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py#L120

Added line #L120 was not covered by tests


if __name__ == "__main__":
configure_prod_logging()

parser = _create_parser()
args, __ = parser.parse_known_args()
with log_context_duration(
"hstrat._auxiliary_lib._alifestd_downsample_tips_asexual", logging.info
):
_run_dataframe_cli(
base_parser=parser,
output_dataframe_op=delegate_polars_implementation()(
functools.partial(
alifestd_downsample_tips_asexual,
n_downsample=args.n,
seed=args.seed,
),
),
overridden_arguments="ignore", # seed is overridden
)
15 changes: 6 additions & 9 deletions hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import typing

import numpy as np
import ordered_set as ods
import pandas as pd
Expand All @@ -9,7 +7,7 @@
from ._alifestd_try_add_ancestor_id_col import alifestd_try_add_ancestor_id_col


def alifestd_find_leaf_ids(phylogeny_df: pd.DataFrame) -> typing.List[int]:
def alifestd_find_leaf_ids(phylogeny_df: pd.DataFrame) -> np.ndarray:

Check warning on line 10 in hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py#L10

Added line #L10 was not covered by tests
"""What ids are not listed in any `ancestor_list`?

Input dataframe is not mutated by this operation.
Expand All @@ -20,15 +18,14 @@
if "ancestor_id" in phylogeny_df:

# root is self ref, but must exclude to handle only-root phylo
internal_node_idxs = phylogeny_df.loc[
phylogeny_df["ancestor_id"] != phylogeny_df["id"],
"ancestor_id",
].to_numpy()
internal_node_idxs = phylogeny_df["ancestor_id"].to_numpy()[

Check warning on line 21 in hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py#L21

Added line #L21 was not covered by tests
phylogeny_df["ancestor_id"] != phylogeny_df["id"]
]

leaf_pos_filter = np.ones(len(phylogeny_df), dtype=np.bool_)
leaf_pos_filter[internal_node_idxs] = False

return phylogeny_df.loc[leaf_pos_filter, "id"].to_list()
return np.flatnonzero(leaf_pos_filter)

Check warning on line 28 in hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py#L28

Added line #L28 was not covered by tests

all_ids = ods.OrderedSet(phylogeny_df["id"])
internal_ids = (
Expand All @@ -50,4 +47,4 @@
]
)
)
return list(all_ids - internal_ids)
return np.fromiter(all_ids - internal_ids, dtype=int)

Check warning on line 50 in hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py#L50

Added line #L50 was not covered by tests
101 changes: 85 additions & 16 deletions hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,70 @@
import numpy as np
import pandas as pd

from ._alifestd_has_contiguous_ids import alifestd_has_contiguous_ids
from ._alifestd_is_topologically_sorted import alifestd_is_topologically_sorted

Check warning on line 7 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L6-L7

Added lines #L6 - L7 were not covered by tests
from ._alifestd_try_add_ancestor_id_col import alifestd_try_add_ancestor_id_col
from ._alifestd_unfurl_lineage_asexual import alifestd_unfurl_lineage_asexual
from ._jit import jit
from ._unfurl_lineage_with_contiguous_ids import (

Check warning on line 11 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L10-L11

Added lines #L10 - L11 were not covered by tests
unfurl_lineage_with_contiguous_ids,
)


def _create_has_extant_descendant_noncontiguous(

Check warning on line 16 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L16

Added line #L16 was not covered by tests
phylogeny_df: pd.DataFrame,
extant_mask: np.ndarray,
) -> np.ndarray:
"""Implementation detail for alifestd_prune_extinct_lineages_asexual."""

phylogeny_df["has_extant_descendant"] = False
for extant_id in phylogeny_df.loc[extant_mask, "id"]:
for lineage_id in alifestd_unfurl_lineage_asexual(

Check warning on line 24 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L22-L24

Added lines #L22 - L24 were not covered by tests
phylogeny_df,
int(extant_id),
mutate=True,
):
if phylogeny_df.loc[lineage_id, "has_extant_descendant"]:
break

Check warning on line 30 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L29-L30

Added lines #L29 - L30 were not covered by tests

phylogeny_df.loc[lineage_id, "has_extant_descendant"] = True

Check warning on line 32 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L32

Added line #L32 was not covered by tests

return phylogeny_df["has_extant_descendant"]

Check warning on line 34 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L34

Added line #L34 was not covered by tests


@jit(nopython=True)
def _create_has_extant_descendant_contiguous(

Check warning on line 38 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L37-L38

Added lines #L37 - L38 were not covered by tests
ancestor_ids: np.ndarray,
extant_mask: np.ndarray,
) -> np.ndarray:
"""Implementation detail for alifestd_prune_extinct_lineages_asexual."""

has_extant_descendant = np.zeros_like(extant_mask)
for extant_id in np.flatnonzero(extant_mask):
for lineage_id in unfurl_lineage_with_contiguous_ids(

Check warning on line 46 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L44-L46

Added lines #L44 - L46 were not covered by tests
ancestor_ids,
int(extant_id),
):
if has_extant_descendant[lineage_id]:
break

Check warning on line 51 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L50-L51

Added lines #L50 - L51 were not covered by tests

has_extant_descendant[lineage_id] = True

Check warning on line 53 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L53

Added line #L53 was not covered by tests

return has_extant_descendant

Check warning on line 55 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L55

Added line #L55 was not covered by tests


@jit(nopython=True)
def _create_has_extant_descendant_contiguous_sorted(

Check warning on line 59 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L58-L59

Added lines #L58 - L59 were not covered by tests
ancestor_ids: np.ndarray,
extant_mask: np.ndarray,
) -> np.ndarray:
"""Implementation detail for alifestd_prune_extinct_lineages_asexual."""

has_extant_descendant = extant_mask.copy()
for id_ in range(len(ancestor_ids) - 1, -1, -1):
has_extant_descendant[ancestor_ids[id_]] |= has_extant_descendant[id_]

Check warning on line 67 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L65-L67

Added lines #L65 - L67 were not covered by tests

return has_extant_descendant

Check warning on line 69 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L69

Added line #L69 was not covered by tests


def alifestd_prune_extinct_lineages_asexual(
Expand Down Expand Up @@ -45,7 +107,10 @@
phylogeny_df = phylogeny_df.copy()

phylogeny_df = alifestd_try_add_ancestor_id_col(phylogeny_df, mutate=True)
phylogeny_df.set_index("id", drop=False, inplace=True)
if alifestd_has_contiguous_ids(phylogeny_df):
phylogeny_df.reset_index(drop=True, inplace=True)

Check warning on line 111 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L110-L111

Added lines #L110 - L111 were not covered by tests
else:
phylogeny_df.index = phylogeny_df["id"]

Check warning on line 113 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L113

Added line #L113 was not covered by tests

extant_mask = None
if "extant" in phylogeny_df:
Expand All @@ -58,22 +123,26 @@
else:
raise ValueError('Need "extant" or "destruction_time" column.')

phylogeny_df["has_extant_descendant"] = False

for extant_id in phylogeny_df.loc[extant_mask, "id"]:
for lineage_id in alifestd_unfurl_lineage_asexual(
if not alifestd_has_contiguous_ids(phylogeny_df):
has_extant_descendant = _create_has_extant_descendant_noncontiguous(

Check warning on line 127 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L126-L127

Added lines #L126 - L127 were not covered by tests
phylogeny_df,
int(extant_id),
mutate=True,
):
if phylogeny_df.loc[lineage_id, "has_extant_descendant"]:
break

phylogeny_df.loc[lineage_id, "has_extant_descendant"] = True
extant_mask,
)
elif not alifestd_is_topologically_sorted(phylogeny_df):
has_extant_descendant = _create_has_extant_descendant_contiguous(

Check warning on line 132 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L131-L132

Added lines #L131 - L132 were not covered by tests
phylogeny_df["ancestor_id"].to_numpy(dtype=np.uint64),
extant_mask.to_numpy(dtype=bool),
)
else:
has_extant_descendant = (

Check warning on line 137 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L137

Added line #L137 was not covered by tests
_create_has_extant_descendant_contiguous_sorted(
phylogeny_df["ancestor_id"].to_numpy(dtype=np.uint64),
extant_mask.to_numpy(dtype=bool),
)
)

drop_filter = ~phylogeny_df["has_extant_descendant"]
phylogeny_df = phylogeny_df[has_extant_descendant].reset_index(drop=True)

Check warning on line 144 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L144

Added line #L144 was not covered by tests
phylogeny_df.drop(
phylogeny_df.index[drop_filter], inplace=True, axis="rows"
columns="has_extant_descendant", errors="ignore", inplace=True
)
phylogeny_df.drop("has_extant_descendant", inplace=True, axis="columns")
return phylogeny_df.reset_index(drop=True)
return phylogeny_df

Check warning on line 148 in hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py#L148

Added line #L148 was not covered by tests
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ def alifestd_try_add_ancestor_list_col(
def _create_parser() -> argparse.ArgumentParser:
"""Create parser for CLI entrypoint."""
parser = argparse.ArgumentParser(
add_help=False,
description=format_cli_description(_raw_description),
formatter_class=argparse.RawTextHelpFormatter,
)
Expand Down
4 changes: 4 additions & 0 deletions hstrat/_auxiliary_lib/_coerce_to_pandas.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import typing

import pandas as pd
import polars as pl

_supported_iterables = tuple, set, list, frozenset
_supported_mappings = dict
Expand All @@ -10,6 +11,9 @@
"""
If a Polars type is detected, coerce it to corresponding Pandas type.
"""
if isinstance(obj, pl.LazyFrame):
obj = obj.collect()

Check warning on line 15 in hstrat/_auxiliary_lib/_coerce_to_pandas.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_coerce_to_pandas.py#L15

Added line #L15 was not covered by tests

if hasattr(obj, "__dataframe__"):
return pd.api.interchange.from_dataframe(obj, allow_copy=True)
elif hasattr(obj, "to_pandas"):
Expand Down
10 changes: 6 additions & 4 deletions hstrat/_auxiliary_lib/_delegate_polars_implementation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
from ._coerce_to_polars import coerce_to_polars
from ._warn_once import warn_once

DataFrame_T = typing.TypeVar("DataFrame_T", pd.DataFrame, pl.DataFrame)
DataFrame_T = typing.TypeVar(
"DataFrame_T", pd.DataFrame, pl.DataFrame, pl.LazyFrame
)
Series_T = typing.TypeVar("Series_T", pd.Series, pl.Series)


Expand All @@ -25,7 +27,7 @@
"""
if isinstance(arg, (pd.DataFrame, pd.Series)):
return True
elif isinstance(arg, (pl.DataFrame, pl.Series, str)):
elif isinstance(arg, (pl.DataFrame, pl.LazyFrame, pl.Series, str)):

Check warning on line 30 in hstrat/_auxiliary_lib/_delegate_polars_implementation.py

View check run for this annotation

Codecov / codecov/patch

hstrat/_auxiliary_lib/_delegate_polars_implementation.py#L30

Added line #L30 was not covered by tests
return False
elif recurse and isinstance(arg, _supported_mappings):
return any(_detect_pandas(v, recurse) for v in arg.values())
Expand All @@ -46,7 +48,7 @@
If `recurse` is True, then this function will recursively check for Polars
members in mappings and iterables.
"""
if isinstance(arg, (pl.DataFrame, pl.Series)):
if isinstance(arg, (pl.DataFrame, pl.LazyFrame, pl.Series)):
return True
elif isinstance(arg, (pd.DataFrame, pd.Series, str)):
return False
Expand Down Expand Up @@ -95,7 +97,7 @@
any_pandas = any(map(detect_pandas_, (*args, *kwargs.values())))
any_polars = any(map(detect_polars_, (*args, *kwargs.values())))
logging.info("begin delgate_polars_implementation")
logging.info("- detected {any_pandas=} {any_polars=}")
logging.info(f"- detected {any_pandas=} {any_polars=}")

if any_pandas and any_polars:
raise TypeError("mixing pandas and polars types is disallowed")
Expand Down
3 changes: 2 additions & 1 deletion hstrat/dataframe/surface_build_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@

def _create_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
add_help=False,
description=format_cli_description(raw_message),
formatter_class=argparse.RawTextHelpFormatter,
)
Expand Down Expand Up @@ -132,7 +133,7 @@ def _create_parser() -> argparse.ArgumentParser:
args, __ = parser.parse_known_args()

logging.info(
f"instantiating trie postprocess functor: "
"instantiating trie postprocess functor: "
f"`{args.trie_postprocessor}`",
)
trie_postprocessor = eval(args.trie_postprocessor, {"hstrat": hstrat})
Expand Down
Loading
Loading