From bf78c15d167733d88c9bdbe169075926b8fdbefe Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 07:02:06 -0500 Subject: [PATCH 01/28] Optimize prune_extinct_lineages for contiguous ids --- ...alifestd_prune_extinct_lineages_asexual.py | 73 +++++++++++++++---- 1 file changed, 58 insertions(+), 15 deletions(-) diff --git a/hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py b/hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py index d9d97d01..20cefede 100644 --- a/hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py +++ b/hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py @@ -3,8 +3,53 @@ import numpy as np import pandas as pd +from ._alifestd_has_contiguous_ids import alifestd_has_contiguous_ids from ._alifestd_try_add_ancestor_id_col import alifestd_try_add_ancestor_id_col from ._alifestd_unfurl_lineage_asexual import alifestd_unfurl_lineage_asexual +from ._unfurl_lineage_with_contiguous_ids import ( + unfurl_lineage_with_contiguous_ids, +) + + +def _create_has_estant_descendant_noncontiguous( + phylogeny_df: pd.DataFrame, + extant_mask: np.ndarray, +) -> np.ndarray: + """Implementation detail for alifestd_prune_extinct_lineages_asexual.""" + + phylogeny_df["has_extant_descendant"] = False + for extant_id in phylogeny_df.loc[extant_mask, "id"]: + for lineage_id in alifestd_unfurl_lineage_asexual( + phylogeny_df, + int(extant_id), + mutate=True, + ): + if phylogeny_df.loc[lineage_id, "has_extant_descendant"]: + break + + phylogeny_df.loc[lineage_id, "has_extant_descendant"] = True + + return phylogeny_df["has_extant_descendant"] + + +def _create_has_extant_descendant_contiguous( + ancestor_ids: np.ndarray, + extant_mask: np.ndarray, +) -> np.ndarray: + """Implementation detail for alifestd_prune_extinct_lineages_asexual.""" + + has_extant_descendant = np.zeros_like(ancestor_ids, dtype=bool) + for extant_id in np.flatnonzero(extant_mask): + for lineage_id in unfurl_lineage_with_contiguous_ids( + ancestor_ids, + int(extant_id), + ): + if has_extant_descendant[lineage_id]: + break + + has_extant_descendant[lineage_id] = True + + return has_extant_descendant def alifestd_prune_extinct_lineages_asexual( @@ -58,22 +103,20 @@ def alifestd_prune_extinct_lineages_asexual( else: raise ValueError('Need "extant" or "destruction_time" column.') - phylogeny_df["has_extant_descendant"] = False - - for extant_id in phylogeny_df.loc[extant_mask, "id"]: - for lineage_id in alifestd_unfurl_lineage_asexual( + has_extant_descendant = ( + _create_has_extant_descendant_contiguous( + phylogeny_df["ancestor_id"].to_numpy(dtype=np.uint64), + extant_mask, + ) + if alifestd_has_contiguous_ids(phylogeny_df) + else _create_has_estant_descendant_noncontiguous( phylogeny_df, - int(extant_id), - mutate=True, - ): - if phylogeny_df.loc[lineage_id, "has_extant_descendant"]: - break - - phylogeny_df.loc[lineage_id, "has_extant_descendant"] = True + extant_mask, + ) + ) - drop_filter = ~phylogeny_df["has_extant_descendant"] + phylogeny_df = phylogeny_df[has_extant_descendant].reset_index(drop=True) phylogeny_df.drop( - phylogeny_df.index[drop_filter], inplace=True, axis="rows" + columns="has_extant_descendant", errors="ignore", inplace=True ) - phylogeny_df.drop("has_extant_descendant", inplace=True, axis="columns") - return phylogeny_df.reset_index(drop=True) + return phylogeny_df From 9b2b7c8ebeebcf41b3941a2a2f0aa60ec254cf7f Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 07:54:38 -0500 Subject: [PATCH 02/28] Fixup joinem cli help message details, and test --- .../_alifestd_try_add_ancestor_list_col.py | 9 +++++++-- hstrat/dataframe/surface_build_tree.py | 9 +++++++-- hstrat/dataframe/surface_postprocess_trie.py | 9 +++++++-- hstrat/dataframe/surface_unpack_reconstruct.py | 9 +++++++-- .../test_alifestd_as_newick_asexual_cli.py | 12 ++++++++++++ .../test_dataframe/test_surface_build_tree_cli.py | 12 ++++++++++++ .../test_surface_postprocess_trie_cli.py | 12 ++++++++++++ .../test_surface_unpack_reconstruct_cli.py | 12 ++++++++++++ 8 files changed, 76 insertions(+), 8 deletions(-) diff --git a/hstrat/_auxiliary_lib/_alifestd_try_add_ancestor_list_col.py b/hstrat/_auxiliary_lib/_alifestd_try_add_ancestor_list_col.py index fbd79e29..72d150b8 100644 --- a/hstrat/_auxiliary_lib/_alifestd_try_add_ancestor_list_col.py +++ b/hstrat/_auxiliary_lib/_alifestd_try_add_ancestor_list_col.py @@ -2,7 +2,11 @@ import logging import warnings -from joinem._dataframe_cli import _add_parser_base, _run_dataframe_cli +from joinem._dataframe_cli import ( + _add_parser_base, + _add_parser_core, + _run_dataframe_cli, +) import pandas as pd import polars as pl @@ -77,11 +81,12 @@ def _create_parser() -> argparse.ArgumentParser: description=format_cli_description(_raw_description), formatter_class=argparse.RawTextHelpFormatter, ) - _add_parser_base( + parser = _add_parser_base( parser=parser, dfcli_module="hstrat._auxiliary_lib._alifestd_try_add_ancestor_list_col", dfcli_version=get_hstrat_version(), ) + parser = _add_parser_core(parser=parser) return parser diff --git a/hstrat/dataframe/surface_build_tree.py b/hstrat/dataframe/surface_build_tree.py index 0adfc2a3..ef740b46 100644 --- a/hstrat/dataframe/surface_build_tree.py +++ b/hstrat/dataframe/surface_build_tree.py @@ -2,7 +2,11 @@ import functools import logging -from joinem._dataframe_cli import _add_parser_base, _run_dataframe_cli +from joinem._dataframe_cli import ( + _add_parser_base, + _add_parser_core, + _run_dataframe_cli, +) from .. import hstrat from .._auxiliary_lib import ( @@ -98,11 +102,12 @@ def _create_parser() -> argparse.ArgumentParser: description=format_cli_description(raw_message), formatter_class=argparse.RawTextHelpFormatter, ) - _add_parser_base( + parser = _add_parser_base( parser=parser, dfcli_module="hstrat.dataframe.surface_build_tree", dfcli_version=get_hstrat_version(), ) + parser = _add_parser_core(parser=parser) parser.add_argument( "--exploded-slice-size", type=int, diff --git a/hstrat/dataframe/surface_postprocess_trie.py b/hstrat/dataframe/surface_postprocess_trie.py index 0ee04038..1f57efe4 100644 --- a/hstrat/dataframe/surface_postprocess_trie.py +++ b/hstrat/dataframe/surface_postprocess_trie.py @@ -2,7 +2,11 @@ import functools import logging -from joinem._dataframe_cli import _add_parser_base, _run_dataframe_cli +from joinem._dataframe_cli import ( + _add_parser_base, + _add_parser_core, + _run_dataframe_cli, +) from .. import hstrat from .._auxiliary_lib import ( @@ -106,11 +110,12 @@ def _create_parser() -> argparse.ArgumentParser: description=format_cli_description(raw_message), formatter_class=argparse.RawTextHelpFormatter, ) - _add_parser_base( + parser = _add_parser_base( parser=parser, dfcli_module="hstrat.dataframe.surface_postprocess_trie", dfcli_version=get_hstrat_version(), ) + parser = _add_parser_core(parser=parser) parser.add_argument( "--trie-postprocessor", type=str, diff --git a/hstrat/dataframe/surface_unpack_reconstruct.py b/hstrat/dataframe/surface_unpack_reconstruct.py index 40a53d63..c35facd9 100644 --- a/hstrat/dataframe/surface_unpack_reconstruct.py +++ b/hstrat/dataframe/surface_unpack_reconstruct.py @@ -2,7 +2,11 @@ import functools import logging -from joinem._dataframe_cli import _add_parser_base, _run_dataframe_cli +from joinem._dataframe_cli import ( + _add_parser_base, + _add_parser_core, + _run_dataframe_cli, +) from .._auxiliary_lib import ( configure_prod_logging, @@ -132,11 +136,12 @@ def _create_parser() -> argparse.ArgumentParser: description=format_cli_description(raw_message), formatter_class=argparse.RawTextHelpFormatter, ) - _add_parser_base( + parser = _add_parser_base( parser=parser, dfcli_module="hstrat.dataframe.surface_unpack_reconstruct", dfcli_version=get_hstrat_version(), ) + parser = _add_parser_core(parser=parser) parser.add_argument( "--exploded-slice-size", type=int, diff --git a/tests/test_hstrat/test_auxiliary_lib/test_alifestd_as_newick_asexual_cli.py b/tests/test_hstrat/test_auxiliary_lib/test_alifestd_as_newick_asexual_cli.py index b8bda2b1..2c1467ab 100644 --- a/tests/test_hstrat/test_auxiliary_lib/test_alifestd_as_newick_asexual_cli.py +++ b/tests/test_hstrat/test_auxiliary_lib/test_alifestd_as_newick_asexual_cli.py @@ -7,6 +7,18 @@ assets = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets") +def test_alifestd_as_newick_asexual_cli_help(): + subprocess.run( + [ + "python3", + "-m", + "hstrat._auxiliary_lib._alifestd_as_newick_asexual", + "--help", + ], + check=True, + ) + + def test_alifestd_as_newick_asexual_cli_version(): subprocess.run( [ diff --git a/tests/test_hstrat/test_dataframe/test_surface_build_tree_cli.py b/tests/test_hstrat/test_dataframe/test_surface_build_tree_cli.py index 236149e7..f8bff325 100644 --- a/tests/test_hstrat/test_dataframe/test_surface_build_tree_cli.py +++ b/tests/test_hstrat/test_dataframe/test_surface_build_tree_cli.py @@ -5,6 +5,18 @@ assets = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets") +def test_surface_build_tree_cli_help(): + subprocess.run( + [ + "python3", + "-m", + "hstrat.dataframe.surface_build_tree", + "--help", + ], + check=True, + ) + + def test_surface_build_tree_cli_version(): subprocess.run( [ diff --git a/tests/test_hstrat/test_dataframe/test_surface_postprocess_trie_cli.py b/tests/test_hstrat/test_dataframe/test_surface_postprocess_trie_cli.py index f60227b3..38f56977 100644 --- a/tests/test_hstrat/test_dataframe/test_surface_postprocess_trie_cli.py +++ b/tests/test_hstrat/test_dataframe/test_surface_postprocess_trie_cli.py @@ -5,6 +5,18 @@ assets = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets") +def test_surface_postprocess_trie_cli_help(): + subprocess.run( + [ + "python3", + "-m", + "hstrat.dataframe.surface_postprocess_trie", + "--help", + ], + check=True, + ) + + def test_surface_postprocess_trie_cli_version(): subprocess.run( [ diff --git a/tests/test_hstrat/test_dataframe/test_surface_unpack_reconstruct_cli.py b/tests/test_hstrat/test_dataframe/test_surface_unpack_reconstruct_cli.py index 13951026..f7c676a0 100644 --- a/tests/test_hstrat/test_dataframe/test_surface_unpack_reconstruct_cli.py +++ b/tests/test_hstrat/test_dataframe/test_surface_unpack_reconstruct_cli.py @@ -5,6 +5,18 @@ assets = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets") +def test_surface_unpack_reconstruct_cli_helper(): + subprocess.run( + [ + "python3", + "-m", + "hstrat.dataframe.surface_unpack_reconstruct", + "--helper", + ], + check=True, + ) + + def test_surface_unpack_reconstruct_cli_version(): subprocess.run( [ From 3f3b23867c166e8c95beed5c55d8171d10c748b2 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 07:55:25 -0500 Subject: [PATCH 03/28] Fix tidy --- hstrat/dataframe/surface_build_tree.py | 2 +- hstrat/dataframe/surface_postprocess_trie.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hstrat/dataframe/surface_build_tree.py b/hstrat/dataframe/surface_build_tree.py index ef740b46..db4973a0 100644 --- a/hstrat/dataframe/surface_build_tree.py +++ b/hstrat/dataframe/surface_build_tree.py @@ -137,7 +137,7 @@ def _create_parser() -> argparse.ArgumentParser: args, __ = parser.parse_known_args() logging.info( - f"instantiating trie postprocess functor: " + "instantiating trie postprocess functor: " f"`{args.trie_postprocessor}`", ) trie_postprocessor = eval(args.trie_postprocessor, {"hstrat": hstrat}) diff --git a/hstrat/dataframe/surface_postprocess_trie.py b/hstrat/dataframe/surface_postprocess_trie.py index 1f57efe4..deb6ca68 100644 --- a/hstrat/dataframe/surface_postprocess_trie.py +++ b/hstrat/dataframe/surface_postprocess_trie.py @@ -139,7 +139,7 @@ def _create_parser() -> argparse.ArgumentParser: args, __ = parser.parse_known_args() logging.info( - f"instantiating trie postprocess functor: " + "instantiating trie postprocess functor: " f"`{args.trie_postprocessor}`", ) trie_postprocessor = eval(args.trie_postprocessor, {"hstrat": hstrat}) From d436dac8c3640d815837448b8384a5cb6e1485be Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 09:33:24 -0500 Subject: [PATCH 04/28] fixup! Fixup joinem cli help message details, and test --- .../_alifestd_try_add_ancestor_list_col.py | 10 +++------- hstrat/dataframe/surface_build_tree.py | 10 +++------- hstrat/dataframe/surface_postprocess_trie.py | 10 +++------- hstrat/dataframe/surface_unpack_reconstruct.py | 10 +++------- 4 files changed, 12 insertions(+), 28 deletions(-) diff --git a/hstrat/_auxiliary_lib/_alifestd_try_add_ancestor_list_col.py b/hstrat/_auxiliary_lib/_alifestd_try_add_ancestor_list_col.py index 72d150b8..1c92dbf8 100644 --- a/hstrat/_auxiliary_lib/_alifestd_try_add_ancestor_list_col.py +++ b/hstrat/_auxiliary_lib/_alifestd_try_add_ancestor_list_col.py @@ -2,11 +2,7 @@ import logging import warnings -from joinem._dataframe_cli import ( - _add_parser_base, - _add_parser_core, - _run_dataframe_cli, -) +from joinem._dataframe_cli import _add_parser_base, _run_dataframe_cli import pandas as pd import polars as pl @@ -78,15 +74,15 @@ def alifestd_try_add_ancestor_list_col( def _create_parser() -> argparse.ArgumentParser: """Create parser for CLI entrypoint.""" parser = argparse.ArgumentParser( + add_help=False, description=format_cli_description(_raw_description), formatter_class=argparse.RawTextHelpFormatter, ) - parser = _add_parser_base( + _add_parser_base( parser=parser, dfcli_module="hstrat._auxiliary_lib._alifestd_try_add_ancestor_list_col", dfcli_version=get_hstrat_version(), ) - parser = _add_parser_core(parser=parser) return parser diff --git a/hstrat/dataframe/surface_build_tree.py b/hstrat/dataframe/surface_build_tree.py index db4973a0..bee9a865 100644 --- a/hstrat/dataframe/surface_build_tree.py +++ b/hstrat/dataframe/surface_build_tree.py @@ -2,11 +2,7 @@ import functools import logging -from joinem._dataframe_cli import ( - _add_parser_base, - _add_parser_core, - _run_dataframe_cli, -) +from joinem._dataframe_cli import _add_parser_base, _run_dataframe_cli from .. import hstrat from .._auxiliary_lib import ( @@ -99,15 +95,15 @@ def _create_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( + add_help=False, description=format_cli_description(raw_message), formatter_class=argparse.RawTextHelpFormatter, ) - parser = _add_parser_base( + _add_parser_base( parser=parser, dfcli_module="hstrat.dataframe.surface_build_tree", dfcli_version=get_hstrat_version(), ) - parser = _add_parser_core(parser=parser) parser.add_argument( "--exploded-slice-size", type=int, diff --git a/hstrat/dataframe/surface_postprocess_trie.py b/hstrat/dataframe/surface_postprocess_trie.py index deb6ca68..d73d80ec 100644 --- a/hstrat/dataframe/surface_postprocess_trie.py +++ b/hstrat/dataframe/surface_postprocess_trie.py @@ -2,11 +2,7 @@ import functools import logging -from joinem._dataframe_cli import ( - _add_parser_base, - _add_parser_core, - _run_dataframe_cli, -) +from joinem._dataframe_cli import _add_parser_base, _run_dataframe_cli from .. import hstrat from .._auxiliary_lib import ( @@ -107,15 +103,15 @@ def _create_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( + add_help=False, description=format_cli_description(raw_message), formatter_class=argparse.RawTextHelpFormatter, ) - parser = _add_parser_base( + _add_parser_base( parser=parser, dfcli_module="hstrat.dataframe.surface_postprocess_trie", dfcli_version=get_hstrat_version(), ) - parser = _add_parser_core(parser=parser) parser.add_argument( "--trie-postprocessor", type=str, diff --git a/hstrat/dataframe/surface_unpack_reconstruct.py b/hstrat/dataframe/surface_unpack_reconstruct.py index c35facd9..2c632f8c 100644 --- a/hstrat/dataframe/surface_unpack_reconstruct.py +++ b/hstrat/dataframe/surface_unpack_reconstruct.py @@ -2,11 +2,7 @@ import functools import logging -from joinem._dataframe_cli import ( - _add_parser_base, - _add_parser_core, - _run_dataframe_cli, -) +from joinem._dataframe_cli import _add_parser_base, _run_dataframe_cli from .._auxiliary_lib import ( configure_prod_logging, @@ -133,15 +129,15 @@ def _create_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser( + add_help=False, description=format_cli_description(raw_message), formatter_class=argparse.RawTextHelpFormatter, ) - parser = _add_parser_base( + _add_parser_base( parser=parser, dfcli_module="hstrat.dataframe.surface_unpack_reconstruct", dfcli_version=get_hstrat_version(), ) - parser = _add_parser_core(parser=parser) parser.add_argument( "--exploded-slice-size", type=int, From a7d8cafc31c4221d85f00a1f93abed3c47919e4b Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 09:39:05 -0500 Subject: [PATCH 05/28] Fix typo --- .../test_dataframe/test_surface_unpack_reconstruct_cli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_hstrat/test_dataframe/test_surface_unpack_reconstruct_cli.py b/tests/test_hstrat/test_dataframe/test_surface_unpack_reconstruct_cli.py index f7c676a0..8e61ef3b 100644 --- a/tests/test_hstrat/test_dataframe/test_surface_unpack_reconstruct_cli.py +++ b/tests/test_hstrat/test_dataframe/test_surface_unpack_reconstruct_cli.py @@ -5,13 +5,13 @@ assets = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets") -def test_surface_unpack_reconstruct_cli_helper(): +def test_surface_unpack_reconstruct_cli_help(): subprocess.run( [ "python3", "-m", "hstrat.dataframe.surface_unpack_reconstruct", - "--helper", + "--help", ], check=True, ) From a0278046239fdba3dc87c465e10e169430309780 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 10:11:59 -0500 Subject: [PATCH 06/28] Fix fstring --- hstrat/_auxiliary_lib/_delegate_polars_implementation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hstrat/_auxiliary_lib/_delegate_polars_implementation.py b/hstrat/_auxiliary_lib/_delegate_polars_implementation.py index 6d66e24a..b5087220 100644 --- a/hstrat/_auxiliary_lib/_delegate_polars_implementation.py +++ b/hstrat/_auxiliary_lib/_delegate_polars_implementation.py @@ -95,7 +95,7 @@ def delegating_function(*args, **kwargs) -> typing.Any: any_pandas = any(map(detect_pandas_, (*args, *kwargs.values()))) any_polars = any(map(detect_polars_, (*args, *kwargs.values()))) logging.info("begin delgate_polars_implementation") - logging.info("- detected {any_pandas=} {any_polars=}") + logging.info(f"- detected {any_pandas=} {any_polars=}") if any_pandas and any_polars: raise TypeError("mixing pandas and polars types is disallowed") From 90785f2beff7f2d9a226b175d9daeb5f1f44387e Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 10:18:42 -0500 Subject: [PATCH 07/28] Fix lazyframe handling in polars coercion --- hstrat/_auxiliary_lib/_coerce_to_pandas.py | 4 ++++ .../_delegate_polars_implementation.py | 8 +++++--- .../test_delegate_polars_implementation.py | 15 +++++++++++++-- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/hstrat/_auxiliary_lib/_coerce_to_pandas.py b/hstrat/_auxiliary_lib/_coerce_to_pandas.py index e656844a..c2702623 100644 --- a/hstrat/_auxiliary_lib/_coerce_to_pandas.py +++ b/hstrat/_auxiliary_lib/_coerce_to_pandas.py @@ -1,6 +1,7 @@ import typing import pandas as pd +import polars as pl _supported_iterables = tuple, set, list, frozenset _supported_mappings = dict @@ -10,6 +11,9 @@ def coerce_to_pandas(obj: typing.Any, *, recurse: bool = False) -> typing.Any: """ If a Polars type is detected, coerce it to corresponding Pandas type. """ + if isinstance(obj, pl.LazyFrame): + obj = obj.collect() + if hasattr(obj, "__dataframe__"): return pd.api.interchange.from_dataframe(obj, allow_copy=True) elif hasattr(obj, "to_pandas"): diff --git a/hstrat/_auxiliary_lib/_delegate_polars_implementation.py b/hstrat/_auxiliary_lib/_delegate_polars_implementation.py index b5087220..80e67743 100644 --- a/hstrat/_auxiliary_lib/_delegate_polars_implementation.py +++ b/hstrat/_auxiliary_lib/_delegate_polars_implementation.py @@ -13,7 +13,9 @@ from ._coerce_to_polars import coerce_to_polars from ._warn_once import warn_once -DataFrame_T = typing.TypeVar("DataFrame_T", pd.DataFrame, pl.DataFrame) +DataFrame_T = typing.TypeVar( + "DataFrame_T", pd.DataFrame, pl.DataFrame, pl.LazyFrame +) Series_T = typing.TypeVar("Series_T", pd.Series, pl.Series) @@ -25,7 +27,7 @@ def _detect_pandas(arg: typing.Any, recurse: bool) -> bool: """ if isinstance(arg, (pd.DataFrame, pd.Series)): return True - elif isinstance(arg, (pl.DataFrame, pl.Series, str)): + elif isinstance(arg, (pl.DataFrame, pl.LazyFrame, pl.Series, str)): return False elif recurse and isinstance(arg, _supported_mappings): return any(_detect_pandas(v, recurse) for v in arg.values()) @@ -46,7 +48,7 @@ def _detect_polars(arg: typing.Any, recurse: bool) -> bool: If `recurse` is True, then this function will recursively check for Polars members in mappings and iterables. """ - if isinstance(arg, (pl.DataFrame, pl.Series)): + if isinstance(arg, (pl.DataFrame, pl.LazyFrame, pl.Series)): return True elif isinstance(arg, (pd.DataFrame, pd.Series, str)): return False diff --git a/tests/test_hstrat/test_auxiliary_lib/test_delegate_polars_implementation.py b/tests/test_hstrat/test_auxiliary_lib/test_delegate_polars_implementation.py index 0705c9da..a491371e 100644 --- a/tests/test_hstrat/test_auxiliary_lib/test_delegate_polars_implementation.py +++ b/tests/test_hstrat/test_auxiliary_lib/test_delegate_polars_implementation.py @@ -38,6 +38,15 @@ def dummy_func( coerce_to_polars( pd.read_csv(f"{assets_path}/nk_tournamentselection.csv") ), + coerce_to_polars( + pd.read_csv(f"{assets_path}/nk_ecoeaselection.csv") + ).lazy(), + coerce_to_polars( + pd.read_csv(f"{assets_path}/nk_lexicaseselection.csv") + ).lazy(), + coerce_to_polars( + pd.read_csv(f"{assets_path}/nk_tournamentselection.csv") + ).lazy(), ], ) @pytest.mark.parametrize( @@ -72,8 +81,10 @@ def test_coercion_and_error( dummy_func({"df": df}, [series], 1234) else: new_df, new_series, _ = dummy_func({"df": df}, [series], "asdf") - assert type(new_df) == type(df) - assert type(new_series) == type(series) + assert isinstance(new_df, type(df)) or isinstance( + new_df, type(df.collect()) + ) + assert isinstance(new_series, type(series)) SignalException = type("", (Exception,), {}) From a4cfd1c3d2541e440aa3fe61c4b03419645d1b36 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 10:19:08 -0500 Subject: [PATCH 08/28] Draft downsample tips CLI --- .../_alifestd_downsample_tips_asexual.py | 74 +++++++++++++++++++ ...st_alifestd_downsample_tips_asexual_cli.py | 67 +++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 tests/test_hstrat/test_auxiliary_lib/test_alifestd_downsample_tips_asexual_cli.py diff --git a/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py b/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py index 9aca774f..3a339703 100644 --- a/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py +++ b/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py @@ -1,6 +1,10 @@ +import argparse +import functools +import logging import random import typing +from joinem._dataframe_cli import _add_parser_base, _run_dataframe_cli import pandas as pd from ._alifestd_find_leaf_ids import alifestd_find_leaf_ids @@ -8,6 +12,11 @@ alifestd_prune_extinct_lineages_asexual, ) from ._alifestd_try_add_ancestor_id_col import alifestd_try_add_ancestor_id_col +from ._configure_prod_logging import configure_prod_logging +from ._delegate_polars_implementation import delegate_polars_implementation +from ._format_cli_description import format_cli_description +from ._get_hstrat_version import get_hstrat_version +from ._log_context_duration import log_context_duration from ._with_rng_state_context import with_rng_state_context @@ -56,3 +65,68 @@ def alifestd_downsample_tips_asexual( ) return impl(phylogeny_df, n_downsample) + + +_raw_description = """Create a subsample phylogeny containing `num_tips` tips. + +If `num_tips` is greater than the number of tips in the phylogeny, the whole phylogeny is returned. + +Data is assumed to be in alife standard format. +Only supports asexual phylogenies. + +Additional Notes +================ +- Requires 'ancestor_id' column to be present in input DataFrame. +Otherwise, no action is taken. + +- Use `--eager-read` if modifying data file inplace. + +- This CLI entrypoint is experimental and may be subject to change. +""" + + +def _create_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + add_help=False, + description=format_cli_description(_raw_description), + formatter_class=argparse.RawTextHelpFormatter, + ) + parser = _add_parser_base( + parser=parser, + dfcli_module="hstrat._auxiliary_lib._alifestd_downsample_tips_asexual", + dfcli_version=get_hstrat_version(), + ) + parser.add_argument( + "-n", + type=int, + help="Number of tips to subsample.", + ) + parser.add_argument( + "--seed", + default=None, + dest="seed", + help="Integer seed for deterministic behavior.", + type=int, + ) + return parser + + +if __name__ == "__main__": + configure_prod_logging() + + parser = _create_parser() + args, __ = parser.parse_known_args() + with log_context_duration( + "hstrat._auxiliary_lib._alifestd_downsample_tips_asexual", logging.info + ): + _run_dataframe_cli( + allow_overridden_arguments=True, # seed is overridden + base_parser=parser, + output_dataframe_op=delegate_polars_implementation()( + functools.partial( + alifestd_downsample_tips_asexual, + n_downsample=args.n, + seed=args.seed, + ), + ), + ) diff --git a/tests/test_hstrat/test_auxiliary_lib/test_alifestd_downsample_tips_asexual_cli.py b/tests/test_hstrat/test_auxiliary_lib/test_alifestd_downsample_tips_asexual_cli.py new file mode 100644 index 00000000..546b3d2f --- /dev/null +++ b/tests/test_hstrat/test_auxiliary_lib/test_alifestd_downsample_tips_asexual_cli.py @@ -0,0 +1,67 @@ +import os +import pathlib +import subprocess + +assets = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets") + + +# def test_alifestd_downsample_tips_asexual_cli_help(): +# subprocess.run( +# [ +# "python3", +# "-m", +# "hstrat._auxiliary_lib._alifestd_downsample_tips_asexual", +# "--help", +# ], +# check=True, +# ) + + +def test_alifestd_downsample_tips_asexual_cli_version(): + subprocess.run( + [ + "python3", + "-m", + "hstrat._auxiliary_lib._alifestd_downsample_tips_asexual", + "--version", + ], + check=True, + ) + + +def test_alifestd_downsample_tips_asexual_cli_csv(): + output_file = "/tmp/hstrat_alifestd_downsample_tips_asexual.pqt" + pathlib.Path(output_file).unlink(missing_ok=True) + subprocess.run( + [ + "python3", + "-m", + "hstrat._auxiliary_lib._alifestd_downsample_tips_asexual", + "-n", + "1", + output_file, + ], + check=True, + input=f"{assets}/example-standard-toy-asexual-phylogeny.csv".encode(), + ) + assert os.path.exists(output_file) + + +def test_alifestd_downsample_tips_asexual_cli_parquet(): + output_file = "/tmp/hstrat_alifestd_downsample_tips_asexual.pqt" + pathlib.Path(output_file).unlink(missing_ok=True) + subprocess.run( + [ + "python3", + "-m", + "hstrat._auxiliary_lib._alifestd_downsample_tips_asexual", + "-n", + "1", + "--seed", + "50_000_000", + output_file, + ], + check=True, + input=f"{assets}/example-standard-toy-asexual-phylogeny.csv".encode(), + ) + assert os.path.exists(output_file) From eca6bcb8714d7e8970bd0d44920c3cde8e1bb8a8 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 10:19:39 -0500 Subject: [PATCH 09/28] Update docstring --- hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py b/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py index 3a339703..473e9a5a 100644 --- a/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py +++ b/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py @@ -40,8 +40,9 @@ def alifestd_downsample_tips_asexual( mutate: bool = False, seed: typing.Optional[int] = None, ) -> pd.DataFrame: - """Subsample phylogeny containing `num_tips` tips. If `num_tips` is greater - than the number of tips in the phylogeny, the whole phylogeny is returned. + """Create a subsample phylogeny containing `num_tips` tips. If `num_tips` + is greater than the number of tips in the phylogeny, the whole phylogeny is + returned. Only supports asexual phylogenies. """ From 39dd68175999170826847bc4c7bf50c144444829 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 10:20:37 -0500 Subject: [PATCH 10/28] fixup! Fix lazyframe handling in polars coercion --- .../test_coerce_to_pandas.py | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 tests/test_hstrat/test_auxiliary_lib/test_coerce_to_pandas.py diff --git a/tests/test_hstrat/test_auxiliary_lib/test_coerce_to_pandas.py b/tests/test_hstrat/test_auxiliary_lib/test_coerce_to_pandas.py new file mode 100644 index 00000000..cf3efa74 --- /dev/null +++ b/tests/test_hstrat/test_auxiliary_lib/test_coerce_to_pandas.py @@ -0,0 +1,37 @@ +import pandas as pd +import polars as pl + +from hstrat._auxiliary_lib import coerce_to_pandas + + +def test_coerce_to_pandas_polars_lazyframe(): + lf = pl.LazyFrame({"a": [1, 2, 3]}) + result = coerce_to_pandas(lf) + + assert isinstance(result, pd.DataFrame) + pd.testing.assert_frame_equal(result, pd.DataFrame({"a": [1, 2, 3]})) + + +def test_coerce_to_pandas_polars_dataframe(): + df = pl.DataFrame({"x": [10, 20], "y": [30, 40]}) + result = coerce_to_pandas(df) + assert isinstance(result, pd.DataFrame) + pd.testing.assert_frame_equal( + result, pd.DataFrame({"x": [10, 20], "y": [30, 40]}) + ) + + +def test_coerce_to_pandas_recurse_iterable(): + data = (pl.DataFrame({"a": [1, 2]}), 42, pl.DataFrame({"b": [3, 4]})) + result = coerce_to_pandas(data, recurse=True) + assert isinstance(result, tuple) + assert len(result) == 3 + assert isinstance(result[0], pd.DataFrame) + assert result[1] == 42 + assert isinstance(result[2], pd.DataFrame) + + +def test_coerce_to_pandas_no_coercion_needed(): + data = [1, 2, 3, "no-polars-here"] + result = coerce_to_pandas(data) + assert result == data From 1a037f95d1f3442a8e7453e01cd1e2a737159493 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 10:24:28 -0500 Subject: [PATCH 11/28] fixup! fixup! Fixup joinem cli help message details, and test --- pyproject.toml | 2 +- requirements-dev/py310/requirements-all.txt | 2 +- requirements-dev/py310/requirements-docs.txt | 2 +- requirements-dev/py310/requirements-jit.txt | 2 +- requirements-dev/py310/requirements-minimal.txt | 2 +- requirements-dev/py310/requirements-release.txt | 2 +- requirements-dev/py310/requirements-testing.txt | 2 +- requirements-dev/py311/requirements-all.txt | 2 +- requirements-dev/py311/requirements-docs.txt | 2 +- requirements-dev/py311/requirements-jit.txt | 2 +- requirements-dev/py311/requirements-minimal.txt | 2 +- requirements-dev/py311/requirements-release.txt | 2 +- requirements-dev/py311/requirements-testing.txt | 2 +- requirements-dev/py312/requirements-all.txt | 2 +- requirements-dev/py312/requirements-docs.txt | 2 +- requirements-dev/py312/requirements-jit.txt | 2 +- requirements-dev/py312/requirements-minimal.txt | 2 +- requirements-dev/py312/requirements-release.txt | 2 +- requirements-dev/py312/requirements-testing.txt | 2 +- 19 files changed, 19 insertions(+), 19 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 905462c1..9e50556c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ dependencies = [ "downstream>=1.5.1", "iterpop>=0.3.4", "interval_search>=0.3.1", - "joinem>=0.9.1", + "joinem>=0.9.2", "keyname>=0.4.1", "lazy_loader>=0.4", "lru-dict>=1.1.7", diff --git a/requirements-dev/py310/requirements-all.txt b/requirements-dev/py310/requirements-all.txt index 46cce7a1..c4f818c5 100644 --- a/requirements-dev/py310/requirements-all.txt +++ b/requirements-dev/py310/requirements-all.txt @@ -131,7 +131,7 @@ jinja2==3.1.4 # nbconvert # nbsphinx # sphinx -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream diff --git a/requirements-dev/py310/requirements-docs.txt b/requirements-dev/py310/requirements-docs.txt index 202aeb49..f36e4860 100644 --- a/requirements-dev/py310/requirements-docs.txt +++ b/requirements-dev/py310/requirements-docs.txt @@ -102,7 +102,7 @@ jinja2==3.1.4 # nbconvert # nbsphinx # sphinx -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream diff --git a/requirements-dev/py310/requirements-jit.txt b/requirements-dev/py310/requirements-jit.txt index 55ffd8dd..95bdf1b1 100644 --- a/requirements-dev/py310/requirements-jit.txt +++ b/requirements-dev/py310/requirements-jit.txt @@ -52,7 +52,7 @@ iterpop==0.4.1 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream diff --git a/requirements-dev/py310/requirements-minimal.txt b/requirements-dev/py310/requirements-minimal.txt index 3f15e0d6..4d174746 100644 --- a/requirements-dev/py310/requirements-minimal.txt +++ b/requirements-dev/py310/requirements-minimal.txt @@ -48,7 +48,7 @@ iterpop==0.4.1 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream diff --git a/requirements-dev/py310/requirements-release.txt b/requirements-dev/py310/requirements-release.txt index 14cc613d..5b883f86 100644 --- a/requirements-dev/py310/requirements-release.txt +++ b/requirements-dev/py310/requirements-release.txt @@ -61,7 +61,7 @@ iterpop==0.4.1 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream diff --git a/requirements-dev/py310/requirements-testing.txt b/requirements-dev/py310/requirements-testing.txt index a522e317..0d1e7eab 100644 --- a/requirements-dev/py310/requirements-testing.txt +++ b/requirements-dev/py310/requirements-testing.txt @@ -71,7 +71,7 @@ iterpop==0.4.1 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream diff --git a/requirements-dev/py311/requirements-all.txt b/requirements-dev/py311/requirements-all.txt index 3097e0eb..cc7be2bc 100644 --- a/requirements-dev/py311/requirements-all.txt +++ b/requirements-dev/py311/requirements-all.txt @@ -129,7 +129,7 @@ jinja2==3.1.4 # nbconvert # nbsphinx # sphinx -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream diff --git a/requirements-dev/py311/requirements-docs.txt b/requirements-dev/py311/requirements-docs.txt index ce902807..04262e3a 100644 --- a/requirements-dev/py311/requirements-docs.txt +++ b/requirements-dev/py311/requirements-docs.txt @@ -102,7 +102,7 @@ jinja2==3.1.4 # nbconvert # nbsphinx # sphinx -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream diff --git a/requirements-dev/py311/requirements-jit.txt b/requirements-dev/py311/requirements-jit.txt index f8488bbe..38cb5163 100644 --- a/requirements-dev/py311/requirements-jit.txt +++ b/requirements-dev/py311/requirements-jit.txt @@ -52,7 +52,7 @@ iterpop==0.4.1 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream diff --git a/requirements-dev/py311/requirements-minimal.txt b/requirements-dev/py311/requirements-minimal.txt index d883f78c..f075a35b 100644 --- a/requirements-dev/py311/requirements-minimal.txt +++ b/requirements-dev/py311/requirements-minimal.txt @@ -48,7 +48,7 @@ iterpop==0.4.1 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream diff --git a/requirements-dev/py311/requirements-release.txt b/requirements-dev/py311/requirements-release.txt index 8aa8636e..8d7df8ee 100644 --- a/requirements-dev/py311/requirements-release.txt +++ b/requirements-dev/py311/requirements-release.txt @@ -61,7 +61,7 @@ iterpop==0.4.1 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream diff --git a/requirements-dev/py311/requirements-testing.txt b/requirements-dev/py311/requirements-testing.txt index 45b27978..a4f32d5f 100644 --- a/requirements-dev/py311/requirements-testing.txt +++ b/requirements-dev/py311/requirements-testing.txt @@ -69,7 +69,7 @@ iterpop==0.4.1 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream diff --git a/requirements-dev/py312/requirements-all.txt b/requirements-dev/py312/requirements-all.txt index 3097e0eb..cc7be2bc 100644 --- a/requirements-dev/py312/requirements-all.txt +++ b/requirements-dev/py312/requirements-all.txt @@ -129,7 +129,7 @@ jinja2==3.1.4 # nbconvert # nbsphinx # sphinx -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream diff --git a/requirements-dev/py312/requirements-docs.txt b/requirements-dev/py312/requirements-docs.txt index ce902807..04262e3a 100644 --- a/requirements-dev/py312/requirements-docs.txt +++ b/requirements-dev/py312/requirements-docs.txt @@ -102,7 +102,7 @@ jinja2==3.1.4 # nbconvert # nbsphinx # sphinx -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream diff --git a/requirements-dev/py312/requirements-jit.txt b/requirements-dev/py312/requirements-jit.txt index 2ca2e7e5..ada8d2cb 100644 --- a/requirements-dev/py312/requirements-jit.txt +++ b/requirements-dev/py312/requirements-jit.txt @@ -52,7 +52,7 @@ iterpop==0.4.1 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream diff --git a/requirements-dev/py312/requirements-minimal.txt b/requirements-dev/py312/requirements-minimal.txt index d883f78c..f075a35b 100644 --- a/requirements-dev/py312/requirements-minimal.txt +++ b/requirements-dev/py312/requirements-minimal.txt @@ -48,7 +48,7 @@ iterpop==0.4.1 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream diff --git a/requirements-dev/py312/requirements-release.txt b/requirements-dev/py312/requirements-release.txt index 8aa8636e..8d7df8ee 100644 --- a/requirements-dev/py312/requirements-release.txt +++ b/requirements-dev/py312/requirements-release.txt @@ -61,7 +61,7 @@ iterpop==0.4.1 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream diff --git a/requirements-dev/py312/requirements-testing.txt b/requirements-dev/py312/requirements-testing.txt index 45b27978..a4f32d5f 100644 --- a/requirements-dev/py312/requirements-testing.txt +++ b/requirements-dev/py312/requirements-testing.txt @@ -69,7 +69,7 @@ iterpop==0.4.1 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -joinem==0.9.1 +joinem==0.9.2 # via # hstrat (../../pyproject.toml) # downstream From 78ae9db2e35f9a418983e60091db8a8a593f03c1 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 10:34:36 -0500 Subject: [PATCH 12/28] Update CLI command listing --- hstrat/__main__.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hstrat/__main__.py b/hstrat/__main__.py index e84d9d0b..802b468a 100644 --- a/hstrat/__main__.py +++ b/hstrat/__main__.py @@ -4,11 +4,15 @@ print(f"hstrat v{get_hstrat_version()}") print() print("Available commands (stabilized API):") + print("$ python3 -m hstrat.dataframe.surface_build_tree") print("$ python3 -m hstrat.dataframe.surface_unpack_reconstruct") print("$ python3 -m hstrat.dataframe.surface_postprocess_trie") print() print("Available commands (experimental API):") print("$ python3 -m hstrat._auxiliary_lib._alifestd_as_newick_asexual") + print( + "$ python3 -m hstrat._auxiliary_lib._alifestd_downsample_tips_asexual" + ) print( "$ python3 -m hstrat._auxiliary_lib._alifestd_try_add_ancestor_list_col" ) From a8a76c390f72d43223b422322e49cd2083473de2 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 10:43:23 -0500 Subject: [PATCH 13/28] Fix downstream overriden_arguments API use --- hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py b/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py index 473e9a5a..431deafc 100644 --- a/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py +++ b/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py @@ -121,7 +121,6 @@ def _create_parser() -> argparse.ArgumentParser: "hstrat._auxiliary_lib._alifestd_downsample_tips_asexual", logging.info ): _run_dataframe_cli( - allow_overridden_arguments=True, # seed is overridden base_parser=parser, output_dataframe_op=delegate_polars_implementation()( functools.partial( @@ -130,4 +129,5 @@ def _create_parser() -> argparse.ArgumentParser: seed=args.seed, ), ), + overriden_arguments="ignore", # seed is overridden ) From 976c8cfbc8ec0eb7193bc60e13c32f81ce53ce7c Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 10:54:10 -0500 Subject: [PATCH 14/28] Fix typo --- hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py b/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py index 431deafc..6688ddb8 100644 --- a/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py +++ b/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py @@ -129,5 +129,5 @@ def _create_parser() -> argparse.ArgumentParser: seed=args.seed, ), ), - overriden_arguments="ignore", # seed is overridden + overridden_arguments="ignore", # seed is overridden ) From d6bb8c6465e15396218aef8b54e2ea546a2f70c7 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 11:01:48 -0500 Subject: [PATCH 15/28] Re-enable commented out test --- ...st_alifestd_downsample_tips_asexual_cli.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/test_hstrat/test_auxiliary_lib/test_alifestd_downsample_tips_asexual_cli.py b/tests/test_hstrat/test_auxiliary_lib/test_alifestd_downsample_tips_asexual_cli.py index 546b3d2f..d9acfaa8 100644 --- a/tests/test_hstrat/test_auxiliary_lib/test_alifestd_downsample_tips_asexual_cli.py +++ b/tests/test_hstrat/test_auxiliary_lib/test_alifestd_downsample_tips_asexual_cli.py @@ -5,16 +5,16 @@ assets = os.path.join(os.path.dirname(os.path.abspath(__file__)), "assets") -# def test_alifestd_downsample_tips_asexual_cli_help(): -# subprocess.run( -# [ -# "python3", -# "-m", -# "hstrat._auxiliary_lib._alifestd_downsample_tips_asexual", -# "--help", -# ], -# check=True, -# ) +def test_alifestd_downsample_tips_asexual_cli_help(): + subprocess.run( + [ + "python3", + "-m", + "hstrat._auxiliary_lib._alifestd_downsample_tips_asexual", + "--help", + ], + check=True, + ) def test_alifestd_downsample_tips_asexual_cli_version(): From 0d55881d26c028cfc2e4baff64156c4b394bd137 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 11:08:05 -0500 Subject: [PATCH 16/28] Enable jit for prune extinct lineages --- .../_alifestd_prune_extinct_lineages_asexual.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py b/hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py index 20cefede..b16d8755 100644 --- a/hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py +++ b/hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py @@ -6,6 +6,7 @@ from ._alifestd_has_contiguous_ids import alifestd_has_contiguous_ids from ._alifestd_try_add_ancestor_id_col import alifestd_try_add_ancestor_id_col from ._alifestd_unfurl_lineage_asexual import alifestd_unfurl_lineage_asexual +from ._jit import jit from ._unfurl_lineage_with_contiguous_ids import ( unfurl_lineage_with_contiguous_ids, ) @@ -32,13 +33,14 @@ def _create_has_estant_descendant_noncontiguous( return phylogeny_df["has_extant_descendant"] +@jit(nopython=True) def _create_has_extant_descendant_contiguous( ancestor_ids: np.ndarray, extant_mask: np.ndarray, ) -> np.ndarray: """Implementation detail for alifestd_prune_extinct_lineages_asexual.""" - has_extant_descendant = np.zeros_like(ancestor_ids, dtype=bool) + has_extant_descendant = np.zeros_like(extant_mask) for extant_id in np.flatnonzero(extant_mask): for lineage_id in unfurl_lineage_with_contiguous_ids( ancestor_ids, @@ -106,7 +108,7 @@ def alifestd_prune_extinct_lineages_asexual( has_extant_descendant = ( _create_has_extant_descendant_contiguous( phylogeny_df["ancestor_id"].to_numpy(dtype=np.uint64), - extant_mask, + extant_mask.to_numpy(dtype=bool), ) if alifestd_has_contiguous_ids(phylogeny_df) else _create_has_estant_descendant_noncontiguous( From 64d19a077902162895c649b0edb35f9625a9f017 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 11:29:58 -0500 Subject: [PATCH 17/28] Optimize leaf identification --- hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py | 4 ++-- .../test_alifestd_find_leaf_ids.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py b/hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py index a06b8b78..f0590e21 100644 --- a/hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py +++ b/hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py @@ -28,7 +28,7 @@ def alifestd_find_leaf_ids(phylogeny_df: pd.DataFrame) -> typing.List[int]: leaf_pos_filter = np.ones(len(phylogeny_df), dtype=np.bool_) leaf_pos_filter[internal_node_idxs] = False - return phylogeny_df.loc[leaf_pos_filter, "id"].to_list() + return np.flatnonzero(leaf_pos_filter) all_ids = ods.OrderedSet(phylogeny_df["id"]) internal_ids = ( @@ -50,4 +50,4 @@ def alifestd_find_leaf_ids(phylogeny_df: pd.DataFrame) -> typing.List[int]: ] ) ) - return list(all_ids - internal_ids) + return np.fromiter(all_ids - internal_ids, dtype=int) diff --git a/tests/test_hstrat/test_auxiliary_lib/test_alifestd_find_leaf_ids.py b/tests/test_hstrat/test_auxiliary_lib/test_alifestd_find_leaf_ids.py index a3f6929d..62dac28b 100644 --- a/tests/test_hstrat/test_auxiliary_lib/test_alifestd_find_leaf_ids.py +++ b/tests/test_hstrat/test_auxiliary_lib/test_alifestd_find_leaf_ids.py @@ -100,7 +100,7 @@ def test_alifestd_find_leaf_ids_singleton(phylogeny_df, apply): phylogeny_df = apply(phylogeny_df) phylogeny_df.sort_values("id", ascending=True, inplace=True) - assert alifestd_find_leaf_ids(phylogeny_df.iloc[0:1, :]) == [ + assert alifestd_find_leaf_ids(phylogeny_df.iloc[0:1, :]).tolist() == [ phylogeny_df.iloc[0].at["id"] ] @@ -120,14 +120,14 @@ def test_alifestd_find_leaf_ids_tworoots(): phylo2.iloc[0:1, :], ] ) - ) == [phylo1.iloc[0].at["id"]] + [phylo2.iloc[0].at["id"]] + ).tolist() == [phylo1.iloc[0].at["id"]] + [phylo2.iloc[0].at["id"]] def test_alifestd_find_leaf_ids_empty2(): phylo1 = pd.read_csv(f"{assets_path}/nk_ecoeaselection.csv") - assert alifestd_find_leaf_ids(phylo1[-1:0]) == [] + assert alifestd_find_leaf_ids(phylo1[-1:0]).tolist() == [] phylo1["ancestor_id"] = 0 - assert alifestd_find_leaf_ids(phylo1[-1:0]) == [] + assert alifestd_find_leaf_ids(phylo1[-1:0]).tolist() == [] def _test_alifestd_find_leaf_ids_impl(phylogeny_df): @@ -141,10 +141,10 @@ def _test_alifestd_find_leaf_ids_impl(phylogeny_df): ] leaf_ids.sort(key=phylogeny_df_.index.get_loc) - assert leaf_ids == alifestd_find_leaf_ids(phylogeny_df) + assert leaf_ids == alifestd_find_leaf_ids(phylogeny_df).tolist() else: # sexual phylogenies - leaf_ids = alifestd_find_leaf_ids(phylogeny_df) + leaf_ids = alifestd_find_leaf_ids(phylogeny_df).tolist() assert sorted(leaf_ids, key=phylogeny_df_.index.get_loc) == leaf_ids all_ids = set(phylogeny_df["id"]) From 08623479f04f53793f2176fbd097c7d53ec18c60 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 11:30:53 -0500 Subject: [PATCH 18/28] Fix type hint --- hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py b/hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py index f0590e21..819a5a68 100644 --- a/hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py +++ b/hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py @@ -9,7 +9,7 @@ from ._alifestd_try_add_ancestor_id_col import alifestd_try_add_ancestor_id_col -def alifestd_find_leaf_ids(phylogeny_df: pd.DataFrame) -> typing.List[int]: +def alifestd_find_leaf_ids(phylogeny_df: pd.DataFrame) -> np.ndarray: """What ids are not listed in any `ancestor_list`? Input dataframe is not mutated by this operation. From 60ba403c0a01354445adaef905a0ff58e28a1205 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 11:56:16 -0500 Subject: [PATCH 19/28] Optimize alifestd_find_leaf_ids --- .../_alifestd_downsample_tips_asexual.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py b/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py index 6688ddb8..325a20aa 100644 --- a/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py +++ b/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py @@ -1,13 +1,14 @@ import argparse import functools import logging -import random import typing from joinem._dataframe_cli import _add_parser_base, _run_dataframe_cli +import numpy as np import pandas as pd from ._alifestd_find_leaf_ids import alifestd_find_leaf_ids +from ._alifestd_has_contiguous_ids import alifestd_has_contiguous_ids from ._alifestd_prune_extinct_lineages_asexual import ( alifestd_prune_extinct_lineages_asexual, ) @@ -26,8 +27,13 @@ def _alifestd_downsample_tips_asexual_impl( ) -> pd.DataFrame: """Implementation detail for alifestd_downsample_tips_asexual.""" tips = alifestd_find_leaf_ids(phylogeny_df) - kept = random.sample(tips, min(n_downsample, len(tips))) - phylogeny_df["extant"] = phylogeny_df["id"].isin(kept) + kept = np.random.choice(tips, min(n_downsample, len(tips))) + if alifestd_has_contiguous_ids(phylogeny_df): + extant = np.zeros(len(phylogeny_df), dtype=bool) + extant[kept] = True + phylogeny_df["extant"] = extant + else: + phylogeny_df["extant"] = phylogeny_df["id"].isin(kept) return alifestd_prune_extinct_lineages_asexual( phylogeny_df, mutate=True From 8e8cc51919207d7f87ea88226bb6afbcb5bc347e Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 11:56:24 -0500 Subject: [PATCH 20/28] Fix tidy --- hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py b/hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py index 819a5a68..813ebd0c 100644 --- a/hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py +++ b/hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py @@ -1,5 +1,3 @@ -import typing - import numpy as np import ordered_set as ods import pandas as pd From 1e7296de34c39f83142b00de276c06995363a07c Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 12:06:44 -0500 Subject: [PATCH 21/28] Further optimize find leaf ids --- hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py b/hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py index 813ebd0c..1cf55312 100644 --- a/hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py +++ b/hstrat/_auxiliary_lib/_alifestd_find_leaf_ids.py @@ -18,10 +18,9 @@ def alifestd_find_leaf_ids(phylogeny_df: pd.DataFrame) -> np.ndarray: if "ancestor_id" in phylogeny_df: # root is self ref, but must exclude to handle only-root phylo - internal_node_idxs = phylogeny_df.loc[ - phylogeny_df["ancestor_id"] != phylogeny_df["id"], - "ancestor_id", - ].to_numpy() + internal_node_idxs = phylogeny_df["ancestor_id"].to_numpy()[ + phylogeny_df["ancestor_id"] != phylogeny_df["id"] + ] leaf_pos_filter = np.ones(len(phylogeny_df), dtype=np.bool_) leaf_pos_filter[internal_node_idxs] = False From bd0497fb3b042dee40e210bfe63847c41d71768e Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 12:15:07 -0500 Subject: [PATCH 22/28] Fix typo, clarify flow --- .../_alifestd_prune_extinct_lineages_asexual.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py b/hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py index b16d8755..bd8f70ad 100644 --- a/hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py +++ b/hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py @@ -12,7 +12,7 @@ ) -def _create_has_estant_descendant_noncontiguous( +def _create_has_extant_descendant_noncontiguous( phylogeny_df: pd.DataFrame, extant_mask: np.ndarray, ) -> np.ndarray: @@ -105,17 +105,16 @@ def alifestd_prune_extinct_lineages_asexual( else: raise ValueError('Need "extant" or "destruction_time" column.') - has_extant_descendant = ( - _create_has_extant_descendant_contiguous( + if alifestd_has_contiguous_ids(phylogeny_df): + has_extant_descendant = _create_has_extant_descendant_contiguous( phylogeny_df["ancestor_id"].to_numpy(dtype=np.uint64), extant_mask.to_numpy(dtype=bool), ) - if alifestd_has_contiguous_ids(phylogeny_df) - else _create_has_estant_descendant_noncontiguous( + else: + has_extant_descendant = _create_has_extant_descendant_noncontiguous( phylogeny_df, extant_mask, ) - ) phylogeny_df = phylogeny_df[has_extant_descendant].reset_index(drop=True) phylogeny_df.drop( From 681b71b28f4e9ccd1fdd34edfd94823470d93a7e Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 12:50:41 -0500 Subject: [PATCH 23/28] Fix bad default arg --- hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py b/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py index 325a20aa..d6843482 100644 --- a/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py +++ b/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py @@ -1,6 +1,7 @@ import argparse import functools import logging +import sys import typing from joinem._dataframe_cli import _add_parser_base, _run_dataframe_cli @@ -105,6 +106,7 @@ def _create_parser() -> argparse.ArgumentParser: ) parser.add_argument( "-n", + default=sys.maxsize, type=int, help="Number of tips to subsample.", ) From f9a7a05a3252e33203f4d3be45bbabc8dec4dea3 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 19:19:40 -0500 Subject: [PATCH 24/28] Add another optimized impl for prune extinct lineages asexual --- ...alifestd_prune_extinct_lineages_asexual.py | 35 ++++++++++++++++--- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py b/hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py index bd8f70ad..d666a00e 100644 --- a/hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py +++ b/hstrat/_auxiliary_lib/_alifestd_prune_extinct_lineages_asexual.py @@ -4,6 +4,7 @@ import pandas as pd from ._alifestd_has_contiguous_ids import alifestd_has_contiguous_ids +from ._alifestd_is_topologically_sorted import alifestd_is_topologically_sorted from ._alifestd_try_add_ancestor_id_col import alifestd_try_add_ancestor_id_col from ._alifestd_unfurl_lineage_asexual import alifestd_unfurl_lineage_asexual from ._jit import jit @@ -54,6 +55,20 @@ def _create_has_extant_descendant_contiguous( return has_extant_descendant +@jit(nopython=True) +def _create_has_extant_descendant_contiguous_sorted( + ancestor_ids: np.ndarray, + extant_mask: np.ndarray, +) -> np.ndarray: + """Implementation detail for alifestd_prune_extinct_lineages_asexual.""" + + has_extant_descendant = extant_mask.copy() + for id_ in range(len(ancestor_ids) - 1, -1, -1): + has_extant_descendant[ancestor_ids[id_]] |= has_extant_descendant[id_] + + return has_extant_descendant + + def alifestd_prune_extinct_lineages_asexual( phylogeny_df: pd.DataFrame, mutate: bool = False, @@ -92,7 +107,10 @@ def alifestd_prune_extinct_lineages_asexual( phylogeny_df = phylogeny_df.copy() phylogeny_df = alifestd_try_add_ancestor_id_col(phylogeny_df, mutate=True) - phylogeny_df.set_index("id", drop=False, inplace=True) + if alifestd_has_contiguous_ids(phylogeny_df): + phylogeny_df.reset_index(drop=True, inplace=True) + else: + phylogeny_df.index = phylogeny_df["id"] extant_mask = None if "extant" in phylogeny_df: @@ -105,15 +123,22 @@ def alifestd_prune_extinct_lineages_asexual( else: raise ValueError('Need "extant" or "destruction_time" column.') - if alifestd_has_contiguous_ids(phylogeny_df): + if not alifestd_has_contiguous_ids(phylogeny_df): + has_extant_descendant = _create_has_extant_descendant_noncontiguous( + phylogeny_df, + extant_mask, + ) + elif not alifestd_is_topologically_sorted(phylogeny_df): has_extant_descendant = _create_has_extant_descendant_contiguous( phylogeny_df["ancestor_id"].to_numpy(dtype=np.uint64), extant_mask.to_numpy(dtype=bool), ) else: - has_extant_descendant = _create_has_extant_descendant_noncontiguous( - phylogeny_df, - extant_mask, + has_extant_descendant = ( + _create_has_extant_descendant_contiguous_sorted( + phylogeny_df["ancestor_id"].to_numpy(dtype=np.uint64), + extant_mask.to_numpy(dtype=bool), + ) ) phylogeny_df = phylogeny_df[has_extant_descendant].reset_index(drop=True) From b8021e115ba9811482a5cf0fbdc6ce92572cbcd8 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 19:50:49 -0500 Subject: [PATCH 25/28] Bugfix: ensure tips sampled without replacement --- hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py b/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py index d6843482..51a102d7 100644 --- a/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py +++ b/hstrat/_auxiliary_lib/_alifestd_downsample_tips_asexual.py @@ -28,7 +28,7 @@ def _alifestd_downsample_tips_asexual_impl( ) -> pd.DataFrame: """Implementation detail for alifestd_downsample_tips_asexual.""" tips = alifestd_find_leaf_ids(phylogeny_df) - kept = np.random.choice(tips, min(n_downsample, len(tips))) + kept = np.random.choice(tips, min(n_downsample, len(tips)), replace=False) if alifestd_has_contiguous_ids(phylogeny_df): extant = np.zeros(len(phylogeny_df), dtype=bool) extant[kept] = True From dc25072bc4382c3741debc00d2f72923419f923b Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 19:51:21 -0500 Subject: [PATCH 26/28] Fix array/list compat in tests --- .../test_auxiliary_lib/test_alifestd_find_leaf_ids.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_hstrat/test_auxiliary_lib/test_alifestd_find_leaf_ids.py b/tests/test_hstrat/test_auxiliary_lib/test_alifestd_find_leaf_ids.py index 62dac28b..b4d226a3 100644 --- a/tests/test_hstrat/test_auxiliary_lib/test_alifestd_find_leaf_ids.py +++ b/tests/test_hstrat/test_auxiliary_lib/test_alifestd_find_leaf_ids.py @@ -56,7 +56,7 @@ def test_alifestd_find_leaf_ids_empty(phylogeny_df, apply): phylogeny_df = phylogeny_df.copy() phylogeny_df = apply(phylogeny_df) - assert alifestd_find_leaf_ids(phylogeny_df.iloc[-1:0, :]) == [] + assert alifestd_find_leaf_ids(phylogeny_df.iloc[-1:0, :]).tolist() == [] @pytest.mark.parametrize( From b542226ea5cce39ad9f87922614de76a59682d3c Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 19:59:09 -0500 Subject: [PATCH 27/28] Fix copy/paste race condition --- tests/test_hstrat/test_dataframe/test_surface_build_tree_cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_hstrat/test_dataframe/test_surface_build_tree_cli.py b/tests/test_hstrat/test_dataframe/test_surface_build_tree_cli.py index f8bff325..624da689 100644 --- a/tests/test_hstrat/test_dataframe/test_surface_build_tree_cli.py +++ b/tests/test_hstrat/test_dataframe/test_surface_build_tree_cli.py @@ -30,7 +30,7 @@ def test_surface_build_tree_cli_version(): def test_surface_build_tree_cli_csv(): - output_file = "/tmp/hstrat_surface_build_tree.pqt" + output_file = "/tmp/hstrat_surface_build_tree.csv" pathlib.Path(output_file).unlink(missing_ok=True) subprocess.run( [ From bf8c59284acee872847b2e370cdf9323ed774bf3 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Sat, 4 Jan 2025 20:40:29 -0500 Subject: [PATCH 28/28] fixup! Fix array/list compat in tests --- .../test_test_drive/test_descend_template_phylogeny_alifestd.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_hstrat/test_test_drive/test_descend_template_phylogeny_alifestd.py b/tests/test_hstrat/test_test_drive/test_descend_template_phylogeny_alifestd.py index 6f4c54d4..38762b11 100644 --- a/tests/test_hstrat/test_test_drive/test_descend_template_phylogeny_alifestd.py +++ b/tests/test_hstrat/test_test_drive/test_descend_template_phylogeny_alifestd.py @@ -107,7 +107,7 @@ def test_descend_template_phylogeny( ) assert [n.id for n in sorted_leaf_nodes] == alifestd_find_leaf_ids( phylogeny_df - ) + ).tolist() for extant_ids, sorted_extant_nodes in ( (None, sorted_leaf_nodes), (map(lambda node: node.id, sampled_tree_nodes), sampled_tree_nodes),