diff --git a/hstrat/dataframe/_surface_unpack_reconstruct.py b/hstrat/dataframe/_surface_unpack_reconstruct.py index 34c93d22..05583300 100644 --- a/hstrat/dataframe/_surface_unpack_reconstruct.py +++ b/hstrat/dataframe/_surface_unpack_reconstruct.py @@ -122,7 +122,10 @@ def surface_unpack_reconstruct(df: pl.DataFrame) -> pl.DataFrame: tqdm.tqdm, ) - logging.info("finalizing tree...") + bitwidth = _get_sole_bitwidth(long_df) + del long_df + + logging.info("finalizing phylogeny dataframe...") phylo_df = pl.from_dict( records, # type: ignore @@ -134,8 +137,12 @@ def surface_unpack_reconstruct(df: pl.DataFrame) -> pl.DataFrame: "rank": pl.UInt64, }, ) + del records + phylo_df = phylo_df.with_columns( + pl.lit(bitwidth).alias("differentia_bitwidth").cast(pl.UInt32), + ) - logging.info("joining frames...") + logging.info("joining user-defined columns...") df = df.select( pl.exclude("^dstream_.*$", "^downstream_.*$"), pl.col("dstream_data_id").cast(pl.UInt64), @@ -148,12 +155,6 @@ def surface_unpack_reconstruct(df: pl.DataFrame) -> pl.DataFrame: else: logging.info(" - no columns to join, skipping") - logging.info("adding differentia_bitwidth column...") - bitwidth = _get_sole_bitwidth(long_df) - phylo_df = phylo_df.with_columns( - pl.lit(bitwidth).alias("differentia_bitwidth").cast(pl.UInt32), - ) - logging.info("surface_unpack_reconstruct complete") render_polars_snapshot(phylo_df, "reconstruction", logging.info)