From 8f1cb3567267196a66607ffc3bb08b988511c515 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Thu, 2 Jan 2025 10:41:12 -0500 Subject: [PATCH 1/5] Bump requirement pins --- pyproject.toml | 2 +- requirements-dev/py310/requirements-all.txt | 4 +++- requirements-dev/py310/requirements-docs.txt | 4 +++- requirements-dev/py310/requirements-jit.txt | 4 +++- requirements-dev/py310/requirements-minimal.txt | 4 +++- requirements-dev/py310/requirements-release.txt | 4 +++- requirements-dev/py310/requirements-testing.txt | 4 +++- requirements-dev/py311/requirements-all.txt | 4 +++- requirements-dev/py311/requirements-docs.txt | 4 +++- requirements-dev/py311/requirements-jit.txt | 4 +++- requirements-dev/py311/requirements-minimal.txt | 4 +++- requirements-dev/py311/requirements-release.txt | 4 +++- requirements-dev/py311/requirements-testing.txt | 4 +++- requirements-dev/py312/requirements-all.txt | 4 +++- requirements-dev/py312/requirements-docs.txt | 4 +++- requirements-dev/py312/requirements-jit.txt | 6 ++++-- requirements-dev/py312/requirements-minimal.txt | 4 +++- requirements-dev/py312/requirements-release.txt | 4 +++- requirements-dev/py312/requirements-testing.txt | 4 +++- 19 files changed, 56 insertions(+), 20 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index fb0fc361..b064ebfc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,7 +21,7 @@ dependencies = [ "bitstring>=3.1.9", "dendropy>=4.5.2", "Deprecated>=1.2.13", - "downstream>=1.4.0", + "downstream>=1.5.1", "iterpop>=0.3.4", "interval_search>=0.3.1", "joinem>=0.9.1", diff --git a/requirements-dev/py310/requirements-all.txt b/requirements-dev/py310/requirements-all.txt index 42895084..46cce7a1 100644 --- a/requirements-dev/py310/requirements-all.txt +++ b/requirements-dev/py310/requirements-all.txt @@ -79,7 +79,7 @@ docutils==0.21.2 # readme-renderer # sphinx # sphinx-rtd-theme -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) entrypoints==0.3 # via flake8 @@ -304,6 +304,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) prompt-toolkit==3.0.48 diff --git a/requirements-dev/py310/requirements-docs.txt b/requirements-dev/py310/requirements-docs.txt index 86de3692..202aeb49 100644 --- a/requirements-dev/py310/requirements-docs.txt +++ b/requirements-dev/py310/requirements-docs.txt @@ -67,7 +67,7 @@ docutils==0.21.2 # nbsphinx # sphinx # sphinx-rtd-theme -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) ete3==3.1.3 # via alifedata-phyloinformatics-convert @@ -239,6 +239,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) prompt-toolkit==3.0.48 diff --git a/requirements-dev/py310/requirements-jit.txt b/requirements-dev/py310/requirements-jit.txt index 91641e22..55ffd8dd 100644 --- a/requirements-dev/py310/requirements-jit.txt +++ b/requirements-dev/py310/requirements-jit.txt @@ -36,7 +36,7 @@ deprecated==1.2.15 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) ete3==3.1.3 # via alifedata-phyloinformatics-convert @@ -145,6 +145,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) propcache==0.2.0 diff --git a/requirements-dev/py310/requirements-minimal.txt b/requirements-dev/py310/requirements-minimal.txt index 043ac66e..3f15e0d6 100644 --- a/requirements-dev/py310/requirements-minimal.txt +++ b/requirements-dev/py310/requirements-minimal.txt @@ -34,7 +34,7 @@ deprecated==1.2.15 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) ete3==3.1.3 # via alifedata-phyloinformatics-convert @@ -132,6 +132,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) propcache==0.2.0 diff --git a/requirements-dev/py310/requirements-release.txt b/requirements-dev/py310/requirements-release.txt index 139bee5e..14cc613d 100644 --- a/requirements-dev/py310/requirements-release.txt +++ b/requirements-dev/py310/requirements-release.txt @@ -45,7 +45,7 @@ deprecated==1.2.15 # alifedata-phyloinformatics-convert docutils==0.21.2 # via readme-renderer -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) ete3==3.1.3 # via alifedata-phyloinformatics-convert @@ -154,6 +154,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) propcache==0.2.0 diff --git a/requirements-dev/py310/requirements-testing.txt b/requirements-dev/py310/requirements-testing.txt index 9daa24b8..a522e317 100644 --- a/requirements-dev/py310/requirements-testing.txt +++ b/requirements-dev/py310/requirements-testing.txt @@ -40,7 +40,7 @@ deprecated==1.2.15 # alifedata-phyloinformatics-convert distlib==0.3.9 # via virtualenv -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) entrypoints==0.3 # via flake8 @@ -183,6 +183,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) propcache==0.2.0 diff --git a/requirements-dev/py311/requirements-all.txt b/requirements-dev/py311/requirements-all.txt index 756e8b17..3097e0eb 100644 --- a/requirements-dev/py311/requirements-all.txt +++ b/requirements-dev/py311/requirements-all.txt @@ -79,7 +79,7 @@ docutils==0.21.2 # readme-renderer # sphinx # sphinx-rtd-theme -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) entrypoints==0.3 # via flake8 @@ -302,6 +302,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) prompt-toolkit==3.0.48 diff --git a/requirements-dev/py311/requirements-docs.txt b/requirements-dev/py311/requirements-docs.txt index 92b52cd7..ce902807 100644 --- a/requirements-dev/py311/requirements-docs.txt +++ b/requirements-dev/py311/requirements-docs.txt @@ -67,7 +67,7 @@ docutils==0.21.2 # nbsphinx # sphinx # sphinx-rtd-theme -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) ete3==3.1.3 # via alifedata-phyloinformatics-convert @@ -239,6 +239,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) prompt-toolkit==3.0.48 diff --git a/requirements-dev/py311/requirements-jit.txt b/requirements-dev/py311/requirements-jit.txt index 929db4d5..f8488bbe 100644 --- a/requirements-dev/py311/requirements-jit.txt +++ b/requirements-dev/py311/requirements-jit.txt @@ -36,7 +36,7 @@ deprecated==1.2.15 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) ete3==3.1.3 # via alifedata-phyloinformatics-convert @@ -145,6 +145,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) propcache==0.2.0 diff --git a/requirements-dev/py311/requirements-minimal.txt b/requirements-dev/py311/requirements-minimal.txt index e1a67843..d883f78c 100644 --- a/requirements-dev/py311/requirements-minimal.txt +++ b/requirements-dev/py311/requirements-minimal.txt @@ -34,7 +34,7 @@ deprecated==1.2.15 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) ete3==3.1.3 # via alifedata-phyloinformatics-convert @@ -132,6 +132,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) propcache==0.2.0 diff --git a/requirements-dev/py311/requirements-release.txt b/requirements-dev/py311/requirements-release.txt index b96a9bba..8aa8636e 100644 --- a/requirements-dev/py311/requirements-release.txt +++ b/requirements-dev/py311/requirements-release.txt @@ -45,7 +45,7 @@ deprecated==1.2.15 # alifedata-phyloinformatics-convert docutils==0.21.2 # via readme-renderer -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) ete3==3.1.3 # via alifedata-phyloinformatics-convert @@ -154,6 +154,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) propcache==0.2.0 diff --git a/requirements-dev/py311/requirements-testing.txt b/requirements-dev/py311/requirements-testing.txt index 4ad5ed04..45b27978 100644 --- a/requirements-dev/py311/requirements-testing.txt +++ b/requirements-dev/py311/requirements-testing.txt @@ -40,7 +40,7 @@ deprecated==1.2.15 # alifedata-phyloinformatics-convert distlib==0.3.9 # via virtualenv -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) entrypoints==0.3 # via flake8 @@ -181,6 +181,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) propcache==0.2.0 diff --git a/requirements-dev/py312/requirements-all.txt b/requirements-dev/py312/requirements-all.txt index 756e8b17..3097e0eb 100644 --- a/requirements-dev/py312/requirements-all.txt +++ b/requirements-dev/py312/requirements-all.txt @@ -79,7 +79,7 @@ docutils==0.21.2 # readme-renderer # sphinx # sphinx-rtd-theme -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) entrypoints==0.3 # via flake8 @@ -302,6 +302,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) prompt-toolkit==3.0.48 diff --git a/requirements-dev/py312/requirements-docs.txt b/requirements-dev/py312/requirements-docs.txt index 92b52cd7..ce902807 100644 --- a/requirements-dev/py312/requirements-docs.txt +++ b/requirements-dev/py312/requirements-docs.txt @@ -67,7 +67,7 @@ docutils==0.21.2 # nbsphinx # sphinx # sphinx-rtd-theme -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) ete3==3.1.3 # via alifedata-phyloinformatics-convert @@ -239,6 +239,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) prompt-toolkit==3.0.48 diff --git a/requirements-dev/py312/requirements-jit.txt b/requirements-dev/py312/requirements-jit.txt index 929db4d5..2ca2e7e5 100644 --- a/requirements-dev/py312/requirements-jit.txt +++ b/requirements-dev/py312/requirements-jit.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv pip compile --python-version=3.11 ../../pyproject.toml --extra jit -o requirements-jit.txt +# uv pip compile --python-version=3.12 ../../pyproject.toml --extra jit -o requirements-jit.txt alifedata-phyloinformatics-convert==0.17.0 # via hstrat (../../pyproject.toml) annotated-types==0.7.0 @@ -36,7 +36,7 @@ deprecated==1.2.15 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) ete3==3.1.3 # via alifedata-phyloinformatics-convert @@ -145,6 +145,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) propcache==0.2.0 diff --git a/requirements-dev/py312/requirements-minimal.txt b/requirements-dev/py312/requirements-minimal.txt index e1a67843..d883f78c 100644 --- a/requirements-dev/py312/requirements-minimal.txt +++ b/requirements-dev/py312/requirements-minimal.txt @@ -34,7 +34,7 @@ deprecated==1.2.15 # via # hstrat (../../pyproject.toml) # alifedata-phyloinformatics-convert -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) ete3==3.1.3 # via alifedata-phyloinformatics-convert @@ -132,6 +132,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) propcache==0.2.0 diff --git a/requirements-dev/py312/requirements-release.txt b/requirements-dev/py312/requirements-release.txt index b96a9bba..8aa8636e 100644 --- a/requirements-dev/py312/requirements-release.txt +++ b/requirements-dev/py312/requirements-release.txt @@ -45,7 +45,7 @@ deprecated==1.2.15 # alifedata-phyloinformatics-convert docutils==0.21.2 # via readme-renderer -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) ete3==3.1.3 # via alifedata-phyloinformatics-convert @@ -154,6 +154,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) propcache==0.2.0 diff --git a/requirements-dev/py312/requirements-testing.txt b/requirements-dev/py312/requirements-testing.txt index 4ad5ed04..45b27978 100644 --- a/requirements-dev/py312/requirements-testing.txt +++ b/requirements-dev/py312/requirements-testing.txt @@ -40,7 +40,7 @@ deprecated==1.2.15 # alifedata-phyloinformatics-convert distlib==0.3.9 # via virtualenv -downstream==1.4.0 +downstream==1.5.1 # via hstrat (../../pyproject.toml) entrypoints==0.3 # via flake8 @@ -181,6 +181,8 @@ polars==1.16.0 # hstrat (../../pyproject.toml) # downstream # joinem +polars-u64-idx==1.18.0 + # via hstrat (../../pyproject.toml) prettytable==3.12.0 # via hstrat (../../pyproject.toml) propcache==0.2.0 From 8971b26698d0b1db0bc4a64e39e8541006851268 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Thu, 2 Jan 2025 10:51:44 -0500 Subject: [PATCH 2/5] Configu max columns a simpler way --- hstrat/_auxiliary_lib/_render_polars_snapshot.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hstrat/_auxiliary_lib/_render_polars_snapshot.py b/hstrat/_auxiliary_lib/_render_polars_snapshot.py index f6237434..03b94317 100644 --- a/hstrat/_auxiliary_lib/_render_polars_snapshot.py +++ b/hstrat/_auxiliary_lib/_render_polars_snapshot.py @@ -26,7 +26,7 @@ def render_polars_snapshot( The rendered DataFrame, as a string, if `display` is not used. """ with pl.Config() as cfg: - cfg.set_tbl_cols(df.lazy().collect_schema().len()) + cfg.set_tbl_cols(-1) head = repr(df.lazy().head().collect()) message = " ".join( [ From 16e6f15d6034c02a507478df18cd7385bcd900bf Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Thu, 2 Jan 2025 11:05:46 -0500 Subject: [PATCH 3/5] Fix out of bounds access error --- tests/test_hstrat/test_dataframe/assets/packed.csv | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_hstrat/test_dataframe/assets/packed.csv b/tests/test_hstrat/test_dataframe/assets/packed.csv index f2471303..54abd2ed 100644 --- a/tests/test_hstrat/test_dataframe/assets/packed.csv +++ b/tests/test_hstrat/test_dataframe/assets/packed.csv @@ -1,3 +1,3 @@ awoo,dstream_algo,downstream_version,data_hex,dstream_storage_bitoffset,dstream_storage_bitwidth,dstream_T_bitoffset,dstream_T_bitwidth,dstream_S -bar,dstream.steady_algo,1.0.1,080001030702050406,8,96,0,8,8 -baz,dstream.steady_algo,1.0.1,0b0001030702050906,8,96,0,8,8 +bar,dstream.steady_algo,1.0.1,080001030702050406,8,64,0,8,8 +baz,dstream.steady_algo,1.0.1,0b0001030702050906,8,64,0,8,8 From 43ffa2fffc799355bb2bdf1400e54ec17bec2f53 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Thu, 2 Jan 2025 11:32:26 -0500 Subject: [PATCH 4/5] Fix elipses --- hstrat/dataframe/_surface_unpack_reconstruct.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hstrat/dataframe/_surface_unpack_reconstruct.py b/hstrat/dataframe/_surface_unpack_reconstruct.py index d9c52321..26c88954 100644 --- a/hstrat/dataframe/_surface_unpack_reconstruct.py +++ b/hstrat/dataframe/_surface_unpack_reconstruct.py @@ -264,7 +264,7 @@ def surface_unpack_reconstruct( res["dstream_S"] = pd.Series(dtype=int) return pl.from_pandas(res) - logging.info("extracting metadata..") + logging.info("extracting metadata...") dstream_storage_bitwidth = get_sole_scalar_value_polars( df, "dstream_storage_bitwidth" ) From ab82e9aa6a7cfb4b3df49c5166174cdc79cf6af5 Mon Sep 17 00:00:00 2001 From: Matthew Andres Moreno Date: Thu, 2 Jan 2025 11:38:55 -0500 Subject: [PATCH 5/5] Log sort_by time separately --- hstrat/dataframe/_surface_unpack_reconstruct.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/hstrat/dataframe/_surface_unpack_reconstruct.py b/hstrat/dataframe/_surface_unpack_reconstruct.py index 26c88954..615b2843 100644 --- a/hstrat/dataframe/_surface_unpack_reconstruct.py +++ b/hstrat/dataframe/_surface_unpack_reconstruct.py @@ -40,7 +40,14 @@ def _build_records_chunked( ): long_df = dstream_dataframe.explode_lookup_unpacked( df_slice, value_type="uint64" - ).select( + ) + + with log_context_duration( + '.sort_by("dstream_Tbar").over(partition_by="dstream_data_id") ' + f"({i + 1}/{num_slices})", + logging.info, + ): + long_df = long_df.select( pl.col( "dstream_data_id", "dstream_T",