Bump polars from 0.20.31 to 1.1.0 (#1358)

* Bump polars from 0.20.31 to 1.1.0 Bumps [polars](https://github.com/pola-rs/polars) from 0.20.31 to 1.1.0. - [Release notes](https://github.com/pola-rs/polars/releases) - [Commits](pola-rs/polars@py-0.20.31...py-1.1.0) --- updated-dependencies: - dependency-name: polars dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] <[email protected]> * Fix polars upgrade, add implicit orient=row conversions. * Fix linters. --------- Signed-off-by: dependabot[bot] <[email protected]> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Călina Cenan <[email protected]>
oceanprotocol · Jul 9, 2024 · a76e30f · a76e30f
1 parent 632c668
commit a76e30f
Show file tree

Hide file tree

Showing 7 changed files with 17 additions and 83 deletions.
diff --git a/pdr_backend/lake/csv_data_store.py b/pdr_backend/lake/csv_data_store.py
@@ -7,7 +7,7 @@
 
 import polars as pl
 from enforce_typing import enforce_types
-from polars.type_aliases import SchemaDict
+from polars._typing import SchemaDict
 
 
 @enforce_types

diff --git a/pdr_backend/lake/duckdb_data_store.py b/pdr_backend/lake/duckdb_data_store.py
@@ -12,7 +12,7 @@
 import duckdb
 import polars as pl
 from enforce_typing import enforce_types
-from polars.type_aliases import SchemaDict
+from polars._typing import SchemaDict
 
 from pdr_backend.lake.base_data_store import BaseDataStore
 

diff --git a/pdr_backend/lake/test/resources.py b/pdr_backend/lake/test/resources.py
@@ -102,7 +102,7 @@ def _df_from_raw_data(raw_data: list) -> pl.DataFrame:
     """Return a df for use in rawohlcv_dfs"""
     df = initialize_rawohlcv_df(TOHLCV_COLS)
 
-    next_df = pl.DataFrame(raw_data, schema=TOHLCV_SCHEMA_PL)
+    next_df = pl.DataFrame(raw_data, schema=TOHLCV_SCHEMA_PL, orient="row")
 
     df = concat_next_df(df, next_df)
 

diff --git a/pdr_backend/lake/test/test_clean_raw_ohlcv.py b/pdr_backend/lake/test/test_clean_raw_ohlcv.py
@@ -4,6 +4,7 @@
 #
 from enforce_typing import enforce_types
 import polars as pl
+import pytest
 
 from pdr_backend.cli.arg_feed import ArgFeed
 from pdr_backend.lake.constants import TOHLCV_SCHEMA_PL
@@ -76,7 +77,11 @@ def test_schema_interpreter_float_as_integer():
     assert isinstance(tohlcv_df, pl.DataFrame)
 
     # Try to create DataFrame with floating-point decimal timestamp instead of integer
-    try:
+    # Timestamp written as a float "1624003200000.00" raises error
+    with pytest.raises(
+        TypeError,
+        match="unexpected value while building Series of type Int64; found value of type Float64",
+    ):
         tohlcv_data = [
             [
                 1624003200000.00,
@@ -92,9 +97,6 @@ def test_schema_interpreter_float_as_integer():
             [1.0, 2.0, 3.0, 4.0, 5.0],
         ]
         tohlcv_df = pl.DataFrame(tohlcv_data, schema=TOHLCV_SCHEMA_PL)
-    except TypeError as e:
-        # Timestamp written as a float "1624003200000.00" raises error
-        assert str(e) == "'float' object cannot be interpreted as an integer"
 
 
 @enforce_types
@@ -111,5 +113,5 @@ def test_fix_schema_interpreter_float_as_integer():
     assert type(uts[0]) == int
 
     tohlcv_data = _filter_within_timerange([RAW_TOHLCV], UnixTimeMs(T1), UnixTimeMs(T1))
-    tohlcv_df = pl.DataFrame(tohlcv_data, schema=TOHLCV_SCHEMA_PL)
+    tohlcv_df = pl.DataFrame(tohlcv_data, schema=TOHLCV_SCHEMA_PL, orient="row")
     assert isinstance(tohlcv_df, pl.DataFrame)
diff --git a/pdr_backend/lake/test/test_ohlcv_data_factory.py b/pdr_backend/lake/test/test_ohlcv_data_factory.py
@@ -226,7 +226,7 @@ async def mock_update(*args, **kwargs):  # pylint: disable=unused-argument
             [st_ut + s_per_epoch * i] + [ohlcv_val] * 5 for i in range(n_pts)
         ]
         df = initialize_rawohlcv_df()
-        next_df = pl.DataFrame(raw_tohlcv_data, schema=TOHLCV_SCHEMA_PL)
+        next_df = pl.DataFrame(raw_tohlcv_data, schema=TOHLCV_SCHEMA_PL, orient="row")
         df = concat_next_df(df, next_df)
         save_rawohlcv_file(filename, df)
 
@@ -325,71 +325,3 @@ def test_get_mergedohlcv_df_calls(
     factory._update_rawohlcv_files.assert_called()
     factory._load_rawohlcv_files.assert_called()
     mock_merge_rawohlcv_dfs.assert_called()
-
-
-# =======================================================================
-# issue #657
-
-tohlcv_data1 = [
-    [1709887500000, 3943.99, 3952.91, 3942.95, 3943.73, 2554.4607],
-    [1709887800000, 3943.72, 3950.52, 3943.66, 3943.88, 1373.67],
-    [1709888100000, 3943.87, 3950.45, 3939.4, 3940.03, 1514.3544],
-    [1709888400000, 3940.03, 3945.4, 3930.0, 3934.19, 1659.3454],
-    [1709888700000, 3934.19, 3936.2, 3930.2, 3933.0, 1607.2883],
-    [1709889000000, 3933.0, 3950.73, 3930.57, 3948.14, 3421.5831],
-    [1709889300000, 3948.15, 3949.29, 3942.35, 3943.61, 2034.7834],
-    [1709889600000, 3943.61, 3946.19, 3935.16, 3937.58, 1659.4638],
-    [1709889900000, 3937.58, 3941.11, 3934.6, 3938.53, 801.3086],
-    [1709890200000, 3938.53, 3948.19, 3935.94, 3941.24, 2356.8294],
-    [1709890500000, 3941.25, 3950.49, 3941.24, 3946.01, 2243.4569],
-    [1709890800000, 3946.0, 3949.72, 3943.14, 3944.22, 1262.2829],
-    [1709891100000, 3944.22, 3946.25, 3944.22, 3946.25, 116.9614],
-]
-
-tohlcv_data2 = [
-    [1709887500000, 67289.04, 67390.0, 67274.7, 67300.0, 118.03242],
-    [1709887800000, 67300.01, 67320.0, 67256.4, 67256.41, 68.07976],
-    [1709888100000, 67256.4, 67291.97, 67200.01, 67208.55, 114.46873],
-    [1709888400000, 67208.56, 67213.19, 67040.76, 67070.82, 173.93243],
-    [1709888700000, 67070.82, 67120.57, 67050.0, 67100.0, 175.64183],
-    [1709889000000, 67100.0, 67223.54, 67095.16, 67216.42, 155.12712],
-]
-
-
-@enforce_types
-def test_issue657_infer_orientation():
-    # the following line will *not* cause an issue
-    # -this mimics how the code looked before the fix
-    # -it infers orient="row"
-    pl.DataFrame(tohlcv_data1, schema=TOHLCV_SCHEMA_PL)
-
-    # the following line *will* cause an issue
-    # -this mimics how the code looked before the fix
-    # -it infers orient="col", and therein lies the issue!
-    # -because it's casting [1709887500000, 67289.04, 67390.0, ..] as timestamps
-    with pytest.raises(TypeError):
-        pl.DataFrame(tohlcv_data2, schema=TOHLCV_SCHEMA_PL)
-
-
-@enforce_types
-def test_issue657_set_col_orientation():
-    # the following line *will* cause an issue
-    # - it's = the code before the fix, plus orient="col" (vs inferring)
-    with pytest.raises(TypeError):
-        pl.DataFrame(tohlcv_data1, schema=TOHLCV_SCHEMA_PL, orient="col")
-
-    # the following line *will* cause an issue
-    # - it's = the code before the fix, plus orient="col" (vs inferring)
-    with pytest.raises(TypeError):
-        pl.DataFrame(tohlcv_data2, schema=TOHLCV_SCHEMA_PL, orient="col")
-
-
-@enforce_types
-def test_issue657_set_row_orientation():
-    # the following line will *not* cause an issue
-    # - it's = the code before the fix, plus orient="row" (vs inferring)
-    pl.DataFrame(tohlcv_data1, schema=TOHLCV_SCHEMA_PL, orient="row")
-
-    # the following line will *not* cause an issue
-    # - it's = the code before the fix, plus orient="row" (vs inferring)
-    pl.DataFrame(tohlcv_data2, schema=TOHLCV_SCHEMA_PL, orient="row")
diff --git a/pdr_backend/lake/test/test_plutil.py b/pdr_backend/lake/test/test_plutil.py
@@ -86,7 +86,7 @@ def test_concat_next_df():
     cand_dtypes = dict(zip(TOHLCV_COLS, TOHLCV_DTYPES_PL))
     schema = {col: cand_dtypes[col] for col in TOHLCV_COLS}
 
-    next_df = pl.DataFrame(FOUR_ROWS_RAW_TOHLCV_DATA, schema=schema)
+    next_df = pl.DataFrame(FOUR_ROWS_RAW_TOHLCV_DATA, schema=schema, orient="row")
     assert len(next_df) == 4
 
     # add 4 rows to empty df
@@ -96,11 +96,11 @@ def test_concat_next_df():
     _assert_TOHLCVd_cols_and_types(df)
 
     # assert 1 more row
-    next_df = pl.DataFrame(ONE_ROW_RAW_TOHLCV_DATA, schema=schema)
+    next_df = pl.DataFrame(ONE_ROW_RAW_TOHLCV_DATA, schema=schema, orient="row")
     assert len(next_df) == 1
 
     # assert that concat verifies schemas match
-    next_df = pl.DataFrame(ONE_ROW_RAW_TOHLCV_DATA, schema=schema)
+    next_df = pl.DataFrame(ONE_ROW_RAW_TOHLCV_DATA, schema=schema, orient="row")
     assert len(next_df) == 1
     assert "datetime" not in next_df.columns
 
@@ -164,7 +164,7 @@ def test_load_append(tmpdir):
 
     # verify: doing a manual concat is the same as the load
     schema = dict(zip(TOHLCV_COLS, TOHLCV_DTYPES_PL))
-    df_1_row = pl.DataFrame(ONE_ROW_RAW_TOHLCV_DATA, schema=schema)
+    df_1_row = pl.DataFrame(ONE_ROW_RAW_TOHLCV_DATA, schema=schema, orient="row")
     df_5_rows = concat_next_df(df_4_rows, df_1_row)
     df_5_rows_loaded = load_rawohlcv_file(filename)
 
@@ -205,7 +205,7 @@ def _df_from_raw_data(raw_data: list) -> pl.DataFrame:
     df = initialize_rawohlcv_df(TOHLCV_COLS)
 
     schema = dict(zip(TOHLCV_COLS, TOHLCV_DTYPES_PL))
-    next_df = pl.DataFrame(raw_data, schema=schema)
+    next_df = pl.DataFrame(raw_data, schema=schema, orient="row")
 
     df = concat_next_df(df, next_df)
     return df

diff --git a/setup.py b/setup.py
@@ -29,7 +29,7 @@
     "pandas==2.2.2",
     "pathlib",
     "plotly==5.22.0",
-    "polars==0.20.31",
+    "polars==1.1.0",
     "polars[timezone]",
     "pyarrow==16.1.0",
     "pylint==3.2.5",