Skip to content

Commit

Permalink
Bump polars from 0.20.31 to 1.1.0 (#1358)
Browse files Browse the repository at this point in the history
* Bump polars from 0.20.31 to 1.1.0

Bumps [polars](https://github.com/pola-rs/polars) from 0.20.31 to 1.1.0.
- [Release notes](https://github.com/pola-rs/polars/releases)
- [Commits](pola-rs/polars@py-0.20.31...py-1.1.0)

---
updated-dependencies:
- dependency-name: polars
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <[email protected]>

* Fix polars upgrade, add implicit orient=row conversions.

* Fix linters.

---------

Signed-off-by: dependabot[bot] <[email protected]>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Călina Cenan <[email protected]>
  • Loading branch information
dependabot[bot] and calina-c authored Jul 9, 2024
1 parent 632c668 commit a76e30f
Show file tree
Hide file tree
Showing 7 changed files with 17 additions and 83 deletions.
2 changes: 1 addition & 1 deletion pdr_backend/lake/csv_data_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import polars as pl
from enforce_typing import enforce_types
from polars.type_aliases import SchemaDict
from polars._typing import SchemaDict


@enforce_types
Expand Down
2 changes: 1 addition & 1 deletion pdr_backend/lake/duckdb_data_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import duckdb
import polars as pl
from enforce_typing import enforce_types
from polars.type_aliases import SchemaDict
from polars._typing import SchemaDict

from pdr_backend.lake.base_data_store import BaseDataStore

Expand Down
2 changes: 1 addition & 1 deletion pdr_backend/lake/test/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def _df_from_raw_data(raw_data: list) -> pl.DataFrame:
"""Return a df for use in rawohlcv_dfs"""
df = initialize_rawohlcv_df(TOHLCV_COLS)

next_df = pl.DataFrame(raw_data, schema=TOHLCV_SCHEMA_PL)
next_df = pl.DataFrame(raw_data, schema=TOHLCV_SCHEMA_PL, orient="row")

df = concat_next_df(df, next_df)

Expand Down
12 changes: 7 additions & 5 deletions pdr_backend/lake/test/test_clean_raw_ohlcv.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#
from enforce_typing import enforce_types
import polars as pl
import pytest

from pdr_backend.cli.arg_feed import ArgFeed
from pdr_backend.lake.constants import TOHLCV_SCHEMA_PL
Expand Down Expand Up @@ -76,7 +77,11 @@ def test_schema_interpreter_float_as_integer():
assert isinstance(tohlcv_df, pl.DataFrame)

# Try to create DataFrame with floating-point decimal timestamp instead of integer
try:
# Timestamp written as a float "1624003200000.00" raises error
with pytest.raises(
TypeError,
match="unexpected value while building Series of type Int64; found value of type Float64",
):
tohlcv_data = [
[
1624003200000.00,
Expand All @@ -92,9 +97,6 @@ def test_schema_interpreter_float_as_integer():
[1.0, 2.0, 3.0, 4.0, 5.0],
]
tohlcv_df = pl.DataFrame(tohlcv_data, schema=TOHLCV_SCHEMA_PL)
except TypeError as e:
# Timestamp written as a float "1624003200000.00" raises error
assert str(e) == "'float' object cannot be interpreted as an integer"


@enforce_types
Expand All @@ -111,5 +113,5 @@ def test_fix_schema_interpreter_float_as_integer():
assert type(uts[0]) == int

tohlcv_data = _filter_within_timerange([RAW_TOHLCV], UnixTimeMs(T1), UnixTimeMs(T1))
tohlcv_df = pl.DataFrame(tohlcv_data, schema=TOHLCV_SCHEMA_PL)
tohlcv_df = pl.DataFrame(tohlcv_data, schema=TOHLCV_SCHEMA_PL, orient="row")
assert isinstance(tohlcv_df, pl.DataFrame)
70 changes: 1 addition & 69 deletions pdr_backend/lake/test/test_ohlcv_data_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ async def mock_update(*args, **kwargs): # pylint: disable=unused-argument
[st_ut + s_per_epoch * i] + [ohlcv_val] * 5 for i in range(n_pts)
]
df = initialize_rawohlcv_df()
next_df = pl.DataFrame(raw_tohlcv_data, schema=TOHLCV_SCHEMA_PL)
next_df = pl.DataFrame(raw_tohlcv_data, schema=TOHLCV_SCHEMA_PL, orient="row")
df = concat_next_df(df, next_df)
save_rawohlcv_file(filename, df)

Expand Down Expand Up @@ -325,71 +325,3 @@ def test_get_mergedohlcv_df_calls(
factory._update_rawohlcv_files.assert_called()
factory._load_rawohlcv_files.assert_called()
mock_merge_rawohlcv_dfs.assert_called()


# =======================================================================
# issue #657

tohlcv_data1 = [
[1709887500000, 3943.99, 3952.91, 3942.95, 3943.73, 2554.4607],
[1709887800000, 3943.72, 3950.52, 3943.66, 3943.88, 1373.67],
[1709888100000, 3943.87, 3950.45, 3939.4, 3940.03, 1514.3544],
[1709888400000, 3940.03, 3945.4, 3930.0, 3934.19, 1659.3454],
[1709888700000, 3934.19, 3936.2, 3930.2, 3933.0, 1607.2883],
[1709889000000, 3933.0, 3950.73, 3930.57, 3948.14, 3421.5831],
[1709889300000, 3948.15, 3949.29, 3942.35, 3943.61, 2034.7834],
[1709889600000, 3943.61, 3946.19, 3935.16, 3937.58, 1659.4638],
[1709889900000, 3937.58, 3941.11, 3934.6, 3938.53, 801.3086],
[1709890200000, 3938.53, 3948.19, 3935.94, 3941.24, 2356.8294],
[1709890500000, 3941.25, 3950.49, 3941.24, 3946.01, 2243.4569],
[1709890800000, 3946.0, 3949.72, 3943.14, 3944.22, 1262.2829],
[1709891100000, 3944.22, 3946.25, 3944.22, 3946.25, 116.9614],
]

tohlcv_data2 = [
[1709887500000, 67289.04, 67390.0, 67274.7, 67300.0, 118.03242],
[1709887800000, 67300.01, 67320.0, 67256.4, 67256.41, 68.07976],
[1709888100000, 67256.4, 67291.97, 67200.01, 67208.55, 114.46873],
[1709888400000, 67208.56, 67213.19, 67040.76, 67070.82, 173.93243],
[1709888700000, 67070.82, 67120.57, 67050.0, 67100.0, 175.64183],
[1709889000000, 67100.0, 67223.54, 67095.16, 67216.42, 155.12712],
]


@enforce_types
def test_issue657_infer_orientation():
# the following line will *not* cause an issue
# -this mimics how the code looked before the fix
# -it infers orient="row"
pl.DataFrame(tohlcv_data1, schema=TOHLCV_SCHEMA_PL)

# the following line *will* cause an issue
# -this mimics how the code looked before the fix
# -it infers orient="col", and therein lies the issue!
# -because it's casting [1709887500000, 67289.04, 67390.0, ..] as timestamps
with pytest.raises(TypeError):
pl.DataFrame(tohlcv_data2, schema=TOHLCV_SCHEMA_PL)


@enforce_types
def test_issue657_set_col_orientation():
# the following line *will* cause an issue
# - it's = the code before the fix, plus orient="col" (vs inferring)
with pytest.raises(TypeError):
pl.DataFrame(tohlcv_data1, schema=TOHLCV_SCHEMA_PL, orient="col")

# the following line *will* cause an issue
# - it's = the code before the fix, plus orient="col" (vs inferring)
with pytest.raises(TypeError):
pl.DataFrame(tohlcv_data2, schema=TOHLCV_SCHEMA_PL, orient="col")


@enforce_types
def test_issue657_set_row_orientation():
# the following line will *not* cause an issue
# - it's = the code before the fix, plus orient="row" (vs inferring)
pl.DataFrame(tohlcv_data1, schema=TOHLCV_SCHEMA_PL, orient="row")

# the following line will *not* cause an issue
# - it's = the code before the fix, plus orient="row" (vs inferring)
pl.DataFrame(tohlcv_data2, schema=TOHLCV_SCHEMA_PL, orient="row")
10 changes: 5 additions & 5 deletions pdr_backend/lake/test/test_plutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def test_concat_next_df():
cand_dtypes = dict(zip(TOHLCV_COLS, TOHLCV_DTYPES_PL))
schema = {col: cand_dtypes[col] for col in TOHLCV_COLS}

next_df = pl.DataFrame(FOUR_ROWS_RAW_TOHLCV_DATA, schema=schema)
next_df = pl.DataFrame(FOUR_ROWS_RAW_TOHLCV_DATA, schema=schema, orient="row")
assert len(next_df) == 4

# add 4 rows to empty df
Expand All @@ -96,11 +96,11 @@ def test_concat_next_df():
_assert_TOHLCVd_cols_and_types(df)

# assert 1 more row
next_df = pl.DataFrame(ONE_ROW_RAW_TOHLCV_DATA, schema=schema)
next_df = pl.DataFrame(ONE_ROW_RAW_TOHLCV_DATA, schema=schema, orient="row")
assert len(next_df) == 1

# assert that concat verifies schemas match
next_df = pl.DataFrame(ONE_ROW_RAW_TOHLCV_DATA, schema=schema)
next_df = pl.DataFrame(ONE_ROW_RAW_TOHLCV_DATA, schema=schema, orient="row")
assert len(next_df) == 1
assert "datetime" not in next_df.columns

Expand Down Expand Up @@ -164,7 +164,7 @@ def test_load_append(tmpdir):

# verify: doing a manual concat is the same as the load
schema = dict(zip(TOHLCV_COLS, TOHLCV_DTYPES_PL))
df_1_row = pl.DataFrame(ONE_ROW_RAW_TOHLCV_DATA, schema=schema)
df_1_row = pl.DataFrame(ONE_ROW_RAW_TOHLCV_DATA, schema=schema, orient="row")
df_5_rows = concat_next_df(df_4_rows, df_1_row)
df_5_rows_loaded = load_rawohlcv_file(filename)

Expand Down Expand Up @@ -205,7 +205,7 @@ def _df_from_raw_data(raw_data: list) -> pl.DataFrame:
df = initialize_rawohlcv_df(TOHLCV_COLS)

schema = dict(zip(TOHLCV_COLS, TOHLCV_DTYPES_PL))
next_df = pl.DataFrame(raw_data, schema=schema)
next_df = pl.DataFrame(raw_data, schema=schema, orient="row")

df = concat_next_df(df, next_df)
return df
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"pandas==2.2.2",
"pathlib",
"plotly==5.22.0",
"polars==0.20.31",
"polars==1.1.0",
"polars[timezone]",
"pyarrow==16.1.0",
"pylint==3.2.5",
Expand Down

0 comments on commit a76e30f

Please sign in to comment.