From 4f3356150f72f986e517aa7870ee9a7b7ca7749e Mon Sep 17 00:00:00 2001 From: Daniela Date: Thu, 5 Sep 2024 16:02:05 +0000 Subject: [PATCH 1/8] feat: add basic geopandas functionality --- .../functions/_remote_function_session.py | 2 +- bigframes/geopandas/__init__.py | 17 +++++ bigframes/geopandas/geoseries.py | 21 ++++++ .../bigframes_vendored/geopandas/LICENSE.txt | 25 +++++++ .../bigframes_vendored/geopandas/geoseries.py | 69 +++++++++++++++++++ 5 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 bigframes/geopandas/__init__.py create mode 100644 bigframes/geopandas/geoseries.py create mode 100644 third_party/bigframes_vendored/geopandas/LICENSE.txt create mode 100644 third_party/bigframes_vendored/geopandas/geoseries.py diff --git a/bigframes/functions/_remote_function_session.py b/bigframes/functions/_remote_function_session.py index 0ab19ca353..fba3b5ba41 100644 --- a/bigframes/functions/_remote_function_session.py +++ b/bigframes/functions/_remote_function_session.py @@ -387,7 +387,7 @@ def wrapper(func): # https://docs.python.org/3/library/inspect.html#inspect.signature signature_kwargs: Mapping[str, Any] = {"eval_str": True} else: - signature_kwargs = {} + signature_kwargs = {} # type: ignore signature = inspect.signature( func, diff --git a/bigframes/geopandas/__init__.py b/bigframes/geopandas/__init__.py new file mode 100644 index 0000000000..08966ba923 --- /dev/null +++ b/bigframes/geopandas/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from bigframes.geopandas.geoseries import GeoSeries + +__all__ = ["GeoSeries"] diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py new file mode 100644 index 0000000000..109b1be6a7 --- /dev/null +++ b/bigframes/geopandas/geoseries.py @@ -0,0 +1,21 @@ +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import bigframes_vendored.geopandas.geoseries as vendored_geoseries + +import bigframes.series + + +class GeoSeries(vendored_geoseries.GeoSeries, bigframes.series.Series): + pass diff --git a/third_party/bigframes_vendored/geopandas/LICENSE.txt b/third_party/bigframes_vendored/geopandas/LICENSE.txt new file mode 100644 index 0000000000..028603be20 --- /dev/null +++ b/third_party/bigframes_vendored/geopandas/LICENSE.txt @@ -0,0 +1,25 @@ +Copyright (c) 2013-2022, GeoPandas developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of GeoPandas nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py new file mode 100644 index 0000000000..f0f73e953b --- /dev/null +++ b/third_party/bigframes_vendored/geopandas/geoseries.py @@ -0,0 +1,69 @@ +# contains code from https://github.com/geopandas/geopandas/blob/main/geopandas/geoseries.py +from __future__ import annotations + +from typing import TYPE_CHECKING + +from bigframes import constants + +if TYPE_CHECKING: + import bigframes.series + + +class GeoSeries: + """ + A Series object designed to store geometry objects. + """ + + @property + def x(self) -> bigframes.series.Series: + """Return the x location of point geometries in a GeoSeries + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> from shapely.geometry import Point + >>> import geopandas + + >>> series = bigframes.pandas.Series( + ... [shapely.Point(1, 1), shapely.Point(2, 2), shapely.Point(3, 3)], + ... dtype=geopandas.array.GeometryDtype() + ... ) + >>> s.x + 0 1.0 + 1 2.0 + 2 3.0 + dtype: float64 + + Returns: + bigframes.series.Series: + Return the x location (longitude) of point geometries. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + @property + def y(self) -> bigframes.series.Series: + """Return the y location of point geometries in a GeoSeries + + **Examples:** + + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> from shapely.geometry import Point + >>> import geopandas + + >>> series = bigframes.pandas.Series( + ... [shapely.Point(1, 1), shapely.Point(2, 2), shapely.Point(3, 3)], + ... dtype=geopandas.array.GeometryDtype() + ... ) + >>> s.y + 0 1.0 + 1 2.0 + 2 3.0 + dtype: float64 + + Returns: + bigframes.series.Series: + Return the y location (latitude) of point geometries. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From b0dafb3ef9f05d9f4dfb3c19e0daff60ddcaf8e4 Mon Sep 17 00:00:00 2001 From: Daniela Date: Thu, 12 Sep 2024 15:23:09 +0000 Subject: [PATCH 2/8] update examples for geoseries --- .../bigframes_vendored/geopandas/geoseries.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py index f0f73e953b..37efa7c710 100644 --- a/third_party/bigframes_vendored/geopandas/geoseries.py +++ b/third_party/bigframes_vendored/geopandas/geoseries.py @@ -22,10 +22,10 @@ def x(self) -> bigframes.series.Series: >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None - >>> from shapely.geometry import Point - >>> import geopandas + >>> import geopandas.array + >>> import shapely - >>> series = bigframes.pandas.Series( + >>> series = bpd.Series( ... [shapely.Point(1, 1), shapely.Point(2, 2), shapely.Point(3, 3)], ... dtype=geopandas.array.GeometryDtype() ... ) @@ -49,10 +49,10 @@ def y(self) -> bigframes.series.Series: >>> import bigframes.pandas as bpd >>> bpd.options.display.progress_bar = None - >>> from shapely.geometry import Point - >>> import geopandas + >>> import geopandas.array + >>> import shapely - >>> series = bigframes.pandas.Series( + >>> series = bpd.Series( ... [shapely.Point(1, 1), shapely.Point(2, 2), shapely.Point(3, 3)], ... dtype=geopandas.array.GeometryDtype() ... ) From f58595f9c3d396b5cf837009248267b241b1f2b1 Mon Sep 17 00:00:00 2001 From: Daniela Date: Thu, 12 Sep 2024 15:48:45 +0000 Subject: [PATCH 3/8] feat: add Series.geo helper to convert Series to a GeoSeries --- bigframes/series.py | 13 +++++++++++++ .../bigframes_vendored/geopandas/geoseries.py | 4 ++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/bigframes/series.py b/bigframes/series.py index 5192a9cf49..579d9783eb 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -49,6 +49,7 @@ import bigframes.dataframe import bigframes.dtypes import bigframes.formatting_helpers as formatter +import bigframes.geopandas import bigframes.operations as ops import bigframes.operations.aggregations as agg_ops import bigframes.operations.base @@ -89,6 +90,18 @@ def dtype(self): def dtypes(self): return self._dtype + @property + def geo(self) -> bigframes.geopandas.GeoSeries: + """ + Accessor object for geography properties of the Series values. + + Returns: + bigframes.geopandas.GeoSeries: + An accessor containing geography methods. + + """ + return bigframes.geopandas.GeoSeries(self) + @property @validations.requires_index def loc(self) -> bigframes.core.indexers.LocSeriesIndexer: diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py index 37efa7c710..814b7281f4 100644 --- a/third_party/bigframes_vendored/geopandas/geoseries.py +++ b/third_party/bigframes_vendored/geopandas/geoseries.py @@ -29,7 +29,7 @@ def x(self) -> bigframes.series.Series: ... [shapely.Point(1, 1), shapely.Point(2, 2), shapely.Point(3, 3)], ... dtype=geopandas.array.GeometryDtype() ... ) - >>> s.x + >>> series.geo.x 0 1.0 1 2.0 2 3.0 @@ -56,7 +56,7 @@ def y(self) -> bigframes.series.Series: ... [shapely.Point(1, 1), shapely.Point(2, 2), shapely.Point(3, 3)], ... dtype=geopandas.array.GeometryDtype() ... ) - >>> s.y + >>> series.geo.y 0 1.0 1 2.0 2 3.0 From fb602522eb70435e62b7fe1f3ea18447917f6189 Mon Sep 17 00:00:00 2001 From: Arwa Date: Tue, 17 Sep 2024 15:06:42 -0500 Subject: [PATCH 4/8] fix cirucular import --- bigframes/series.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/bigframes/series.py b/bigframes/series.py index 579d9783eb..0336094569 100644 --- a/bigframes/series.py +++ b/bigframes/series.py @@ -49,7 +49,6 @@ import bigframes.dataframe import bigframes.dtypes import bigframes.formatting_helpers as formatter -import bigframes.geopandas import bigframes.operations as ops import bigframes.operations.aggregations as agg_ops import bigframes.operations.base @@ -59,6 +58,9 @@ import bigframes.operations.strings as strings import bigframes.operations.structs as structs +if typing.TYPE_CHECKING: + import bigframes.geopandas.geoseries + LevelType = typing.Union[str, int] LevelsType = typing.Union[LevelType, typing.Sequence[LevelType]] @@ -91,16 +93,18 @@ def dtypes(self): return self._dtype @property - def geo(self) -> bigframes.geopandas.GeoSeries: + def geo(self) -> bigframes.geopandas.geoseries.GeoSeries: """ Accessor object for geography properties of the Series values. Returns: - bigframes.geopandas.GeoSeries: + bigframes.geopandas.geoseries.GeoSeries: An accessor containing geography methods. """ - return bigframes.geopandas.GeoSeries(self) + import bigframes.geopandas.geoseries + + return bigframes.geopandas.geoseries.GeoSeries(self) @property @validations.requires_index From 76eaf4d9ae101af2562ec9033569f530cafb8f47 Mon Sep 17 00:00:00 2001 From: Arwa Date: Tue, 17 Sep 2024 15:40:02 -0500 Subject: [PATCH 5/8] Added a constructor --- bigframes/geopandas/geoseries.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py index 109b1be6a7..fd90df0edc 100644 --- a/bigframes/geopandas/geoseries.py +++ b/bigframes/geopandas/geoseries.py @@ -11,11 +11,16 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from __future__ import annotations import bigframes_vendored.geopandas.geoseries as vendored_geoseries +import geopandas.array import bigframes.series class GeoSeries(vendored_geoseries.GeoSeries, bigframes.series.Series): - pass + def __init__(self, data=None, index=None, **kwargs): + super().__init__( + data=data, index=index, dtype=geopandas.array.GeometryDtype(), **kwargs + ) From c55f66fbc43e2d52d9b02e5e47e4c317e9362b4b Mon Sep 17 00:00:00 2001 From: Daniela Date: Thu, 31 Oct 2024 15:36:16 +0000 Subject: [PATCH 6/8] add documentation for geoseries --- bigframes/geopandas/geoseries.py | 2 ++ .../bigframes.geopandas/geoseries.rst | 17 ++++++++++++++ docs/reference/bigframes.geopandas/index.rst | 9 ++++++++ docs/reference/index.rst | 5 +++-- .../bigframes_vendored/geopandas/geoseries.py | 22 +++++++++++++++++++ 5 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 docs/reference/bigframes.geopandas/geoseries.rst create mode 100644 docs/reference/bigframes.geopandas/index.rst diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py index fd90df0edc..4a409d55bc 100644 --- a/bigframes/geopandas/geoseries.py +++ b/bigframes/geopandas/geoseries.py @@ -20,6 +20,8 @@ class GeoSeries(vendored_geoseries.GeoSeries, bigframes.series.Series): + __doc__ = vendored_geoseries.GeoSeries.__doc__ + def __init__(self, data=None, index=None, **kwargs): super().__init__( data=data, index=index, dtype=geopandas.array.GeometryDtype(), **kwargs diff --git a/docs/reference/bigframes.geopandas/geoseries.rst b/docs/reference/bigframes.geopandas/geoseries.rst new file mode 100644 index 0000000000..1819613955 --- /dev/null +++ b/docs/reference/bigframes.geopandas/geoseries.rst @@ -0,0 +1,17 @@ + +========= +GeoSeries +========= + +.. contents:: Table of Contents + :depth: 2 + :local: + :backlinks: none + +Series +------ + +.. autoclass:: bigframes.geopandas.geoseries.GeoSeries + :members: + :inherited-members: + :undoc-members: diff --git a/docs/reference/bigframes.geopandas/index.rst b/docs/reference/bigframes.geopandas/index.rst new file mode 100644 index 0000000000..e33946461c --- /dev/null +++ b/docs/reference/bigframes.geopandas/index.rst @@ -0,0 +1,9 @@ + +=============================== +BigQuery DataFrames (geopandas) +=============================== + +.. toctree:: + :maxdepth: 2 + + geoseries diff --git a/docs/reference/index.rst b/docs/reference/index.rst index eb5a774b29..a0f96f751a 100644 --- a/docs/reference/index.rst +++ b/docs/reference/index.rst @@ -8,7 +8,8 @@ packages. :maxdepth: 2 bigframes/index - bigframes.pandas/index - bigframes.ml/index bigframes.bigquery/index + bigframes.geopandas/index + bigframes.ml/index + bigframes.pandas/index bigframes.streaming/index diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py index 814b7281f4..f2112ed25b 100644 --- a/third_party/bigframes_vendored/geopandas/geoseries.py +++ b/third_party/bigframes_vendored/geopandas/geoseries.py @@ -12,6 +12,28 @@ class GeoSeries: """ A Series object designed to store geometry objects. + + **Examples:** + + >>> import bigframes.geopandas + >>> import bigframes.pandas as bpd + >>> bpd.options.display.progress_bar = None + >>> from shapely.geometry import Point + >>> s = bigframes.geopandas.GeoSeries([Point(1, 1), Point(2, 2), Point(3, 3)]) + >>> s + 0 POINT (1 1) + 1 POINT (2 2) + 2 POINT (3 3) + dtype: geometry + + Args: + data (array-like, dict, scalar value, bigframes.pandas.Series): + The geometries to store in the GeoSeries. + index (array-like, pandas.Index, bigframes.pandas.Index): + The index for the GeoSeries. + kwargs (dict): + Additional arguments passed to the Series constructor, + e.g. ``name``. """ @property From cbcc032e6405bfc5e5a3557719a91e05f944d3dd Mon Sep 17 00:00:00 2001 From: Daniela Date: Thu, 31 Oct 2024 15:44:40 +0000 Subject: [PATCH 7/8] remove GeoSeries.x and GeoSeries.y temporarily --- .../bigframes_vendored/geopandas/geoseries.py | 61 ------------------- 1 file changed, 61 deletions(-) diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py index f2112ed25b..81d4e94600 100644 --- a/third_party/bigframes_vendored/geopandas/geoseries.py +++ b/third_party/bigframes_vendored/geopandas/geoseries.py @@ -1,13 +1,6 @@ # contains code from https://github.com/geopandas/geopandas/blob/main/geopandas/geoseries.py from __future__ import annotations -from typing import TYPE_CHECKING - -from bigframes import constants - -if TYPE_CHECKING: - import bigframes.series - class GeoSeries: """ @@ -35,57 +28,3 @@ class GeoSeries: Additional arguments passed to the Series constructor, e.g. ``name``. """ - - @property - def x(self) -> bigframes.series.Series: - """Return the x location of point geometries in a GeoSeries - - **Examples:** - - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> import geopandas.array - >>> import shapely - - >>> series = bpd.Series( - ... [shapely.Point(1, 1), shapely.Point(2, 2), shapely.Point(3, 3)], - ... dtype=geopandas.array.GeometryDtype() - ... ) - >>> series.geo.x - 0 1.0 - 1 2.0 - 2 3.0 - dtype: float64 - - Returns: - bigframes.series.Series: - Return the x location (longitude) of point geometries. - """ - raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) - - @property - def y(self) -> bigframes.series.Series: - """Return the y location of point geometries in a GeoSeries - - **Examples:** - - >>> import bigframes.pandas as bpd - >>> bpd.options.display.progress_bar = None - >>> import geopandas.array - >>> import shapely - - >>> series = bpd.Series( - ... [shapely.Point(1, 1), shapely.Point(2, 2), shapely.Point(3, 3)], - ... dtype=geopandas.array.GeometryDtype() - ... ) - >>> series.geo.y - 0 1.0 - 1 2.0 - 2 3.0 - dtype: float64 - - Returns: - bigframes.series.Series: - Return the y location (latitude) of point geometries. - """ - raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) From e351e363b69f54e8a397d27943a8fb659a52cd52 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20Swe=C3=B1a=20=28Swast=29?= Date: Thu, 31 Oct 2024 11:13:50 -0500 Subject: [PATCH 8/8] Update bigframes/geopandas/geoseries.py --- bigframes/geopandas/geoseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py index 4a409d55bc..959934e2c3 100644 --- a/bigframes/geopandas/geoseries.py +++ b/bigframes/geopandas/geoseries.py @@ -14,7 +14,7 @@ from __future__ import annotations import bigframes_vendored.geopandas.geoseries as vendored_geoseries -import geopandas.array +import geopandas.array # type: ignore import bigframes.series