From 5a5602d21a08fc8629436c2a3bd44a31a7e179a9 Mon Sep 17 00:00:00 2001 From: Christian Lamprecht Date: Mon, 7 Feb 2022 11:02:46 +0100 Subject: [PATCH] Meteostat 1.6.1 (#84) * Meteostat 1.6.1 * Linting * Change Monthly tests + examples * Clean DF based on met cols only --- examples/monthly/aggregate.py | 5 +- meteostat/__init__.py | 2 +- meteostat/interface/daily.py | 9 ++- meteostat/interface/hourly.py | 9 ++- .../interface/{meteo.py => meteodata.py} | 57 ++--------------- meteostat/interface/monthly.py | 19 ++---- meteostat/interface/normals.py | 4 +- meteostat/interface/timeseries.py | 28 +++++---- meteostat/utilities/mutations.py | 61 +++++++++++++++++++ setup.py | 2 +- tests/e2e/test_monthly.py | 24 ++++---- 11 files changed, 122 insertions(+), 98 deletions(-) rename meteostat/interface/{meteo.py => meteodata.py} (82%) create mode 100644 meteostat/utilities/mutations.py diff --git a/examples/monthly/aggregate.py b/examples/monthly/aggregate.py index 670c9a7..ad4034c 100644 --- a/examples/monthly/aggregate.py +++ b/examples/monthly/aggregate.py @@ -17,9 +17,10 @@ end = datetime(2018, 12, 31) # Get monthly data -data = Monthly('10637', start, end) +# Then, aggregate annually +data = Monthly('72202', start, end) data = data.normalize().aggregate(freq='1Y').fetch() # Plot chart -data.plot(y=['tavg', 'tmin', 'tmax']) +data.plot(y='tavg') plt.show() diff --git a/meteostat/__init__.py b/meteostat/__init__.py index f7292dc..23beed3 100644 --- a/meteostat/__init__.py +++ b/meteostat/__init__.py @@ -15,7 +15,7 @@ __version__ = '1.6.0' from .interface.base import Base -from .interface.timeseries import Timeseries +from .interface.timeseries import TimeSeries from .interface.stations import Stations from .interface.point import Point from .interface.hourly import Hourly diff --git a/meteostat/interface/daily.py b/meteostat/interface/daily.py index 53a71c3..b500fdd 100644 --- a/meteostat/interface/daily.py +++ b/meteostat/interface/daily.py @@ -13,11 +13,11 @@ import pandas as pd from meteostat.enumerations.granularity import Granularity from meteostat.utilities.aggregations import degree_mean -from meteostat.interface.timeseries import Timeseries +from meteostat.interface.timeseries import TimeSeries from meteostat.interface.point import Point -class Daily(Timeseries): +class Daily(TimeSeries): """ Retrieve daily weather observations for one or multiple weather stations or @@ -33,6 +33,9 @@ class Daily(Timeseries): # Default frequency _freq: str = '1D' + # Flag which represents model data + _model_flag = 'G' + # Columns _columns: list = [ 'date', @@ -94,7 +97,7 @@ def __init__( ) -> None: # Initialize time series - self._init_timeseries(loc, start, end, model, flags) + self._init_time_series(loc, start, end, model, flags) def expected_rows(self) -> int: """ diff --git a/meteostat/interface/hourly.py b/meteostat/interface/hourly.py index 7ffa7d9..950adb3 100644 --- a/meteostat/interface/hourly.py +++ b/meteostat/interface/hourly.py @@ -15,11 +15,11 @@ import pandas as pd from meteostat.enumerations.granularity import Granularity from meteostat.utilities.aggregations import degree_mean -from meteostat.interface.timeseries import Timeseries +from meteostat.interface.timeseries import TimeSeries from meteostat.interface.point import Point -class Hourly(Timeseries): +class Hourly(TimeSeries): """ Retrieve hourly weather observations for one or multiple weather stations or @@ -41,6 +41,9 @@ class Hourly(Timeseries): # Default frequency _freq: str = '1H' + # Flag which represents model data + _model_flag = 'E' + # Raw data columns _columns: list = [ 'date', @@ -155,7 +158,7 @@ def __init__( self._set_time(start, end, timezone) # Initialize time series - self._init_timeseries(loc, start, end, model, flags) + self._init_time_series(loc, start, end, model, flags) def expected_rows(self) -> int: """ diff --git a/meteostat/interface/meteo.py b/meteostat/interface/meteodata.py similarity index 82% rename from meteostat/interface/meteo.py rename to meteostat/interface/meteodata.py index 9b7ae67..4325405 100644 --- a/meteostat/interface/meteo.py +++ b/meteostat/interface/meteodata.py @@ -11,20 +11,19 @@ The code is licensed under the MIT license. """ -from datetime import datetime from typing import Union -import numpy as np import pandas as pd from meteostat.enumerations.granularity import Granularity from meteostat.core.cache import get_local_file_path, file_in_cache from meteostat.core.loader import processing_handler, load_handler +from meteostat.utilities.mutations import localize, filter_time, adjust_temp from meteostat.utilities.validations import validate_series from meteostat.utilities.aggregations import weighted_average from meteostat.utilities.endpoint import generate_endpoint_path from meteostat.interface.base import Base -class Meteo(Base): +class MeteoData(Base): """ A parent class for both time series and @@ -37,33 +36,6 @@ class Meteo(Base): # The data frame _data: pd.DataFrame = pd.DataFrame() - @staticmethod - def _localize(df: pd.DataFrame, timezone: str) -> pd.DataFrame: - """ - Convert time data to any time zone - """ - - return df.tz_localize( - 'UTC', - level='time' - ).tz_convert( - timezone, - level='time' - ) - - @staticmethod - def _filter_time(df: pd.DataFrame, start: datetime, - end: datetime) -> pd.DataFrame: - """ - Filter time series data based on start and end date - """ - - # Get time index - time = df.index.get_level_values('time') - - # Filter & return - return df.loc[(time >= start) & (time <= end)] - def _load_data( self, station: str, @@ -118,7 +90,7 @@ def _load_data( # Localize time column if self.granularity == Granularity.HOURLY and self._timezone is not None and len( df.index) > 0: - df = Meteo._localize(df, self._timezone) + df = localize(df, self._timezone) # Filter time period and append to DataFrame # pylint: disable=no-else-return @@ -128,7 +100,7 @@ def _load_data( # Filter & return return df.loc[end == self._end] elif not self.granularity == Granularity.NORMALS: - df = Meteo._filter_time(df, self._start, self._end) + df = filter_time(df, self._start, self._end) # Return return df @@ -167,23 +139,6 @@ def _get_data(self) -> None: # Empty DataFrame return pd.DataFrame(columns=[*self._types]) - @staticmethod - def adjust_temp(df: pd.DataFrame, alt: int): - """ - Adjust temperature-like data based on altitude - """ - - # Temperature-like columns - temp_like = ('temp', 'dwpt', 'tavg', 'tmin', 'tmax') - - # Adjust values for all temperature-like data - for col_name in temp_like: - if col_name in df.columns: - df.loc[df[col_name] != np.NaN, col_name] = df[col_name] + \ - ((2 / 3) * ((df['elevation'] - alt) / 100)) - - return df - # pylint: disable=too-many-branches def _resolve_point( self, @@ -208,7 +163,7 @@ def _resolve_point( stations['elevation'], on='station') # Adapt temperature-like data based on altitude - data = Meteo.adjust_temp(data, alt) + data = adjust_temp(data, alt) # Drop elevation & round data = data.drop('elevation', axis=1).round(1) @@ -236,7 +191,7 @@ def _resolve_point( # Adapt temperature-like data based on altitude if adapt_temp: - data = Meteo.adjust_temp(data, alt) + data = adjust_temp(data, alt) # Exclude non-mean data & perform aggregation if not self.granularity == Granularity.NORMALS: diff --git a/meteostat/interface/monthly.py b/meteostat/interface/monthly.py index 6b33441..029eb64 100644 --- a/meteostat/interface/monthly.py +++ b/meteostat/interface/monthly.py @@ -11,13 +11,12 @@ from datetime import datetime from typing import Union import pandas as pd -from meteostat.utilities.aggregations import degree_mean from meteostat.enumerations.granularity import Granularity -from meteostat.interface.timeseries import Timeseries +from meteostat.interface.timeseries import TimeSeries from meteostat.interface.point import Point -class Monthly(Timeseries): +class Monthly(TimeSeries): """ Retrieve monthly weather data for one or multiple weather stations or @@ -33,6 +32,9 @@ class Monthly(Timeseries): # Default frequency _freq: str = '1MS' + # Flag which represents model data + _model_flag = 'I' + # Columns _columns: list = [ 'year', @@ -41,10 +43,7 @@ class Monthly(Timeseries): 'tmin', 'tmax', 'prcp', - 'snow', - 'wdir', 'wspd', - 'wpgt', 'pres', 'tsun' ] @@ -58,10 +57,7 @@ class Monthly(Timeseries): 'tmin': 'float64', 'tmax': 'float64', 'prcp': 'float64', - 'snow': 'float64', - 'wdir': 'float64', 'wspd': 'float64', - 'wpgt': 'float64', 'pres': 'float64', 'tsun': 'float64' } @@ -77,10 +73,7 @@ class Monthly(Timeseries): 'tmin': 'mean', 'tmax': 'mean', 'prcp': 'sum', - 'snow': 'max', - 'wdir': degree_mean, 'wspd': 'mean', - 'wpgt': 'max', 'pres': 'mean', 'tsun': 'sum' } @@ -99,7 +92,7 @@ def __init__( start = start.replace(day=1) # Initialize time series - self._init_timeseries(loc, start, end, model, flags) + self._init_time_series(loc, start, end, model, flags) def expected_rows(self) -> int: """ diff --git a/meteostat/interface/normals.py b/meteostat/interface/normals.py index 90128ec..47ced67 100644 --- a/meteostat/interface/normals.py +++ b/meteostat/interface/normals.py @@ -15,11 +15,11 @@ import pandas as pd from meteostat.enumerations.granularity import Granularity from meteostat.core.warn import warn -from meteostat.interface.meteo import Meteo +from meteostat.interface.meteodata import MeteoData from meteostat.interface.point import Point -class Normals(Meteo): +class Normals(MeteoData): """ Retrieve climate normals for one or multiple weather stations or diff --git a/meteostat/interface/timeseries.py b/meteostat/interface/timeseries.py index 8fa1591..00d97c5 100644 --- a/meteostat/interface/timeseries.py +++ b/meteostat/interface/timeseries.py @@ -1,5 +1,5 @@ """ -Timeseries Class +TimeSeries Class Meteorological data provided by Meteostat (https://dev.meteostat.net) under the terms of the Creative Commons Attribution-NonCommercial @@ -15,16 +15,17 @@ from meteostat.enumerations.granularity import Granularity from meteostat.core.cache import get_local_file_path, file_in_cache from meteostat.core.loader import processing_handler, load_handler +from meteostat.utilities.mutations import localize, filter_time from meteostat.utilities.validations import validate_series from meteostat.utilities.endpoint import generate_endpoint_path from meteostat.interface.point import Point -from meteostat.interface.meteo import Meteo +from meteostat.interface.meteodata import MeteoData -class Timeseries(Meteo): +class TimeSeries(MeteoData): """ - Timeseries class which provides features which are + TimeSeries class which provides features which are used across all time series classes """ @@ -76,7 +77,9 @@ def _load_flags( self.endpoint, file, self._columns, - None, + { + key: 'string' for key in self._columns[self._first_met_col:] + }, self._parse_dates) # Validate Series @@ -89,11 +92,11 @@ def _load_flags( # Localize time column if self.granularity == Granularity.HOURLY and self._timezone is not None and len( df.index) > 0: - df = Timeseries._localize(df, self._timezone) + df = localize(df, self._timezone) # Filter time period and append to DataFrame if self._start and self._end: - df = Timeseries._filter_time(df, self._start, self._end) + df = filter_time(df, self._start, self._end) return df @@ -126,8 +129,11 @@ def _filter_model(self) -> None: columns = self._columns[self._first_met_col:] for col_name in columns: - self._data.loc[self._data[f'{col_name}_flag'] - == 'M', col_name] = np.NaN + self._data.loc[ + (pd.isna(self._data[f'{col_name}_flag'])) | + (self._data[f'{col_name}_flag'].str.contains(self._model_flag)), + col_name + ] = np.NaN # Conditionally, remove flags from DataFrame if not self._flags: @@ -137,9 +143,9 @@ def _filter_model(self) -> None: inplace=True) # Drop NaN-only rows - self._data.dropna(how='all', inplace=True) + self._data.dropna(how='all', subset=columns, inplace=True) - def _init_timeseries( + def _init_time_series( self, loc: Union[pd.DataFrame, Point, list, str], # Station(s) or geo point start: datetime = None, diff --git a/meteostat/utilities/mutations.py b/meteostat/utilities/mutations.py new file mode 100644 index 0000000..30a22fa --- /dev/null +++ b/meteostat/utilities/mutations.py @@ -0,0 +1,61 @@ +""" +Utilities - DataFrame Mutations + +Meteorological data provided by Meteostat (https://dev.meteostat.net) +under the terms of the Creative Commons Attribution-NonCommercial +4.0 International Public License. + +The code is licensed under the MIT license. +""" + +from datetime import datetime +import numpy as np +import pandas as pd + + +def localize( + df: pd.DataFrame, + timezone: str +) -> pd.DataFrame: + """ + Convert time data to any time zone + """ + + return df.tz_localize( + 'UTC', + level='time' + ).tz_convert( + timezone, + level='time' + ) + +def filter_time( + df: pd.DataFrame, + start: datetime, + end: datetime +) -> pd.DataFrame: + """ + Filter time series data based on start and end date + """ + + # Get time index + time = df.index.get_level_values('time') + + # Filter & return + return df.loc[(time >= start) & (time <= end)] + +def adjust_temp(df: pd.DataFrame, alt: int): + """ + Adjust temperature-like data based on altitude + """ + + # Temperature-like columns + temp_like = ('temp', 'dwpt', 'tavg', 'tmin', 'tmax') + + # Adjust values for all temperature-like data + for col_name in temp_like: + if col_name in df.columns: + df.loc[df[col_name] != np.NaN, col_name] = df[col_name] + \ + ((2 / 3) * ((df['elevation'] - alt) / 100)) + + return df diff --git a/setup.py b/setup.py index 9b519ce..db0d6ed 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ # Setup setup( name='meteostat', - version='1.6.0', + version='1.6.1', author='Meteostat', author_email='info@meteostat.net', description='Access and analyze historical weather and climate data with Python.', diff --git a/tests/e2e/test_monthly.py b/tests/e2e/test_monthly.py index 2db42dc..0756f05 100644 --- a/tests/e2e/test_monthly.py +++ b/tests/e2e/test_monthly.py @@ -15,9 +15,10 @@ def test_normalize(): # Get 2018 monthly data for Frankfurt Airport data = Monthly( - ['10637'], start=datetime( - 2018, 1, 1), end=datetime( - 2018, 12, 31)) + '72202', + start=datetime(2018, 1, 1), + end=datetime(2018, 12, 31) + ) count = data.normalize().count() # Check if count matches 12 @@ -29,13 +30,13 @@ def test_aggregate(): """ # Get 2018 monthly data for Frankfurt Airport - data = Monthly( - ['10637'], start=datetime( - 2018, 1, 1), end=datetime( - 2018, 12, 31)) + data = Monthly('72202', + start=datetime(2018, 1, 1), + end=datetime(2018, 12, 31) + ) count = data.normalize().aggregate('1Y').count() - # Check if count matches 53 + # Check if count matches 1 assert count == 1 def test_coverage(): @@ -45,9 +46,10 @@ def test_coverage(): # Get 2018 monthly data for Frankfurt Airport data = Monthly( - ['10637'], start=datetime( - 2018, 1, 1), end=datetime( - 2018, 12, 31)) + '72202', + start=datetime(2018, 1, 1), + end=datetime(2018, 12, 31) + ) coverage = data.normalize().coverage() # Check if coverage is 100%