From 96766a0c3ba96998ebc568c2bf58e1aa2e4d53bc Mon Sep 17 00:00:00 2001 From: Christian Lamprecht Date: Sat, 8 Jan 2022 12:30:42 +0100 Subject: [PATCH] Meteostat 1.5.11 (#82) * Bugfix and new endpoint path function (#80) * Fixes bug with chunked datasets in hourly requests and introduce a function to generate the endpoint path within a tested function * Update linter.yml Workaround for super-linter bug Co-authored-by: Christian Lamprecht * Minor adaptions to better understand cache behaviour (#81) * Restructuring & Linting * misplaced-comparison-constant * Wrap up v1.5.11 * Finalize 1.5.11 Co-authored-by: Daniel Lassahn --- .github/workflows/linter.yml | 2 +- meteostat/__init__.py | 2 +- meteostat/core/cache.py | 5 +- meteostat/core/loader.py | 3 +- meteostat/enumerations/__init__.py | 0 meteostat/enumerations/granularity.py | 22 +++++++ meteostat/interface/daily.py | 20 +++--- meteostat/interface/hourly.py | 94 +++++++++++++++++---------- meteostat/interface/monthly.py | 25 +++---- meteostat/interface/normals.py | 22 ++++--- meteostat/interface/stations.py | 4 +- meteostat/utilities/endpoint.py | 37 +++++++++++ setup.py | 2 +- tests/core/__init__.py | 0 tests/core/test_cache.py | 37 +++++++++++ tests/utilities/__init__.py | 0 tests/utilities/test_endpoint.py | 84 ++++++++++++++++++++++++ 17 files changed, 283 insertions(+), 76 deletions(-) create mode 100644 meteostat/enumerations/__init__.py create mode 100644 meteostat/enumerations/granularity.py create mode 100644 meteostat/utilities/endpoint.py create mode 100644 tests/core/__init__.py create mode 100644 tests/core/test_cache.py create mode 100644 tests/utilities/__init__.py create mode 100644 tests/utilities/test_endpoint.py diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml index a6382fc..4f18312 100644 --- a/.github/workflows/linter.yml +++ b/.github/workflows/linter.yml @@ -48,7 +48,7 @@ jobs: # Run Linter against code base # ################################ - name: Lint Code Base - uses: github/super-linter@v3 + uses: github/super-linter@v3.17.0 env: VALIDATE_ALL_CODEBASE: false DEFAULT_BRANCH: master diff --git a/meteostat/__init__.py b/meteostat/__init__.py index bf87925..0f330c8 100644 --- a/meteostat/__init__.py +++ b/meteostat/__init__.py @@ -12,7 +12,7 @@ """ __appname__ = 'meteostat' -__version__ = '1.5.10' +__version__ = '1.5.11' from .interface.base import Base from .interface.timeseries import Timeseries diff --git a/meteostat/core/cache.py b/meteostat/core/cache.py index 3b843ff..72553ee 100644 --- a/meteostat/core/cache.py +++ b/meteostat/core/cache.py @@ -13,7 +13,7 @@ import hashlib -def get_file_path( +def get_local_file_path( cache_dir: str, cache_subdir: str, path: str @@ -25,8 +25,7 @@ def get_file_path( # Get file ID file = hashlib.md5(path.encode('utf-8')).hexdigest() - # Return path - return cache_dir + os.sep + cache_subdir + os.sep + file + return f"{cache_dir}/{cache_subdir}/{file}" def file_in_cache( diff --git a/meteostat/core/loader.py b/meteostat/core/loader.py index 78eeeb8..4e730e8 100644 --- a/meteostat/core/loader.py +++ b/meteostat/core/loader.py @@ -87,7 +87,8 @@ def load_handler( compression='gzip', names=columns, dtype=types, - parse_dates=parse_dates) + parse_dates=parse_dates + ) # Force datetime conversion if coerce_dates: diff --git a/meteostat/enumerations/__init__.py b/meteostat/enumerations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/meteostat/enumerations/granularity.py b/meteostat/enumerations/granularity.py new file mode 100644 index 0000000..d02a8a7 --- /dev/null +++ b/meteostat/enumerations/granularity.py @@ -0,0 +1,22 @@ +""" +Granularity Enumeration + +Meteorological data provided by Meteostat (https://dev.meteostat.net) +under the terms of the Creative Commons Attribution-NonCommercial +4.0 International Public License. + +The code is licensed under the MIT license. +""" + +from enum import Enum + + +class Granularity(Enum): + """ + The different levels of time series granularity + """ + + HOURLY = 'hourly' + DAILY = 'daily' + MONTHLY = 'monthly' + NORMALS = 'normals' diff --git a/meteostat/interface/daily.py b/meteostat/interface/daily.py index 4e4b94c..b9c980b 100644 --- a/meteostat/interface/daily.py +++ b/meteostat/interface/daily.py @@ -12,10 +12,12 @@ from typing import Union import numpy as np import pandas as pd -from meteostat.core.cache import get_file_path, file_in_cache +from meteostat.core.cache import get_local_file_path, file_in_cache from meteostat.core.loader import processing_handler, load_handler +from meteostat.enumerations.granularity import Granularity from meteostat.utilities.validations import validate_series from meteostat.utilities.aggregations import degree_mean, weighted_average +from meteostat.utilities.endpoint import generate_endpoint_path from meteostat.interface.timeseries import Timeseries from meteostat.interface.point import Point @@ -93,11 +95,14 @@ def _load( """ # File name - file = 'daily/' + ('full' if self._model else 'obs') + \ - '/' + station + '.csv.gz' + file = generate_endpoint_path( + Granularity.DAILY, + station, + self._model + ) # Get local file path - path = get_file_path(self.cache_dir, self.cache_subdir, file) + path = get_local_file_path(self.cache_dir, self.cache_subdir, file) # Check if file in cache if self.max_age > 0 and file_in_cache(path, self.max_age): @@ -142,12 +147,7 @@ def _get_data(self) -> None: if len(self._stations) > 0: # List of datasets - datasets = [] - - for station in self._stations: - datasets.append(( - str(station), - )) + datasets = [(str(station),) for station in self._stations] # Data Processing return processing_handler( diff --git a/meteostat/interface/hourly.py b/meteostat/interface/hourly.py index a2d41a9..47d745c 100644 --- a/meteostat/interface/hourly.py +++ b/meteostat/interface/hourly.py @@ -9,15 +9,17 @@ """ from math import floor -from datetime import datetime +from datetime import datetime, timedelta from typing import Union import pytz import numpy as np import pandas as pd -from meteostat.core.cache import get_file_path, file_in_cache +from meteostat.core.cache import get_local_file_path, file_in_cache from meteostat.core.loader import processing_handler, load_handler +from meteostat.enumerations.granularity import Granularity from meteostat.utilities.validations import validate_series from meteostat.utilities.aggregations import degree_mean, weighted_average +from meteostat.utilities.endpoint import generate_endpoint_path from meteostat.interface.timeseries import Timeseries from meteostat.interface.point import Point @@ -32,7 +34,7 @@ class Hourly(Timeseries): # The cache subdirectory cache_subdir: str = 'hourly' - # Specify if the library should use chunks or full dumps + # Download data as annual chunks chunked: bool = True # The time zone @@ -118,13 +120,15 @@ def _set_time( # Set start date self._start = timezone.localize( - start, is_dst=None).astimezone( - pytz.utc) + start, + is_dst=None + ).astimezone(pytz.utc) # Set end date self._end = timezone.localize( - end, is_dst=None).astimezone( - pytz.utc) + end, + is_dst=None + ).astimezone(pytz.utc) else: @@ -134,21 +138,25 @@ def _set_time( # Set end date self._end = end + self._annual_steps = [ + ( + self._start + timedelta(days=365 * i) + ).year for i in range( + self._end.year - self._start.year + 1 + ) + ] + def _load( self, station: str, - year: str = None + file: str ) -> None: """ Load file from Meteostat """ - # File name - file = 'hourly/' + ('full' if self._model else 'obs') + '/' + \ - (year + '/' if year else '') + station + '.csv.gz' - # Get local file path - path = get_file_path(self.cache_dir, self.cache_subdir, file) + path = get_local_file_path(self.cache_dir, self.cache_subdir, file) # Check if file in cache if self.max_age > 0 and file_in_cache(path, self.max_age): @@ -164,7 +172,8 @@ def _load( file, self._columns, self._types, - self._parse_dates) + self._parse_dates + ) # Validate Series df = validate_series(df, station) @@ -176,8 +185,12 @@ def _load( # Localize time column if self._timezone is not None and len(df.index) > 0: df = df.tz_localize( - 'UTC', level='time').tz_convert( - self._timezone, level='time') + 'UTC', + level='time' + ).tz_convert( + self._timezone, + level='time' + ) # Filter time period and append to DataFrame if self._start and self._end: @@ -198,29 +211,40 @@ def _get_data(self) -> None: if len(self._stations) > 0: - # List of datasets - datasets = [] - - for station in self._stations: - - if self.chunked and self._start and self._end: - - for year in range(self._start.year, self._end.year + 1): - datasets.append(( - str(station), - str(year) - )) - - else: + # Create list of datasets + if self.chunked: + datasets = [ + ( + str(station), + generate_endpoint_path( + Granularity.HOURLY, + station, + self._model, + year + ) + ) + for station in self._stations for year in self._annual_steps + ] - datasets.append(( + else: + datasets = [ + ( str(station), - None - )) + generate_endpoint_path( + Granularity.HOURLY, + station, + self._model + ) + ) + for station in self._stations + ] - # Data Processing return processing_handler( - datasets, self._load, self.processes, self.threads) + datasets, + self._load, + self.processes, + self.threads + ) return pd.DataFrame(columns=[*self._types]) diff --git a/meteostat/interface/monthly.py b/meteostat/interface/monthly.py index 92f7600..3c9e69a 100644 --- a/meteostat/interface/monthly.py +++ b/meteostat/interface/monthly.py @@ -12,10 +12,12 @@ from typing import Union import numpy as np import pandas as pd -from meteostat.core.cache import get_file_path, file_in_cache +from meteostat.core.cache import get_local_file_path, file_in_cache from meteostat.core.loader import processing_handler, load_handler +from meteostat.utilities.endpoint import generate_endpoint_path from meteostat.utilities.validations import validate_series from meteostat.utilities.aggregations import degree_mean, weighted_average +from meteostat.enumerations.granularity import Granularity from meteostat.interface.timeseries import Timeseries from meteostat.interface.point import Point @@ -94,11 +96,14 @@ def _load( """ # File name - file = 'monthly/' + ('full' if self._model else 'obs') + \ - '/' + station + '.csv.gz' + file = generate_endpoint_path( + Granularity.MONTHLY, + station, + self._model + ) # Get local file path - path = get_file_path(self.cache_dir, self.cache_subdir, file) + path = get_local_file_path(self.cache_dir, self.cache_subdir, file) # Check if file in cache if self.max_age > 0 and file_in_cache(path, self.max_age): @@ -143,15 +148,11 @@ def _get_data(self) -> None: if len(self._stations) > 0: # List of datasets - datasets = [] - - for station in self._stations: - datasets.append(( - str(station), - )) - + datasets = [(str(station),) for station in self._stations] # Data Processing - return processing_handler(datasets, self._load, self.processes, self.threads) + return processing_handler( + datasets, self._load, self.processes, self.threads + ) # Empty DataFrame return pd.DataFrame(columns=[*self._types]) diff --git a/meteostat/interface/normals.py b/meteostat/interface/normals.py index d7d21dc..e526bc7 100644 --- a/meteostat/interface/normals.py +++ b/meteostat/interface/normals.py @@ -13,10 +13,12 @@ from datetime import datetime import numpy as np import pandas as pd -from meteostat.core.cache import get_file_path, file_in_cache +from meteostat.core.cache import get_local_file_path, file_in_cache +from meteostat.enumerations.granularity import Granularity from meteostat.core.loader import processing_handler, load_handler from meteostat.core.warn import warn from meteostat.utilities.aggregations import weighted_average +from meteostat.utilities.endpoint import generate_endpoint_path from meteostat.interface.base import Base from meteostat.interface.point import Point @@ -78,10 +80,13 @@ def _load( """ # File name - file = f'normals/{station}.csv.gz' + file = generate_endpoint_path( + Granularity.NORMALS, + station + ) # Get local file path - path = get_file_path(self.cache_dir, self.cache_subdir, file) + path = get_local_file_path(self.cache_dir, self.cache_subdir, file) # Check if file in cache if self.max_age > 0 and file_in_cache(path, self.max_age): @@ -97,11 +102,13 @@ def _load( file, self._columns, self._types, - None) + None + ) if df.index.size > 0: # Add weather station ID + # pylint: disable=unsupported-assignment-operation df['station'] = station # Set index @@ -130,12 +137,7 @@ def _get_data(self) -> None: if len(self._stations) > 0: # List of datasets - datasets = [] - - for station in self._stations: - datasets.append(( - str(station), - )) + datasets = [(str(station),) for station in self._stations] # Data Processing return processing_handler( diff --git a/meteostat/interface/stations.py b/meteostat/interface/stations.py index dc70875..16f8503 100644 --- a/meteostat/interface/stations.py +++ b/meteostat/interface/stations.py @@ -13,7 +13,7 @@ from typing import Union import numpy as np import pandas as pd -from meteostat.core.cache import get_file_path, file_in_cache +from meteostat.core.cache import get_local_file_path, file_in_cache from meteostat.core.loader import load_handler from meteostat.interface.base import Base @@ -76,7 +76,7 @@ def _load(self) -> None: file = 'stations/slim.csv.gz' # Get local file path - path = get_file_path(self.cache_dir, self.cache_subdir, file) + path = get_local_file_path(self.cache_dir, self.cache_subdir, file) # Check if file in cache if self.max_age > 0 and file_in_cache(path, self.max_age): diff --git a/meteostat/utilities/endpoint.py b/meteostat/utilities/endpoint.py new file mode 100644 index 0000000..e1e0cb6 --- /dev/null +++ b/meteostat/utilities/endpoint.py @@ -0,0 +1,37 @@ +""" +Utilities - Endpoint Helpers + +Meteorological data provided by Meteostat (https://dev.meteostat.net) +under the terms of the Creative Commons Attribution-NonCommercial +4.0 International Public License. + +The code is licensed under the MIT license. +""" + +from typing import Union +from meteostat.enumerations.granularity import Granularity + + +def generate_endpoint_path( + granularity: Granularity, + station: str, + model: bool = True, + year: Union[int, None] = None +) -> str: + """ + Generate Meteostat Bulk path + """ + + # Base path + path = f"{granularity.value}/" + + if granularity != Granularity.NORMALS: + if model: + path += 'full/' + else: + path += 'obs/' + + if Granularity.HOURLY and year: + path += f"{year}/" + + return f"{path}{station}.csv.gz" diff --git a/setup.py b/setup.py index b103257..bcd9828 100644 --- a/setup.py +++ b/setup.py @@ -15,7 +15,7 @@ # Setup setup( name='meteostat', - version='1.5.10', + version='1.5.11', author='Meteostat', author_email='info@meteostat.net', description='Access and analyze historical weather and climate data with Python.', diff --git a/tests/core/__init__.py b/tests/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/core/test_cache.py b/tests/core/test_cache.py new file mode 100644 index 0000000..1a20e38 --- /dev/null +++ b/tests/core/test_cache.py @@ -0,0 +1,37 @@ +""" +Cache Tests + +Meteorological data provided by Meteostat (https://dev.meteostat.net) +under the terms of the Creative Commons Attribution-NonCommercial +4.0 International Public License. + +The code is licensed under the MIT license. +""" + +from meteostat.core.cache import get_local_file_path + +EXPECTED_FILE_PATH = "cache/hourly/6dfc35c47756e962ef055d1049f1f8ec" + + +def test_get_local_file_path(): + """ + Test local file path + """ + + assert get_local_file_path( + 'cache', + 'hourly', + '10101' + ) == EXPECTED_FILE_PATH + + +def test_get_local_file_path_chunked(): + """ + Test local file path II + """ + + assert get_local_file_path( + 'cache', + 'hourly', + '10101_2022' + ) != EXPECTED_FILE_PATH diff --git a/tests/utilities/__init__.py b/tests/utilities/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/utilities/test_endpoint.py b/tests/utilities/test_endpoint.py new file mode 100644 index 0000000..b5951a6 --- /dev/null +++ b/tests/utilities/test_endpoint.py @@ -0,0 +1,84 @@ +""" +Endpoint Utility Tests + +Meteorological data provided by Meteostat (https://dev.meteostat.net) +under the terms of the Creative Commons Attribution-NonCommercial +4.0 International Public License. + +The code is licensed under the MIT license. +""" + +from meteostat.utilities.endpoint import generate_endpoint_path +from meteostat.enumerations.granularity import Granularity + + +def test_generate_endpoint_path_normals(): + """ + Generate endpoint path for climate normals + """ + + assert generate_endpoint_path( + Granularity.NORMALS, + '10286' + ) == 'normals/10286.csv.gz' + + +def test_generate_endpoint_path_hourly_full(): + """ + Generate endpoint path for full hourly data + """ + + assert generate_endpoint_path( + Granularity.HOURLY, + '10286', + True + ) == 'hourly/full/10286.csv.gz' + + +def test_generate_endpoint_path_hourly_full_obs(): + """ + Generate endpoint path for hourly observation data + """ + + assert generate_endpoint_path( + Granularity.HOURLY, + '10286', + False + ) == 'hourly/obs/10286.csv.gz' + + +def test_generate_endpoint_path_hourly_subset(): + """ + Generate endpoint path for hourly chunk + """ + + assert generate_endpoint_path( + Granularity.HOURLY, + '10286', + True, + 2021 + ) == 'hourly/full/2021/10286.csv.gz' + + +def test_generate_endpoint_path_daily_subset(): + """ + Generate endpoint path for full daily data + """ + + assert generate_endpoint_path( + Granularity.DAILY, + '10286', + True + ) == 'daily/full/10286.csv.gz' + + +def test_generate_endpoint_path_monthly_subset(): + """ + Generate endpoint path for full monthly data + """ + + assert generate_endpoint_path( + Granularity.MONTHLY, + '10286', + True + ) == 'monthly/full/10286.csv.gz'