From 96766a0c3ba96998ebc568c2bf58e1aa2e4d53bc Mon Sep 17 00:00:00 2001
From: Christian Lamprecht <christian.lamprecht@aol.de>
Date: Sat, 8 Jan 2022 12:30:42 +0100
Subject: [PATCH] Meteostat 1.5.11 (#82)

* Bugfix and new endpoint path function  (#80)

* Fixes bug with chunked datasets in hourly requests and introduce a function to generate the endpoint path within a tested function

* Update linter.yml

Workaround for super-linter bug

Co-authored-by: Christian Lamprecht <christian.lamprecht@aol.de>

* Minor adaptions to better understand cache behaviour (#81)

* Restructuring & Linting

* misplaced-comparison-constant

* Wrap up v1.5.11

* Finalize 1.5.11

Co-authored-by: Daniel Lassahn <daniel.lassahn@alitiq.com>
---
 .github/workflows/linter.yml          |  2 +-
 meteostat/__init__.py                 |  2 +-
 meteostat/core/cache.py               |  5 +-
 meteostat/core/loader.py              |  3 +-
 meteostat/enumerations/__init__.py    |  0
 meteostat/enumerations/granularity.py | 22 +++++++
 meteostat/interface/daily.py          | 20 +++---
 meteostat/interface/hourly.py         | 94 +++++++++++++++++----------
 meteostat/interface/monthly.py        | 25 +++----
 meteostat/interface/normals.py        | 22 ++++---
 meteostat/interface/stations.py       |  4 +-
 meteostat/utilities/endpoint.py       | 37 +++++++++++
 setup.py                              |  2 +-
 tests/core/__init__.py                |  0
 tests/core/test_cache.py              | 37 +++++++++++
 tests/utilities/__init__.py           |  0
 tests/utilities/test_endpoint.py      | 84 ++++++++++++++++++++++++
 17 files changed, 283 insertions(+), 76 deletions(-)
 create mode 100644 meteostat/enumerations/__init__.py
 create mode 100644 meteostat/enumerations/granularity.py
 create mode 100644 meteostat/utilities/endpoint.py
 create mode 100644 tests/core/__init__.py
 create mode 100644 tests/core/test_cache.py
 create mode 100644 tests/utilities/__init__.py
 create mode 100644 tests/utilities/test_endpoint.py

diff --git a/.github/workflows/linter.yml b/.github/workflows/linter.yml
index a6382fc..4f18312 100644
--- a/.github/workflows/linter.yml
+++ b/.github/workflows/linter.yml
@@ -48,7 +48,7 @@ jobs:
       # Run Linter against code base #
       ################################
       - name: Lint Code Base
-        uses: github/super-linter@v3
+        uses: github/super-linter@v3.17.0
         env:
           VALIDATE_ALL_CODEBASE: false
           DEFAULT_BRANCH: master
diff --git a/meteostat/__init__.py b/meteostat/__init__.py
index bf87925..0f330c8 100644
--- a/meteostat/__init__.py
+++ b/meteostat/__init__.py
@@ -12,7 +12,7 @@
 """
 
 __appname__ = 'meteostat'
-__version__ = '1.5.10'
+__version__ = '1.5.11'
 
 from .interface.base import Base
 from .interface.timeseries import Timeseries
diff --git a/meteostat/core/cache.py b/meteostat/core/cache.py
index 3b843ff..72553ee 100644
--- a/meteostat/core/cache.py
+++ b/meteostat/core/cache.py
@@ -13,7 +13,7 @@
 import hashlib
 
 
-def get_file_path(
+def get_local_file_path(
     cache_dir: str,
     cache_subdir: str,
     path: str
@@ -25,8 +25,7 @@ def get_file_path(
     # Get file ID
     file = hashlib.md5(path.encode('utf-8')).hexdigest()
 
-    # Return path
-    return cache_dir + os.sep + cache_subdir + os.sep + file
+    return f"{cache_dir}/{cache_subdir}/{file}"
 
 
 def file_in_cache(
diff --git a/meteostat/core/loader.py b/meteostat/core/loader.py
index 78eeeb8..4e730e8 100644
--- a/meteostat/core/loader.py
+++ b/meteostat/core/loader.py
@@ -87,7 +87,8 @@ def load_handler(
             compression='gzip',
             names=columns,
             dtype=types,
-            parse_dates=parse_dates)
+            parse_dates=parse_dates
+        )
 
         # Force datetime conversion
         if coerce_dates:
diff --git a/meteostat/enumerations/__init__.py b/meteostat/enumerations/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/meteostat/enumerations/granularity.py b/meteostat/enumerations/granularity.py
new file mode 100644
index 0000000..d02a8a7
--- /dev/null
+++ b/meteostat/enumerations/granularity.py
@@ -0,0 +1,22 @@
+"""
+Granularity Enumeration
+
+Meteorological data provided by Meteostat (https://dev.meteostat.net)
+under the terms of the Creative Commons Attribution-NonCommercial
+4.0 International Public License.
+
+The code is licensed under the MIT license.
+"""
+
+from enum import Enum
+
+
+class Granularity(Enum):
+    """
+    The different levels of time series granularity
+    """
+
+    HOURLY = 'hourly'
+    DAILY = 'daily'
+    MONTHLY = 'monthly'
+    NORMALS = 'normals'
diff --git a/meteostat/interface/daily.py b/meteostat/interface/daily.py
index 4e4b94c..b9c980b 100644
--- a/meteostat/interface/daily.py
+++ b/meteostat/interface/daily.py
@@ -12,10 +12,12 @@
 from typing import Union
 import numpy as np
 import pandas as pd
-from meteostat.core.cache import get_file_path, file_in_cache
+from meteostat.core.cache import get_local_file_path, file_in_cache
 from meteostat.core.loader import processing_handler, load_handler
+from meteostat.enumerations.granularity import Granularity
 from meteostat.utilities.validations import validate_series
 from meteostat.utilities.aggregations import degree_mean, weighted_average
+from meteostat.utilities.endpoint import generate_endpoint_path
 from meteostat.interface.timeseries import Timeseries
 from meteostat.interface.point import Point
 
@@ -93,11 +95,14 @@ def _load(
         """
 
         # File name
-        file = 'daily/' + ('full' if self._model else 'obs') + \
-            '/' + station + '.csv.gz'
+        file = generate_endpoint_path(
+            Granularity.DAILY,
+            station,
+            self._model
+        )
 
         # Get local file path
-        path = get_file_path(self.cache_dir, self.cache_subdir, file)
+        path = get_local_file_path(self.cache_dir, self.cache_subdir, file)
 
         # Check if file in cache
         if self.max_age > 0 and file_in_cache(path, self.max_age):
@@ -142,12 +147,7 @@ def _get_data(self) -> None:
         if len(self._stations) > 0:
 
             # List of datasets
-            datasets = []
-
-            for station in self._stations:
-                datasets.append((
-                    str(station),
-                ))
+            datasets = [(str(station),) for station in self._stations]
 
             # Data Processing
             return processing_handler(
diff --git a/meteostat/interface/hourly.py b/meteostat/interface/hourly.py
index a2d41a9..47d745c 100644
--- a/meteostat/interface/hourly.py
+++ b/meteostat/interface/hourly.py
@@ -9,15 +9,17 @@
 """
 
 from math import floor
-from datetime import datetime
+from datetime import datetime, timedelta
 from typing import Union
 import pytz
 import numpy as np
 import pandas as pd
-from meteostat.core.cache import get_file_path, file_in_cache
+from meteostat.core.cache import get_local_file_path, file_in_cache
 from meteostat.core.loader import processing_handler, load_handler
+from meteostat.enumerations.granularity import Granularity
 from meteostat.utilities.validations import validate_series
 from meteostat.utilities.aggregations import degree_mean, weighted_average
+from meteostat.utilities.endpoint import generate_endpoint_path
 from meteostat.interface.timeseries import Timeseries
 from meteostat.interface.point import Point
 
@@ -32,7 +34,7 @@ class Hourly(Timeseries):
     # The cache subdirectory
     cache_subdir: str = 'hourly'
 
-    # Specify if the library should use chunks or full dumps
+    # Download data as annual chunks
     chunked: bool = True
 
     # The time zone
@@ -118,13 +120,15 @@ def _set_time(
 
                 # Set start date
                 self._start = timezone.localize(
-                    start, is_dst=None).astimezone(
-                    pytz.utc)
+                    start,
+                    is_dst=None
+                ).astimezone(pytz.utc)
 
                 # Set end date
                 self._end = timezone.localize(
-                    end, is_dst=None).astimezone(
-                    pytz.utc)
+                    end,
+                    is_dst=None
+                ).astimezone(pytz.utc)
 
         else:
 
@@ -134,21 +138,25 @@ def _set_time(
             # Set end date
             self._end = end
 
+        self._annual_steps = [
+            (
+                self._start + timedelta(days=365 * i)
+            ).year for i in range(
+                self._end.year - self._start.year + 1
+            )
+        ]
+
     def _load(
         self,
         station: str,
-        year: str = None
+        file: str
     ) -> None:
         """
         Load file from Meteostat
         """
 
-        # File name
-        file = 'hourly/' + ('full' if self._model else 'obs') + '/' + \
-            (year + '/' if year else '') + station + '.csv.gz'
-
         # Get local file path
-        path = get_file_path(self.cache_dir, self.cache_subdir, file)
+        path = get_local_file_path(self.cache_dir, self.cache_subdir, file)
 
         # Check if file in cache
         if self.max_age > 0 and file_in_cache(path, self.max_age):
@@ -164,7 +172,8 @@ def _load(
                 file,
                 self._columns,
                 self._types,
-                self._parse_dates)
+                self._parse_dates
+            )
 
             # Validate Series
             df = validate_series(df, station)
@@ -176,8 +185,12 @@ def _load(
         # Localize time column
         if self._timezone is not None and len(df.index) > 0:
             df = df.tz_localize(
-                'UTC', level='time').tz_convert(
-                self._timezone, level='time')
+                'UTC',
+                level='time'
+            ).tz_convert(
+                self._timezone,
+                level='time'
+            )
 
         # Filter time period and append to DataFrame
         if self._start and self._end:
@@ -198,29 +211,40 @@ def _get_data(self) -> None:
 
         if len(self._stations) > 0:
 
-            # List of datasets
-            datasets = []
-
-            for station in self._stations:
-
-                if self.chunked and self._start and self._end:
-
-                    for year in range(self._start.year, self._end.year + 1):
-                        datasets.append((
-                            str(station),
-                            str(year)
-                        ))
-
-                else:
+            # Create list of datasets
+            if self.chunked:
+                datasets = [
+                    (
+                        str(station),
+                        generate_endpoint_path(
+                            Granularity.HOURLY,
+                            station,
+                            self._model,
+                            year
+                        )
+                    )
+                    for station in self._stations for year in self._annual_steps
+                ]
 
-                    datasets.append((
+            else:
+                datasets = [
+                    (
                         str(station),
-                        None
-                    ))
+                        generate_endpoint_path(
+                            Granularity.HOURLY,
+                            station,
+                            self._model
+                        )
+                    )
+                    for station in self._stations
+                ]
 
-            # Data Processing
             return processing_handler(
-                datasets, self._load, self.processes, self.threads)
+                datasets,
+                self._load,
+                self.processes,
+                self.threads
+            )
 
         return pd.DataFrame(columns=[*self._types])
 
diff --git a/meteostat/interface/monthly.py b/meteostat/interface/monthly.py
index 92f7600..3c9e69a 100644
--- a/meteostat/interface/monthly.py
+++ b/meteostat/interface/monthly.py
@@ -12,10 +12,12 @@
 from typing import Union
 import numpy as np
 import pandas as pd
-from meteostat.core.cache import get_file_path, file_in_cache
+from meteostat.core.cache import get_local_file_path, file_in_cache
 from meteostat.core.loader import processing_handler, load_handler
+from meteostat.utilities.endpoint import generate_endpoint_path
 from meteostat.utilities.validations import validate_series
 from meteostat.utilities.aggregations import degree_mean, weighted_average
+from meteostat.enumerations.granularity import Granularity
 from meteostat.interface.timeseries import Timeseries
 from meteostat.interface.point import Point
 
@@ -94,11 +96,14 @@ def _load(
         """
 
         # File name
-        file = 'monthly/' + ('full' if self._model else 'obs') + \
-            '/' + station + '.csv.gz'
+        file = generate_endpoint_path(
+            Granularity.MONTHLY,
+            station,
+            self._model
+        )
 
         # Get local file path
-        path = get_file_path(self.cache_dir, self.cache_subdir, file)
+        path = get_local_file_path(self.cache_dir, self.cache_subdir, file)
 
         # Check if file in cache
         if self.max_age > 0 and file_in_cache(path, self.max_age):
@@ -143,15 +148,11 @@ def _get_data(self) -> None:
         if len(self._stations) > 0:
 
             # List of datasets
-            datasets = []
-
-            for station in self._stations:
-                datasets.append((
-                    str(station),
-                ))
-
+            datasets = [(str(station),) for station in self._stations]
             # Data Processing
-            return processing_handler(datasets, self._load, self.processes, self.threads)
+            return processing_handler(
+                datasets, self._load, self.processes, self.threads
+            )
 
         # Empty DataFrame
         return pd.DataFrame(columns=[*self._types])
diff --git a/meteostat/interface/normals.py b/meteostat/interface/normals.py
index d7d21dc..e526bc7 100644
--- a/meteostat/interface/normals.py
+++ b/meteostat/interface/normals.py
@@ -13,10 +13,12 @@
 from datetime import datetime
 import numpy as np
 import pandas as pd
-from meteostat.core.cache import get_file_path, file_in_cache
+from meteostat.core.cache import get_local_file_path, file_in_cache
+from meteostat.enumerations.granularity import Granularity
 from meteostat.core.loader import processing_handler, load_handler
 from meteostat.core.warn import warn
 from meteostat.utilities.aggregations import weighted_average
+from meteostat.utilities.endpoint import generate_endpoint_path
 from meteostat.interface.base import Base
 from meteostat.interface.point import Point
 
@@ -78,10 +80,13 @@ def _load(
         """
 
         # File name
-        file = f'normals/{station}.csv.gz'
+        file = generate_endpoint_path(
+            Granularity.NORMALS,
+            station
+        )
 
         # Get local file path
-        path = get_file_path(self.cache_dir, self.cache_subdir, file)
+        path = get_local_file_path(self.cache_dir, self.cache_subdir, file)
 
         # Check if file in cache
         if self.max_age > 0 and file_in_cache(path, self.max_age):
@@ -97,11 +102,13 @@ def _load(
                 file,
                 self._columns,
                 self._types,
-                None)
+                None
+            )
 
             if df.index.size > 0:
 
                 # Add weather station ID
+                # pylint: disable=unsupported-assignment-operation
                 df['station'] = station
 
                 # Set index
@@ -130,12 +137,7 @@ def _get_data(self) -> None:
         if len(self._stations) > 0:
 
             # List of datasets
-            datasets = []
-
-            for station in self._stations:
-                datasets.append((
-                    str(station),
-                ))
+            datasets = [(str(station),) for station in self._stations]
 
             # Data Processing
             return processing_handler(
diff --git a/meteostat/interface/stations.py b/meteostat/interface/stations.py
index dc70875..16f8503 100644
--- a/meteostat/interface/stations.py
+++ b/meteostat/interface/stations.py
@@ -13,7 +13,7 @@
 from typing import Union
 import numpy as np
 import pandas as pd
-from meteostat.core.cache import get_file_path, file_in_cache
+from meteostat.core.cache import get_local_file_path, file_in_cache
 from meteostat.core.loader import load_handler
 from meteostat.interface.base import Base
 
@@ -76,7 +76,7 @@ def _load(self) -> None:
         file = 'stations/slim.csv.gz'
 
         # Get local file path
-        path = get_file_path(self.cache_dir, self.cache_subdir, file)
+        path = get_local_file_path(self.cache_dir, self.cache_subdir, file)
 
         # Check if file in cache
         if self.max_age > 0 and file_in_cache(path, self.max_age):
diff --git a/meteostat/utilities/endpoint.py b/meteostat/utilities/endpoint.py
new file mode 100644
index 0000000..e1e0cb6
--- /dev/null
+++ b/meteostat/utilities/endpoint.py
@@ -0,0 +1,37 @@
+"""
+Utilities - Endpoint Helpers
+
+Meteorological data provided by Meteostat (https://dev.meteostat.net)
+under the terms of the Creative Commons Attribution-NonCommercial
+4.0 International Public License.
+
+The code is licensed under the MIT license.
+"""
+
+from typing import Union
+from meteostat.enumerations.granularity import Granularity
+
+
+def generate_endpoint_path(
+        granularity: Granularity,
+        station: str,
+        model: bool = True,
+        year: Union[int, None] = None
+) -> str:
+    """
+    Generate Meteostat Bulk path
+    """
+
+    # Base path
+    path = f"{granularity.value}/"
+
+    if granularity != Granularity.NORMALS:
+        if model:
+            path += 'full/'
+        else:
+            path += 'obs/'
+
+    if Granularity.HOURLY and year:
+        path += f"{year}/"
+
+    return f"{path}{station}.csv.gz"
diff --git a/setup.py b/setup.py
index b103257..bcd9828 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@
 # Setup
 setup(
     name='meteostat',
-    version='1.5.10',
+    version='1.5.11',
     author='Meteostat',
     author_email='info@meteostat.net',
     description='Access and analyze historical weather and climate data with Python.',
diff --git a/tests/core/__init__.py b/tests/core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/core/test_cache.py b/tests/core/test_cache.py
new file mode 100644
index 0000000..1a20e38
--- /dev/null
+++ b/tests/core/test_cache.py
@@ -0,0 +1,37 @@
+"""
+Cache Tests
+
+Meteorological data provided by Meteostat (https://dev.meteostat.net)
+under the terms of the Creative Commons Attribution-NonCommercial
+4.0 International Public License.
+
+The code is licensed under the MIT license.
+"""
+
+from meteostat.core.cache import get_local_file_path
+
+EXPECTED_FILE_PATH = "cache/hourly/6dfc35c47756e962ef055d1049f1f8ec"
+
+
+def test_get_local_file_path():
+    """
+    Test local file path
+    """
+
+    assert get_local_file_path(
+        'cache',
+        'hourly',
+        '10101'
+    ) == EXPECTED_FILE_PATH
+
+
+def test_get_local_file_path_chunked():
+    """
+    Test local file path II
+    """
+
+    assert get_local_file_path(
+        'cache',
+        'hourly',
+        '10101_2022'
+    ) != EXPECTED_FILE_PATH
diff --git a/tests/utilities/__init__.py b/tests/utilities/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/utilities/test_endpoint.py b/tests/utilities/test_endpoint.py
new file mode 100644
index 0000000..b5951a6
--- /dev/null
+++ b/tests/utilities/test_endpoint.py
@@ -0,0 +1,84 @@
+"""
+Endpoint Utility Tests
+
+Meteorological data provided by Meteostat (https://dev.meteostat.net)
+under the terms of the Creative Commons Attribution-NonCommercial
+4.0 International Public License.
+
+The code is licensed under the MIT license.
+"""
+
+from meteostat.utilities.endpoint import generate_endpoint_path
+from meteostat.enumerations.granularity import Granularity
+
+
+def test_generate_endpoint_path_normals():
+    """
+    Generate endpoint path for climate normals
+    """
+
+    assert generate_endpoint_path(
+        Granularity.NORMALS,
+        '10286'
+    ) == 'normals/10286.csv.gz'
+
+
+def test_generate_endpoint_path_hourly_full():
+    """
+    Generate endpoint path for full hourly data
+    """
+
+    assert generate_endpoint_path(
+        Granularity.HOURLY,
+        '10286',
+        True
+    ) == 'hourly/full/10286.csv.gz'
+
+
+def test_generate_endpoint_path_hourly_full_obs():
+    """
+    Generate endpoint path for hourly observation data
+    """
+
+    assert generate_endpoint_path(
+        Granularity.HOURLY,
+        '10286',
+        False
+    ) == 'hourly/obs/10286.csv.gz'
+
+
+def test_generate_endpoint_path_hourly_subset():
+    """
+    Generate endpoint path for hourly chunk
+    """
+
+    assert generate_endpoint_path(
+        Granularity.HOURLY,
+        '10286',
+        True,
+        2021
+    ) == 'hourly/full/2021/10286.csv.gz'
+
+
+def test_generate_endpoint_path_daily_subset():
+    """
+    Generate endpoint path for full daily data
+    """
+
+    assert generate_endpoint_path(
+        Granularity.DAILY,
+        '10286',
+        True
+    ) == 'daily/full/10286.csv.gz'
+
+
+def test_generate_endpoint_path_monthly_subset():
+    """
+    Generate endpoint path for full monthly data
+    """
+
+    assert generate_endpoint_path(
+        Granularity.MONTHLY,
+        '10286',
+        True
+    ) == 'monthly/full/10286.csv.gz'