diff --git a/meteostat/core/loader.py b/meteostat/core/loader.py index f552740..aed2d7c 100644 --- a/meteostat/core/loader.py +++ b/meteostat/core/loader.py @@ -8,6 +8,9 @@ The code is licensed under the MIT license. """ +from io import BytesIO +from gzip import GzipFile +from urllib.request import urlopen, Request, ProxyHandler, build_opener from urllib.error import HTTPError from multiprocessing import Pool from multiprocessing.pool import ThreadPool @@ -70,28 +73,36 @@ def load_handler( columns: list, types: Union[dict, None], parse_dates: list, - coerce_dates: bool = False, + proxy: str = None, + coerce_dates: bool = False ) -> pd.DataFrame: """ Load a single CSV file into a DataFrame """ try: + handlers = [] + + # Set a proxy + if proxy: + opener = (ProxyHandler({'http': proxy, 'https': proxy})) # Read CSV file from Meteostat endpoint - df = pd.read_csv( - endpoint + path, - compression="gzip", - names=columns, - dtype=types, - parse_dates=parse_dates, - ) - - # Force datetime conversion - if coerce_dates: - df.iloc[:, parse_dates] = df.iloc[:, parse_dates].apply( - pd.to_datetime, errors="coerce" - ) + with build_opener(*handlers).open(Request(endpoint + path)) as response: + # Decompress the content + with GzipFile(fileobj=BytesIO(response.read()), mode='rb') as file: + df = pd.read_csv( + file, + names=columns, + dtype=types, + parse_dates=parse_dates, + ) + + # Force datetime conversion + if coerce_dates: + df.iloc[:, parse_dates] = df.iloc[:, parse_dates].apply( + pd.to_datetime, errors="coerce" + ) except (FileNotFoundError, HTTPError): diff --git a/meteostat/interface/base.py b/meteostat/interface/base.py index 7e33009..bb1a423 100644 --- a/meteostat/interface/base.py +++ b/meteostat/interface/base.py @@ -20,6 +20,9 @@ class Base: # Base URL of the Meteostat bulk data interface endpoint: str = "https://bulk.meteostat.net/v2/" + # Proxy URL for the Meteostat bulk data interface + proxy: str = None + # Location of the cache directory cache_dir: str = os.path.expanduser("~") + os.sep + ".meteostat" + os.sep + "cache" diff --git a/meteostat/interface/meteodata.py b/meteostat/interface/meteodata.py index e6c372c..224ad8a 100644 --- a/meteostat/interface/meteodata.py +++ b/meteostat/interface/meteodata.py @@ -57,7 +57,12 @@ def _load_data(self, station: str, year: Union[int, None] = None) -> None: # Get data from Meteostat df = load_handler( - self.endpoint, file, self._columns, self._types, self._parse_dates + self.endpoint, + file, + self._columns, + self._types, + self._parse_dates, + self.proxy ) # Validate and prepare data for further processing diff --git a/meteostat/interface/stations.py b/meteostat/interface/stations.py index 801e37b..41e8286 100644 --- a/meteostat/interface/stations.py +++ b/meteostat/interface/stations.py @@ -88,7 +88,13 @@ def _load(self) -> None: # Get data from Meteostat df = load_handler( - self.endpoint, file, self._columns, self._types, self._parse_dates, True + self.endpoint, + file, + self._columns, + self._types, + self._parse_dates, + self.proxy, + True ) # Add index diff --git a/meteostat/interface/timeseries.py b/meteostat/interface/timeseries.py index e1967d2..b89c875 100644 --- a/meteostat/interface/timeseries.py +++ b/meteostat/interface/timeseries.py @@ -70,6 +70,7 @@ def _load_flags(self, station: str, year: Union[int, None] = None) -> None: self._columns, {key: "string" for key in self._columns[self._first_met_col :]}, self._parse_dates, + self.proxy ) # Validate Series