Skip to content

Commit

Permalink
Parquet, Units, Cache Refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
clampr committed Nov 20, 2020
1 parent 2fd002e commit 330f5ac
Show file tree
Hide file tree
Showing 16 changed files with 321 additions and 122 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ The Meteostat Python library provides a simple API for accessing open weather an
## Installation

The Meteostat Python package is available through [PyPI](https://pypi.org/project/meteostat/):

```
pip install meteostat
```
Expand Down Expand Up @@ -38,7 +39,7 @@ data = Daily(station, start = datetime(2018, 1, 1), end = datetime(2018, 12, 31)
data = data.fetch()

# Plot line chart including average, minimum and maximum temperature
data.plot(x = 'time', y = ['tavg', 'tmin', 'tmax'], kind = 'line')
data.plot(y = ['tavg', 'tmin', 'tmax'], kind = 'line')
plt.show()
```

Expand Down
3 changes: 2 additions & 1 deletion examples/daily/aggregation-regional.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from datetime import datetime
import matplotlib.pyplot as plt

stations = Stations(country = 'US', daily = datetime(2005, 1, 1)).sample(5).fetch()
stations = Stations(country = 'US', daily = datetime(2005, 1, 1))
stations = stations.fetch(limit = 5, sample = True)

data = Daily(stations, max_threads = 5, start = datetime(1980, 1, 1), end = datetime(2019, 12, 31))
data = data.normalize().aggregate(freq = '1Y', spatial = True).fetch()
Expand Down
4 changes: 2 additions & 2 deletions examples/daily/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from datetime import datetime
import matplotlib.pyplot as plt

data = Daily(['10637'], start = datetime(2018, 1, 1), end = datetime(2018, 12, 31))
data = Daily('10637', start = datetime(2018, 1, 1), end = datetime(2018, 12, 31))
data = data.normalize().aggregate(freq = '1W').fetch()

data.plot(x = 'time', y = ['tavg', 'tmin', 'tmax'], kind = 'line')
data.plot(y = ['tavg', 'tmin', 'tmax'], kind = 'line')
plt.show()
6 changes: 4 additions & 2 deletions examples/daily/chart.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
from datetime import datetime
import matplotlib.pyplot as plt

# Hourly
# Get a weather station
stations = Stations(lat = 49.2497, lon = -123.1193)
station = stations.fetch(1)

# Get daily data
data = Daily(station, start = datetime(2018, 1, 1), end = datetime(2018, 12, 31))
data = data.fetch()

data.plot(x = 'time', y = ['tavg', 'tmin', 'tmax'], kind = 'line')
# Plot chart
data.plot(y = ['tavg', 'tmin', 'tmax'], kind = 'line')
plt.show()
2 changes: 1 addition & 1 deletion examples/daily/compare-aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

# Plot data
fig, ax = plt.subplots(figsize = (8, 6))
data.groupby(['station']).plot(x = 'time', y = 'tmax', kind = 'line', legend = True, ax = ax, style='.-', ylabel = 'Max. Annual Temperature (°C)', title = 'Max. Temperature Report')
data.unstack('station')['tmax'].plot(kind = 'line', legend = True, ax = ax, style='.-', ylabel = 'Max. Annual Temperature (°C)', title = 'Max. Temperature Report')
plt.legend(names)

# Show plot
Expand Down
2 changes: 1 addition & 1 deletion examples/daily/compare.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
data = data.fetch()

# Plot data
ax = data.set_index('time').groupby(['station'])['tavg'].plot(kind = 'line', legend = True, ylabel = 'Avg. Daily Temperature °C', title = 'Average Temperature Report for 2019')
data.unstack('station')['tavg'].plot(kind = 'line', legend = True, ylabel = 'Avg. Daily Temperature °C', title = 'Average Temperature Report for 2019')
plt.legend(names)

# Show plot
Expand Down
6 changes: 4 additions & 2 deletions examples/hourly/chart.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
from datetime import datetime
import matplotlib.pyplot as plt

# Hourly
# Get a weather station
stations = Stations(lat = 50, lon = 8)
station = stations.fetch(1)

# Get hourly data
data = Hourly(station, start = datetime(2010, 1, 1), end = datetime(2020, 1, 1, 23, 59))
data = data.fetch()

data.plot(x = 'time', y = ['temp'], kind = 'line')
# Plot chart
data.plot(y = 'temp', kind = 'line')
plt.show()
7 changes: 3 additions & 4 deletions examples/hourly/interpolation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
import matplotlib.pyplot as plt

# Hourly
station = ['10637']

data = Hourly(station, start = datetime(2020, 8, 1), end = datetime(2020, 8, 4, 23, 59))
data = Hourly('10730', start = datetime(2020, 8, 1), end = datetime(2020, 8, 4, 23, 59))
data = data.normalize()
data = data.interpolate().fetch()
data.plot(x = 'time', y = ['temp'], kind = 'line')
data = data.interpolate(limit = 6).fetch()
data.plot(y = 'temp', kind = 'line')
plt.show()
5 changes: 4 additions & 1 deletion examples/hourly/simple.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from meteostat import Stations, Hourly
from meteostat.units import fahrenheit, direction, condition
from datetime import datetime

# Hourly
stations = Stations(lat = 50, lon = 8)
station = stations.fetch(1)

data = Hourly(station, start = datetime(2020, 1, 1), end = datetime(2020, 1, 1, 23, 59))
print(data.fetch())
data = data.convert({ 'temp': fahrenheit, 'wdir': direction, 'coco': condition })
data = data.fetch()
print(data)
5 changes: 4 additions & 1 deletion meteostat/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
"""
█▀▄▀█ █▀▀ ▀█▀ █▀▀ █▀█ █▀ ▀█▀ ▄▀█ ▀█▀
█░▀░█ ██▄ ░█░ ██▄ █▄█ ▄█ ░█░ █▀█ ░█░
A Python library for accessing open weather and climate data
Meteorological data provided by Meteostat (https://dev.meteostat.net)
Expand All @@ -9,7 +12,7 @@
"""

__appname__ = "meteostat"
__version__ = "0.2.0"
__version__ = "0.3.0"

from .core import Core
from .stations import Stations
Expand Down
34 changes: 18 additions & 16 deletions meteostat/core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
"""
█▀▄▀█ █▀▀ ▀█▀ █▀▀ █▀█ █▀ ▀█▀ ▄▀█ ▀█▀
█░▀░█ ██▄ ░█░ ██▄ █▄█ ▄█ ░█░ █▀█ ░█░
Core Class
Base class that provides methods which are used across the package
Expand All @@ -11,6 +14,7 @@
"""

import os
import errno
import time
import hashlib
import pandas as pd
Expand All @@ -19,9 +23,6 @@

class Core:

# Temporary class storage
_temp = None

# Base URL of the Meteostat bulk data interface
_endpoint = 'https://bulk.meteostat.net/'

Expand All @@ -40,19 +41,22 @@ def _get_file_path(self, path = False):
# Get file ID
file_id = hashlib.md5(path.encode('utf-8')).hexdigest()
# Return path
return self._cache_dir + os.sep + file_id
return self._cache_dir + os.sep + self._cache_subdir + os.sep + file_id
else:
# Return false
return False

def _file_in_cache(self, file_path = False):

# Make sure the cache directory exists
if not os.path.exists(self._cache_dir):
if not os.path.exists(self._cache_dir + os.sep + self._cache_subdir):
try:
os.makedirs(self._cache_dir)
except:
raise Exception('Cannot create cache directory')
os.makedirs(self._cache_dir + os.sep + self._cache_subdir)
except OSError as e:
if e.errno == errno.EEXIST:
pass
else:
raise Exception('Cannot create cache directory')

if file_path:
# Return the file path if it exists
Expand All @@ -75,17 +79,15 @@ def _download_file(self, path = None):
if path[-6:-3] == 'csv':

# Read CSV file from Meteostat endpoint
try:
df = pd.read_csv(self._endpoint + path, compression = 'gzip', names = self._columns, parse_dates = self._parse_dates)
except:
return False
df = pd.read_csv(self._endpoint + path, compression = 'gzip', names = self._columns, dtype = self._types, parse_dates = self._parse_dates)

# Set weather station ID
if self.__class__.__name__ == 'Hourly' or self.__class__.__name__ == 'Daily':
df['station'] = path[-12:-7]
df = df.set_index(['station', 'time'])

# Save as Feather
df.to_feather(local_path)
# Save as Parquet
df.to_parquet(local_path)

return {
'path': local_path,
Expand Down Expand Up @@ -131,10 +133,10 @@ def clear_cache(self, max_age = None):
now = time.time()

# Go through all files
for file in os.listdir(self._cache_dir):
for file in os.listdir(self._cache_dir + os.sep + self._cache_subdir):

# Get full path
path = os.path.join(self._cache_dir, file)
path = os.path.join(self._cache_dir + os.sep + self._cache_subdir, file)

# Check if file is older than max_age
if now - os.path.getmtime(path) > max_age and os.path.isfile(path):
Expand Down
Loading

0 comments on commit 330f5ac

Please sign in to comment.