Skip to content

Commit

Permalink
Merge pull request #1102 from metno/pyaro-multi-ts
Browse files Browse the repository at this point in the history
Adds possibility of muliple ts per station
  • Loading branch information
jgriesfeller authored May 6, 2024
2 parents 7662eff + 46eb8a3 commit eaecbe0
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 20 deletions.
58 changes: 43 additions & 15 deletions pyaerocom/io/pyaro/read_pyaro.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
logger = logging.getLogger(__name__)


Metadata = NewType("Metadata", dict[str, dict[str, Union[str, list[str]]]])
MetadataEntry = NewType("MetadataEntry", dict[str, Union[str, list[str]]])
Metadata = NewType("Metadata", dict[str, MetadataEntry])


class ReadPyaro(ReadUngriddedBase):
Expand Down Expand Up @@ -132,22 +133,23 @@ def _convert_to_ungriddeddata(self, pyaro_data: dict[str, Data]) -> UngriddedDat
vars = list(pyaro_data.keys())
total_size = sum(list(var_size.values()))
units = {var: {"units": pyaro_data[var]._units} for var in pyaro_data}
ts_types: dict[str, Optional[TsType]] = {k: None for k in stations}
ts_types: dict[str, dict[str, Optional[TsType]]] = {}

# Object necessary for ungriddeddata
var_idx = {var: i for i, var in enumerate(vars)}
metadata = self._make_ungridded_metadata(stations=stations, var_idx=var_idx, units=units)
meta_idx = {s: {v: [] for v in vars} for s in metadata}
metadata: Metadata = {}
meta_idx: dict = {} # = {s: {v: [] for v in vars} for s in metadata}
data_array = np.zeros([total_size, 12])

# Helper objects
station_idx = {metadata[idx]["station_name"]: idx for idx in metadata}
station_idx = {}

idx = 0
metadata_idx = 0
for var, var_data in pyaro_data.items():
size = var_size[var]
for i in range(
1, size
0, size
): # The 1 start is a temp fix for the empty first row of the current Data implementation from pyaro
data_line = var_data[i]
current_station = data_line["stations"]
Expand All @@ -159,18 +161,25 @@ def _convert_to_ungriddeddata(self, pyaro_data: dict[str, Data]) -> UngriddedDat

# Finds the ts_type of the stations. Raises error of same station has different types
start, stop = data_line["start_times"], data_line["end_times"]
ts_type = self._calculate_ts_type(start, stop)
if ts_types[current_station] is None:
ts_types[current_station] = ts_type
elif ts_types[current_station] != ts_type:
msg = f"TS type {ts_type} of station {current_station} is different from already found value {ts_types[current_station]}"
logger.error(msg)
raise ValueError(msg)
ts_type = str(self._calculate_ts_type(start, stop))

if current_station not in station_idx:
station_idx[current_station] = {}

if ts_type not in station_idx[current_station]:
station_idx[current_station][ts_type] = metadata_idx
metadata[metadata_idx] = self._make_single_ungridded_metadata(
stations[current_station], current_station, ts_type, units
)
metadata_idx += 1

data_array[idx, :] = ungriddeddata_line

# Fills meta_idx
meta_idx[station_idx[current_station]][var].append(idx)
if station_idx[current_station][ts_type] not in meta_idx:
meta_idx[station_idx[current_station][ts_type]] = {v: [] for v in vars}

meta_idx[station_idx[current_station][ts_type]][var].append(idx)

idx += 1

Expand All @@ -182,7 +191,7 @@ def _convert_to_ungriddeddata(self, pyaro_data: dict[str, Data]) -> UngriddedDat

self.data._data = data_array
self.data.meta_idx = new_meta_idx
self.data.metadata = self._add_ts_type_to_metadata(metadata, ts_types)
self.data.metadata = metadata
self.data.var_idx = var_idx

return self.data
Expand All @@ -209,6 +218,25 @@ def _get_metadata_from_pyaro(self, station: Station) -> list[dict[str, str]]:

return metadata

def _make_single_ungridded_metadata(
self, station: Station, name: str, ts_type: Optional[TsType], units: dict[str, str]
) -> MetadataEntry:
entry = dict(
data_id=self.config.name,
variables=list(self.get_variables()),
var_info=units,
latitude=station["latitude"],
longitude=station["longitude"],
altitude=station["altitude"],
station_name=station["long_name"],
station_id=name,
country=station["country"],
ts_type=str(ts_type) if ts_type is not None else "undefined",
)
entry.update(self._get_metadata_from_pyaro(station))

return MetadataEntry(entry)

def _make_ungridded_metadata(
self, stations: dict[str, Station], var_idx: dict[str, int], units: dict[str, str]
) -> Metadata:
Expand Down
7 changes: 5 additions & 2 deletions tests/fixtures/pyaro.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,12 @@ def make_csv_test_file(tmp_path: Path) -> Path:
with open(file, "w") as f:
for s in species:
for i, station in enumerate(stations):
for date in dates:
for j, date in enumerate(dates):
delta_t = ["1h", "3D", "2D", "2h"][
j % 4
] # Rotates over the freqs in a deterministic fashion
f.write(
f"{s}, {station}, {coords[i][1]}, {coords[i][0]}, {np.random.normal(10, 5)}, Gg, {date}, {date+pd.Timedelta('1D')}, {countries[i]} \n"
f"{s}, {station}, {coords[i][1]}, {coords[i][0]}, {np.random.normal(10, 5)}, Gg, {date}, {date+pd.Timedelta(delta_t)}, {countries[i]} \n"
)

return file
Expand Down
8 changes: 5 additions & 3 deletions tests/io/pyaro/test_read_pyaro.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ def test_variables(pyaro_testdata):


def test_pyarotoungriddeddata_reading(pyaro_testdata):
from math import ceil

obj = pyaro_testdata.converter
data = obj.read()
assert isinstance(data, UngriddedData)
Expand All @@ -39,16 +41,16 @@ def test_pyarotoungriddeddata_reading(pyaro_testdata):
assert len(data.unique_station_names) == 2

# Tests the found stations
all_stations = data.to_station_data_all("concso4")
all_stations = data.to_station_data_all("concso4", ts_type_preferred="daily")

assert all_stations["stats"][0]["ts_type"] == "daily"
assert all_stations["stats"][0]["ts_type"] in ["hourly", "3daily", "2hourly", "2daily"]
assert all_stations["stats"][0]["country"] == "NO"

# Tests the dates
start = pd.to_datetime("01.01.2015", dayfirst=True)
end = pd.to_datetime("31.12.2015", dayfirst=True)
dates = pd.date_range(start, end, freq="D")
assert (all_stations["stats"][0].dtime == dates).all()
assert len(all_stations["stats"][0].dtime) == ceil(len(dates) / 2)


def test_pyarotoungriddeddata_reading_kwargs(pyaro_testdata_kwargs):
Expand Down

0 comments on commit eaecbe0

Please sign in to comment.