From 598baf3cf803bec92d7ed21efa0a25852cb32e00 Mon Sep 17 00:00:00 2001 From: dmey Date: Sun, 14 Aug 2022 19:49:19 +0200 Subject: [PATCH 1/2] cast to float64 --- cfgrib/dataset.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cfgrib/dataset.py b/cfgrib/dataset.py index c8ea895..a54d956 100644 --- a/cfgrib/dataset.py +++ b/cfgrib/dataset.py @@ -345,11 +345,11 @@ class OnDiskArray: ) missing_value: float geo_ndim: int = attr.attrib(default=1, repr=False) - dtype = np.dtype("float32") + dtype = np.dtype("float64") def build_array(self) -> np.ndarray: """Helper method used to test __getitem__""" - array = np.full(self.shape, fill_value=np.nan, dtype="float32") + array = np.full(self.shape, fill_value=np.nan, dtype="float64") for header_indexes, message_ids in self.field_id_index.items(): # NOTE: fill a single field as found in the message message = self.index.get_field(message_ids[0]) # type: ignore @@ -363,7 +363,7 @@ def __getitem__(self, item): header_item_list = expand_item(item[: -self.geo_ndim], self.shape) header_item = [{ix: i for i, ix in enumerate(it)} for it in header_item_list] array_field_shape = tuple(len(i) for i in header_item_list) + self.shape[-self.geo_ndim :] - array_field = np.full(array_field_shape, fill_value=np.nan, dtype="float32") + array_field = np.full(array_field_shape, fill_value=np.nan, dtype="float64") for header_indexes, message_ids in self.field_id_index.items(): try: array_field_indexes = [it[ix] for it, ix in zip(header_item, header_indexes)] From 93e7f62dd9ef2cc3d2045fb2e4b88d3005882739 Mon Sep 17 00:00:00 2001 From: Juniper Tyree <50025784+juntyr@users.noreply.github.com> Date: Tue, 30 Jul 2024 12:30:56 +0300 Subject: [PATCH 2/2] Determine dtype from grib messages --- cfgrib/dataset.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/cfgrib/dataset.py b/cfgrib/dataset.py index a54d956..1117ef7 100644 --- a/cfgrib/dataset.py +++ b/cfgrib/dataset.py @@ -345,11 +345,11 @@ class OnDiskArray: ) missing_value: float geo_ndim: int = attr.attrib(default=1, repr=False) - dtype = np.dtype("float64") + dtype: np.dtype = attr.attrib(default=np.dtype("float32"), repr=False) def build_array(self) -> np.ndarray: """Helper method used to test __getitem__""" - array = np.full(self.shape, fill_value=np.nan, dtype="float64") + array = np.full(self.shape, fill_value=np.nan, dtype=self.dtype) for header_indexes, message_ids in self.field_id_index.items(): # NOTE: fill a single field as found in the message message = self.index.get_field(message_ids[0]) # type: ignore @@ -363,7 +363,7 @@ def __getitem__(self, item): header_item_list = expand_item(item[: -self.geo_ndim], self.shape) header_item = [{ix: i for i, ix in enumerate(it)} for it in header_item_list] array_field_shape = tuple(len(i) for i in header_item_list) + self.shape[-self.geo_ndim :] - array_field = np.full(array_field_shape, fill_value=np.nan, dtype="float64") + array_field = np.full(array_field_shape, fill_value=np.nan, dtype=self.dtype) for header_indexes, message_ids in self.field_id_index.items(): try: array_field_indexes = [it[ix] for it, ix in zip(header_item, header_indexes)] @@ -595,12 +595,22 @@ def build_variable_components( extra_coords_data[coord_name][header_value] = coord_value offsets[tuple(header_indexes)] = message_ids missing_value = data_var_attrs.get("missingValue", messages.MISSING_VAUE_INDICATOR) + if len(offsets) > 0: + # Infer the dtype from the first data message + header_indexes, message_ids = next(iter(offsets.items())) + message = index.get_field(message_ids[0]) + values = get_values_in_order(message, np.empty(shape)[header_indexes].shape) + dtype = values.dtype + else: + # Fall back to a reasonable default dtype + dtype = np.dtype("float32") on_disk_array = OnDiskArray( index=index, shape=shape, field_id_index=offsets, missing_value=missing_value, geo_ndim=len(geo_dims), + dtype=dtype, ) if "time" in coord_vars and "step" in coord_vars: