From f1cb079a020dd253d02aabb49f74cfbe17b53263 Mon Sep 17 00:00:00 2001 From: "randy.pierce" Date: Mon, 29 Jan 2024 16:05:31 -0700 Subject: [PATCH 1/2] better handle vegetation_type when vegetation_type is missing from grib file and also convert vegetation_type to string --- src/vxingest/grib2_to_cb/grib_builder.py | 47 ++++++++++- .../grib2_to_cb/grib_builder_parent.py | 80 +++++++++---------- .../test_unit_metar_obs_netcdf.py | 7 ++ 3 files changed, 88 insertions(+), 46 deletions(-) diff --git a/src/vxingest/grib2_to_cb/grib_builder.py b/src/vxingest/grib2_to_cb/grib_builder.py index 11c1f2e..cf90ccb 100644 --- a/src/vxingest/grib2_to_cb/grib_builder.py +++ b/src/vxingest/grib2_to_cb/grib_builder.py @@ -60,6 +60,7 @@ def __init__( self.cadence = ingest_document["validTimeInterval"] self.template = ingest_document["template"] self.subset = self.template["subset"] + self.land_use_types = None # self.do_profiling = True # set to True to enable build_document profiling self.do_profiling = False # set to True to enable build_document profiling @@ -167,6 +168,8 @@ def handle_ceiling(self, params_dict): "Orography" ].values surface_values = [] + if self.ds_translate_item_variables_map["Cloud ceiling"] is None: + return None ceil_var_values = self.ds_translate_item_variables_map[ "Cloud ceiling" ].values @@ -285,6 +288,9 @@ def handle_wind_speed(self, params_dict): [int]: translated wind speed """ # interpolated value cannot use rounded gridpoints + if self.ds_translate_item_variables_map["10 metre U wind component"] is None: + return None + values = self.ds_translate_item_variables_map[ "10 metre U wind component" ].values @@ -335,6 +341,8 @@ def handle_wind_direction(self, params_dict): Returns: [int]: wind direction """ + if self.ds_translate_item_variables_map["10 metre U wind component"] is None: + return None u_values = self.ds_translate_item_variables_map[ "10 metre U wind component" ].values @@ -396,6 +404,8 @@ def handle_wind_dir_u(self, params_dict): Returns: float: wind direction U component """ + if self.ds_translate_item_variables_map["10 metre U wind component"] is None: + return None u_values = self.ds_translate_item_variables_map[ "10 metre U wind component" ].values @@ -419,6 +429,8 @@ def handle_wind_dir_v(self, params_dict): Returns: float: wind direction V component """ + if self.ds_translate_item_variables_map["10 metre V wind component"] is None: + return None v_values = self.ds_translate_item_variables_map[ "10 metre V wind component" ].values @@ -442,6 +454,8 @@ def handle_specific_humidity(self, params_dict): Returns: float: specific humidity """ + if self.ds_translate_item_variables_map["2 metre specific humidity"] is None: + return None values = self.ds_translate_item_variables_map[ "2 metre specific humidity" ].values @@ -462,17 +476,46 @@ def handle_vegetation_type(self, params_dict): Returns: string: vegetation_type """ + if self.ds_translate_item_variables_map["Vegetation Type"] is None: + return None values = self.ds_translate_item_variables_map["Vegetation Type"].values vegetation_type = [] + # I don't know which land_use_type to use i.e. USGS,MODIFIED_IGBP_MODIS_NOAH,NLCD40,USGS-RUC, or MODI-RUC + # or which land_use_type_index i.e. "0" + land_use_type = "USGS" + land_use_type_index = "0" + # using lazy initialization get the land use types from the metadata, if not there set them to {} + if self.land_use_types is None: + try: + # get the land use types from the metadata + land_use_metadata = ( + self.load_spec["collection"] + .get("MD:LAND_USE_TYPES:COMMON:V01") + .content_as[dict] + ) + self.land_use_types = land_use_metadata[land_use_type][ + land_use_type_index + ] + except Exception as _e: + logger.error( + "%s handle_vegetation_type: Exception error: %s", + self.__class__.__name__, + str(_e), + ) + self.land_use_types = {} for station in self.domain_stations: geo_index = get_geo_index( self.ds_translate_item_variables_map["fcst_valid_epoch"], station["geo"] ) x_gridpoint = station["geo"][geo_index]["x_gridpoint"] y_gridpoint = station["geo"][geo_index]["y_gridpoint"] - vegetation_type.append( - self.interp_grid_box(values, y_gridpoint, x_gridpoint) + vegetation_type_USGS_index = str( + round(self.interp_grid_box(values, y_gridpoint, x_gridpoint)) + ) + vegetation_type_str = self.land_use_types.get( + vegetation_type_USGS_index, None ) + vegetation_type.append(vegetation_type_str) return vegetation_type def getName(self, params_dict): diff --git a/src/vxingest/grib2_to_cb/grib_builder_parent.py b/src/vxingest/grib2_to_cb/grib_builder_parent.py index 2c2b2a2..93bb462 100644 --- a/src/vxingest/grib2_to_cb/grib_builder_parent.py +++ b/src/vxingest/grib2_to_cb/grib_builder_parent.py @@ -665,70 +665,62 @@ def build_document(self, queue_element): self.ds_translate_item_variables_map = { "2 metre temperature": ds_hgt_2_metre_temperature.variables[ list(ds_hgt_2_metre_temperature.data_vars.keys())[0] - if len(list(ds_hgt_2_metre_temperature.data_vars.keys())) > 0 - else None - ], + ] + if len(list(ds_hgt_2_metre_temperature.data_vars.keys())) > 0 + else None, "2 metre dewpoint temperature": ds_hgt_2_metre_dewpoint_temperature.variables[ list(ds_hgt_2_metre_dewpoint_temperature.data_vars.keys())[0] - if len( - list(ds_hgt_2_metre_dewpoint_temperature.data_vars.keys()) - ) - > 0 - else None - ], + ] + if len(list(ds_hgt_2_metre_dewpoint_temperature.data_vars.keys())) + > 0 + else None, "2 metre relative humidity": ds_hgt_2_metre_relative_humidity.variables[ list(ds_hgt_2_metre_relative_humidity.data_vars.keys())[0] - if len(list(ds_hgt_2_metre_relative_humidity.data_vars.keys())) - > 0 - else None - ], + ] + if len(list(ds_hgt_2_metre_relative_humidity.data_vars.keys())) > 0 + else None, "2 metre specific humidity": ds_hgt_2_metre_specific_humidity.variables[ list(ds_hgt_2_metre_specific_humidity.data_vars.keys())[0] - if len(list(ds_hgt_2_metre_specific_humidity.data_vars.keys())) - > 0 - else None - ], + ] + if len(list(ds_hgt_2_metre_specific_humidity.data_vars.keys())) > 0 + else None, "10 metre U wind component": ds_hgt_10_metre_u_component_of_wind.variables[ list(ds_hgt_10_metre_u_component_of_wind.data_vars.keys())[0] - if len( - list(ds_hgt_10_metre_u_component_of_wind.data_vars.keys()) - ) - > 0 - else None - ], + ] + if len(list(ds_hgt_10_metre_u_component_of_wind.data_vars.keys())) + > 0 + else None, "10 metre V wind component": ds_hgt_10_metre_v_component_of_wind.variables[ list(ds_hgt_10_metre_v_component_of_wind.data_vars.keys())[0] - if len( - list(ds_hgt_10_metre_v_component_of_wind.data_vars.keys()) - ) - > 0 - else None - ], + ] + if len(list(ds_hgt_10_metre_v_component_of_wind.data_vars.keys())) + > 0 + else None, "Surface pressure": ds_surface_pressure.variables[ list(ds_surface_pressure.data_vars.keys())[0] - if len(list(ds_surface_pressure.data_vars.keys())) > 0 - else None - ], + ] + if len(list(ds_surface_pressure.data_vars.keys())) > 0 + else None, "Visibility": ds_surface_visibility.variables[ list(ds_surface_visibility.data_vars.keys())[0] - if len(list(ds_surface_visibility.data_vars.keys())) > 0 - else None - ], + ] + if len(list(ds_surface_visibility.data_vars.keys())) > 0 + else None, "Orography": ds_surface_orog.variables[ list(ds_surface_orog.data_vars.keys())[0] - if len(list(ds_surface_orog.data_vars.keys())) > 0 - else None - ], + ] + if len(list(ds_surface_orog.data_vars.keys())) > 0 + else None, "Cloud ceiling": ds_cloud_ceiling.variables[ list(ds_cloud_ceiling.data_vars.keys())[0] - if len(list(ds_cloud_ceiling.data_vars.keys())) > 0 - else None - ], + ] + if len(list(ds_cloud_ceiling.data_vars.keys())) > 0 + else None, "Vegetation Type": ds_surface_vegetation_type.variables[ list(ds_surface_vegetation_type.data_vars.keys())[0] - if len(list(ds_surface_vegetation_type.data_vars.keys())) > 0 - else None - ], + ] + if len(list(ds_surface_vegetation_type.data_vars.keys())) > 0 + else None, "fcst_valid_epoch": ds_fcst_valid_epoch, "fcst_len": ds_fcst_len, "proj_params": proj_params_dict, diff --git a/tests/vxingest/netcdf_to_cb/test_unit_metar_obs_netcdf.py b/tests/vxingest/netcdf_to_cb/test_unit_metar_obs_netcdf.py index 5ff3592..5418e60 100644 --- a/tests/vxingest/netcdf_to_cb/test_unit_metar_obs_netcdf.py +++ b/tests/vxingest/netcdf_to_cb/test_unit_metar_obs_netcdf.py @@ -156,6 +156,9 @@ def test_vxingest_get_file_list(tmp_path): "url": str(tmp_path / "f_fred_01"), } vx_ingest.collection.upsert("DF:metar:grib2:HRRR_OPS:f_fred_01", df_record) + # make sure the get_file_list is returning the files and ignoring the .prev and .tmp directories + Path.mkdir(tmp_path / ".prev") + Path.mkdir(tmp_path / ".tmp") # order is important to see if the files are getting returned sorted by mtime Path(tmp_path / "f_fred_01").touch() Path(tmp_path / "f_fred_02").touch() @@ -163,6 +166,10 @@ def test_vxingest_get_file_list(tmp_path): Path(tmp_path / "f_fred_05").touch() Path(tmp_path / "f_fred_03").touch() Path(tmp_path / "f_1_fred_01").touch() + Path(tmp_path / ".prev_fred").touch() + Path(tmp_path / ".prev/prev_fred").touch() + Path(tmp_path / ".tmp/tmp_fred").touch() + Path(tmp_path / "f_1_fred_01").touch() Path(tmp_path / "f_2_fred_01").touch() Path(tmp_path / "f_3_fred_01").touch() query = f""" SELECT url, mtime From dfd4f5637d4fd372dba2b9ed678e795eadcdf25c Mon Sep 17 00:00:00 2001 From: "randy.pierce" Date: Tue, 30 Jan 2024 12:39:11 -0700 Subject: [PATCH 2/2] modify comment --- src/vxingest/grib2_to_cb/grib_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/vxingest/grib2_to_cb/grib_builder.py b/src/vxingest/grib2_to_cb/grib_builder.py index cf90ccb..ad0e612 100644 --- a/src/vxingest/grib2_to_cb/grib_builder.py +++ b/src/vxingest/grib2_to_cb/grib_builder.py @@ -481,7 +481,7 @@ def handle_vegetation_type(self, params_dict): values = self.ds_translate_item_variables_map["Vegetation Type"].values vegetation_type = [] # I don't know which land_use_type to use i.e. USGS,MODIFIED_IGBP_MODIS_NOAH,NLCD40,USGS-RUC, or MODI-RUC - # or which land_use_type_index i.e. "0" + # or which land_use_type_index i.e. "0". Jeff said to use these values but we should better understand why. land_use_type = "USGS" land_use_type_index = "0" # using lazy initialization get the land use types from the metadata, if not there set them to {}