From 10e62e935fd824db599045aa5d8d630c389503ad Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Tue, 15 Feb 2022 17:18:21 -0500 Subject: [PATCH 1/9] add new boolean flag and note for implementing fix --- icepyx/core/read.py | 11 ++++++++++- icepyx/core/variables.py | 9 ++++++++- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/icepyx/core/read.py b/icepyx/core/read.py index e1624c2d6..65c37bebf 100644 --- a/icepyx/core/read.py +++ b/icepyx/core/read.py @@ -338,9 +338,18 @@ def _add_var_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): wanted_vars = list(wanted_dict.keys()) + print(grp_path) + print(wanted_groups_tiered) + print(wanted_dict) + print(wanted_vars) + if grp_path in ["orbit_info", "ancillary_data"]: + # print(grp_path) + # print(wanted_groups_tiered[0]) + # # print(wanted_groups_tiered) + # print(wanted_vars) grp_spec_vars = [ - wanted_vars[i] + print(wanted_vars[i]) for i, x in enumerate(wanted_groups_tiered[0]) if x == grp_path ] diff --git a/icepyx/core/variables.py b/icepyx/core/variables.py index ef71dc737..f50880bed 100644 --- a/icepyx/core/variables.py +++ b/icepyx/core/variables.py @@ -135,7 +135,7 @@ def visitor_func(name, node): return self._avail @staticmethod - def parse_var_list(varlist, tiered=True): + def parse_var_list(varlist, tiered=True, tiered_vars=False): """ Parse a list of path strings into tiered lists and names of variables @@ -149,6 +149,13 @@ def parse_var_list(varlist, tiered=True): (e.g. [['orbit_info', 'ancillary_data', 'gt1l'],['none','none','land_ice_segments']]) or a single list of path strings (e.g. ['orbit_info','ancillary_data','gt1l/land_ice_segments']) + tiered_vars : boolean, default False + Whether or not to append a list of the variable names to the nested list of component strings + (e.g. [['orbit_info', 'ancillary_data', 'gt1l'],['none','none','land_ice_segments'], + ['sc_orient','atlas_sdp_gps_epoch','h_li']])) + + Jessica NOTE: add this tiered vars functionality, then use it in read ln 582 (set to true) and ultimately circa line 339 to avoid the index error + Examples -------- >>> reg_a = ipx.Query('ATL06',[-55, 68, -48, 71],['2019-02-20','2019-02-28'], version='1') # doctest: +SKIP From dd6b2df1a42faee53982339a37e3ac4b0cfaa28c Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Wed, 16 Feb 2022 11:20:27 -0500 Subject: [PATCH 2/9] implement boolean flag for dealing with index error from mistmatched length var and varpath lists --- icepyx/core/read.py | 20 +++++++++----------- icepyx/core/variables.py | 9 ++++++--- 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/icepyx/core/read.py b/icepyx/core/read.py index 65c37bebf..5b3ad02b2 100644 --- a/icepyx/core/read.py +++ b/icepyx/core/read.py @@ -336,23 +336,19 @@ def _add_var_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): Xarray Dataset with variables from the ds variable group added. """ - wanted_vars = list(wanted_dict.keys()) + # wanted_vars = list(wanted_dict.keys()) print(grp_path) - print(wanted_groups_tiered) - print(wanted_dict) - print(wanted_vars) + # print(wanted_groups_tiered) + # print(wanted_dict) if grp_path in ["orbit_info", "ancillary_data"]: - # print(grp_path) - # print(wanted_groups_tiered[0]) - # # print(wanted_groups_tiered) - # print(wanted_vars) grp_spec_vars = [ - print(wanted_vars[i]) + wanted_groups_tiered[-1][i] for i, x in enumerate(wanted_groups_tiered[0]) if x == grp_path ] + print(grp_spec_vars) for var in grp_spec_vars: is2ds = is2ds.assign({var: ("gran_idx", ds[var].data)}) @@ -392,7 +388,7 @@ def _add_var_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): grp_spec_vars = [ k for k, v in wanted_dict.items() if any(grp_path in x for x in v) ] - # print(grp_spec_vars) + print(grp_spec_vars) ds = ( ds.reset_coords(drop=False) @@ -579,7 +575,9 @@ def _build_single_file_dataset(self, file, groups_list): # orbit_info is used automatically as the first group path so the info is available for the rest of the groups wanted_groups_set.remove("orbit_info") # returns the wanted groups as a list of lists with group path string elements separated - _, wanted_groups_tiered = Variables.parse_var_list(groups_list, tiered=True) + _, wanted_groups_tiered = Variables.parse_var_list( + groups_list, tiered=True, tiered_vars=True + ) for grp_path in ["orbit_info"] + list(wanted_groups_set): ds = self._read_single_var(file, grp_path) diff --git a/icepyx/core/variables.py b/icepyx/core/variables.py index f50880bed..ae12d4d3c 100644 --- a/icepyx/core/variables.py +++ b/icepyx/core/variables.py @@ -154,8 +154,6 @@ def parse_var_list(varlist, tiered=True, tiered_vars=False): (e.g. [['orbit_info', 'ancillary_data', 'gt1l'],['none','none','land_ice_segments'], ['sc_orient','atlas_sdp_gps_epoch','h_li']])) - Jessica NOTE: add this tiered vars functionality, then use it in read ln 582 (set to true) and ultimately circa line 339 to avoid the index error - Examples -------- >>> reg_a = ipx.Query('ATL06',[-55, 68, -48, 71],['2019-02-20','2019-02-28'], version='1') # doctest: +SKIP @@ -222,7 +220,10 @@ def parse_var_list(varlist, tiered=True, tiered_vars=False): else: num = np.max([v.count("/") for v in varlist]) # print('max needed: ' + str(num)) - paths = [[] for i in range(num)] + if tiered_vars == True: + paths = [[] for i in range(num + 1)] + else: + paths = [[] for i in range(num)] # print(self._cust_options['variables']) for vn in varlist: @@ -244,6 +245,8 @@ def parse_var_list(varlist, tiered=True, tiered_vars=False): for i in range(j, num): paths[i].append("none") i = i + 1 + if tiered_vars == True: + paths[num].append(vkey) return vgrp, paths From 5f7850e011cdd64be3ccd0c628566899db6cd28c Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Wed, 16 Feb 2022 12:08:33 -0500 Subject: [PATCH 3/9] fix generation of group specific variable list for multiple variable levels --- icepyx/core/read.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/icepyx/core/read.py b/icepyx/core/read.py index 5b3ad02b2..fe1a94f45 100644 --- a/icepyx/core/read.py +++ b/icepyx/core/read.py @@ -348,7 +348,6 @@ def _add_var_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): for i, x in enumerate(wanted_groups_tiered[0]) if x == grp_path ] - print(grp_spec_vars) for var in grp_spec_vars: is2ds = is2ds.assign({var: ("gran_idx", ds[var].data)}) @@ -385,19 +384,24 @@ def _add_var_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): spot = is2ref.gt2spot(gt_str, is2ds.sc_orient.values[0]) # add a test for the new function (called here)! + # print(wanted_dict) + grp_spec_vars = [ - k for k, v in wanted_dict.items() if any(grp_path in x for x in v) + k + for k, v in wanted_dict.items() + if any(f"{grp_path}/{k}" in x for x in v) ] print(grp_spec_vars) + print(ds) + ds = ( ds.reset_coords(drop=False) .expand_dims(dim=["spot", "gran_idx"]) .assign_coords(spot=("spot", [spot])) .assign(gt=(("gran_idx", "spot"), [[gt_str]])) ) - - # print(ds) + # print(ds[grp_spec_vars]) grp_spec_vars.append("gt") is2ds = is2ds.merge( ds[grp_spec_vars], join="outer", combine_attrs="no_conflicts" From c3b5c73c7cbfac628b1f6e301915985c7161a49d Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Mon, 21 Feb 2022 13:22:38 -0500 Subject: [PATCH 4/9] set up structure to handle merge conflicts caused by more highly nested variables --- icepyx/core/query.py | 2 +- icepyx/core/read.py | 41 +++++++++++++++++++++++++++++------------ 2 files changed, 30 insertions(+), 13 deletions(-) diff --git a/icepyx/core/query.py b/icepyx/core/query.py index 244e75978..db8219f8e 100644 --- a/icepyx/core/query.py +++ b/icepyx/core/query.py @@ -1017,7 +1017,7 @@ def download_granules( by default when subset=True, but additional subsetting options are available. Spatial subsetting returns all data that are within the area of interest (but not complete granules. This eliminates false-positive granules returned by the metadata-level search) - restart: boolean, default false + restart : boolean, default false If previous download was terminated unexpectedly. Run again with restart set to True to continue. **kwargs : key-value pairs Additional parameters to be passed to the subsetter. diff --git a/icepyx/core/read.py b/icepyx/core/read.py index fe1a94f45..b9bb82123 100644 --- a/icepyx/core/read.py +++ b/icepyx/core/read.py @@ -311,9 +311,9 @@ def _check_source_for_pattern(source, filename_pattern): return False, None @staticmethod - def _add_var_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): + def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): """ - Add the new variable group to the dataset template. + Add the new variables in the group to the dataset template. Parameters ---------- @@ -338,7 +338,7 @@ def _add_var_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): # wanted_vars = list(wanted_dict.keys()) - print(grp_path) + # print(grp_path) # print(wanted_groups_tiered) # print(wanted_dict) @@ -391,10 +391,10 @@ def _add_var_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): for k, v in wanted_dict.items() if any(f"{grp_path}/{k}" in x for x in v) ] - print(grp_spec_vars) - print(ds) + # print(ds) + # DevNOTE: the issue seems to be that the incoming ds has mismatching delta time lengths, and they're not brought in as coordinates for the canopy/canopy_hy ds = ( ds.reset_coords(drop=False) .expand_dims(dim=["spot", "gran_idx"]) @@ -403,9 +403,26 @@ def _add_var_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): ) # print(ds[grp_spec_vars]) grp_spec_vars.append("gt") + + # Use this to handle issues specific to group paths that are more nested + tiers = len(wanted_groups_tiered) + if tiers > 3 and grp_path.count("/") == tiers - 2: + # Handle attribute conflicts that arose from data descriptions during merging + for var in grp_spec_vars: + ds[var].attrs = ds.attrs + for k in ds[var].attrs.keys(): + ds.attrs.pop(k) + # warnings.warn( + # "Due to the number of layers of variable group paths, some attributes have been dropped from your DataSet during merging", + # UserWarning, + # ) + + # assign delta-time coordinates for the deeper layer variable + is2ds = is2ds.merge( ds[grp_spec_vars], join="outer", combine_attrs="no_conflicts" ) + # print(is2ds) # re-cast some dtypes to make array smaller @@ -486,7 +503,7 @@ def _build_dataset_template(self, file): ) return is2ds - def _read_single_var(self, file, grp_path): + def _read_single_grp(self, file, grp_path): """ For a given file and variable group path, construct an Intake catalog and use it to read in the data. @@ -520,12 +537,10 @@ def _read_single_var(self, file, grp_path): grp_paths=grp_path, extra_engine_kwargs={"phony_dims": "access"}, ) - ds = grpcat[self._source_type].read() return ds - # NOTE: for non-gridded datasets only def _build_single_file_dataset(self, file, groups_list): """ Create a single xarray dataset with all of the wanted variables/groups from the wanted var list for a single data file/url. @@ -545,7 +560,7 @@ def _build_single_file_dataset(self, file, groups_list): Xarray Dataset """ - file_product = self._read_single_var(file, "/").attrs["identifier_product_type"] + file_product = self._read_single_grp(file, "/").attrs["identifier_product_type"] assert ( file_product == self._prod ), "Your product specification does not match the product specification within your files." @@ -582,10 +597,12 @@ def _build_single_file_dataset(self, file, groups_list): _, wanted_groups_tiered = Variables.parse_var_list( groups_list, tiered=True, tiered_vars=True ) - + print(wanted_groups_set) for grp_path in ["orbit_info"] + list(wanted_groups_set): - ds = self._read_single_var(file, grp_path) - is2ds = Read._add_var_to_ds( + print(grp_path) + ds = self._read_single_grp(file, grp_path) + # print(ds) + is2ds = Read._add_vars_to_ds( is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict ) From 57fd93d68779a03206ca7b84c78167c1057e729c Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Thu, 24 Feb 2022 17:04:25 -0500 Subject: [PATCH 5/9] working prototype for deeply nested dataset merging --- icepyx/core/read.py | 154 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 136 insertions(+), 18 deletions(-) diff --git a/icepyx/core/read.py b/icepyx/core/read.py index b9bb82123..281443a61 100644 --- a/icepyx/core/read.py +++ b/icepyx/core/read.py @@ -1,4 +1,5 @@ import fnmatch +import grp import os import warnings @@ -404,20 +405,29 @@ def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): # print(ds[grp_spec_vars]) grp_spec_vars.append("gt") - # Use this to handle issues specific to group paths that are more nested - tiers = len(wanted_groups_tiered) - if tiers > 3 and grp_path.count("/") == tiers - 2: - # Handle attribute conflicts that arose from data descriptions during merging - for var in grp_spec_vars: - ds[var].attrs = ds.attrs - for k in ds[var].attrs.keys(): - ds.attrs.pop(k) - # warnings.warn( - # "Due to the number of layers of variable group paths, some attributes have been dropped from your DataSet during merging", - # UserWarning, - # ) + # # Use this to handle issues specific to group paths that are more nested + # tiers = len(wanted_groups_tiered) + # if tiers > 3 and grp_path.count("/") == tiers - 2: + # # Handle attribute conflicts that arose from data descriptions during merging + # for var in grp_spec_vars: + # ds[var].attrs = ds.attrs + # for k in ds[var].attrs.keys(): + # ds.attrs.pop(k) + # # warnings.warn( + # # "Due to the number of layers of variable group paths, some attributes have been dropped from your DataSet during merging", + # # UserWarning, + # # ) + + # # assign delta-time coordinates for the deeper layer variable + # up_grp_path = grp_path.rsplit("/")[0] + + # print(is2ds.sel(spot=spot).delta_time) + + # # ds.assign_coords(delta_time=is2ds.sel(spot=spot).delta_time) + # print(is2ds) - # assign delta-time coordinates for the deeper layer variable + # ds=ds.sel(spot=spot).assign_coords({'delta_time':is2ds.sel(spot=spot).delta_time.data}) + # # print(ds) is2ds = is2ds.merge( ds[grp_spec_vars], join="outer", combine_attrs="no_conflicts" @@ -429,6 +439,92 @@ def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): is2ds["gt"] = is2ds.gt.astype(str) is2ds["spot"] = is2ds.spot.astype(np.uint8) + return is2ds, ds[grp_spec_vars] + + @staticmethod + def _combine_nested_vars(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): + """ + Add the new variables in the group to the dataset template. + + Parameters + ---------- + is2ds : Xarray dataset + Template dataset to add new variables to. + ds : Xarray dataset + Dataset containing the group to add + grp_path : str + hdf5 group path read into ds + wanted_groups_tiered : list of lists + A list of lists of deconstructed group + variable paths. + The first list contains the first portion of the group name (between consecutive "/"), + the second list contains the second portion of the group name, etc. + "none" is used to fill in where paths are shorter than the longest path. + wanted_dict : dict + Dictionary with variable names as keys and a list of group + variable paths containing those variables as values. + + Returns + ------- + Xarray Dataset with variables from the ds variable group added. + """ + + # wanted_vars = list(wanted_dict.keys()) + + # print(grp_path) + # print(wanted_groups_tiered) + # print(wanted_dict) + + # print(wanted_dict) + + grp_spec_vars = [ + k for k, v in wanted_dict.items() if any(f"{grp_path}/{k}" in x for x in v) + ] + + # print(ds) + + # DevNOTE: the issue seems to be that the incoming ds has mismatching delta time lengths, and they're not brought in as coordinates for the canopy/canopy_hy + # ds = ( + # ds.reset_coords(drop=False) + # .expand_dims(dim=["spot", "gran_idx"]) + # .assign_coords(spot=("spot", [spot])) + # .assign(gt=(("gran_idx", "spot"), [[gt_str]])) + # ) + # # print(ds[grp_spec_vars]) + # grp_spec_vars.append("gt") + + # # Use this to handle issues specific to group paths that are more nested + # tiers = len(wanted_groups_tiered) + # if tiers > 3 and grp_path.count("/") == tiers - 2: + # # Handle attribute conflicts that arose from data descriptions during merging + # for var in grp_spec_vars: + # ds[var].attrs = ds.attrs + # for k in ds[var].attrs.keys(): + # ds.attrs.pop(k) + # # warnings.warn( + # # "Due to the number of layers of variable group paths, some attributes have been dropped from your DataSet during merging", + # # UserWarning, + # # ) + + # # assign delta-time coordinates for the deeper layer variable + # up_grp_path = grp_path.rsplit("/")[0] + + # print(is2ds.sel(spot=spot).delta_time) + + # # ds.assign_coords(delta_time=is2ds.sel(spot=spot).delta_time) + # print(is2ds) + + # ds=ds.sel(spot=spot).assign_coords({'delta_time':is2ds.sel(spot=spot).delta_time.data}) + # # print(ds) + + print(grp_spec_vars) + + is2ds = is2ds.assign(ds[grp_spec_vars]) + + # print(is2ds) + + # re-cast some dtypes to make array smaller + # is2ds["gt"] = is2ds.gt.astype(str) + # is2ds["spot"] = is2ds.spot.astype(np.uint8) + return is2ds def load(self): @@ -593,17 +689,39 @@ def _build_single_file_dataset(self, file, groups_list): wanted_groups_set = set(wanted_groups) # orbit_info is used automatically as the first group path so the info is available for the rest of the groups wanted_groups_set.remove("orbit_info") + # Note: the sorting is critical for datasets with highly nested groups + wanted_groups_list = ["orbit_info"] + sorted(wanted_groups_set) # returns the wanted groups as a list of lists with group path string elements separated _, wanted_groups_tiered = Variables.parse_var_list( groups_list, tiered=True, tiered_vars=True ) - print(wanted_groups_set) - for grp_path in ["orbit_info"] + list(wanted_groups_set): - print(grp_path) + + while wanted_groups_list: + grp_path = wanted_groups_list[0] + wanted_groups_list = wanted_groups_list[1:] + # Note this will fail with an index error on the last run ds = self._read_single_grp(file, grp_path) - # print(ds) - is2ds = Read._add_vars_to_ds( + print(grp_path) + is2ds, ds = Read._add_vars_to_ds( is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict ) + # if there are any deeper nested variables, get those so they have actual coordinates and add them + if any(grp_path in grp_path2 for grp_path2 in wanted_groups_list): + print("deep nested paths") + for grp_path2 in wanted_groups_list: + if grp_path in grp_path2: + sub_ds = self._read_single_grp(file, grp_path2) + # print(ds) + # print(sub_ds) + ds = Read._combine_nested_vars( + ds, sub_ds, grp_path2, wanted_groups_tiered, wanted_dict + ) + wanted_groups_list.remove(grp_path2) + is2ds = is2ds.merge(ds, join="outer", combine_attrs="no_conflicts") + + print(is2ds) + + # Notes (next steps): test on ATL06; reset kernal and try again; figure out gran_idx generation to be unique for ATL08files + return is2ds From 6d9cec66578c0d400aeb77f76b402b183389f1a8 Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Fri, 25 Feb 2022 10:43:40 -0500 Subject: [PATCH 6/9] fix docstring typo --- icepyx/core/is2ref.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/icepyx/core/is2ref.py b/icepyx/core/is2ref.py index 3fed2ef4f..5f4e455c6 100644 --- a/icepyx/core/is2ref.py +++ b/icepyx/core/is2ref.py @@ -259,7 +259,7 @@ def _default_varlists(product): else: print( - "THE REQUESTED PRODUCT DOES NOT YET HAVE A DEFAULT LIST SET UP. ONLY DELTA_TIME, LATITUTDE, AND LONGITUDE WILL BE RETURNED" + "THE REQUESTED PRODUCT DOES NOT YET HAVE A DEFAULT LIST SET UP. ONLY DELTA_TIME, LATITUDE, AND LONGITUDE WILL BE RETURNED" ) return common_list From 763305b6db9b01ffdabc28a50b02d9187b8bf9c8 Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Fri, 25 Feb 2022 10:44:26 -0500 Subject: [PATCH 7/9] finish debugging ATL08 read-in issue --- .../example_notebooks/IS2_data_read-in.ipynb | 2 +- icepyx/core/read.py | 81 +------------------ 2 files changed, 3 insertions(+), 80 deletions(-) diff --git a/doc/source/example_notebooks/IS2_data_read-in.ipynb b/doc/source/example_notebooks/IS2_data_read-in.ipynb index 6537777de..836381698 100644 --- a/doc/source/example_notebooks/IS2_data_read-in.ipynb +++ b/doc/source/example_notebooks/IS2_data_read-in.ipynb @@ -426,7 +426,7 @@ "\n", "***ATTENTION: icepyx loads your data by creating an Xarray DataSet for each input granule and then merging them. In some cases, the automatic merge fails and needs to be handled manually. In these cases, icepyx will return a warning with the error message from the failed Xarray merge and a list of per-granule DataSets***\n", "\n", - "This can happen if you unintentionally provide the same granule multiple times with different filenames." + "This can happen if you unintentionally provide the same granule multiple times with different filenames or in segmented products where the rgt+cycle automatically generated `gran_idx` values match. In this latter case, you can simply provide unique `gran_idx` values for each DataSet in `ds` and run `import xarray as xr` and `ds_merged = xr.merge(ds)` to create one merged DataSet." ] }, { diff --git a/icepyx/core/read.py b/icepyx/core/read.py index 281443a61..2f3ab5891 100644 --- a/icepyx/core/read.py +++ b/icepyx/core/read.py @@ -385,56 +385,25 @@ def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): spot = is2ref.gt2spot(gt_str, is2ds.sc_orient.values[0]) # add a test for the new function (called here)! - # print(wanted_dict) - grp_spec_vars = [ k for k, v in wanted_dict.items() if any(f"{grp_path}/{k}" in x for x in v) ] - # print(ds) - - # DevNOTE: the issue seems to be that the incoming ds has mismatching delta time lengths, and they're not brought in as coordinates for the canopy/canopy_hy ds = ( ds.reset_coords(drop=False) .expand_dims(dim=["spot", "gran_idx"]) .assign_coords(spot=("spot", [spot])) .assign(gt=(("gran_idx", "spot"), [[gt_str]])) ) - # print(ds[grp_spec_vars]) - grp_spec_vars.append("gt") - - # # Use this to handle issues specific to group paths that are more nested - # tiers = len(wanted_groups_tiered) - # if tiers > 3 and grp_path.count("/") == tiers - 2: - # # Handle attribute conflicts that arose from data descriptions during merging - # for var in grp_spec_vars: - # ds[var].attrs = ds.attrs - # for k in ds[var].attrs.keys(): - # ds.attrs.pop(k) - # # warnings.warn( - # # "Due to the number of layers of variable group paths, some attributes have been dropped from your DataSet during merging", - # # UserWarning, - # # ) - - # # assign delta-time coordinates for the deeper layer variable - # up_grp_path = grp_path.rsplit("/")[0] - - # print(is2ds.sel(spot=spot).delta_time) - - # # ds.assign_coords(delta_time=is2ds.sel(spot=spot).delta_time) - # print(is2ds) - # ds=ds.sel(spot=spot).assign_coords({'delta_time':is2ds.sel(spot=spot).delta_time.data}) - # # print(ds) + grp_spec_vars.append("gt") is2ds = is2ds.merge( ds[grp_spec_vars], join="outer", combine_attrs="no_conflicts" ) - # print(is2ds) - # re-cast some dtypes to make array smaller is2ds["gt"] = is2ds.gt.astype(str) is2ds["spot"] = is2ds.spot.astype(np.uint8) @@ -467,30 +436,10 @@ def _combine_nested_vars(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict) Xarray Dataset with variables from the ds variable group added. """ - # wanted_vars = list(wanted_dict.keys()) - - # print(grp_path) - # print(wanted_groups_tiered) - # print(wanted_dict) - - # print(wanted_dict) - grp_spec_vars = [ k for k, v in wanted_dict.items() if any(f"{grp_path}/{k}" in x for x in v) ] - # print(ds) - - # DevNOTE: the issue seems to be that the incoming ds has mismatching delta time lengths, and they're not brought in as coordinates for the canopy/canopy_hy - # ds = ( - # ds.reset_coords(drop=False) - # .expand_dims(dim=["spot", "gran_idx"]) - # .assign_coords(spot=("spot", [spot])) - # .assign(gt=(("gran_idx", "spot"), [[gt_str]])) - # ) - # # print(ds[grp_spec_vars]) - # grp_spec_vars.append("gt") - # # Use this to handle issues specific to group paths that are more nested # tiers = len(wanted_groups_tiered) # if tiers > 3 and grp_path.count("/") == tiers - 2: @@ -504,27 +453,8 @@ def _combine_nested_vars(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict) # # UserWarning, # # ) - # # assign delta-time coordinates for the deeper layer variable - # up_grp_path = grp_path.rsplit("/")[0] - - # print(is2ds.sel(spot=spot).delta_time) - - # # ds.assign_coords(delta_time=is2ds.sel(spot=spot).delta_time) - # print(is2ds) - - # ds=ds.sel(spot=spot).assign_coords({'delta_time':is2ds.sel(spot=spot).delta_time.data}) - # # print(ds) - - print(grp_spec_vars) - is2ds = is2ds.assign(ds[grp_spec_vars]) - # print(is2ds) - - # re-cast some dtypes to make array smaller - # is2ds["gt"] = is2ds.gt.astype(str) - # is2ds["spot"] = is2ds.spot.astype(np.uint8) - return is2ds def load(self): @@ -699,29 +629,22 @@ def _build_single_file_dataset(self, file, groups_list): while wanted_groups_list: grp_path = wanted_groups_list[0] wanted_groups_list = wanted_groups_list[1:] - # Note this will fail with an index error on the last run ds = self._read_single_grp(file, grp_path) - print(grp_path) is2ds, ds = Read._add_vars_to_ds( is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict ) # if there are any deeper nested variables, get those so they have actual coordinates and add them if any(grp_path in grp_path2 for grp_path2 in wanted_groups_list): - print("deep nested paths") for grp_path2 in wanted_groups_list: if grp_path in grp_path2: sub_ds = self._read_single_grp(file, grp_path2) - # print(ds) - # print(sub_ds) ds = Read._combine_nested_vars( ds, sub_ds, grp_path2, wanted_groups_tiered, wanted_dict ) wanted_groups_list.remove(grp_path2) is2ds = is2ds.merge(ds, join="outer", combine_attrs="no_conflicts") - print(is2ds) - - # Notes (next steps): test on ATL06; reset kernal and try again; figure out gran_idx generation to be unique for ATL08files + # Notes (next steps): open an issue; maybe add a fn to generate unique gran ids return is2ds From 1e8b785de49fdec907856b67f4c139a17e04b7b4 Mon Sep 17 00:00:00 2001 From: Jessica Scheick Date: Fri, 25 Feb 2022 10:58:43 -0500 Subject: [PATCH 8/9] clean up code after viewing dif --- icepyx/core/read.py | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/icepyx/core/read.py b/icepyx/core/read.py index 2f3ab5891..184d2b7c6 100644 --- a/icepyx/core/read.py +++ b/icepyx/core/read.py @@ -1,5 +1,4 @@ import fnmatch -import grp import os import warnings @@ -337,12 +336,6 @@ def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): Xarray Dataset with variables from the ds variable group added. """ - # wanted_vars = list(wanted_dict.keys()) - - # print(grp_path) - # print(wanted_groups_tiered) - # print(wanted_dict) - if grp_path in ["orbit_info", "ancillary_data"]: grp_spec_vars = [ wanted_groups_tiered[-1][i] @@ -399,7 +392,6 @@ def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): ) grp_spec_vars.append("gt") - is2ds = is2ds.merge( ds[grp_spec_vars], join="outer", combine_attrs="no_conflicts" ) @@ -411,23 +403,18 @@ def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): return is2ds, ds[grp_spec_vars] @staticmethod - def _combine_nested_vars(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict): + def _combine_nested_vars(is2ds, ds, grp_path, wanted_dict): """ - Add the new variables in the group to the dataset template. + Add the deeply nested variables to a dataset with appropriate coordinate information. Parameters ---------- is2ds : Xarray dataset - Template dataset to add new variables to. + Dataset to add deeply nested variables to. ds : Xarray dataset - Dataset containing the group to add + Dataset containing proper dimensions for the variables being added grp_path : str hdf5 group path read into ds - wanted_groups_tiered : list of lists - A list of lists of deconstructed group + variable paths. - The first list contains the first portion of the group name (between consecutive "/"), - the second list contains the second portion of the group name, etc. - "none" is used to fill in where paths are shorter than the longest path. wanted_dict : dict Dictionary with variable names as keys and a list of group + variable paths containing those variables as values. @@ -640,11 +627,9 @@ def _build_single_file_dataset(self, file, groups_list): if grp_path in grp_path2: sub_ds = self._read_single_grp(file, grp_path2) ds = Read._combine_nested_vars( - ds, sub_ds, grp_path2, wanted_groups_tiered, wanted_dict + ds, sub_ds, grp_path2, wanted_dict ) wanted_groups_list.remove(grp_path2) is2ds = is2ds.merge(ds, join="outer", combine_attrs="no_conflicts") - # Notes (next steps): open an issue; maybe add a fn to generate unique gran ids - return is2ds From 5ee425fa575fd12cebb5a314751664ca1c1a05bc Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Thu, 17 Mar 2022 01:21:19 +0000 Subject: [PATCH 9/9] GitHub action UML generation auto-update --- .../documentation/classes_dev_uml.svg | 343 +++++++++--------- .../documentation/classes_user_uml.svg | 6 +- 2 files changed, 175 insertions(+), 174 deletions(-) diff --git a/doc/source/user_guide/documentation/classes_dev_uml.svg b/doc/source/user_guide/documentation/classes_dev_uml.svg index a494bc220..a73320ea1 100644 --- a/doc/source/user_guide/documentation/classes_dev_uml.svg +++ b/doc/source/user_guide/documentation/classes_dev_uml.svg @@ -4,11 +4,11 @@ - + classes_dev_uml - + icepyx.core.Earthdata.Earthdata @@ -18,7 +18,7 @@ capability_url email netrc : NoneType -pswd : NoneType, str +pswd : str, NoneType session : Session uid @@ -29,17 +29,17 @@ icepyx.core.query.GenQuery - -GenQuery - -_end : datetime -_geom_filepath : NoneType -_spat_extent -_start : datetime -extent_type : str - -__init__(spatial_extent, date_range, start_time, end_time) -__str__() + +GenQuery + +_end : datetime +_geom_filepath : NoneType +_spat_extent +_start : datetime +extent_type : str + +__init__(spatial_extent, date_range, start_time, end_time) +__str__() @@ -58,104 +58,104 @@ icepyx.core.query.Query - -Query - -CMRparams -_CMRparams -_about_product -_cust_options : dict -_cycles : list -_email -_file_vars -_granules -_order_vars -_prod : NoneType, str -_readable_granule_name : list -_reqparams -_s3login_credentials -_session : Session -_source : str -_subsetparams : NoneType -_tracks : list -_version -cycles -dataset -dates -end_time -file_vars -granules -order_vars -product -product_version -reqparams -spatial_extent -start_time -tracks - -__init__(product, spatial_extent, date_range, start_time, end_time, version, cycles, tracks, files) -__str__() -avail_granules(ids, cycles, tracks, s3urls) -download_granules(path, verbose, subset, restart) -earthdata_login(uid, email, s3token) -latest_version() -order_granules(verbose, subset, email) -product_all_info() -product_summary_info() -show_custom_options(dictview) -subsetparams() -visualize_elevation() -visualize_spatial_extent() + +Query + +CMRparams +_CMRparams +_about_product +_cust_options : dict +_cycles : list +_email +_file_vars +_granules +_order_vars +_prod : str, NoneType +_readable_granule_name : list +_reqparams +_s3login_credentials +_session : Session +_source : str +_subsetparams : NoneType +_tracks : list +_version +cycles +dataset +dates +end_time +file_vars +granules +order_vars +product +product_version +reqparams +spatial_extent +start_time +tracks + +__init__(product, spatial_extent, date_range, start_time, end_time, version, cycles, tracks, files) +__str__() +avail_granules(ids, cycles, tracks, s3urls) +download_granules(path, verbose, subset, restart) +earthdata_login(uid, email, s3token) +latest_version() +order_granules(verbose, subset, email) +product_all_info() +product_summary_info() +show_custom_options(dictview) +subsetparams() +visualize_elevation() +visualize_spatial_extent() icepyx.core.granules.Granules->icepyx.core.query.Query - - -_granules + + +_granules icepyx.core.granules.Granules->icepyx.core.query.Query - - -_granules + + +_granules icepyx.core.icesat2data.Icesat2Data - -Icesat2Data - - -__init__() + +Icesat2Data + + +__init__() icepyx.core.exceptions.NsidcQueryError - -NsidcQueryError - -errmsg -msgtxt : str - -__init__(errmsg, msgtxt) -__str__() + +NsidcQueryError + +errmsg +msgtxt : str + +__init__(errmsg, msgtxt) +__str__() icepyx.core.exceptions.QueryError - -QueryError - - - + +QueryError + + + icepyx.core.exceptions.NsidcQueryError->icepyx.core.exceptions.QueryError - - + + @@ -163,9 +163,9 @@ Parameters -_fmted_keys : NoneType, dict +_fmted_keys : dict, NoneType _poss_keys : dict -_reqtype : str, NoneType +_reqtype : NoneType, str fmted_keys partype poss_keys @@ -180,136 +180,137 @@ icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - + + _CMRparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - + + _reqparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - -_subsetparams + + +_subsetparams icepyx.core.APIformatting.Parameters->icepyx.core.query.Query - - -_subsetparams + + +_subsetparams icepyx.core.query.Query->icepyx.core.query.GenQuery - - + + icepyx.core.read.Read - -Read - -_catalog_path : NoneType -_filelist : list, NoneType -_is2catalog : Catalog -_out_obj : Dataset -_pattern : str -_prod : str, NoneType -_read_vars -_source_type : str -data_source : NoneType -is2catalog -vars - -__init__(data_source, product, filename_pattern, catalog, out_obj_type) -_add_var_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict) -_build_dataset_template(file) -_build_single_file_dataset(file, groups_list) -_check_source_for_pattern(source, filename_pattern) -_read_single_var(file, grp_path) -load() + +Read + +_catalog_path : NoneType +_filelist : NoneType, list +_is2catalog : Catalog +_out_obj : Dataset +_pattern : str +_prod : str, NoneType +_read_vars +_source_type : str +data_source : NoneType +is2catalog +vars + +__init__(data_source, product, filename_pattern, catalog, out_obj_type) +_add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict) +_build_dataset_template(file) +_build_single_file_dataset(file, groups_list) +_check_source_for_pattern(source, filename_pattern) +_combine_nested_vars(is2ds, ds, grp_path, wanted_dict) +_read_single_grp(file, grp_path) +load() icepyx.core.variables.Variables - -Variables - -_avail : NoneType, list -_session : NoneType -_vartype -_version : NoneType -path : NoneType -product : NoneType -wanted : NoneType, dict - -__init__(vartype, avail, wanted, session, product, version, path) -_check_valid_lists(vgrp, allpaths, var_list, beam_list, keyword_list) -_get_combined_list(beam_list, keyword_list) -_get_sum_varlist(var_list, all_vars, defaults) -_iter_paths(sum_varlist, req_vars, vgrp, beam_list, keyword_list) -_iter_vars(sum_varlist, req_vars, vgrp) -append(defaults, var_list, beam_list, keyword_list) -avail(options, internal) -parse_var_list(varlist, tiered) -remove(all, var_list, beam_list, keyword_list) + +Variables + +_avail : NoneType, list +_session : NoneType +_vartype +_version : NoneType +path : NoneType +product : NoneType +wanted : dict, NoneType + +__init__(vartype, avail, wanted, session, product, version, path) +_check_valid_lists(vgrp, allpaths, var_list, beam_list, keyword_list) +_get_combined_list(beam_list, keyword_list) +_get_sum_varlist(var_list, all_vars, defaults) +_iter_paths(sum_varlist, req_vars, vgrp, beam_list, keyword_list) +_iter_vars(sum_varlist, req_vars, vgrp) +append(defaults, var_list, beam_list, keyword_list) +avail(options, internal) +parse_var_list(varlist, tiered, tiered_vars) +remove(all, var_list, beam_list, keyword_list) icepyx.core.variables.Variables->icepyx.core.query.Query - - + + _order_vars icepyx.core.variables.Variables->icepyx.core.query.Query - - + + _order_vars icepyx.core.variables.Variables->icepyx.core.query.Query - - + + _file_vars icepyx.core.variables.Variables->icepyx.core.read.Read - - + + _read_vars icepyx.core.visualization.Visualize - -Visualize - -bbox : list -cycles : NoneType -date_range : NoneType -product : NoneType, str -tracks : NoneType - -__init__(query_obj, product, spatial_extent, date_range, cycles, tracks) -generate_OA_parameters(): list -grid_bbox(binsize): list -make_request(base_url, payload) -parallel_request_OA(): -query_icesat2_filelist(): tuple -request_OA_data(paras): -viz_elevation(): + +Visualize + +bbox : list +cycles : NoneType +date_range : NoneType +product : str, NoneType +tracks : NoneType + +__init__(query_obj, product, spatial_extent, date_range, cycles, tracks) +generate_OA_parameters(): list +grid_bbox(binsize): list +make_request(base_url, payload) +parallel_request_OA(): +query_icesat2_filelist(): tuple +request_OA_data(paras): +viz_elevation(): diff --git a/doc/source/user_guide/documentation/classes_user_uml.svg b/doc/source/user_guide/documentation/classes_user_uml.svg index 6ad75574f..5901d2fb6 100644 --- a/doc/source/user_guide/documentation/classes_user_uml.svg +++ b/doc/source/user_guide/documentation/classes_user_uml.svg @@ -197,11 +197,11 @@ path : NoneType product : NoneType -wanted : NoneType, dict +wanted : dict, NoneType append(defaults, var_list, beam_list, keyword_list) avail(options, internal) -parse_var_list(varlist, tiered) +parse_var_list(varlist, tiered, tiered_vars) remove(all, var_list, beam_list, keyword_list) @@ -241,7 +241,7 @@ bbox : list cycles : NoneType date_range : NoneType -product : NoneType, str +product : str, NoneType tracks : NoneType generate_OA_parameters(): list