From b36757b13056ecca1b7b43681ce2e80f0b6db7f3 Mon Sep 17 00:00:00 2001 From: "Angeline G. Burrell" Date: Thu, 6 Jun 2024 17:41:59 -0400 Subject: [PATCH 1/7] STY: generalized `concat_data` method Extracted the general portion of the GUVI `concat_data` method, to allow use in the DMSP SSUSI data. --- pysatNASA/instruments/methods/jhuapl.py | 73 +++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/pysatNASA/instruments/methods/jhuapl.py b/pysatNASA/instruments/methods/jhuapl.py index 51d0be22..738446b8 100644 --- a/pysatNASA/instruments/methods/jhuapl.py +++ b/pysatNASA/instruments/methods/jhuapl.py @@ -444,3 +444,76 @@ def clean_by_dqi(inst): inst.data[dat_var].values[dqi_bad] = np.nan inst.meta[dat_var] = {inst.meta.labels.fill_val: np.nan} return + + +def concat_data(inst, time_dims, new_data, combine_times=False, **kwargs): + """Concatonate data to inst.data for JHU APL SDR data. + + Parameters + ---------- + inst : pysat.Instrument + Object containing a JHU APL Instrument with data + time_dims : list + List of the time dimensions + new_data : xarray.Dataset or list of such objects + New data objects to be concatonated + combine_times : bool + For SDR data, optionally combine the different datetime coordinates + into a single time coordinate (default=False) + **kwargs : dict + Optional keyword arguments passed to xr.concat + + Note + ---- + For xarray, `dim=Instrument.index.name` is passed along to xarray.concat + except if the user includes a value for dim as a keyword argument. + + """ + # Concatonate using the appropriate method for the number of time + # dimensions + if len(time_dims) == 1: + # There is only one time dimensions, but other dimensions may + # need to be adjusted + new_data = pysat.utils.coords.expand_xarray_dims( + new_data, inst.meta, exclude_dims=time_dims) + + # Combine the data + inst.data = xr.combine_by_coords(new_data, **kwargs) + else: + inners = None + for ndata in new_data: + # Separate into inner datasets + inner_keys = {dim: [key for key in ndata.keys() + if dim in ndata[key].dims] for dim in time_dims} + inner_dat = {dim: ndata.get(inner_keys[dim]) for dim in time_dims} + + # Add 'single_var's into 'time' dataset to keep track + sv_keys = [val.name for val in ndata.values() + if 'single_var' in val.dims] + singlevar_set = ndata.get(sv_keys) + inner_dat[inst.index.name] = xr.merge([inner_dat[inst.index.name], + singlevar_set]) + + # Concatenate along desired dimension with previous data + if inners is None: + # No previous data, assign the data separated by dimension + inners = dict(inner_dat) + else: + # Concatenate with existing data + inners = {dim: xr.concat([inners[dim], inner_dat[dim]], + dim=dim) for dim in time_dims} + + # Combine all time dimensions + if inners is not None: + if combine_times: + data_list = pysat.utils.coords.expand_xarray_dims( + [inners[dim] if dim == inst.index.name else + inners[dim].rename_dims({dim: inst.index.name}) + for dim in time_dims if len(inners[dim].dims) > 0], + inst.meta, dims_equal=False) + else: + data_list = [inners[dim] for dim in time_dims] + + # Combine all the data, indexing along time + inst.data = xr.merge(data_list) + return From 73bf6bab3ae62d12b9d67142ddae260ea9f7dad0 Mon Sep 17 00:00:00 2001 From: "Angeline G. Burrell" Date: Thu, 6 Jun 2024 17:42:28 -0400 Subject: [PATCH 2/7] STY: use JHU APL concat Use the new JHU APL concat function in local method. --- pysatNASA/instruments/timed_guvi.py | 48 ++--------------------------- 1 file changed, 2 insertions(+), 46 deletions(-) diff --git a/pysatNASA/instruments/timed_guvi.py b/pysatNASA/instruments/timed_guvi.py index 6087ce6e..9b5db962 100644 --- a/pysatNASA/instruments/timed_guvi.py +++ b/pysatNASA/instruments/timed_guvi.py @@ -60,7 +60,6 @@ import datetime as dt import functools -import xarray as xr import pysat from pysat.instruments.methods import general as mm_gen @@ -158,51 +157,8 @@ def concat_data(self, new_data, combine_times=False, **kwargs): # Concatonate using the appropriate method for the number of time # dimensions - if len(time_dims) == 1: - # There is only one time dimensions, but other dimensions may - # need to be adjusted - new_data = pysat.utils.coords.expand_xarray_dims( - new_data, self.meta, exclude_dims=time_dims) - - # Combine the data - self.data = xr.combine_by_coords(new_data, **kwargs) - else: - inners = None - for ndata in new_data: - # Separate into inner datasets - inner_keys = {dim: [key for key in ndata.keys() - if dim in ndata[key].dims] for dim in time_dims} - inner_dat = {dim: ndata.get(inner_keys[dim]) for dim in time_dims} - - # Add 'single_var's into 'time' dataset to keep track - sv_keys = [val.name for val in ndata.values() - if 'single_var' in val.dims] - singlevar_set = ndata.get(sv_keys) - inner_dat[self.index.name] = xr.merge([inner_dat[self.index.name], - singlevar_set]) - - # Concatenate along desired dimension with previous data - if inners is None: - # No previous data, assign the data separated by dimension - inners = dict(inner_dat) - else: - # Concatenate with existing data - inners = {dim: xr.concat([inners[dim], inner_dat[dim]], - dim=dim) for dim in time_dims} - - # Combine all time dimensions - if inners is not None: - if combine_times: - data_list = pysat.utils.coords.expand_xarray_dims( - [inners[dim] if dim == self.index.name else - inners[dim].rename_dims({dim: self.index.name}) - for dim in time_dims if len(inners[dim].dims) > 0], - self.meta, dims_equal=False) - else: - data_list = [inners[dim] for dim in time_dims] - - # Combine all the data, indexing along time - self.data = xr.merge(data_list) + jhuapl.concat_data(self, time_dims, new_data, combine_times=combine_times, + **kwargs) return From 672b6253bfb228f8f8a5f643ca451a559218516e Mon Sep 17 00:00:00 2001 From: "Angeline G. Burrell" Date: Thu, 6 Jun 2024 17:43:10 -0400 Subject: [PATCH 3/7] BUG: add custom `concat_data` method Use the JHU APL `concat_data` function in this Instrument. --- pysatNASA/instruments/dmsp_ssusi.py | 32 +++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/pysatNASA/instruments/dmsp_ssusi.py b/pysatNASA/instruments/dmsp_ssusi.py index 0b4a892c..71593319 100644 --- a/pysatNASA/instruments/dmsp_ssusi.py +++ b/pysatNASA/instruments/dmsp_ssusi.py @@ -118,6 +118,38 @@ def clean(self): return +def concat_data(self, new_data, combine_times=False, **kwargs): + """Concatonate data to self.data for DMSP SSUSI data. + + Parameters + ---------- + new_data : xarray.Dataset or list of such objects + New data objects to be concatonated + combine_times : bool + For SDR data, optionally combine the different datetime coordinates + into a single time coordinate (default=False) + **kwargs : dict + Optional keyword arguments passed to xr.concat + + Note + ---- + For xarray, `dim=Instrument.index.name` is passed along to xarray.concat + except if the user includes a value for dim as a keyword argument. + + """ + # Establish the time dimensions by data type + time_dims = [self.index.name] + + if self.tag in ['sdr-disk', 'sdr2-dist']: + time_dims.append('time_auroral') + + # Concatonate using the appropriate method for the number of time + # dimensions + jhuapl.concat_data(self, time_dims, new_data, combine_times=combine_times, + **kwargs) + return + + # ---------------------------------------------------------------------------- # Instrument functions # From 9c33f07777e8ed7f4845b2dca1c32f0121e5ec9f Mon Sep 17 00:00:00 2001 From: "Angeline G. Burrell" Date: Thu, 6 Jun 2024 17:43:39 -0400 Subject: [PATCH 4/7] DOC: udpated changelog Modified changelog to reflect changes in this pull request. --- CHANGELOG.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d7abf91..207fc21a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,10 +9,11 @@ This project adheres to [Semantic Versioning](https://semver.org/). * MAVEN SEP * MAVEN in situ key parameters * REACH Dosimeter - * DMSP SSUSI SDR-disk data + * DMSP SSUSI SDR-disk and SDR2-disk data * New Features * Allow files to be unzipped after download - * Added custom `concat_data` method to TIMED-GUVI data + * Added custom `concat_data` method to JHUAPL methods, for TIMED-GUVI and + DMSP-SSUSI data * Added cleaning to TIMED-GUVI SDR imaging data * Bug Fixes * Fix general clean routine to skip transformation matrices From db5559c151128bc5382fbd6b5e58e794910aaf42 Mon Sep 17 00:00:00 2001 From: "Angeline G. Burrell" Date: Fri, 7 Jun 2024 12:36:16 -0400 Subject: [PATCH 5/7] BUG: update DMSP SSUSI test flags Updated the DMSP SSUSI test flags to only test for warnings in the clean routine when appropriate and to skip the new tests for the high resolution imaging data. --- pysatNASA/instruments/dmsp_ssusi.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pysatNASA/instruments/dmsp_ssusi.py b/pysatNASA/instruments/dmsp_ssusi.py index 71593319..14f21743 100644 --- a/pysatNASA/instruments/dmsp_ssusi.py +++ b/pysatNASA/instruments/dmsp_ssusi.py @@ -88,6 +88,12 @@ for tag in inst_ids[inst_id] if tag not in ['sdr-disk', 'sdr2-disk']} for inst_id in inst_ids.keys()} +# TODO(#218): Remove when compliant with multi-day load tests +_new_tests = {inst_id: {'sdr-disk': False} for inst_id in inst_ids.keys()} +_clean_warn = {inst_id: {tag: mm_nasa.clean_warnings + for tag in inst_ids[inst_id] + if tag not in ['sdr-disk', 'sdr2-disk']} + for inst_id in inst_ids.keys()} # ---------------------------------------------------------------------------- # Instrument methods From c05d594380a21ea7fe232af5297cb0b3fea0b187 Mon Sep 17 00:00:00 2001 From: "Angeline G. Burrell" Date: Fri, 7 Jun 2024 14:37:36 -0400 Subject: [PATCH 6/7] TST: expand test skipping Tests still failing due to too much memory, add another skip. --- pysatNASA/instruments/dmsp_ssusi.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pysatNASA/instruments/dmsp_ssusi.py b/pysatNASA/instruments/dmsp_ssusi.py index 14f21743..33898a93 100644 --- a/pysatNASA/instruments/dmsp_ssusi.py +++ b/pysatNASA/instruments/dmsp_ssusi.py @@ -89,7 +89,8 @@ if tag not in ['sdr-disk', 'sdr2-disk']} for inst_id in inst_ids.keys()} # TODO(#218): Remove when compliant with multi-day load tests -_new_tests = {inst_id: {'sdr-disk': False} for inst_id in inst_ids.keys()} +_new_tests = {inst_id: {'sdr-disk': False, 'sdr2-dist': False} + for inst_id in inst_ids.keys()} _clean_warn = {inst_id: {tag: mm_nasa.clean_warnings for tag in inst_ids[inst_id] if tag not in ['sdr-disk', 'sdr2-disk']} From b8fc91f4774ea0e54c5c5ff4fd9a76df5b5c91f0 Mon Sep 17 00:00:00 2001 From: Angeline Burrell Date: Wed, 3 Jul 2024 17:52:29 -0400 Subject: [PATCH 7/7] BUG: fixed large hours Fixed a bug with roll-over hours. --- pysatNASA/instruments/methods/jhuapl.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/pysatNASA/instruments/methods/jhuapl.py b/pysatNASA/instruments/methods/jhuapl.py index 738446b8..d2456126 100644 --- a/pysatNASA/instruments/methods/jhuapl.py +++ b/pysatNASA/instruments/methods/jhuapl.py @@ -38,7 +38,8 @@ def build_dtimes(data, var, epoch=None, epoch_var='time'): skey = 'TIME{:s}'.format(var) if epoch is None: - hours = [int(np.floor(sec / 3600.0)) for sec in data[skey].values] + hours = np.array([int(np.floor(sec / 3600.0)) + for sec in data[skey].values]) mins = [int(np.floor((sec - hours[i] * 3600) / 60.0)) for i, sec in enumerate(data[skey].values)] secs = [int(np.floor((sec - hours[i] * 3600 - mins[i] * 60))) @@ -46,11 +47,18 @@ def build_dtimes(data, var, epoch=None, epoch_var='time'): microsecs = [int(np.floor((sec - hours[i] * 3600 - mins[i] * 60 - secs[i]) * 1.0e6)) for i, sec in enumerate(data[skey].values)] + days = np.array([int(dval) for dval in data[dkey].values]) + + # Ensure hours are within a realistic range. Datetime can handle day of + # roll-over for non-leap years. + days[hours >= 24] += 1 + hours[hours >= 24] -= 24 + dtimes = [ dt.datetime.strptime( "{:4d}-{:03d}-{:02d}-{:02d}-{:02d}-{:06d}".format( - int(data[ykey].values[i]), int(data[dkey].values[i]), - hours[i], mins[i], secs[i], microsec), '%Y-%j-%H-%M-%S-%f') + int(data[ykey].values[i]), days[i], hours[i], mins[i], + secs[i], microsec), '%Y-%j-%H-%M-%S-%f') for i, microsec in enumerate(microsecs)] else: dtimes = [