diff --git a/CHANGELOG.md b/CHANGELOG.md index 116b2df7..88a0e1e4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,10 +9,11 @@ This project adheres to [Semantic Versioning](https://semver.org/). * MAVEN SEP * MAVEN in situ key parameters * REACH Dosimeter - * DMSP SSUSI SDR-disk data + * DMSP SSUSI SDR-disk and SDR2-disk data * New Features * Allow files to be unzipped after download - * Added custom `concat_data` method to TIMED-GUVI data + * Added custom `concat_data` method to JHUAPL methods, for TIMED-GUVI and + DMSP-SSUSI data * Added cleaning to TIMED-GUVI SDR imaging data * Bug Fixes * Fix general clean routine to skip transformation matrices diff --git a/pysatNASA/instruments/dmsp_ssusi.py b/pysatNASA/instruments/dmsp_ssusi.py index f5c0ba0b..8550b8e6 100644 --- a/pysatNASA/instruments/dmsp_ssusi.py +++ b/pysatNASA/instruments/dmsp_ssusi.py @@ -84,6 +84,9 @@ _test_dates = {inst_id: {tag: dt.datetime(2015, 1, 1) for tag in tags.keys()} for inst_id in inst_ids.keys()} +# TODO(#218, #222): Remove when compliant with multi-day load tests +_new_tests = {inst_id: {tag: False for tag in tags.keys()} + for inst_id in inst_ids.keys()} # TODO(pysat#1196): Un-comment when pysat bug is fixed and released # _clean_warn = {inst_id: {tag: mm_nasa.clean_warnings # for tag in inst_ids[inst_id] @@ -93,12 +96,8 @@ # ---------------------------------------------------------------------------- # Instrument methods - # Use standard init routine init = functools.partial(mm_nasa.init, module=mm_dmsp, name=name) -# TODO(#218, #222): Remove when compliant with multi-day load tests -_new_tests = {inst_id: {tag: False for tag in tags.keys()} - for inst_id in inst_ids.keys()} def clean(self): @@ -122,6 +121,38 @@ def clean(self): return +def concat_data(self, new_data, combine_times=False, **kwargs): + """Concatonate data to self.data for DMSP SSUSI data. + + Parameters + ---------- + new_data : xarray.Dataset or list of such objects + New data objects to be concatonated + combine_times : bool + For SDR data, optionally combine the different datetime coordinates + into a single time coordinate (default=False) + **kwargs : dict + Optional keyword arguments passed to xr.concat + + Note + ---- + For xarray, `dim=Instrument.index.name` is passed along to xarray.concat + except if the user includes a value for dim as a keyword argument. + + """ + # Establish the time dimensions by data type + time_dims = [self.index.name] + + if self.tag in ['sdr-disk', 'sdr2-dist']: + time_dims.append('time_auroral') + + # Concatonate using the appropriate method for the number of time + # dimensions + jhuapl.concat_data(self, time_dims, new_data, combine_times=combine_times, + **kwargs) + return + + # ---------------------------------------------------------------------------- # Instrument functions # diff --git a/pysatNASA/instruments/methods/jhuapl.py b/pysatNASA/instruments/methods/jhuapl.py index 51d0be22..d2456126 100644 --- a/pysatNASA/instruments/methods/jhuapl.py +++ b/pysatNASA/instruments/methods/jhuapl.py @@ -38,7 +38,8 @@ def build_dtimes(data, var, epoch=None, epoch_var='time'): skey = 'TIME{:s}'.format(var) if epoch is None: - hours = [int(np.floor(sec / 3600.0)) for sec in data[skey].values] + hours = np.array([int(np.floor(sec / 3600.0)) + for sec in data[skey].values]) mins = [int(np.floor((sec - hours[i] * 3600) / 60.0)) for i, sec in enumerate(data[skey].values)] secs = [int(np.floor((sec - hours[i] * 3600 - mins[i] * 60))) @@ -46,11 +47,18 @@ def build_dtimes(data, var, epoch=None, epoch_var='time'): microsecs = [int(np.floor((sec - hours[i] * 3600 - mins[i] * 60 - secs[i]) * 1.0e6)) for i, sec in enumerate(data[skey].values)] + days = np.array([int(dval) for dval in data[dkey].values]) + + # Ensure hours are within a realistic range. Datetime can handle day of + # roll-over for non-leap years. + days[hours >= 24] += 1 + hours[hours >= 24] -= 24 + dtimes = [ dt.datetime.strptime( "{:4d}-{:03d}-{:02d}-{:02d}-{:02d}-{:06d}".format( - int(data[ykey].values[i]), int(data[dkey].values[i]), - hours[i], mins[i], secs[i], microsec), '%Y-%j-%H-%M-%S-%f') + int(data[ykey].values[i]), days[i], hours[i], mins[i], + secs[i], microsec), '%Y-%j-%H-%M-%S-%f') for i, microsec in enumerate(microsecs)] else: dtimes = [ @@ -444,3 +452,76 @@ def clean_by_dqi(inst): inst.data[dat_var].values[dqi_bad] = np.nan inst.meta[dat_var] = {inst.meta.labels.fill_val: np.nan} return + + +def concat_data(inst, time_dims, new_data, combine_times=False, **kwargs): + """Concatonate data to inst.data for JHU APL SDR data. + + Parameters + ---------- + inst : pysat.Instrument + Object containing a JHU APL Instrument with data + time_dims : list + List of the time dimensions + new_data : xarray.Dataset or list of such objects + New data objects to be concatonated + combine_times : bool + For SDR data, optionally combine the different datetime coordinates + into a single time coordinate (default=False) + **kwargs : dict + Optional keyword arguments passed to xr.concat + + Note + ---- + For xarray, `dim=Instrument.index.name` is passed along to xarray.concat + except if the user includes a value for dim as a keyword argument. + + """ + # Concatonate using the appropriate method for the number of time + # dimensions + if len(time_dims) == 1: + # There is only one time dimensions, but other dimensions may + # need to be adjusted + new_data = pysat.utils.coords.expand_xarray_dims( + new_data, inst.meta, exclude_dims=time_dims) + + # Combine the data + inst.data = xr.combine_by_coords(new_data, **kwargs) + else: + inners = None + for ndata in new_data: + # Separate into inner datasets + inner_keys = {dim: [key for key in ndata.keys() + if dim in ndata[key].dims] for dim in time_dims} + inner_dat = {dim: ndata.get(inner_keys[dim]) for dim in time_dims} + + # Add 'single_var's into 'time' dataset to keep track + sv_keys = [val.name for val in ndata.values() + if 'single_var' in val.dims] + singlevar_set = ndata.get(sv_keys) + inner_dat[inst.index.name] = xr.merge([inner_dat[inst.index.name], + singlevar_set]) + + # Concatenate along desired dimension with previous data + if inners is None: + # No previous data, assign the data separated by dimension + inners = dict(inner_dat) + else: + # Concatenate with existing data + inners = {dim: xr.concat([inners[dim], inner_dat[dim]], + dim=dim) for dim in time_dims} + + # Combine all time dimensions + if inners is not None: + if combine_times: + data_list = pysat.utils.coords.expand_xarray_dims( + [inners[dim] if dim == inst.index.name else + inners[dim].rename_dims({dim: inst.index.name}) + for dim in time_dims if len(inners[dim].dims) > 0], + inst.meta, dims_equal=False) + else: + data_list = [inners[dim] for dim in time_dims] + + # Combine all the data, indexing along time + inst.data = xr.merge(data_list) + return diff --git a/pysatNASA/instruments/timed_guvi.py b/pysatNASA/instruments/timed_guvi.py index 06d81faf..4f7a98bd 100644 --- a/pysatNASA/instruments/timed_guvi.py +++ b/pysatNASA/instruments/timed_guvi.py @@ -60,7 +60,6 @@ import datetime as dt import functools -import xarray as xr import pysat from pysat.instruments.methods import general as mm_gen @@ -159,51 +158,8 @@ def concat_data(self, new_data, combine_times=False, **kwargs): # Concatonate using the appropriate method for the number of time # dimensions - if len(time_dims) == 1: - # There is only one time dimensions, but other dimensions may - # need to be adjusted - new_data = pysat.utils.coords.expand_xarray_dims( - new_data, self.meta, exclude_dims=time_dims) - - # Combine the data - self.data = xr.combine_by_coords(new_data, **kwargs) - else: - inners = None - for ndata in new_data: - # Separate into inner datasets - inner_keys = {dim: [key for key in ndata.keys() - if dim in ndata[key].dims] for dim in time_dims} - inner_dat = {dim: ndata.get(inner_keys[dim]) for dim in time_dims} - - # Add 'single_var's into 'time' dataset to keep track - sv_keys = [val.name for val in ndata.values() - if 'single_var' in val.dims] - singlevar_set = ndata.get(sv_keys) - inner_dat[self.index.name] = xr.merge([inner_dat[self.index.name], - singlevar_set]) - - # Concatenate along desired dimension with previous data - if inners is None: - # No previous data, assign the data separated by dimension - inners = dict(inner_dat) - else: - # Concatenate with existing data - inners = {dim: xr.concat([inners[dim], inner_dat[dim]], - dim=dim) for dim in time_dims} - - # Combine all time dimensions - if inners is not None: - if combine_times: - data_list = pysat.utils.coords.expand_xarray_dims( - [inners[dim] if dim == self.index.name else - inners[dim].rename_dims({dim: self.index.name}) - for dim in time_dims if len(inners[dim].dims) > 0], - self.meta, dims_equal=False) - else: - data_list = [inners[dim] for dim in time_dims] - - # Combine all the data, indexing along time - self.data = xr.merge(data_list) + jhuapl.concat_data(self, time_dims, new_data, combine_times=combine_times, + **kwargs) return