Skip to content

Commit

Permalink
Merge pull request #231 from pysat/ssusi_bug
Browse files Browse the repository at this point in the history
SSUSI bug
  • Loading branch information
aburrell authored Jul 12, 2024
2 parents 5005613 + 1654369 commit 22de06c
Show file tree
Hide file tree
Showing 4 changed files with 124 additions and 55 deletions.
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ This project adheres to [Semantic Versioning](https://semver.org/).
* MAVEN SEP
* MAVEN in situ key parameters
* REACH Dosimeter
* DMSP SSUSI SDR-disk data
* DMSP SSUSI SDR-disk and SDR2-disk data
* New Features
* Allow files to be unzipped after download
* Added custom `concat_data` method to TIMED-GUVI data
* Added custom `concat_data` method to JHUAPL methods, for TIMED-GUVI and
DMSP-SSUSI data
* Added cleaning to TIMED-GUVI SDR imaging data
* Bug Fixes
* Fix general clean routine to skip transformation matrices
Expand Down
39 changes: 35 additions & 4 deletions pysatNASA/instruments/dmsp_ssusi.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@

_test_dates = {inst_id: {tag: dt.datetime(2015, 1, 1) for tag in tags.keys()}
for inst_id in inst_ids.keys()}
# TODO(#218, #222): Remove when compliant with multi-day load tests
_new_tests = {inst_id: {tag: False for tag in tags.keys()}
for inst_id in inst_ids.keys()}
# TODO(pysat#1196): Un-comment when pysat bug is fixed and released
# _clean_warn = {inst_id: {tag: mm_nasa.clean_warnings
# for tag in inst_ids[inst_id]
Expand All @@ -93,12 +96,8 @@
# ----------------------------------------------------------------------------
# Instrument methods


# Use standard init routine
init = functools.partial(mm_nasa.init, module=mm_dmsp, name=name)
# TODO(#218, #222): Remove when compliant with multi-day load tests
_new_tests = {inst_id: {tag: False for tag in tags.keys()}
for inst_id in inst_ids.keys()}


def clean(self):
Expand All @@ -122,6 +121,38 @@ def clean(self):
return


def concat_data(self, new_data, combine_times=False, **kwargs):
"""Concatonate data to self.data for DMSP SSUSI data.
Parameters
----------
new_data : xarray.Dataset or list of such objects
New data objects to be concatonated
combine_times : bool
For SDR data, optionally combine the different datetime coordinates
into a single time coordinate (default=False)
**kwargs : dict
Optional keyword arguments passed to xr.concat
Note
----
For xarray, `dim=Instrument.index.name` is passed along to xarray.concat
except if the user includes a value for dim as a keyword argument.
"""
# Establish the time dimensions by data type
time_dims = [self.index.name]

if self.tag in ['sdr-disk', 'sdr2-dist']:
time_dims.append('time_auroral')

# Concatonate using the appropriate method for the number of time
# dimensions
jhuapl.concat_data(self, time_dims, new_data, combine_times=combine_times,
**kwargs)
return


# ----------------------------------------------------------------------------
# Instrument functions
#
Expand Down
87 changes: 84 additions & 3 deletions pysatNASA/instruments/methods/jhuapl.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,27 @@ def build_dtimes(data, var, epoch=None, epoch_var='time'):
skey = 'TIME{:s}'.format(var)

if epoch is None:
hours = [int(np.floor(sec / 3600.0)) for sec in data[skey].values]
hours = np.array([int(np.floor(sec / 3600.0))
for sec in data[skey].values])
mins = [int(np.floor((sec - hours[i] * 3600) / 60.0))
for i, sec in enumerate(data[skey].values)]
secs = [int(np.floor((sec - hours[i] * 3600 - mins[i] * 60)))
for i, sec in enumerate(data[skey].values)]
microsecs = [int(np.floor((sec - hours[i] * 3600 - mins[i] * 60
- secs[i]) * 1.0e6))
for i, sec in enumerate(data[skey].values)]
days = np.array([int(dval) for dval in data[dkey].values])

# Ensure hours are within a realistic range. Datetime can handle day of
# roll-over for non-leap years.
days[hours >= 24] += 1
hours[hours >= 24] -= 24

dtimes = [
dt.datetime.strptime(
"{:4d}-{:03d}-{:02d}-{:02d}-{:02d}-{:06d}".format(
int(data[ykey].values[i]), int(data[dkey].values[i]),
hours[i], mins[i], secs[i], microsec), '%Y-%j-%H-%M-%S-%f')
int(data[ykey].values[i]), days[i], hours[i], mins[i],
secs[i], microsec), '%Y-%j-%H-%M-%S-%f')
for i, microsec in enumerate(microsecs)]
else:
dtimes = [
Expand Down Expand Up @@ -444,3 +452,76 @@ def clean_by_dqi(inst):
inst.data[dat_var].values[dqi_bad] = np.nan
inst.meta[dat_var] = {inst.meta.labels.fill_val: np.nan}
return


def concat_data(inst, time_dims, new_data, combine_times=False, **kwargs):
"""Concatonate data to inst.data for JHU APL SDR data.
Parameters
----------
inst : pysat.Instrument
Object containing a JHU APL Instrument with data
time_dims : list
List of the time dimensions
new_data : xarray.Dataset or list of such objects
New data objects to be concatonated
combine_times : bool
For SDR data, optionally combine the different datetime coordinates
into a single time coordinate (default=False)
**kwargs : dict
Optional keyword arguments passed to xr.concat
Note
----
For xarray, `dim=Instrument.index.name` is passed along to xarray.concat
except if the user includes a value for dim as a keyword argument.
"""
# Concatonate using the appropriate method for the number of time
# dimensions
if len(time_dims) == 1:
# There is only one time dimensions, but other dimensions may
# need to be adjusted
new_data = pysat.utils.coords.expand_xarray_dims(
new_data, inst.meta, exclude_dims=time_dims)

# Combine the data
inst.data = xr.combine_by_coords(new_data, **kwargs)
else:
inners = None
for ndata in new_data:
# Separate into inner datasets
inner_keys = {dim: [key for key in ndata.keys()
if dim in ndata[key].dims] for dim in time_dims}
inner_dat = {dim: ndata.get(inner_keys[dim]) for dim in time_dims}

# Add 'single_var's into 'time' dataset to keep track
sv_keys = [val.name for val in ndata.values()
if 'single_var' in val.dims]
singlevar_set = ndata.get(sv_keys)
inner_dat[inst.index.name] = xr.merge([inner_dat[inst.index.name],
singlevar_set])

# Concatenate along desired dimension with previous data
if inners is None:
# No previous data, assign the data separated by dimension
inners = dict(inner_dat)
else:
# Concatenate with existing data
inners = {dim: xr.concat([inners[dim], inner_dat[dim]],
dim=dim) for dim in time_dims}

# Combine all time dimensions
if inners is not None:
if combine_times:
data_list = pysat.utils.coords.expand_xarray_dims(
[inners[dim] if dim == inst.index.name else
inners[dim].rename_dims({dim: inst.index.name})
for dim in time_dims if len(inners[dim].dims) > 0],
inst.meta, dims_equal=False)
else:
data_list = [inners[dim] for dim in time_dims]

# Combine all the data, indexing along time
inst.data = xr.merge(data_list)
return
48 changes: 2 additions & 46 deletions pysatNASA/instruments/timed_guvi.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@

import datetime as dt
import functools
import xarray as xr

import pysat
from pysat.instruments.methods import general as mm_gen
Expand Down Expand Up @@ -159,51 +158,8 @@ def concat_data(self, new_data, combine_times=False, **kwargs):

# Concatonate using the appropriate method for the number of time
# dimensions
if len(time_dims) == 1:
# There is only one time dimensions, but other dimensions may
# need to be adjusted
new_data = pysat.utils.coords.expand_xarray_dims(
new_data, self.meta, exclude_dims=time_dims)

# Combine the data
self.data = xr.combine_by_coords(new_data, **kwargs)
else:
inners = None
for ndata in new_data:
# Separate into inner datasets
inner_keys = {dim: [key for key in ndata.keys()
if dim in ndata[key].dims] for dim in time_dims}
inner_dat = {dim: ndata.get(inner_keys[dim]) for dim in time_dims}

# Add 'single_var's into 'time' dataset to keep track
sv_keys = [val.name for val in ndata.values()
if 'single_var' in val.dims]
singlevar_set = ndata.get(sv_keys)
inner_dat[self.index.name] = xr.merge([inner_dat[self.index.name],
singlevar_set])

# Concatenate along desired dimension with previous data
if inners is None:
# No previous data, assign the data separated by dimension
inners = dict(inner_dat)
else:
# Concatenate with existing data
inners = {dim: xr.concat([inners[dim], inner_dat[dim]],
dim=dim) for dim in time_dims}

# Combine all time dimensions
if inners is not None:
if combine_times:
data_list = pysat.utils.coords.expand_xarray_dims(
[inners[dim] if dim == self.index.name else
inners[dim].rename_dims({dim: self.index.name})
for dim in time_dims if len(inners[dim].dims) > 0],
self.meta, dims_equal=False)
else:
data_list = [inners[dim] for dim in time_dims]

# Combine all the data, indexing along time
self.data = xr.merge(data_list)
jhuapl.concat_data(self, time_dims, new_data, combine_times=combine_times,
**kwargs)
return


Expand Down

0 comments on commit 22de06c

Please sign in to comment.