Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SSUSI bug #231

Merged
merged 9 commits into from
Jul 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ This project adheres to [Semantic Versioning](https://semver.org/).
* MAVEN SEP
* MAVEN in situ key parameters
* REACH Dosimeter
* DMSP SSUSI SDR-disk data
* DMSP SSUSI SDR-disk and SDR2-disk data
* New Features
* Allow files to be unzipped after download
* Added custom `concat_data` method to TIMED-GUVI data
* Added custom `concat_data` method to JHUAPL methods, for TIMED-GUVI and
DMSP-SSUSI data
* Added cleaning to TIMED-GUVI SDR imaging data
* Bug Fixes
* Fix general clean routine to skip transformation matrices
Expand Down
39 changes: 35 additions & 4 deletions pysatNASA/instruments/dmsp_ssusi.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@

_test_dates = {inst_id: {tag: dt.datetime(2015, 1, 1) for tag in tags.keys()}
for inst_id in inst_ids.keys()}
# TODO(#218, #222): Remove when compliant with multi-day load tests
_new_tests = {inst_id: {tag: False for tag in tags.keys()}
for inst_id in inst_ids.keys()}
# TODO(pysat#1196): Un-comment when pysat bug is fixed and released
# _clean_warn = {inst_id: {tag: mm_nasa.clean_warnings
# for tag in inst_ids[inst_id]
Expand All @@ -93,12 +96,8 @@
# ----------------------------------------------------------------------------
# Instrument methods


# Use standard init routine
init = functools.partial(mm_nasa.init, module=mm_dmsp, name=name)
# TODO(#218, #222): Remove when compliant with multi-day load tests
_new_tests = {inst_id: {tag: False for tag in tags.keys()}
for inst_id in inst_ids.keys()}


def clean(self):
Expand All @@ -122,6 +121,38 @@ def clean(self):
return


def concat_data(self, new_data, combine_times=False, **kwargs):
"""Concatonate data to self.data for DMSP SSUSI data.

Parameters
----------
new_data : xarray.Dataset or list of such objects
New data objects to be concatonated
combine_times : bool
For SDR data, optionally combine the different datetime coordinates
into a single time coordinate (default=False)
**kwargs : dict
Optional keyword arguments passed to xr.concat

Note
----
For xarray, `dim=Instrument.index.name` is passed along to xarray.concat
except if the user includes a value for dim as a keyword argument.

"""
# Establish the time dimensions by data type
time_dims = [self.index.name]

if self.tag in ['sdr-disk', 'sdr2-dist']:
time_dims.append('time_auroral')

# Concatonate using the appropriate method for the number of time
# dimensions
jhuapl.concat_data(self, time_dims, new_data, combine_times=combine_times,
**kwargs)
return


# ----------------------------------------------------------------------------
# Instrument functions
#
Expand Down
87 changes: 84 additions & 3 deletions pysatNASA/instruments/methods/jhuapl.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,27 @@ def build_dtimes(data, var, epoch=None, epoch_var='time'):
skey = 'TIME{:s}'.format(var)

if epoch is None:
hours = [int(np.floor(sec / 3600.0)) for sec in data[skey].values]
hours = np.array([int(np.floor(sec / 3600.0))
for sec in data[skey].values])
mins = [int(np.floor((sec - hours[i] * 3600) / 60.0))
for i, sec in enumerate(data[skey].values)]
secs = [int(np.floor((sec - hours[i] * 3600 - mins[i] * 60)))
for i, sec in enumerate(data[skey].values)]
microsecs = [int(np.floor((sec - hours[i] * 3600 - mins[i] * 60
- secs[i]) * 1.0e6))
for i, sec in enumerate(data[skey].values)]
days = np.array([int(dval) for dval in data[dkey].values])

# Ensure hours are within a realistic range. Datetime can handle day of
# roll-over for non-leap years.
days[hours >= 24] += 1
hours[hours >= 24] -= 24

dtimes = [
dt.datetime.strptime(
"{:4d}-{:03d}-{:02d}-{:02d}-{:02d}-{:06d}".format(
int(data[ykey].values[i]), int(data[dkey].values[i]),
hours[i], mins[i], secs[i], microsec), '%Y-%j-%H-%M-%S-%f')
int(data[ykey].values[i]), days[i], hours[i], mins[i],
secs[i], microsec), '%Y-%j-%H-%M-%S-%f')
for i, microsec in enumerate(microsecs)]
else:
dtimes = [
Expand Down Expand Up @@ -444,3 +452,76 @@ def clean_by_dqi(inst):
inst.data[dat_var].values[dqi_bad] = np.nan
inst.meta[dat_var] = {inst.meta.labels.fill_val: np.nan}
return


def concat_data(inst, time_dims, new_data, combine_times=False, **kwargs):
"""Concatonate data to inst.data for JHU APL SDR data.

Parameters
----------
inst : pysat.Instrument
Object containing a JHU APL Instrument with data
time_dims : list
List of the time dimensions
new_data : xarray.Dataset or list of such objects
New data objects to be concatonated
combine_times : bool
For SDR data, optionally combine the different datetime coordinates
into a single time coordinate (default=False)
**kwargs : dict
Optional keyword arguments passed to xr.concat

Note
----
For xarray, `dim=Instrument.index.name` is passed along to xarray.concat
except if the user includes a value for dim as a keyword argument.

"""
# Concatonate using the appropriate method for the number of time
# dimensions
if len(time_dims) == 1:
# There is only one time dimensions, but other dimensions may
# need to be adjusted
new_data = pysat.utils.coords.expand_xarray_dims(
new_data, inst.meta, exclude_dims=time_dims)

# Combine the data
inst.data = xr.combine_by_coords(new_data, **kwargs)
else:
inners = None
for ndata in new_data:
# Separate into inner datasets
inner_keys = {dim: [key for key in ndata.keys()
if dim in ndata[key].dims] for dim in time_dims}
inner_dat = {dim: ndata.get(inner_keys[dim]) for dim in time_dims}

# Add 'single_var's into 'time' dataset to keep track
sv_keys = [val.name for val in ndata.values()
if 'single_var' in val.dims]
singlevar_set = ndata.get(sv_keys)
inner_dat[inst.index.name] = xr.merge([inner_dat[inst.index.name],
singlevar_set])

# Concatenate along desired dimension with previous data
if inners is None:
# No previous data, assign the data separated by dimension
inners = dict(inner_dat)
else:
# Concatenate with existing data
inners = {dim: xr.concat([inners[dim], inner_dat[dim]],
dim=dim) for dim in time_dims}

# Combine all time dimensions
if inners is not None:
if combine_times:
data_list = pysat.utils.coords.expand_xarray_dims(
[inners[dim] if dim == inst.index.name else
inners[dim].rename_dims({dim: inst.index.name})
for dim in time_dims if len(inners[dim].dims) > 0],
inst.meta, dims_equal=False)
else:
data_list = [inners[dim] for dim in time_dims]

# Combine all the data, indexing along time
inst.data = xr.merge(data_list)
return
48 changes: 2 additions & 46 deletions pysatNASA/instruments/timed_guvi.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@

import datetime as dt
import functools
import xarray as xr

import pysat
from pysat.instruments.methods import general as mm_gen
Expand Down Expand Up @@ -159,51 +158,8 @@ def concat_data(self, new_data, combine_times=False, **kwargs):

# Concatonate using the appropriate method for the number of time
# dimensions
if len(time_dims) == 1:
# There is only one time dimensions, but other dimensions may
# need to be adjusted
new_data = pysat.utils.coords.expand_xarray_dims(
new_data, self.meta, exclude_dims=time_dims)

# Combine the data
self.data = xr.combine_by_coords(new_data, **kwargs)
else:
inners = None
for ndata in new_data:
# Separate into inner datasets
inner_keys = {dim: [key for key in ndata.keys()
if dim in ndata[key].dims] for dim in time_dims}
inner_dat = {dim: ndata.get(inner_keys[dim]) for dim in time_dims}

# Add 'single_var's into 'time' dataset to keep track
sv_keys = [val.name for val in ndata.values()
if 'single_var' in val.dims]
singlevar_set = ndata.get(sv_keys)
inner_dat[self.index.name] = xr.merge([inner_dat[self.index.name],
singlevar_set])

# Concatenate along desired dimension with previous data
if inners is None:
# No previous data, assign the data separated by dimension
inners = dict(inner_dat)
else:
# Concatenate with existing data
inners = {dim: xr.concat([inners[dim], inner_dat[dim]],
dim=dim) for dim in time_dims}

# Combine all time dimensions
if inners is not None:
if combine_times:
data_list = pysat.utils.coords.expand_xarray_dims(
[inners[dim] if dim == self.index.name else
inners[dim].rename_dims({dim: self.index.name})
for dim in time_dims if len(inners[dim].dims) > 0],
self.meta, dims_equal=False)
else:
data_list = [inners[dim] for dim in time_dims]

# Combine all the data, indexing along time
self.data = xr.merge(data_list)
jhuapl.concat_data(self, time_dims, new_data, combine_times=combine_times,
**kwargs)
return


Expand Down
Loading