diff --git a/CHANGELOG.md b/CHANGELOG.md index 0018f066..6d7abf91 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,13 +2,14 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/). -## [0.0.6] - 2023-XX-XX +## [0.0.6] - 2024-XX-XX * New Instruments * DE2 VEFIMAGB - electric and magnetic field on the same cadence * MAVEN mag * MAVEN SEP * MAVEN in situ key parameters * REACH Dosimeter + * DMSP SSUSI SDR-disk data * New Features * Allow files to be unzipped after download * Added custom `concat_data` method to TIMED-GUVI data diff --git a/docs/index.rst b/docs/index.rst index 926c4f76..a2b0ca12 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,7 +9,7 @@ routines to NASA space science data as pysat.Instrument objects through the CDAWeb interface. .. toctree:: - :maxdepth: -1 + :maxdepth: 2 overview.rst installation.rst diff --git a/pysatNASA/instruments/dmsp_ssusi.py b/pysatNASA/instruments/dmsp_ssusi.py index 268985a8..0b4a892c 100644 --- a/pysatNASA/instruments/dmsp_ssusi.py +++ b/pysatNASA/instruments/dmsp_ssusi.py @@ -29,6 +29,8 @@ 'ssusi' tag 'edr-aurora' + 'sdr-disk' + 'sdr2-disk' inst_id 'f16', 'f17', 'f18', 'f19' @@ -52,6 +54,7 @@ import datetime as dt import functools +import pysat from pysat.instruments.methods import general as mm_gen from pysatNASA.instruments.methods import cdaweb as cdw @@ -67,7 +70,9 @@ tags = {'edr-aurora': ''.join(['Electron energy flux and mean energy, auroral', ' boundaries, identified discrete auroral arcs,', ' hemispheric power, and magnetic field lines ', - 'traced to 4 Earth radii'])} + 'traced to 4 Earth radii']), + 'sdr-disk': 'Fine resolution gridded disk radiances', + 'sdr2-disk': 'Coarse resolution gridded disk radiances'} inst_ids = {sat_id: list(tags.keys()) for sat_id in ['f16', 'f17', 'f18', 'f19']} @@ -79,6 +84,10 @@ _test_dates = {inst_id: {tag: dt.datetime(2015, 1, 1) for tag in tags.keys()} for inst_id in inst_ids.keys()} +_clean_warn = {inst_id: {tag: mm_nasa.clean_warnings + for tag in inst_ids[inst_id] + if tag not in ['sdr-disk', 'sdr2-disk']} + for inst_id in inst_ids.keys()} # ---------------------------------------------------------------------------- # Instrument methods @@ -87,8 +96,27 @@ # Use standard init routine init = functools.partial(mm_nasa.init, module=mm_dmsp, name=name) -# No cleaning, use standard warning function instead -clean = mm_nasa.clean_warn + +def clean(self): + """Clean DMSP SSUSI imaging data. + + Note + ---- + Supports 'clean', 'dusty', 'dirty', 'none'. Method is + not called by pysat if clean_level is None or 'none'. + + """ + if self.tag in ["sdr-disk", "sdr2-disk"]: + jhuapl.clean_by_dqi(self) + else: + # Follow the same warning format as the general clean warning, but + # with additional information. + pysat.logger.warning(' '.join(['No cleaning routines available for', + self.platform, self.name, self.tag, + self.inst_id, 'at clean level', + self.clean_level])) + return + # ---------------------------------------------------------------------------- # Instrument functions @@ -105,10 +133,6 @@ list_files = functools.partial(mm_gen.list_files, supported_tags=supported_tags) -# Set the load routine -load = functools.partial(jhuapl.load_edr_aurora, pandas_format=pandas_format, - strict_dim_check=False) - # Set the download routine basic_tag = {'remote_dir': ''.join(('/pub/data/dmsp/dmsp{inst_id:s}/ssusi/', '/data/{tag:s}/{{year:4d}}/{{day:03d}}/')), @@ -122,3 +146,63 @@ # Set the list_remote_files routine list_remote_files = functools.partial(cdw.list_remote_files, supported_tags=download_tags) + + +# Set the load routine +def load(fnames, tag='', inst_id='', combine_times=False): + """Load DMSP SSUSI data into `xarray.DataSet` and `pysat.Meta` objects. + + This routine is called as needed by pysat. It is not intended + for direct user interaction. + + Parameters + ---------- + fnames : array-like + Iterable of filename strings, full path, to data files to be loaded. + This input is nominally provided by pysat itself. + tag : str + Tag name used to identify particular data set to be loaded. + This input is nominally provided by pysat itself. + inst_id : str + Satellite ID used to identify particular data set to be loaded. + This input is nominally provided by pysat itself. + combine_times : bool + For SDR data, optionally combine the different datetime coordinates + into a single time coordinate (default=False) + + Returns + ------- + data : xr.DataSet + A xarray DataSet with data prepared for the pysat.Instrument + meta : pysat.Meta + Metadata formatted for a pysat.Instrument object. + + Raises + ------ + ValueError + If temporal dimensions are not consistent + + Note + ---- + Any additional keyword arguments passed to pysat.Instrument + upon instantiation are passed along to this routine. + + Examples + -------- + :: + + inst = pysat.Instrument('dmsp', 'ssusi', tag='sdr-disk', inst_id='f16') + inst.load(2015, 1) + + """ + if tag == 'edr-aurora': + data, meta = jhuapl.load_edr_aurora(fnames, tag, inst_id, + pandas_format=pandas_format, + strict_dim_check=False) + elif tag.find('disk') > 0: + data, meta = jhuapl.load_sdr_aurora(fnames, name, tag, inst_id, + pandas_format=pandas_format, + strict_dim_check=False, + combine_times=combine_times) + + return data, meta diff --git a/pysatNASA/instruments/methods/jhuapl.py b/pysatNASA/instruments/methods/jhuapl.py index 2b20cc53..51d0be22 100644 --- a/pysatNASA/instruments/methods/jhuapl.py +++ b/pysatNASA/instruments/methods/jhuapl.py @@ -158,7 +158,7 @@ def load_edr_aurora(fnames, tag='', inst_id='', pandas_format=False, return data, mdata -def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False, +def load_sdr_aurora(fnames, name='', tag='', inst_id='', pandas_format=False, strict_dim_check=True, combine_times=False): """Load JHU APL SDR data and meta data. @@ -166,6 +166,8 @@ def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False, ---------- fnames : array-like Iterable of filename strings, full path, to data files to be loaded. + name : str + Instrument name used to identify the data set to be loaded (default='') tag : str Tag name used to identify particular data set to be loaded (default='') inst_id : str @@ -193,13 +195,6 @@ def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False, Logger warning 'Epoch label: TIME is not a dimension.' is raised due to the data format and pysat file expectations. - Examples - -------- - :: - - inst = pysat.Instrument('timed', 'guvi', tag='edr-aur') - inst.load(2003, 1) - """ # Initialize the output mdata = pysat.Meta() @@ -216,9 +211,14 @@ def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False, 'PIERCEPOINT_DAY_LATITUDE', 'PIERCEPOINT_DAY_LONGITUDE', 'PIERCEPOINT_DAY_ALTITUDE', 'PIERCEPOINT_DAY_SZA'] time_dims = ['time'] - rename_dims = {'nAlongDay': 'nAlong', 'nAlongNight': 'nAlong'} + if name == 'guvi': + rename_dims = {'nAlongDay': 'nAlong', 'nAlongNight': 'nAlong'} + swap_dims = {'nAlong': 'time'} + else: + rename_dims = {} + swap_dims = {'nAlongDay': 'time'} - if tag == 'sdr-imaging': + if tag in ['sdr-imaging', 'sdr-disk', 'sdr2-disk']: time_vars.extend(["YEAR_DAY_AURORAL", "DOY_DAY_AURORAL", "TIME_DAY_AURORAL", "TIME_EPOCH_DAY_AURORAL"]) coords.extend(['PIERCEPOINT_DAY_LATITUDE_AURORAL', @@ -226,9 +226,13 @@ def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False, 'PIERCEPOINT_DAY_ALTITUDE_AURORAL', 'PIERCEPOINT_DAY_SZA_AURORAL']) time_dims.append('time_auroral') - rename_dims['nCrossDay'] = 'nCross' - rename_dims['nCrossNight'] = 'nCross' rename_dims['nAlongDayAur'] = 'time_auroral' + if name == 'guvi': + rename_dims['nCrossDay'] = 'nCross' + rename_dims['nCrossNight'] = 'nCross' + else: + time_dims.append('time_night') + rename_dims['nAlongNight'] = 'time_night' elif tag == 'sdr-spectrograph': coords.extend(['PIERCEPOINT_NIGHT_ZENITH_ANGLE', 'PIERCEPOINT_NIGHT_SAZIMUTH', @@ -260,18 +264,22 @@ def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False, # the UNIX epoch as the date offset ftime = build_dtimes(sdata, '_DAY', dt.datetime(1970, 1, 1)) - # Ensure identical day and night dimensions - if sdata.dims['nAlongDay'] != sdata.dims['nAlongNight']: - raise ValueError('Along-track day and night dimensions differ') + # Ensure identical day and night dimensions for GUVI + if name == 'guvi': + if sdata.dims['nAlongDay'] != sdata.dims['nAlongNight']: + raise ValueError('Along-track day and night dimensions differ') - if 'nCrossDay' in rename_dims.keys(): - if sdata.dims['nCrossDay'] != sdata.dims['nCrossNight']: - raise ValueError('Cross-track day and night dimensions differ') + if 'nCrossDay' in rename_dims.keys(): + if sdata.dims['nCrossDay'] != sdata.dims['nCrossNight']: + raise ValueError(''.join([ + 'Cross-track day and night dimensions differ ', + '{:} != {:}'.format(sdata.dims['nCrossDay'], + sdata.dims['nCrossNight'])])) - # Combine identical dimensions and rename 'nAlong' to 'time' + # Combine identical dimensions and rename some time dimensions sdata = sdata.rename_dims(rename_dims) - if tag == 'sdr-imaging': + if tag in ['sdr-imaging', 'sdr-disk', 'sdr2-disk']: sdata = sdata.assign(time_auroral=build_dtimes(sdata, '_DAY_AURORAL')) elif tag == 'sdr-spectrograph' and inst_id == 'low_res': @@ -279,13 +287,18 @@ def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False, sdata, '_GAIM_DAY'), time_gaim_night=build_dtimes( sdata, '_GAIM_NIGHT')) - # Test that day and night times are consistent to the nearest second - for i, ntime in enumerate(build_dtimes(sdata, '_NIGHT')): - if abs(ntime - ftime[i]).total_seconds() > 1.0: - raise ValueError('Day and night times differ') + # Test that day and night times are consistent + if name == 'guvi': + max_diff = 1.0 + for i, ntime in enumerate(build_dtimes(sdata, '_NIGHT')): + diff_sec = abs(ntime - ftime[i]).total_seconds() + if diff_sec > max_diff: + raise ValueError(''.join(['Day and night times differ by ', + '{:.3f} s >= {:.3f} s'.format( + diff_sec, max_diff)])) # Remove redundant time variables and rname the 'nAlong' dimension - sdata = sdata.drop_vars(time_vars).swap_dims({'nAlong': 'time'}) + sdata = sdata.drop_vars(time_vars).swap_dims(swap_dims) # Assign time as a coordinate for combining files indexing sdata['time'] = ftime @@ -334,16 +347,41 @@ def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False, # Combine all the data, indexing along time data = xr.merge(data_list) + if name == 'ssusi': + # Data may not contain both day and night values + bad_coords = [coord for coord in coords + if coord not in data.data_vars.keys()] + + if len(bad_coords) == 4: + # Ensure the time of day is consisent + tod = bad_coords[0].split("_")[1] + if np.any([coord.find(tod) < 0 for coord in bad_coords]): + raise ValueError('Some {:} coordinates are missing'.format( + tod)) + + coords = [coord for coord in coords if coord not in bad_coords] + elif len(bad_coords) == len(coords): + raise ValueError('All coordiantes are missing from data') + elif len(bad_coords) > 0: + raise ValueError(''.join(['Unexpected amount of coordinates ', + 'missing from data: {:}'.format( + bad_coords)])) + # Set additional coordinates data = data.set_coords(coords).assign_coords({'time': data['time']}) - if tag == 'sdr-imaging': - data = data.assign_coords( - {'nchan': ["121.6nm", "130.4nm", "135.6nm", "LBHshort", - "LBHlong"], - "nchanAur": ["121.6nm", "130.4nm", "135.6nm", "LBHshort", - "LBHlong"], - "nCross": sdata.nCross.data, - "nCrossDayAur": sdata.nCrossDayAur.data}) + if tag in ['sdr-imaging', 'sdr-disk', 'sdr2-disk']: + # Get the additional coordinates to assign + add_coords = {'nchan': ["121.6nm", "130.4nm", "135.6nm", "LBHshort", + "LBHlong"], + "nchanAur": ["121.6nm", "130.4nm", "135.6nm", + "LBHshort", "LBHlong"]} + for dvar in sdata.data_vars.keys(): + if dvar.find('nCross') == 0: + # Identify all cross-track variables + add_coords[dvar] = sdata[dvar].data + + # Assign the additional coordinates + data = data.assign_coords(add_coords) elif tag == 'sdr-spectrograph': data = data.assign_coords({"nchan": ["121.6nm", "130.4nm", "135.6nm", "LBHshort", @@ -353,3 +391,56 @@ def load_sdr_aurora(fnames, tag='', inst_id='', pandas_format=False, data = data.sortby('time') return data, mdata + + +def clean_by_dqi(inst): + """Clean JHU APL data using the DQI flags to different levels using the DQI. + + Parameters + ---------- + inst : pysat.Instrument + Object containing a JHU APL Instrument with data + + Note + ---- + 0: MeV noise in pixel, (allowed at clean) + 1: SAA, (allowed at dusty/dirty) + 2: unknown mirror/bit position (allowed at none) + 3: LBH Thresh exceeded (allowed at none) + + """ + # Find the flag variables + dqi_vars = [var for var in inst.variables if var.find('DQI') == 0] + + # Find the variables affected by each flag + dat_vars = dict() + for dqi in dqi_vars: + dqi_dims = inst.data[dqi].dims + dat_vars[dqi] = [var for var in inst.variables + if dqi_dims == inst.data[var].dims[:len(dqi_dims)] + and var not in inst.data.coords.keys() + and var.find("IN_SAA") < 0 and var not in dqi_vars] + + for dqi in dqi_vars: + if inst.clean_level == 'clean': + # For clean, require DQI of zero (MeV noise only) + dqi_bad = inst.data[dqi].values > 0 + elif inst.clean_level in ['dusty', 'dirty']: + # For dusty and dirty, allow the SAA region as well + dqi_bad = inst.data[dqi].values > 1 + else: + # For none, allow all to pass + dqi_bad = np.full(inst.data[dqi].values.shape, False) + + # Apply the DQI mask to the data, replacing bad values with + # appropriate fill values if there are bad values + if dqi_bad.any(): + for dat_var in dat_vars[dqi]: + fill_val = inst.meta[dat_var, inst.meta.labels.fill_val] + try: + inst.data[dat_var].values[dqi_bad] = fill_val + except ValueError: + # Try again with NaN, a bad fill value was set + inst.data[dat_var].values[dqi_bad] = np.nan + inst.meta[dat_var] = {inst.meta.labels.fill_val: np.nan} + return diff --git a/pysatNASA/instruments/timed_guvi.py b/pysatNASA/instruments/timed_guvi.py index cdaa28d4..6087ce6e 100644 --- a/pysatNASA/instruments/timed_guvi.py +++ b/pysatNASA/instruments/timed_guvi.py @@ -100,9 +100,6 @@ _clean_warn = {inst_id: {tag: mm_nasa.clean_warnings for tag in inst_ids[inst_id] if tag != 'sdr-imaging'} for inst_id in inst_ids.keys()} -for inst_id in ['high_res', 'low_res']: - _clean_warn[inst_id]['sdr-imaging'] = {'dirty': mm_nasa.clean_warnings[ - 'dirty']} # ---------------------------------------------------------------------------- # Instrument methods @@ -120,32 +117,8 @@ def clean(self): not called by pysat if clean_level is None or 'none'. """ - if self.tag == "sdr-imaging" and self.clean_level in ['clean', 'dusty']: - # Find the flag variables - dqi_vars = [var for var in self.variables if var.find('DQI') == 0] - - # Find the variables affected by each flag - dat_vars = {dqi: [var for var in self.variables if var.find(dqi) > 0] - if dqi.find('AURORAL') >= 0 else - [var for var in self.variables if var.find('AURORAL') < 0 - and var.find(dqi) > 0] for dqi in dqi_vars} - - for dqi in dqi_vars: - if self.clean_level == 'clean': - # For clean, require DQI of zero (MeV noise only) - dqi_bad = self.data[dqi].values > 0 - else: - # For dusty, allow the SAA region as well - dqi_bad = self.data[dqi].values > 1 - - # Apply the DQI mask to the data, replacing bad values with - # appropriate fill values - for dat_var in dat_vars[dqi]: - if self.data[dat_var].shape == dqi_bad.shape or self.data[ - dat_var].shape[:-1] == dqi_bad.shape: - # Only apply to data with the correct dimensions - fill_val = self.meta[dat_var, self.meta.labels.fill_val] - self.data[dat_var].values[dqi_bad] = fill_val + if self.tag == "sdr-imaging": + jhuapl.clean_by_dqi(self) else: # Follow the same warning format as the general clean warning, but # with additional information. @@ -323,7 +296,7 @@ def load(fnames, tag='', inst_id='', combine_times=False): pandas_format=pandas_format, strict_dim_check=False) else: - data, meta = jhuapl.load_sdr_aurora(fnames, tag, inst_id, + data, meta = jhuapl.load_sdr_aurora(fnames, name, tag, inst_id, pandas_format=pandas_format, strict_dim_check=False, combine_times=combine_times) diff --git a/pysatNASA/tests/test_methods_jhuapl.py b/pysatNASA/tests/test_methods_jhuapl.py new file mode 100644 index 00000000..b3d26cff --- /dev/null +++ b/pysatNASA/tests/test_methods_jhuapl.py @@ -0,0 +1,145 @@ +"""Unit tests for the JHU APL instrument methods.""" + +import datetime as dt +import numpy as np +import pandas as pds + +import pytest + +import pysat +from pysatNASA.instruments.methods import jhuapl + + +class TestJHUAPL(object): + """Unit tests for `pysat.instrument.methods.jhuapl`.""" + + def setup_method(self): + """Set up the unit test environment for each method.""" + + self.test_inst = pysat.Instrument( + inst_module=pysat.instruments.pysat_ndtesting) + self.var_list = ["images", "variable_profiles"] + self.out = None + self.comp = None + self.epoch_date = dt.datetime(1970, 1, 1) + return + + def teardown_method(self): + """Clean up the unit test environment after each method.""" + + del self.test_inst, self.var_list, self.out, self.comp + return + + def set_jhuapl(self): + """Update the test instrument to have JHUAPL varialbes.""" + + if self.test_inst.empty: + self.test_inst.load( + date=pysat.instruments.pysat_ndtesting._test_dates['']['']) + + # Create the common string for this time using `var` + self.test_inst['YEAR{:s}'.format(self.var_list[0])] = np.full( + shape=self.test_inst['time'].shape, fill_value=self.test_inst.yr) + self.test_inst['DOY{:s}'.format(self.var_list[0])] = np.full( + shape=self.test_inst['time'].shape, fill_value=self.test_inst.doy) + self.test_inst['TIME{:s}'.format(self.var_list[0])] = self.test_inst[ + 'uts'] + self.test_inst['EPOCH'] = [self.epoch_date + dt.timedelta(seconds=sec) + for sec in self.test_inst['uts'].values] + + # Add DQI masks for the multi-dim data variables + self.test_inst['DQI_Z'] = (('time', 'z'), np.zeros(shape=( + self.test_inst.data.sizes['time'], self.test_inst.data.sizes['z']))) + self.test_inst['DQI_X'] = (('time', 'x'), np.zeros(shape=( + self.test_inst.data.sizes['time'], self.test_inst.data.sizes['x']))) + + # Set some bad values for the DQI data + self.test_inst['DQI_Z'][:, 0] = 3 + self.test_inst['DQI_Z'][:, 1] = 2 + self.test_inst['DQI_Z'][:, 2] = 1 + self.test_inst['DQI_X'][:, 0] = 3 + self.test_inst['DQI_X'][:, 1] = 2 + self.test_inst['DQI_X'][:, 2] = 1 + + return + + @pytest.mark.parametrize("epoch_var", ['time', 'EPOCH']) + def test_build_dtimes(self, epoch_var): + """Test creation of datetime list from JHU APL times. + + Parameters + ---------- + epoch_var : str + Epoch variable containing time data that seconds of day will be + obtained from if `epoch` != None + + """ + # Set up the test instrument with necessary times + self.set_jhuapl() + + # Get the time list + epoch = None if epoch_var == 'time' else self.epoch_date + self.out = jhuapl.build_dtimes(self.test_inst.data, self.var_list[0], + epoch=epoch, epoch_var=epoch_var) + + # Get the comparison from the Instrument index + self.comp = [pds.to_datetime(tval).to_pydatetime() + for tval in self.test_inst.index] + + # Ensure the lists are equal + pysat.utils.testing.assert_lists_equal(self.out, self.comp) + return + + @pytest.mark.parametrize("clean_level", ['clean', 'dusty', 'dirty', 'none']) + def test_clean_by_dqi(self, clean_level): + """Test creation of datetime list from JHU APL times. + + Parameters + ---------- + clean_level : str + String denoting desired clean level + + """ + # Set up the test instrument with necessary flag arrays + self.set_jhuapl() + + # Update the clean level + self.test_inst.clean_level = clean_level + self.out = {'clean': 2, 'dusty': 1, 'dirty': 1} + + # Clean the data to the desired level + jhuapl.clean_by_dqi(self.test_inst) + + # Test that there are fill values only in the appropriate variables and + # places + for var in self.test_inst.variables: + # Get the fill value for this variable + if var in self.test_inst.meta.keys(): + self.comp = self.test_inst.meta[ + var, self.test_inst.meta.labels.fill_val] + + try: + isnan = True if np.isnan(self.comp) else False + except TypeError: + isnan = False + + if var in self.var_list and clean_level != 'none': + # This variable will have been cleaned + estr = "unmasked values in {:}".format(var) + if isnan: + assert np.isnan(self.test_inst[var][:, self.out[ + clean_level]].values).all(), estr + else: + assert np.all( + self.test_inst[var][:, self.out[clean_level]].values + == self.comp), estr + else: + # This variable should not have any fill values + estr = "masked values ({:}) in {:}".format(self.comp, var) + if isnan: + assert not np.isnan( + self.test_inst[var].values).all(), estr + else: + assert not np.all( + self.test_inst[var].values == self.comp), estr + return