Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Update Solo and SDO data downloaders #3

Merged
merged 18 commits into from
Dec 4, 2023
1 change: 1 addition & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[email protected]
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,7 @@ datasets/*
\outputs/
\jbook/_build/
\_build/
\.idea/
\.idea/

**notebooks/*data*
**notebooks/*png*
19 changes: 14 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,23 +45,32 @@ See [notebook](./notebooks/3.2_rastervision.ipynb) for details.
We can install it directly through pip

```bash
pip install git+https://github.com/jejjohnson/helio_tools
pip install git+https://github.com/spaceml-org/helio_tools
```

We also use poetry for the development environment.

```bash
git clone https://github.com/jejjohnson/helio_tools.git
git clone https://github.com/spaceml-org/helio_tools
cd helio_tools
conda create -n helio_tools python=3.11 poetry
conda activate helio_tools
poetry install
```



---
## References

**Software**

* [InstrumentToInstrument](https://github.com/RobertJaro/InstrumentToInstrument/tree/master) - Instrument-to-Instrument Translation.
* [InstrumentToInstrument](https://github.com/RobertJaro/InstrumentToInstrument/tree/master) - Instrument-to-Instrument Translation.

**Glossary**

* [SDO](https://sdo.gsfc.nasa.gov/) - Solar Dynamics Observatory.
* [AIA](https://sdo.gsfc.nasa.gov/data/) - Atmospheric Imaging Assembly.
* [HMI](https://sdo.gsfc.nasa.gov/data/) - Helioseismic and Magnetic Imager.
* [EVE](https://lasp.colorado.edu/home/eve/data/) - Extreme Ultraviolet Variability Experiment.
* [SolO](https://sci.esa.int/web/solar-orbiter) - Solar Orbiter.
* FSI - Full Sun Imager.
* [SOHO](https://soho.nascom.nasa.gov//) - Solar and Heliospheric Observatory.
2 changes: 2 additions & 0 deletions environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,11 @@ dependencies:
- scikit-image
- astropy
- sunpy
- sunpy-soar
- conda-forge::aiapy
- pandas # Data structure
- scikit-learn # Machine Learning
- joblib # Parallelization
# PLOTTING LIBRARY
- matplotlib # standard plotting library
- seaborn # Stats viz library
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,17 @@
DEFAULT_WAVELENGTHS = [171, 193, 211, 304]



class SDOData:
email: str
base_path: str
wavelengths: List[str | int | float]=DEFAULT_WAVELENGTHS
wavelengths: List[str | int | float] = DEFAULT_WAVELENGTHS
n_workers: int

def download_soho(

def download_sdo(
email: str, base_path: str,
wavelengths: List[str | int | float]=DEFAULT_WAVELENGTHS,
n_workers: int=5
wavelengths: List[str | int | float] = DEFAULT_WAVELENGTHS,
n_workers: int = 5
) -> None:
"""A simple download script do down

Expand All @@ -24,4 +24,4 @@ def download_soho(
wavelength (list[int|str|float]): the wavelengths we would like to download
n_workers (int): the number of workers for the download.
"""
pass
pass
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
"""
Script to download data products from the SDO database at http://jsoc.stanford.edu/
to a local directory. Uses the drms python package, the default package for downloading SDO data.

Documentation for DRMS: https://docs.sunpy.org/projects/drms/en/latest/
"""

import argparse
import logging
import multiprocessing
Expand All @@ -15,21 +22,76 @@

DEFAULT_WAVELENGTHS = [171, 193, 211, 304]


class SDODownloader:
def __init__(self, base_path: str = None,
email: str = None,
wavelengths: list[str | int | float] = DEFAULT_WAVELENGTHS,
n_workers: int = 5) -> None:
"""The SDO Downloader is an efficent way to download data from the SDO database.

Args:
base_path (str): the base path where the data should be downloaded to.
email (str): the email account needed
wavelength (list[int|str|float]): the wavelengths we would like to download
n_workers (int): the number of workers for the download.

def __init__(self, base_path, email, wavelengths=DEFAULT_WAVELENGTHS, n_workers=5):
Example Usage:

>>> downloader_sdo = SDODownloader(...)
>>> downloader_sdo.downloadDate(datetime(2022, 3, 1))

"""
self.ds_path = base_path
self.wavelengths = [str(wl) for wl in wavelengths]
self.n_workers = n_workers
[os.makedirs(os.path.join(base_path, wl), exist_ok=True) for wl in self.wavelengths + ['6173']]
[os.makedirs(os.path.join(base_path, wl), exist_ok=True)
for wl in self.wavelengths + ['6173']]

self.drms_client = drms.Client(email=email)

def download(self, sample):
def downloadDate(self, date: datetime):
"""Download FITS data for a specific date.
"""
id = date.isoformat()
logging.info('Start download: %s' % id)
time_param = '%sZ' % date.isoformat('_', timespec='seconds')

# query Magnetogram Instrument
ds_hmi = 'hmi.M_720s[%s]{magnetogram}' % time_param
keys_hmi = self.drms_client.keys(ds_hmi)
header_hmi, segment_hmi = self.drms_client.query(
ds_hmi, key=','.join(keys_hmi), seg='magnetogram')
if len(header_hmi) != 1 or np.any(header_hmi.QUALITY != 0):
self.fetchDataFallback(date)
return

# query EUV Instrument
ds_euv = 'aia.lev1_euv_12s[%s][%s]{image}' % (
time_param, ','.join(self.wavelengths))
keys_euv = self.drms_client.keys(ds_euv)
header_euv, segment_euv = self.drms_client.query(
ds_euv, key=','.join(keys_euv), seg='image')
if len(header_euv) != len(self.wavelengths) or np.any(header_euv.QUALITY != 0):
self.fetchDataFallback(date)
return

queue = []
for (idx, h), s in zip(header_hmi.iterrows(), segment_hmi.magnetogram):
queue += [(h.to_dict(), s, date)]
for (idx, h), s in zip(header_euv.iterrows(), segment_euv.image):
queue += [(h.to_dict(), s, date)]

with multiprocessing.Pool(self.n_workers) as p:
p.map(self.download, queue)
logging.info('Finished: %s' % id)

def download(self, sample: tuple[dict, str, datetime]):
header, segment, t = sample
try:
dir = os.path.join(self.ds_path, '%d' % header['WAVELNTH'])
map_path = os.path.join(dir, '%s.fits' % t.isoformat('T', timespec='seconds'))
map_path = os.path.join(dir, '%s.fits' %
t.isoformat('T', timespec='seconds'))
if os.path.exists(map_path):
return map_path
# load map
Expand All @@ -52,50 +114,22 @@ def download(self, sample):
logging.info(ex)
raise ex

def downloadDate(self, date):
id = date.isoformat()

logging.info('Start download: %s' % id)
# query Magnetogram
time_param = '%sZ' % date.isoformat('_', timespec='seconds')
ds_hmi = 'hmi.M_720s[%s]{magnetogram}' % time_param
keys_hmi = self.drms_client.keys(ds_hmi)
header_hmi, segment_hmi = self.drms_client.query(ds_hmi, key=','.join(keys_hmi), seg='magnetogram')
if len(header_hmi) != 1 or np.any(header_hmi.QUALITY != 0):
self.fetchDataFallback(date)
return

# query EUV
time_param = '%sZ' % date.isoformat('_', timespec='seconds')
ds_euv = 'aia.lev1_euv_12s[%s][%s]{image}' % (time_param, ','.join(self.wavelengths))
keys_euv = self.drms_client.keys(ds_euv)
header_euv, segment_euv = self.drms_client.query(ds_euv, key=','.join(keys_euv), seg='image')
if len(header_euv) != len(self.wavelengths) or np.any(header_euv.QUALITY != 0):
self.fetchDataFallback(date)
return

queue = []
for (idx, h), s in zip(header_hmi.iterrows(), segment_hmi.magnetogram):
queue += [(h.to_dict(), s, date)]
for (idx, h), s in zip(header_euv.iterrows(), segment_euv.image):
queue += [(h.to_dict(), s, date)]

with multiprocessing.Pool(self.n_workers) as p:
p.map(self.download, queue)
logging.info('Finished: %s' % id)

def fetchDataFallback(self, date):
def fetchDataFallback(self, date: datetime):
id = date.isoformat()

logging.info('Fallback download: %s' % id)
# query Magnetogram
t = date - timedelta(hours=24)
ds_hmi = 'hmi.M_720s[%sZ/12h@720s]{magnetogram}' % t.replace(tzinfo=None).isoformat('_', timespec='seconds')
ds_hmi = 'hmi.M_720s[%sZ/12h@720s]{magnetogram}' % t.replace(
tzinfo=None).isoformat('_', timespec='seconds')
keys_hmi = self.drms_client.keys(ds_hmi)
header_tmp, segment_tmp = self.drms_client.query(ds_hmi, key=','.join(keys_hmi), seg='magnetogram')
header_tmp, segment_tmp = self.drms_client.query(
ds_hmi, key=','.join(keys_hmi), seg='magnetogram')
assert len(header_tmp) != 0, 'No data found!'
date_str = header_tmp['DATE__OBS'].replace('MISSING', '').str.replace('60', '59') # fix date format
date_diff = np.abs(pd.to_datetime(date_str).dt.tz_localize(None) - date)
date_str = header_tmp['DATE__OBS'].replace(
'MISSING', '').str.replace('60', '59') # fix date format
date_diff = np.abs(pd.to_datetime(
date_str).dt.tz_localize(None) - date)
# sort and filter
header_tmp['date_diff'] = date_diff
header_tmp.sort_values('date_diff')
Expand All @@ -116,10 +150,13 @@ def fetchDataFallback(self, date):
euv_ds = 'aia.lev1_euv_12s[%sZ/12h@12s][%s]{image}' % (
t.replace(tzinfo=None).isoformat('_', timespec='seconds'), wl)
keys_euv = self.drms_client.keys(euv_ds)
header_tmp, segment_tmp = self.drms_client.query(euv_ds, key=','.join(keys_euv), seg='image')
header_tmp, segment_tmp = self.drms_client.query(
euv_ds, key=','.join(keys_euv), seg='image')
assert len(header_tmp) != 0, 'No data found!'
date_str = header_tmp['DATE__OBS'].replace('MISSING', '').str.replace('60', '59') # fix date format
date_diff = (pd.to_datetime(date_str).dt.tz_localize(None) - date).abs()
date_str = header_tmp['DATE__OBS'].replace(
'MISSING', '').str.replace('60', '59') # fix date format
date_diff = (pd.to_datetime(
date_str).dt.tz_localize(None) - date).abs()
# sort and filter
header_tmp['date_diff'] = date_diff
header_tmp.sort_values('date_diff')
Expand All @@ -145,9 +182,12 @@ def fetchDataFallback(self, date):


def main():
email = "[email protected]"
base_path = "/home/juanjohn/data/helio/sdo"
downloader_sdo = SDODownloader(base_path=base_path, email=email, n_workers=8)
import os
email = os.getenv('SDO_EMAIL')
base_path = os.path.join(os.path.expanduser('~'), 'sdo-data')

downloader_sdo = SDODownloader(
base_path=base_path, email=email, n_workers=8)

start_date = datetime(2022, 3, 1)
end_date = datetime(2023, 3, 2)
Expand Down
Empty file.
Loading
Loading