Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pyspextool ingest script #498

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 138 additions & 0 deletions scripts/ingests/pyspextool/practice_ingest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
"""
Script for ingesting IRTF SpeX spectra processed by pyspextool into the SIMPLE database.

Expects spectra files to be uploaded to the S3 bucket in the following structure:
https://bdnyc.s3.amazonaws.com/SpeX/pyspextool/{filename}
"""

import logging
from pathlib import Path
from astropy.io import fits
from astrodb_scripts import load_astrodb, find_source_in_db, AstroDBError
from simple.schema import *
from simple.schema import REFERENCE_TABLES
from simple.utils.spectra import ingest_spectrum, spectrum_plottable

SAVE_DB = False # save the data files in addition to modifying the .db file
RECREATE_DB = False # recreates the .db file from the data files

logger = logging.getLogger("AstroDB")
logger.setLevel(logging.WARNING)

db = load_astrodb("SIMPLE.sqlite", recreatedb=RECREATE_DB, reference_tables=REFERENCE_TABLES)

data_directory = "/Users/kelle/Desktop/processed data set" #proc directory
logger.info(f"Data directory: {data_directory}")

#TODO - only ingest files renamed and converted to Spectrum1D fits
fits_files = Path(data_directory).glob("calspec*.fits")

total_files = 0
ingested = []
skipped = []

for file in fits_files:
total_files += 1
msg = f"\n Processing {file}"
logger.info(f"Processing {file}")
hdr = fits.getheader(file)

# Check if source is in the database
matches = find_source_in_db(db, hdr["OBJECT"], ra=hdr["RA"], dec=hdr["DEC"])
if len(matches) == 0:
skipped.append(file.name)
msg = f"Source {hdr['OBJECT']} not found in the database. Skipping."
logger.warning(msg)
continue # exit loop and go to next file
elif len(matches) > 1:
skipped.append(file.name)
msg = f"Multiple matches found for {hdr['OBJECT']}. Skipping."
logger.warning(msg)
continue # exit loop and go to next file
elif len(matches) == 1:
source = matches[0]
msg = f"Source {hdr['OBJECT']} found in the database as {source}."
print(msg)
logging.info(msg)

# Source is in the database, get other needed keywords from the header
regime = "nir"

if hdr["TELESCOP"] == "NASA IRTF":
telescope = "IRTF"
else:
skipped.append(file.name)
msg = f"Telescope {hdr['TELESCOP']} not expected. Skipping. Expected NASA IRTF."
logger.warning(msg)
continue

if hdr["INSTR"] == "SpeX":
instrument = "SpeX"
else:
skipped.append(file.name)
msg = f"Instrument {hdr['INSTRUME']} not expected. Skipping. Expected SpeX."
logger.warning(msg)
continue

if hdr["MODE"] == "Prism":
mode = "Prism"
elif hdr["MODE"] == "SXD":
mode = "SXD"
else:
skipped.append(file.name)
msg = f"Mode {hdr['MODE']} not expected. Skipping. Expected Prism or SXD."
logger.warning(msg)

obs_date = hdr["AVE_DATE"]

#TODO: make a reference for the pyspextool team
reference = "Missing"

other_references = f"{hdr['PROG_ID']}: {hdr['OBSERVER']}"

spectrum = f"https://bdnyc.s3.amazonaws.com/SpeX/pyspextool/{file.name}"

# check if the spectrum is plottable
try:
plottable = spectrum_plottable(spectrum, raise_error=True)
except AstroDBError as e:
skipped.append(file.name)
logger.warning(f"Spectrum not plottable. Skipping {file.name}")
logger.debug(e)
continue

# Ingest the spectrum
# TODO - ingest the new way.
try:
ingest_spectrum(
db,
source=source,
spectrum=spectrum,
regime=regime,
obs_date=obs_date,
telescope=telescope,
instrument=instrument,
mode=mode,
reference=reference,
other_references=other_references,
)
ingested.append(file.name)
except AstroDBError as e:
skipped.append(file.name)
logger.warning(f"Error ingesting {file.name}. Skipping.")
logger.debug(e)
continue

if len(ingested) + len(skipped) != total_files:
msg = (
f"Some files were not ingested or skipped. \n"
f"n_ingested = {len(ingested)}, n_skipped = {len(skipped)}, total_files = {total_files}"
)
logger.error(msg)
raise AstroDBError

if len(skipped) == 0:
logger.info(f"Ingested {len(ingested)} out of {total_files} files.")

if len(skipped) > 0:
logger.warning(f"Skipped {len(skipped)} out of {total_files} files: \n {skipped}")