From 27ec229ce9f11c1dc42c995a19331a7404464a10 Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Tue, 14 May 2024 17:25:39 -0400 Subject: [PATCH 01/10] First pass at reworking how ingest_spectrum works using the ORM --- simple/schema.py | 31 ++++-- simple/utils/spectra.py | 195 ++++-------------------------------- tests/test_schema.py | 22 +++- tests/test_spectra_utils.py | 39 ++------ 4 files changed, 74 insertions(+), 213 deletions(-) diff --git a/simple/schema.py b/simple/schema.py index 93ac27610..8e9439b28 100644 --- a/simple/schema.py +++ b/simple/schema.py @@ -3,6 +3,7 @@ """ import enum +from datetime import datetime import sqlalchemy as sa from astrodbkit2.astrodb import Base @@ -338,20 +339,22 @@ class Gravities(Base): class Spectra(Base): # Table to store references to spectra __tablename__ = 'Spectra' + source = Column( String(100), ForeignKey("Sources.source", ondelete="cascade", onupdate="cascade"), nullable=False, primary_key=True, ) + # Data access_url = Column(String(1000), nullable=False) # URL of spectrum location - original_spectrum = Column( - String(1000) - ) # URL of original spectrum location, if applicable - local_spectrum = Column( - String(1000) - ) # local directory (via environment variable) of spectrum location + + # URL of original spectrum location, if applicable + original_spectrum = Column(String(1000)) + # local directory (via environment variable) of spectrum location + local_spectrum = Column(String(1000)) + regime = Column( String(30), ForeignKey("Regimes.regime", ondelete="cascade", onupdate="cascade"), @@ -381,6 +384,22 @@ class Spectra(Base): {}, ) + @validates("access_url", "regime", "source") + def validate_required(self, key, value): + if value is None: + raise ValueError(f"Value required for {key}") + return value + + @validates("observation_date") + def validate_date(self, key, value): + if value is None: + raise ValueError(f"Invalid date received: {value}") + elif not isinstance(value, datetime): + # Convert to datetime for storing in the database + # Will throw error if unable to convert + value = datetime.fromisoformat(value) + return value + class ModeledParameters(Base): # Table to store derived/inferred paramaters from models diff --git a/simple/utils/spectra.py b/simple/utils/spectra.py index 8090bdb1a..21de79faa 100644 --- a/simple/utils/spectra.py +++ b/simple/utils/spectra.py @@ -1,25 +1,23 @@ import logging -import requests -import numpy.ma as ma -import pandas as pd # used for to_datetime conversion -import dateutil # used to convert obs date to datetime object -import sqlalchemy.exc import sqlite3 -import numpy as np from typing import Optional -import matplotlib.pyplot as plt -from astropy.io import fits import astropy.units as u -from specutils import Spectrum1D - -from astrodbkit2.astrodb import Database +import matplotlib.pyplot as plt +import numpy as np +import requests +import sqlalchemy.exc from astrodb_utils import ( AstroDBError, + find_publication, find_source_in_db, internet_connection, - find_publication, ) +from astrodbkit2.astrodb import Database +from astropy.io import fits +from specutils import Spectrum1D + +from simple.schema import Spectra __all__ = [ "ingest_spectrum", @@ -91,119 +89,6 @@ def ingest_spectrum( "plottable": False, } - # Check input values - if regime is None: - msg = "Regime is required" - logger.error(msg) - flags["skipped"] = True - if raise_error: - raise AstroDBError(msg) - else: - return flags - else: - good_regime = db.query(db.Regimes).filter(db.Regimes.c.regime == regime).table() - if len(good_regime) == 0: - msg = f"Regime {regime} is not in Regimes table" - logger.error(msg) - flags["skipped"] = True - if raise_error: - raise AstroDBError(msg) - else: - return flags - - if telescope is None: - msg = "Telescope is required" - logger.error(msg) - flags["skipped"] = True - if raise_error: - raise AstroDBError(msg) - else: - return flags - else: - good_telescope = ( - db.query(db.Telescopes) - .filter(db.Telescopes.c.telescope == telescope) - .table() - ) - if len(good_telescope) == 0: - msg = f"Telescope {telescope} is not in Telescopes table" - logger.error(msg) - flags["skipped"] = True - if raise_error: - raise AstroDBError(msg) - else: - return flags - - if instrument is None: - msg = "Instrument is required" - logger.error(msg) - flags["skipped"] = True - if raise_error: - raise AstroDBError(msg) - else: - return flags - else: - good_instrument = ( - db.query(db.Instruments) - .filter(db.Instruments.c.instrument == instrument) - .table() - ) - if len(good_instrument) == 0: - msg = f"Instrument {instrument} is not in Instruments table" - logger.error(msg) - flags["skipped"] = True - if raise_error: - raise AstroDBError(msg) - else: - return flags - - if mode is None: - msg = "Mode is required" - logger.error(msg) - flags["skipped"] = True - if raise_error: - raise AstroDBError(msg) - else: - return flags - else: - good_mode = ( - db.query(db.Instruments) - .filter(db.Instruments.c.instrument == instrument) - .filter(db.Instruments.c.mode == mode) - .table() - ) - if len(good_mode) == 0: - msg = f"Mode {mode} is not in Instruments table for {instrument}" - logger.error(msg) - flags["skipped"] = True - if raise_error: - raise AstroDBError(msg) - else: - return flags - - if reference is None: - msg = "Reference is required" - logger.error(msg) - flags["skipped"] = True - if raise_error: - raise AstroDBError(msg) - else: - return flags - else: - good_reference = find_publication(db, reference=reference) - if good_reference[0] is False: - msg = ( - f"Spectrum for {source} could not be added to the database because the " - f"reference {reference} is not in Publications table. \n" - f"(Add it with ingest_publication function.) \n " - ) - logger.error(msg) - flags["skipped"] = True - if raise_error: - raise AstroDBError(msg) - else: - return flags - # Get source name as it appears in the database db_name = find_source_in_db(db, source) @@ -258,52 +143,6 @@ def ingest_spectrum( msg = "No internet connection. Internet is needed to check spectrum files." raise AstroDBError(msg) - # SKIP if observation date is blank - if ma.is_masked(obs_date) or obs_date == "" or obs_date is None: - obs_date = None - missing_obs_msg = ( - f"Skipping spectrum with missing observation date: {source} \n" - ) - missing_row_spe = f"{source, obs_date, reference} \n" - flags["no_obs_date"] = True - logger.debug(missing_row_spe) - if raise_error: - logger.error(missing_obs_msg) - raise AstroDBError(missing_obs_msg) - else: - logger.warning(missing_obs_msg) - return flags - else: - try: - obs_date = pd.to_datetime( - obs_date - ) # TODO: Another method that doesn't require pandas? - except ValueError: - flags["no_obs_date"] = True - if raise_error: - msg = ( - f"{source}: Can't convert obs date to Date Time object: {obs_date}" - ) - logger.error(msg) - raise AstroDBError(msg) - else: - return flags - except dateutil.parser._parser.ParserError: - flags["no_obs_date"] = True - if raise_error: - msg = ( - f"{source}: Can't convert obs date to Date Time object: {obs_date}" - ) - logger.error(msg) - raise AstroDBError(msg) - else: - msg = ( - f"Skipping {source} Can't convert obs date to Date Time object: " - f"{obs_date}" - ) - logger.warning(msg) - return flags - matches = find_spectra( db, source, @@ -344,11 +183,15 @@ def ingest_spectrum( } logger.debug(row_data) - # Attempt to add spectrum to database try: - with db.engine.connect() as conn: - conn.execute(db.Spectra.insert().values(row_data)) - conn.commit() + # Attempt to add spectrum to database + # This will throw errors based on validation in schema.py + # and any database checks (as for example IntegrityError) + obj = Spectra(**row_data) + with db.session as session: + session.add(obj) + session.commit() + flags["added"] = True logger.info(f"Added {source} : \n" f"{row_data}") except sqlalchemy.exc.IntegrityError as e: @@ -370,7 +213,7 @@ def ingest_spectrum( except Exception as e: msg = ( f"Spectrum for {source} could not be added to the database" - f"for unexpected reason: \n {row_data} \n error: {str(e)}" + f"for unexpected reason: \n {row_data} \n error: {e}" ) logger.error(msg) flags["skipped"] = True diff --git a/tests/test_schema.py b/tests/test_schema.py index e3f188572..c56a6e821 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -1,8 +1,10 @@ """Tests for the schema itself and any validating functions""" +from datetime import datetime + import pytest -from simple.schema import PhotometryFilters, Publications, Sources +from simple.schema import PhotometryFilters, Publications, Sources, Spectra def schema_tester(table, values, error_state): @@ -46,4 +48,20 @@ def test_sources(values, error_state): ]) def test_publications(values, error_state): """Validating Publications""" - schema_tester(Publications, values, error_state) \ No newline at end of file + schema_tester(Publications, values, error_state) + + +@pytest.mark.parametrize("values, error_state", + [ + ({"access_url": None}, ValueError), + ({"source": None}, ValueError), + ({"regime": None}, ValueError), + ({"observation_date": "2024-01-01"}, None), + ({"observation_date": datetime(2024,1,1)}, None), + ({"observation_date": None}, ValueError), + ({"observation_date": "fake"}, ValueError), + ]) +def test_spectra(values, error_state): + """Validating Spectra""" + # Note: due to how this works, only the columns with values provided get tested + schema_tester(Spectra, values, error_state) \ No newline at end of file diff --git a/tests/test_spectra_utils.py b/tests/test_spectra_utils.py index a8eead2eb..04aae9d99 100644 --- a/tests/test_spectra_utils.py +++ b/tests/test_spectra_utils.py @@ -3,6 +3,7 @@ from astrodb_utils.utils import ( AstroDBError, ) + from simple.utils.spectra import ( ingest_spectrum, # ingest_spectrum_from_fits, @@ -26,10 +27,13 @@ ), ) def test_ingest_spectrum_errors(temp_db): + + # A lot of the tests fail because they were checking very specific parts of ingest_spectrum + spectrum = "https://bdnyc.s3.amazonaws.com/tests/U10176.fits" with pytest.raises(AstroDBError) as error_message: ingest_spectrum(temp_db, source="apple", spectrum=spectrum) - assert "Regime is required" in str(error_message.value) + # assert "Regime is required" in str(error_message.value) result = ingest_spectrum( temp_db, source="apple", spectrum=spectrum, raise_error=False ) @@ -46,7 +50,7 @@ def test_ingest_spectrum_errors(temp_db): regime="nir", spectrum=spectrum, ) - assert "Reference is required" in str(error_message.value) + # assert "Reference is required" in str(error_message.value) ingest_spectrum( temp_db, source="apple", regime="nir", spectrum=spectrum, raise_error=False ) @@ -64,7 +68,7 @@ def test_ingest_spectrum_errors(temp_db): mode="Prism", reference="Ref 5", ) - assert "not in Publications table" in str(error_message.value) + # assert "not in Publications table" in str(error_message.value) ingest_spectrum( temp_db, source="apple", @@ -90,7 +94,7 @@ def test_ingest_spectrum_errors(temp_db): instrument="SpeX", mode="Prism", ) - assert "No unique source match for kiwi in the database" in str(error_message.value) + # assert "No unique source match for kiwi in the database" in str(error_message.value) result = ingest_spectrum( temp_db, source="kiwi", @@ -116,7 +120,7 @@ def test_ingest_spectrum_errors(temp_db): instrument="SpeX", mode="Prism", ) - assert "missing observation date" in str(error_message.value) + # assert "missing observation date" in str(error_message.value) result = ingest_spectrum( temp_db, source="apple", @@ -129,29 +133,6 @@ def test_ingest_spectrum_errors(temp_db): raise_error=False, ) assert result["added"] is False - assert result["skipped"] is False - assert result["no_obs_date"] is True - - # with pytest.raises(AstroDBError) as error_message: - # ingest_spectrum( - # db, - # source="orange", - # regime="nir", - # spectrum=spectrum, - # reference="Ref 1", - # obs_date="Jan20", - # ) - # assert "Can't convert obs date to Date Time object" in str(error_message.value) - # result = ingest_spectrum( - # db, - # source="orange", - # regime="nir", - # spectrum=spectrum, - # reference="Ref 1", - # obs_date="Jan20", - # raise_error=False, - # ) - # assert result["added"] is False # assert result["skipped"] is False # assert result["no_obs_date"] is True @@ -167,7 +148,7 @@ def test_ingest_spectrum_errors(temp_db): instrument="LRIS", mode="OG570", ) - assert "not in Regimes table" in str(error_message.value) + # assert "not in Regimes table" in str(error_message.value) result = ingest_spectrum( temp_db, source="orange", From 9bf6ae118db87ca0bc708c228ea75d8f0c3d68f4 Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Tue, 2 Jul 2024 15:52:02 -0400 Subject: [PATCH 02/10] Fixing some issues in unit tests --- tests/test_spectra_utils.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/test_spectra_utils.py b/tests/test_spectra_utils.py index 04aae9d99..c2fe875a0 100644 --- a/tests/test_spectra_utils.py +++ b/tests/test_spectra_utils.py @@ -34,6 +34,8 @@ def test_ingest_spectrum_errors(temp_db): with pytest.raises(AstroDBError) as error_message: ingest_spectrum(temp_db, source="apple", spectrum=spectrum) # assert "Regime is required" in str(error_message.value) + # Commented out the error_message assertions since we have more generic error messages with this + result = ingest_spectrum( temp_db, source="apple", spectrum=spectrum, raise_error=False ) @@ -51,7 +53,8 @@ def test_ingest_spectrum_errors(temp_db): spectrum=spectrum, ) # assert "Reference is required" in str(error_message.value) - ingest_spectrum( + + result = ingest_spectrum( temp_db, source="apple", regime="nir", spectrum=spectrum, raise_error=False ) assert result["added"] is False @@ -69,7 +72,8 @@ def test_ingest_spectrum_errors(temp_db): reference="Ref 5", ) # assert "not in Publications table" in str(error_message.value) - ingest_spectrum( + + result = ingest_spectrum( temp_db, source="apple", regime="nir", @@ -95,6 +99,7 @@ def test_ingest_spectrum_errors(temp_db): mode="Prism", ) # assert "No unique source match for kiwi in the database" in str(error_message.value) + result = ingest_spectrum( temp_db, source="kiwi", @@ -121,6 +126,7 @@ def test_ingest_spectrum_errors(temp_db): mode="Prism", ) # assert "missing observation date" in str(error_message.value) + result = ingest_spectrum( temp_db, source="apple", @@ -149,6 +155,7 @@ def test_ingest_spectrum_errors(temp_db): mode="OG570", ) # assert "not in Regimes table" in str(error_message.value) + result = ingest_spectrum( temp_db, source="orange", From b5a44c8872f67e3da264285b26a197ee928769a5 Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Tue, 2 Jul 2024 16:49:44 -0400 Subject: [PATCH 03/10] Partial implementation of new flags response --- simple/utils/spectra.py | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/simple/utils/spectra.py b/simple/utils/spectra.py index 21de79faa..5cf8ad087 100644 --- a/simple/utils/spectra.py +++ b/simple/utils/spectra.py @@ -74,6 +74,10 @@ def ingest_spectrum( Returns ------- flags: dict + Status response with the following keys: + - "added": True if it's added and False if it's skipped. + - "content": the data that was attempted to add + - "message": string which includes information about why skipped Raises ------ @@ -81,12 +85,9 @@ def ingest_spectrum( """ flags = { - "skipped": False, - "dupe": False, - "missing_instrument": False, - "no_obs_date": False, "added": False, - "plottable": False, + "content": {}, + "message": "" } # Get source name as it appears in the database @@ -94,7 +95,6 @@ def ingest_spectrum( if len(db_name) != 1: msg = f"No unique source match for {source} in the database" - flags["skipped"] = True if raise_error: raise AstroDBError(msg) else: @@ -111,7 +111,6 @@ def ingest_spectrum( request_response.status_code ) # The website is up if the status code is 200 if status_code != 200: - flags["skipped"] = True msg = ( "The spectrum location does not appear to be valid: \n" f"spectrum: {spectrum} \n" @@ -127,7 +126,6 @@ def ingest_spectrum( request_response1 = requests.head(original_spectrum) status_code1 = request_response1.status_code if status_code1 != 200: - flags["skipped"] = True msg = ( "The spectrum location does not appear to be valid: \n" f"spectrum: {original_spectrum} \n" @@ -157,7 +155,7 @@ def ingest_spectrum( msg2 = f"{matches}" f"{instrument, mode, obs_date, reference, spectrum} \n" logger.warning(msg) logger.debug(msg2) - flags["dupe"] = True + flags["message"] = msg if raise_error: raise AstroDBError else: @@ -182,6 +180,7 @@ def ingest_spectrum( "other_references": other_references, } logger.debug(row_data) + flags["content"] = row_data try: # Attempt to add spectrum to database @@ -197,7 +196,7 @@ def ingest_spectrum( except sqlalchemy.exc.IntegrityError as e: msg = "Integrity Error:" f"{source} \n" f"{row_data}" logger.error(msg + str(e) + f" \n {row_data}") - flags["skipped"] = True + flags["message"] = msg if raise_error: raise AstroDBError(msg) else: @@ -205,7 +204,7 @@ def ingest_spectrum( except sqlite3.IntegrityError as e: msg = "Integrity Error: " f"{source} \n" f"{row_data}" logger.error(msg + str(e)) - flags["skipped"] = True + flags["message"] = msg if raise_error: raise AstroDBError(msg) else: @@ -216,7 +215,7 @@ def ingest_spectrum( f"for unexpected reason: \n {row_data} \n error: {e}" ) logger.error(msg) - flags["skipped"] = True + flags["message"] = msg if raise_error: raise AstroDBError(msg) else: From 578d2319ee41d74d87954f0e1d4a83614bb04e01 Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Tue, 9 Jul 2024 17:07:48 -0400 Subject: [PATCH 04/10] Iterating on error message handling for ingest_spectrum --- simple/utils/spectra.py | 20 +++--- tests/test_spectra_utils.py | 123 ++++++++++++++++++++---------------- 2 files changed, 79 insertions(+), 64 deletions(-) diff --git a/simple/utils/spectra.py b/simple/utils/spectra.py index 5cf8ad087..56fe34f8b 100644 --- a/simple/utils/spectra.py +++ b/simple/utils/spectra.py @@ -151,13 +151,13 @@ def ingest_spectrum( mode=mode, ) if len(matches) > 0: - msg = f"Skipping suspected duplicate measurement\n{source}\n" + msg = f"Skipping suspected duplicate measurement: {source}\n" msg2 = f"{matches}" f"{instrument, mode, obs_date, reference, spectrum} \n" logger.warning(msg) logger.debug(msg2) flags["message"] = msg if raise_error: - raise AstroDBError + raise AstroDBError(msg) else: return flags @@ -194,16 +194,16 @@ def ingest_spectrum( flags["added"] = True logger.info(f"Added {source} : \n" f"{row_data}") except sqlalchemy.exc.IntegrityError as e: - msg = "Integrity Error:" f"{source} \n" f"{row_data}" - logger.error(msg + str(e) + f" \n {row_data}") + msg = f"Integrity Error: {source} \n {e}" + logger.error(msg + f" \n {row_data}") flags["message"] = msg if raise_error: raise AstroDBError(msg) else: return flags except sqlite3.IntegrityError as e: - msg = "Integrity Error: " f"{source} \n" f"{row_data}" - logger.error(msg + str(e)) + msg = f"Integrity Error: {source} \n {e}" + logger.error(msg) flags["message"] = msg if raise_error: raise AstroDBError(msg) @@ -211,8 +211,8 @@ def ingest_spectrum( return flags except Exception as e: msg = ( - f"Spectrum for {source} could not be added to the database" - f"for unexpected reason: \n {row_data} \n error: {e}" + f"Spectrum for {source} could not be added to the database " + f"for unexpected reason: {e}" ) logger.error(msg) flags["message"] = msg @@ -308,7 +308,7 @@ def spectrum_plottable(spectrum_path, raise_error=True, show_plot=False): return False except u.UnitConversionError as e: msg = ( - f"{str(e)} \n" + f"{e} \n" f"Skipping {spectrum_path}: unable to convert spectral axis to microns" ) if raise_error: @@ -318,7 +318,7 @@ def spectrum_plottable(spectrum_path, raise_error=True, show_plot=False): logger.warning(msg) return False except ValueError as e: - msg = f"{str(e)} \nSkipping {spectrum_path}: Value error" + msg = f"{e} \nSkipping {spectrum_path}: Value error" if raise_error: logger.error(msg) raise AstroDBError(msg) diff --git a/tests/test_spectra_utils.py b/tests/test_spectra_utils.py index c2fe875a0..cae4895aa 100644 --- a/tests/test_spectra_utils.py +++ b/tests/test_spectra_utils.py @@ -11,8 +11,6 @@ ) -@pytest.mark.filterwarnings("ignore::UserWarning") -@pytest.mark.filterwarnings("ignore", message=".*not FITS standard.*") @pytest.mark.filterwarnings( "ignore", message=".*Note: astropy.io.fits uses zero-based indexing.*" ) @@ -30,47 +28,55 @@ def test_ingest_spectrum_errors(temp_db): # A lot of the tests fail because they were checking very specific parts of ingest_spectrum + # Ingesting a spectrum with missing regime spectrum = "https://bdnyc.s3.amazonaws.com/tests/U10176.fits" with pytest.raises(AstroDBError) as error_message: - ingest_spectrum(temp_db, source="apple", spectrum=spectrum) - # assert "Regime is required" in str(error_message.value) - # Commented out the error_message assertions since we have more generic error messages with this + _ = ingest_spectrum(temp_db, source="apple", spectrum=spectrum) + assert "Value required for regime" in str(error_message.value) result = ingest_spectrum( temp_db, source="apple", spectrum=spectrum, raise_error=False ) assert result["added"] is False - assert result["skipped"] is True + assert "Value required for regime" in result["message"] + + # Ingesting with missing reference with pytest.raises(AstroDBError) as error_message: - ingest_spectrum( - temp_db, - source="apple", - telescope="IRTF", - instrument="SpeX", - mode="Prism", - regime="nir", - spectrum=spectrum, - ) + _ = ingest_spectrum( + temp_db, + source="apple", + telescope="IRTF", + instrument="SpeX", + mode="Prism", + regime="nir", + spectrum=spectrum, + obs_date="2020-01-01", + ) + assert "NOT NULL constraint failed: Spectra.reference" in str(error_message.value) # assert "Reference is required" in str(error_message.value) result = ingest_spectrum( - temp_db, source="apple", regime="nir", spectrum=spectrum, raise_error=False + temp_db, source="apple", regime="nir", spectrum=spectrum, raise_error=False, obs_date="2020-01-01" ) assert result["added"] is False - assert result["skipped"] is True + assert "NOT NULL constraint failed: Spectra.reference" in result["message"] + + # Ingesting with invalid reference (does not already exist) with pytest.raises(AstroDBError) as error_message: - ingest_spectrum( - temp_db, - source="apple", - regime="nir", - spectrum=spectrum, - telescope="IRTF", - instrument="SpeX", - mode="Prism", - reference="Ref 5", - ) + _ = ingest_spectrum( + temp_db, + source="apple", + regime="nir", + spectrum=spectrum, + telescope="IRTF", + instrument="SpeX", + mode="Prism", + reference="Ref 5", + obs_date="2020-01-01", + ) + print(error_message) # assert "not in Publications table" in str(error_message.value) result = ingest_spectrum( @@ -83,21 +89,26 @@ def test_ingest_spectrum_errors(temp_db): mode="Prism", reference="Ref 5", raise_error=False, + obs_date="2020-01-01", ) + for k, v in result.items(): + print(k, v) assert result["added"] is False - assert result["skipped"] is True + + # Ingesting for invalid source (not already in database) with pytest.raises(AstroDBError) as error_message: - ingest_spectrum( - temp_db, - source="kiwi", - regime="nir", - spectrum=spectrum, - reference="Ref 1", - telescope="IRTF", - instrument="SpeX", - mode="Prism", - ) + _ = ingest_spectrum( + temp_db, + source="kiwi", + regime="nir", + spectrum=spectrum, + reference="Ref 1", + telescope="IRTF", + instrument="SpeX", + mode="Prism", + obs_date="2020-01-01", + ) # assert "No unique source match for kiwi in the database" in str(error_message.value) result = ingest_spectrum( @@ -110,21 +121,25 @@ def test_ingest_spectrum_errors(temp_db): telescope="IRTF", instrument="SpeX", mode="Prism", + obs_date="2020-01-01", ) + print(result) assert result["added"] is False - assert result["skipped"] is True + + # Ingesting with missing date with pytest.raises(AstroDBError) as error_message: - ingest_spectrum( - temp_db, - source="apple", - regime="nir", - spectrum=spectrum, - reference="Ref 1", - telescope="IRTF", - instrument="SpeX", - mode="Prism", - ) + _ = ingest_spectrum( + temp_db, + source="apple", + regime="nir", + spectrum=spectrum, + reference="Ref 1", + telescope="IRTF", + instrument="SpeX", + mode="Prism", + ) + assert "Invalid date received" in str(error_message.value) # assert "missing observation date" in str(error_message.value) result = ingest_spectrum( @@ -139,11 +154,12 @@ def test_ingest_spectrum_errors(temp_db): raise_error=False, ) assert result["added"] is False - # assert result["skipped"] is False - # assert result["no_obs_date"] is True + assert "Invalid date received" in result["message"] + + # Ingesting with invalid regime with pytest.raises(AstroDBError) as error_message: - result = ingest_spectrum( + _ = ingest_spectrum( temp_db, source="orange", regime="far-uv", @@ -168,9 +184,8 @@ def test_ingest_spectrum_errors(temp_db): mode="OG570", raise_error=False, ) + print(result) assert result["added"] is False - assert result["skipped"] is True - @pytest.mark.filterwarnings("ignore:Verification") @pytest.mark.filterwarnings("ignore", message=".*Card 'AIRMASS' is not FITS standard.*") From eecb5e219843fae12b87d5d2236daa07592f6d0f Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Tue, 9 Jul 2024 17:27:12 -0400 Subject: [PATCH 05/10] Reworking tests to be more compact --- simple/utils/spectra.py | 11 +- tests/test_spectra_utils.py | 209 +++++++++--------------------------- 2 files changed, 55 insertions(+), 165 deletions(-) diff --git a/simple/utils/spectra.py b/simple/utils/spectra.py index 56fe34f8b..bff481fbe 100644 --- a/simple/utils/spectra.py +++ b/simple/utils/spectra.py @@ -95,6 +95,7 @@ def ingest_spectrum( if len(db_name) != 1: msg = f"No unique source match for {source} in the database" + flags["message"] = msg if raise_error: raise AstroDBError(msg) else: @@ -193,15 +194,7 @@ def ingest_spectrum( flags["added"] = True logger.info(f"Added {source} : \n" f"{row_data}") - except sqlalchemy.exc.IntegrityError as e: - msg = f"Integrity Error: {source} \n {e}" - logger.error(msg + f" \n {row_data}") - flags["message"] = msg - if raise_error: - raise AstroDBError(msg) - else: - return flags - except sqlite3.IntegrityError as e: + except (sqlite3.IntegrityError, sqlalchemy.exc.IntegrityError) as e: msg = f"Integrity Error: {source} \n {e}" logger.error(msg) flags["message"] = msg diff --git a/tests/test_spectra_utils.py b/tests/test_spectra_utils.py index cae4895aa..ebb69e1c0 100644 --- a/tests/test_spectra_utils.py +++ b/tests/test_spectra_utils.py @@ -24,169 +24,66 @@ "which is discouraged by the FITS standard.*", ), ) -def test_ingest_spectrum_errors(temp_db): - - # A lot of the tests fail because they were checking very specific parts of ingest_spectrum - - # Ingesting a spectrum with missing regime +@pytest.mark.parametrize("test_input, message", [ + ({"source": "apple"}, "Value required for regime"), # missing regime + ({"source": "apple", + "telescope": "IRTF", + "instrument": "SpeX", + "mode": "Prism", + "regime": "nir", + "obs_date": "2020-01-01", + }, "NOT NULL constraint failed: Spectra.reference"), # missing reference + ({"source": "apple", + "telescope": "IRTF", + "instrument": "SpeX", + "mode": "Prism", + "regime": "nir", + "obs_date": "2020-01-01", + "reference": "Ref 5", + }, "FOREIGN KEY constraint failed"), # invalid reference + ({"source": "kiwi", + "telescope": "IRTF", + "instrument": "SpeX", + "mode": "Prism", + "regime": "nir", + "obs_date": "2020-01-01", + "reference": "Ref 1", + }, "No unique source match for kiwi in the database"), # invalid source + ({"source": "apple", + "telescope": "IRTF", + "instrument": "SpeX", + "mode": "Prism", + "regime": "nir", + "reference": "Ref 1", + }, "Invalid date received: None"), # missing date + ({"source": "apple", + "telescope": "IRTF", + "instrument": "SpeX", + "mode": "Prism", + "regime": "fake regime", + "obs_date": "2020-01-01", + "reference": "Ref 1", + }, "FOREIGN KEY constraint failed"), # invalid regime +]) +def test_ingest_spectrum_errors(temp_db, test_input, message): + # Test for ingest_spectrum that is expected to return errors + + # Prepare parameters to send to ingest_spectrum spectrum = "https://bdnyc.s3.amazonaws.com/tests/U10176.fits" - with pytest.raises(AstroDBError) as error_message: - _ = ingest_spectrum(temp_db, source="apple", spectrum=spectrum) - assert "Value required for regime" in str(error_message.value) - - result = ingest_spectrum( - temp_db, source="apple", spectrum=spectrum, raise_error=False - ) - assert result["added"] is False - assert "Value required for regime" in result["message"] - - - # Ingesting with missing reference - with pytest.raises(AstroDBError) as error_message: - _ = ingest_spectrum( - temp_db, - source="apple", - telescope="IRTF", - instrument="SpeX", - mode="Prism", - regime="nir", - spectrum=spectrum, - obs_date="2020-01-01", - ) - assert "NOT NULL constraint failed: Spectra.reference" in str(error_message.value) - # assert "Reference is required" in str(error_message.value) - - result = ingest_spectrum( - temp_db, source="apple", regime="nir", spectrum=spectrum, raise_error=False, obs_date="2020-01-01" - ) - assert result["added"] is False - assert "NOT NULL constraint failed: Spectra.reference" in result["message"] - - - # Ingesting with invalid reference (does not already exist) - with pytest.raises(AstroDBError) as error_message: - _ = ingest_spectrum( - temp_db, - source="apple", - regime="nir", - spectrum=spectrum, - telescope="IRTF", - instrument="SpeX", - mode="Prism", - reference="Ref 5", - obs_date="2020-01-01", - ) - print(error_message) - # assert "not in Publications table" in str(error_message.value) - - result = ingest_spectrum( - temp_db, - source="apple", - regime="nir", - spectrum=spectrum, - telescope="IRTF", - instrument="SpeX", - mode="Prism", - reference="Ref 5", - raise_error=False, - obs_date="2020-01-01", - ) - for k, v in result.items(): - print(k, v) - assert result["added"] is False - - - # Ingesting for invalid source (not already in database) - with pytest.raises(AstroDBError) as error_message: - _ = ingest_spectrum( - temp_db, - source="kiwi", - regime="nir", - spectrum=spectrum, - reference="Ref 1", - telescope="IRTF", - instrument="SpeX", - mode="Prism", - obs_date="2020-01-01", - ) - # assert "No unique source match for kiwi in the database" in str(error_message.value) + parameters = {"db": temp_db, "spectrum": spectrum} + parameters.update(test_input) - result = ingest_spectrum( - temp_db, - source="kiwi", - regime="nir", - spectrum=spectrum, - reference="Ref 1", - raise_error=False, - telescope="IRTF", - instrument="SpeX", - mode="Prism", - obs_date="2020-01-01", - ) - print(result) - assert result["added"] is False - - - # Ingesting with missing date + # Check that error was raised with pytest.raises(AstroDBError) as error_message: - _ = ingest_spectrum( - temp_db, - source="apple", - regime="nir", - spectrum=spectrum, - reference="Ref 1", - telescope="IRTF", - instrument="SpeX", - mode="Prism", - ) - assert "Invalid date received" in str(error_message.value) - # assert "missing observation date" in str(error_message.value) + _ = ingest_spectrum(**parameters) + assert message in str(error_message.value) - result = ingest_spectrum( - temp_db, - source="apple", - regime="nir", - spectrum=spectrum, - reference="Ref 1", - telescope="IRTF", - instrument="SpeX", - mode="Prism", - raise_error=False, - ) + # Suppress error but check that it was still captured + result = ingest_spectrum(**parameters, raise_error=False) assert result["added"] is False - assert "Invalid date received" in result["message"] + assert message in result["message"] - # Ingesting with invalid regime - with pytest.raises(AstroDBError) as error_message: - _ = ingest_spectrum( - temp_db, - source="orange", - regime="far-uv", - spectrum=spectrum, - reference="Ref 1", - obs_date="1/1/2024", - telescope="Keck I", - instrument="LRIS", - mode="OG570", - ) - # assert "not in Regimes table" in str(error_message.value) - - result = ingest_spectrum( - temp_db, - source="orange", - regime="far-uv", - spectrum=spectrum, - reference="Ref 1", - obs_date="1/1/2024", - telescope="Keck I", - instrument="LRIS", - mode="OG570", - raise_error=False, - ) - print(result) - assert result["added"] is False - @pytest.mark.filterwarnings("ignore:Verification") @pytest.mark.filterwarnings("ignore", message=".*Card 'AIRMASS' is not FITS standard.*") @pytest.mark.filterwarnings( From 0593d86436e03313e6f422fe38a42172b9842431 Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Wed, 10 Jul 2024 12:15:32 -0400 Subject: [PATCH 06/10] Expanding validation to check for missing telescope, instrument, and mode --- simple/schema.py | 8 ++++---- tests/test_schema.py | 3 +++ tests/test_spectra_utils.py | 16 +++++++++++++++- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/simple/schema.py b/simple/schema.py index 26f5390cf..5ef1009f9 100644 --- a/simple/schema.py +++ b/simple/schema.py @@ -366,9 +366,9 @@ class Spectra(Base): ForeignKey("Regimes.regime", ondelete="cascade", onupdate="cascade"), primary_key=True, ) - telescope = Column(String(30)) - instrument = Column(String(30)) - mode = Column(String(30)) # eg, Prism, Echelle, etc + telescope = Column(String(30), nullable=False) + instrument = Column(String(30), nullable=False) + mode = Column(String(30), nullable=False) # eg, Prism, Echelle, etc observation_date = Column(DateTime, primary_key=True) # Common metadata @@ -390,7 +390,7 @@ class Spectra(Base): {}, ) - @validates("access_url", "regime", "source") + @validates("access_url", "regime", "source", "telescope", "instrument", "mode") def validate_required(self, key, value): if value is None: raise ValueError(f"Value required for {key}") diff --git a/tests/test_schema.py b/tests/test_schema.py index f04675f8e..1966300df 100644 --- a/tests/test_schema.py +++ b/tests/test_schema.py @@ -67,6 +67,9 @@ def test_parallaxes(values, error_state): ({"access_url": None}, ValueError), ({"source": None}, ValueError), ({"regime": None}, ValueError), + ({"telescope": None}, ValueError), + ({"instrument": None}, ValueError), + ({"mode": None}, ValueError), ({"observation_date": "2024-01-01"}, None), ({"observation_date": datetime(2024,1,1)}, None), ({"observation_date": None}, ValueError), diff --git a/tests/test_spectra_utils.py b/tests/test_spectra_utils.py index ebb69e1c0..f86d095bb 100644 --- a/tests/test_spectra_utils.py +++ b/tests/test_spectra_utils.py @@ -25,7 +25,21 @@ ), ) @pytest.mark.parametrize("test_input, message", [ - ({"source": "apple"}, "Value required for regime"), # missing regime + ({"source": "apple", + "telescope": "IRTF", + "instrument": "SpeX", + "mode": "Prism", + }, "Value required for regime"), # missing regime + ({"source": "apple", + "regime": "nir", + "instrument": "SpeX", + "obs_date": "2020-01-01", + }, "Value required for telescope"), # missing telescope + ({"source": "apple", + "regime": "nir", + "telescope": "IRTF", + "obs_date": "2020-01-01", + }, "Value required for instrument"), # missing instrument ({"source": "apple", "telescope": "IRTF", "instrument": "SpeX", From 6fa29b0308f73b88a1fa991ad748641b746aa498 Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Wed, 10 Jul 2024 12:38:57 -0400 Subject: [PATCH 07/10] Minor cleanup --- simple/utils/spectra.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/simple/utils/spectra.py b/simple/utils/spectra.py index bff481fbe..fd0c82a06 100644 --- a/simple/utils/spectra.py +++ b/simple/utils/spectra.py @@ -9,7 +9,6 @@ import sqlalchemy.exc from astrodb_utils import ( AstroDBError, - find_publication, find_source_in_db, internet_connection, ) @@ -152,8 +151,8 @@ def ingest_spectrum( mode=mode, ) if len(matches) > 0: - msg = f"Skipping suspected duplicate measurement: {source}\n" - msg2 = f"{matches}" f"{instrument, mode, obs_date, reference, spectrum} \n" + msg = f"Skipping suspected duplicate measurement: {source}" + msg2 = f"{matches} {instrument, mode, obs_date, reference, spectrum}" logger.warning(msg) logger.debug(msg2) flags["message"] = msg From 70ad8f6ff4427f221b052c53ee0e19866796efb1 Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Tue, 16 Jul 2024 13:42:52 -0400 Subject: [PATCH 08/10] Apply suggestions from code review Co-authored-by: Kelle Cruz --- simple/utils/spectra.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/simple/utils/spectra.py b/simple/utils/spectra.py index fd0c82a06..338db67b7 100644 --- a/simple/utils/spectra.py +++ b/simple/utils/spectra.py @@ -153,12 +153,12 @@ def ingest_spectrum( if len(matches) > 0: msg = f"Skipping suspected duplicate measurement: {source}" msg2 = f"{matches} {instrument, mode, obs_date, reference, spectrum}" - logger.warning(msg) logger.debug(msg2) flags["message"] = msg if raise_error: raise AstroDBError(msg) else: + logger.warning(msg) return flags # Check if spectrum is plottable @@ -200,17 +200,18 @@ def ingest_spectrum( if raise_error: raise AstroDBError(msg) else: + logger.error(msg) return flags except Exception as e: msg = ( f"Spectrum for {source} could not be added to the database " f"for unexpected reason: {e}" ) - logger.error(msg) flags["message"] = msg if raise_error: raise AstroDBError(msg) else: + logger.warning(msg) return flags return flags From 0a8ada268791a5ac8b977427b48f483dcc7318f5 Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Tue, 16 Jul 2024 13:43:18 -0400 Subject: [PATCH 09/10] Update simple/utils/spectra.py Co-authored-by: Kelle Cruz --- simple/utils/spectra.py | 1 - 1 file changed, 1 deletion(-) diff --git a/simple/utils/spectra.py b/simple/utils/spectra.py index 338db67b7..78018af66 100644 --- a/simple/utils/spectra.py +++ b/simple/utils/spectra.py @@ -195,7 +195,6 @@ def ingest_spectrum( logger.info(f"Added {source} : \n" f"{row_data}") except (sqlite3.IntegrityError, sqlalchemy.exc.IntegrityError) as e: msg = f"Integrity Error: {source} \n {e}" - logger.error(msg) flags["message"] = msg if raise_error: raise AstroDBError(msg) From 60c888336c1f57726662d67678e1be65b5ed184a Mon Sep 17 00:00:00 2001 From: David Rodriguez Date: Tue, 16 Jul 2024 15:33:12 -0400 Subject: [PATCH 10/10] Adding warning when converting date into ISO format --- simple/schema.py | 1 + 1 file changed, 1 insertion(+) diff --git a/simple/schema.py b/simple/schema.py index 5ef1009f9..48fb29a1a 100644 --- a/simple/schema.py +++ b/simple/schema.py @@ -403,6 +403,7 @@ def validate_date(self, key, value): elif not isinstance(value, datetime): # Convert to datetime for storing in the database # Will throw error if unable to convert + print("WARNING: Value will be converted to ISO format.") value = datetime.fromisoformat(value) return value