From 797ae1006e99926a7a290910aabfd0a235d94438 Mon Sep 17 00:00:00 2001 From: Kelle Cruz Date: Tue, 30 Jan 2024 17:43:09 -0500 Subject: [PATCH] Reorganize scripts into new package (#457) * reorganize scripts to new package * add astrodb_scripts to tests and github actions * add astrodb_scripts to devcontainer and environment --- .devcontainer/devcontainer.json | 2 +- .github/workflows/gen-db.yml | 5 +- .github/workflows/python-app.yml | 5 +- .github/workflows/scheduled-tests.yml | 5 +- environment.yml | 1 + {simple => schema}/__init__.py | 0 {simple => schema}/schema.py | 0 scripts/examples/plotting_example.py | 263 +++--- .../{ingest_utils.py => simple_utils.py} | 798 +++-------------- scripts/ingests/utils.py | 563 ------------ scripts/tutorials/generate_database.py | 58 +- scripts/tutorials/single_object_example.py | 125 +-- tests/scheduled_checks.py | 12 +- tests/test_data.py | 815 +++++++++++------- tests/test_integrity.py | 4 +- tests/test_utils.py | 249 +----- 16 files changed, 922 insertions(+), 1983 deletions(-) rename {simple => schema}/__init__.py (100%) rename {simple => schema}/schema.py (100%) rename scripts/ingests/{ingest_utils.py => simple_utils.py} (67%) delete mode 100644 scripts/ingests/utils.py diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index e69b35352..890237a05 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -13,7 +13,7 @@ "forwardPorts": [5432], // Use 'postCreateCommand' to run commands after the container is created. - "postCreateCommand": "pip install astrodbkit2 pytest psycopg2 ads", + "postCreateCommand": "pip install astrodbkit2 pytest psycopg2 ads git+https://github.com/astrodbtoolkit/astrodb_scripts.git@main", // python scripts/tutorials/generate_database.py sqlite // python scripts/tutorials/generate_database.py postgres postgres://postgres@localhost:5432 diff --git a/.github/workflows/gen-db.yml b/.github/workflows/gen-db.yml index b7038df90..cba6d4cbf 100644 --- a/.github/workflows/gen-db.yml +++ b/.github/workflows/gen-db.yml @@ -19,10 +19,10 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python 3.10 - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: '3.10' @@ -30,6 +30,7 @@ jobs: run: | python -m pip install --upgrade pip pip install astrodbkit2 + pip install git+https://github.com/astrodbtoolkit/astrodb_scripts.git@main - name: Generate sqlite (file) database run: | diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 962c9165d..31e3a2c2f 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -14,10 +14,10 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python 3.10 - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: '3.10' @@ -26,6 +26,7 @@ jobs: python -m pip install --upgrade pip pip install pytest ads pip install astrodbkit2 + pip install git+https://github.com/astrodbtoolkit/astrodb_scripts.git@main - name: Test with pytest run: | diff --git a/.github/workflows/scheduled-tests.yml b/.github/workflows/scheduled-tests.yml index 70c9d662d..97ac034ac 100644 --- a/.github/workflows/scheduled-tests.yml +++ b/.github/workflows/scheduled-tests.yml @@ -14,10 +14,10 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python 3.10 - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: '3.10' @@ -26,6 +26,7 @@ jobs: python -m pip install --upgrade pip pip install pytest ads pip install astrodbkit2 + pip install git+https://github.com/astrodbtoolkit/astrodb_scripts.git@main - name: Test with pytest run: | diff --git a/environment.yml b/environment.yml index 0e3461bf8..8b65afed3 100644 --- a/environment.yml +++ b/environment.yml @@ -75,3 +75,4 @@ dependencies: - webencodings==0.5.1 - werkzeug==2.2.2 - zipp==3.13.0 + - git+https://github.com/astrodbtoolkit/astrodb_scripts.git@main \ No newline at end of file diff --git a/simple/__init__.py b/schema/__init__.py similarity index 100% rename from simple/__init__.py rename to schema/__init__.py diff --git a/simple/schema.py b/schema/schema.py similarity index 100% rename from simple/schema.py rename to schema/schema.py diff --git a/scripts/examples/plotting_example.py b/scripts/examples/plotting_example.py index fa5ff78c4..e6e364aab 100644 --- a/scripts/examples/plotting_example.py +++ b/scripts/examples/plotting_example.py @@ -5,7 +5,7 @@ import astropy.units as u from sqlalchemy import func, and_, Integer, cast from scripts.ingests.utils import load_simpledb, logger -from scripts.ingests.ingest_utils import convert_spt_code_to_string_to_code +from scripts.ingests.simple_utils import convert_spt_code_to_string_to_code from astrodbkit2.spectra import load_spectrum plt.interactive(False) @@ -13,75 +13,87 @@ INCLUDE_VERSION = True -db = load_simpledb('SIMPLE.db', recreatedb=False) +db = load_simpledb("SIMPLE.db", recreatedb=False) # =============================================================================================== # Get version number -t = db.query(db.Versions.c.version). \ - filter(db.Versions.c.end_date != None). \ - order_by(db.Versions.c.end_date.desc()). \ - limit(1). \ - astropy() -version = t['version'][0] +t = ( + db.query(db.Versions.c.version) + .filter(db.Versions.c.end_date != None) + .order_by(db.Versions.c.end_date.desc()) + .limit(1) + .astropy() +) +version = t["version"][0] # =============================================================================================== # Counts of spectral types # Query for counts grouped by spectral type -t = db.query(cast(db.SpectralTypes.c.spectral_type_code, Integer()).label('spectral_type'), - func.count(db.SpectralTypes.c.source).label('counts')). \ - group_by(cast(db.SpectralTypes.c.spectral_type_code, Integer())). \ - having(func.count(cast(db.SpectralTypes.c.spectral_type_code, Integer())) > 0). \ - astropy() +t = ( + db.query( + cast(db.SpectralTypes.c.spectral_type_code, Integer()).label("spectral_type"), + func.count(db.SpectralTypes.c.source).label("counts"), + ) + .group_by(cast(db.SpectralTypes.c.spectral_type_code, Integer())) + .having(func.count(cast(db.SpectralTypes.c.spectral_type_code, Integer())) > 0) + .astropy() +) # Making strings out of the numeric codes -t['spectral_type'] = convert_spt_code_to_string_to_code(t['spectral_type'], decimals=0) +t["spectral_type"] = convert_spt_code_to_string_to_code(t["spectral_type"], decimals=0) # Bar chart of counts vs spectral types fig, ax = plt.subplots(figsize=(8, 6)) index = np.arange(len(t)) bar_width = 0.95 -plt.bar(index, t['counts'], bar_width, alpha=0.8) -plt.xlabel('Spectral Type') -plt.ylabel('Counts') -plt.xticks(index, t['spectral_type']) +plt.bar(index, t["counts"], bar_width, alpha=0.8) +plt.xlabel("Spectral Type") +plt.ylabel("Counts") +plt.xticks(index, t["spectral_type"]) if INCLUDE_VERSION: - plt.title(f'SIMPLE Spectral Types; Version {version}') -plt.yscale('linear') + plt.title(f"SIMPLE Spectral Types; Version {version}") +plt.yscale("linear") ax.set_xticklabels(ax.xaxis.get_majorticklabels(), rotation=90) plt.tight_layout() # plt.show() -plt.savefig('documentation/figures/sptypes_counts.png') +plt.savefig("documentation/figures/sptypes_counts.png") # =============================================================================================== # Representative spectra (1 per spectral type) # Query for sources and spectral types that have SpeX Prism spectra -t = db.query(db.SpectralTypes.c.source, db.SpectralTypes.c.spectral_type_string, - db.SpectralTypes.c.spectral_type_code, - db.Spectra.c.instrument, db.Spectra.c.spectrum). \ - join(db.Spectra, db.Spectra.c.source == db.SpectralTypes.c.source). \ - filter(db.Spectra.c.instrument == 'SpeX'). \ - filter(db.Spectra.c.spectrum.ilike('%.fits')). \ - order_by(db.SpectralTypes.c.spectral_type_code). \ - astropy() +t = ( + db.query( + db.SpectralTypes.c.source, + db.SpectralTypes.c.spectral_type_string, + db.SpectralTypes.c.spectral_type_code, + db.Spectra.c.instrument, + db.Spectra.c.spectrum, + ) + .join(db.Spectra, db.Spectra.c.source == db.SpectralTypes.c.source) + .filter(db.Spectra.c.instrument == "SpeX") + .filter(db.Spectra.c.spectrum.ilike("%.fits")) + .order_by(db.SpectralTypes.c.spectral_type_code) + .astropy() +) spectra_dict = {} -spectra_dict['spt'] = [] +spectra_dict["spt"] = [] for row in t: # Only consider those from spectral types that haven't been fetched yet - spt = convert_spt_code_to_string_to_code(row['spectral_type_code'], decimals=1)[0] - if spt in spectra_dict.get('spt') or not spt.endswith('.0'): + spt = convert_spt_code_to_string_to_code(row["spectral_type_code"], decimals=1)[0] + if spt in spectra_dict.get("spt") or not spt.endswith(".0"): continue - spec = load_spectrum(row['spectrum'], spectra_format='Spex Prism') + spec = load_spectrum(row["spectrum"], spectra_format="Spex Prism") if isinstance(spec, str): # Failed to get spectrum continue # Store results - spectra_dict['spt'].append(spt) - spectra_dict[f'{spt}_data'] = [row['source'], spt, spec] + spectra_dict["spt"].append(spt) + spectra_dict[f"{spt}_data"] = [row["source"], spt, spec] # Make plot fig, ax = plt.subplots(figsize=(8, 6)) @@ -90,7 +102,7 @@ minwave, maxwave = 1.2, 1.3 # normalisation region bounds # minwave, maxwave = 2.1, 2.25 for k, v in spectra_dict.items(): - if k == 'spt': + if k == "spt": continue print(v[0], v[1]) @@ -108,151 +120,182 @@ ind += 1 plt.legend() -plt.xlabel('Wavelength') -plt.ylabel('Nomalized Flux') +plt.xlabel("Wavelength") +plt.ylabel("Nomalized Flux") if INCLUDE_VERSION: - plt.title(f'SIMPLE Spectra; Version {version}') + plt.title(f"SIMPLE Spectra; Version {version}") plt.tight_layout() # plt.show() -plt.savefig('documentation/figures/spectra_sample.png') +plt.savefig("documentation/figures/spectra_sample.png") # =============================================================================================== # Counts of spectra grouped by telescope/instrument # Query for counts grouped by telescope/instrument -t = db.query(db.Spectra.c.telescope, db.Spectra.c.instrument, - func.count(db.Spectra.c.source).label('counts')). \ - group_by(db.Spectra.c.telescope, db.Spectra.c.instrument). \ - filter(db.Spectra.c.instrument.is_not(None)). \ - astropy() - -t['telins'] = [f"{row['telescope']}/{row['instrument']}" for row in t] -t.sort('counts', reverse=True) +t = ( + db.query( + db.Spectra.c.telescope, + db.Spectra.c.instrument, + func.count(db.Spectra.c.source).label("counts"), + ) + .group_by(db.Spectra.c.telescope, db.Spectra.c.instrument) + .filter(db.Spectra.c.instrument.is_not(None)) + .astropy() +) + +t["telins"] = [f"{row['telescope']}/{row['instrument']}" for row in t] +t.sort("counts", reverse=True) # Bar chart of counts fig, ax = plt.subplots(figsize=(8, 6)) index = np.arange(len(t)) bar_width = 0.95 -plt.bar(index, t['counts'], bar_width, alpha=0.8) -plt.xlabel('Telescope/Instrument') -plt.ylabel('Counts') -plt.xticks(index, t['telins']) +plt.bar(index, t["counts"], bar_width, alpha=0.8) +plt.xlabel("Telescope/Instrument") +plt.ylabel("Counts") +plt.xticks(index, t["telins"]) if INCLUDE_VERSION: - plt.title(f'SIMPLE Spectra; Version {version}') + plt.title(f"SIMPLE Spectra; Version {version}") # plt.yscale('log') ax.set_xticklabels(ax.xaxis.get_majorticklabels(), rotation=90) plt.tight_layout() # plt.show() -plt.savefig('documentation/figures/spectra_telins_counts.png') +plt.savefig("documentation/figures/spectra_telins_counts.png") # Only by instrument -t = db.query(db.Spectra.c.instrument, - func.count(db.Spectra.c.source).label('counts')). \ - group_by(db.Spectra.c.instrument). \ - filter(db.Spectra.c.instrument.is_not(None)). \ - astropy() +t = ( + db.query(db.Spectra.c.instrument, func.count(db.Spectra.c.source).label("counts")) + .group_by(db.Spectra.c.instrument) + .filter(db.Spectra.c.instrument.is_not(None)) + .astropy() +) -t.sort('counts', reverse=True) +t.sort("counts", reverse=True) # Bar chart of counts fig, ax = plt.subplots(figsize=(8, 6)) index = np.arange(len(t)) bar_width = 0.95 -plt.bar(index, t['counts'], bar_width, alpha=0.8) -plt.xlabel('Instrument') -plt.ylabel('Counts') -plt.xticks(index, t['instrument']) +plt.bar(index, t["counts"], bar_width, alpha=0.8) +plt.xlabel("Instrument") +plt.ylabel("Counts") +plt.xticks(index, t["instrument"]) if INCLUDE_VERSION: - plt.title(f'SIMPLE Spectra; Version {version}') + plt.title(f"SIMPLE Spectra; Version {version}") # plt.yscale('log') ax.set_xticklabels(ax.xaxis.get_majorticklabels(), rotation=90) plt.tight_layout() # plt.show() -plt.savefig('documentation/figures/spectra_ins_counts.png') +plt.savefig("documentation/figures/spectra_ins_counts.png") # =============================================================================================== # Counts of photometry grouped by band # Query for counts grouped by telescope/instrument -t = db.query(db.Photometry.c.band, - func.count(db.Photometry.c.source).label('counts')). \ - group_by(db.Photometry.c.band). \ - astropy() -t.sort('counts', reverse=True) +t = ( + db.query(db.Photometry.c.band, func.count(db.Photometry.c.source).label("counts")) + .group_by(db.Photometry.c.band) + .astropy() +) +t.sort("counts", reverse=True) fig, ax = plt.subplots(figsize=(8, 6)) index = np.arange(len(t)) bar_width = 0.85 -plt.bar(index, t['counts'], bar_width, alpha=0.8) -plt.xlabel('Photometry') -plt.ylabel('Counts') -plt.yscale('log') -plt.xticks(index, t['band']) +plt.bar(index, t["counts"], bar_width, alpha=0.8) +plt.xlabel("Photometry") +plt.ylabel("Counts") +plt.yscale("log") +plt.xticks(index, t["band"]) if INCLUDE_VERSION: - plt.title(f'SIMPLE Photometry; Version {version}') -plt.yscale('log') + plt.title(f"SIMPLE Photometry; Version {version}") +plt.yscale("log") ax.set_xticklabels(ax.xaxis.get_majorticklabels(), rotation=90, fontsize=8) plt.tight_layout() # plt.show() -plt.savefig('documentation/figures/photometry_counts.png') +plt.savefig("documentation/figures/photometry_counts.png") # =============================================================================================== # Color/magnitude diagrams # Note using pandas for easier table manipulation -t = db.query(db.Photometry.c.source, db.Photometry.c.band, db.Photometry.c.magnitude, - db.SpectralTypes.c.spectral_type_code). \ - join(db.SpectralTypes, db.SpectralTypes.c.source == db.Photometry.c.source). \ - filter(db.Photometry.c.band.in_(['2MASS.J', '2MASS.H', '2MASS.Ks'])). \ - pandas() +t = ( + db.query( + db.Photometry.c.source, + db.Photometry.c.band, + db.Photometry.c.magnitude, + db.SpectralTypes.c.spectral_type_code, + ) + .join(db.SpectralTypes, db.SpectralTypes.c.source == db.Photometry.c.source) + .filter(db.Photometry.c.band.in_(["2MASS.J", "2MASS.H", "2MASS.Ks"])) + .pandas() +) # Pivoting and handling table t_pivoted = t.pivot_table(index="source", columns="band", values="magnitude") -df = t[['source', 'spectral_type_code']].merge(t_pivoted, left_on='source', right_on='source') -df = df.drop_duplicates(['source', 'spectral_type_code']) # multiple values of spectral types result in duplicate rows +df = t[["source", "spectral_type_code"]].merge( + t_pivoted, left_on="source", right_on="source" +) +df = df.drop_duplicates( + ["source", "spectral_type_code"] +) # multiple values of spectral types result in duplicate rows fig, ax = plt.subplots(figsize=(8, 6)) -plt.scatter(df['spectral_type_code'], df['2MASS.J'] - df['2MASS.Ks'], alpha=0.8) -plt.xlabel('Spectral Type') -plt.ylabel('J-Ks (mag)') +plt.scatter(df["spectral_type_code"], df["2MASS.J"] - df["2MASS.Ks"], alpha=0.8) +plt.xlabel("Spectral Type") +plt.ylabel("J-Ks (mag)") if INCLUDE_VERSION: - plt.title(f'SIMPLE Sources; Version {version}') + plt.title(f"SIMPLE Sources; Version {version}") # Custom tick labels for spectral type ax.set_xticks([65, 70, 75, 80, 85, 90]) -ax.set_xticklabels(['M5', 'L0', 'L5', 'T0', 'T5', 'Y0']) +ax.set_xticklabels(["M5", "L0", "L5", "T0", "T5", "Y0"]) plt.tight_layout() # plt.show() -plt.savefig('documentation/figures/color_spectra_counts.png') +plt.savefig("documentation/figures/color_spectra_counts.png") # --------------------------------------------------------------------- # Color-magnitude with parallax -t = db.query(db.Photometry.c.source, db.Photometry.c.band, db.Photometry.c.magnitude, - db.Parallaxes.c.parallax, db.SpectralTypes.c.spectral_type_code). \ - join(db.Parallaxes, db.Parallaxes.c.source == db.Photometry.c.source). \ - join(db.SpectralTypes, db.SpectralTypes.c.source == db.Photometry.c.source). \ - filter(db.Photometry.c.band.in_(['2MASS.J', '2MASS.H', '2MASS.Ks'])). \ - filter(db.Parallaxes.c.adopted == 1). \ - pandas() +t = ( + db.query( + db.Photometry.c.source, + db.Photometry.c.band, + db.Photometry.c.magnitude, + db.Parallaxes.c.parallax, + db.SpectralTypes.c.spectral_type_code, + ) + .join(db.Parallaxes, db.Parallaxes.c.source == db.Photometry.c.source) + .join(db.SpectralTypes, db.SpectralTypes.c.source == db.Photometry.c.source) + .filter(db.Photometry.c.band.in_(["2MASS.J", "2MASS.H", "2MASS.Ks"])) + .filter(db.Parallaxes.c.adopted == 1) + .pandas() +) # Pivoting and creating helper columns t_pivoted = t.pivot_table(index="source", columns="band", values="magnitude") -df = t[['source', 'parallax', 'spectral_type_code']].merge(t_pivoted, left_on='source', right_on='source') -df = df.drop_duplicates(['source', 'spectral_type_code']) -df['distance'] = 1000./df['parallax'] -df['dm'] = 5 * np.log10(df['distance'] / 10) # distance modulus +df = t[["source", "parallax", "spectral_type_code"]].merge( + t_pivoted, left_on="source", right_on="source" +) +df = df.drop_duplicates(["source", "spectral_type_code"]) +df["distance"] = 1000.0 / df["parallax"] +df["dm"] = 5 * np.log10(df["distance"] / 10) # distance modulus fig, ax = plt.subplots(figsize=(8, 6)) -plt.scatter(df['2MASS.J'] - df['2MASS.Ks'], df['2MASS.Ks'] - df['dm'], c=df['spectral_type_code'], alpha=0.8) -plt.xlabel('J-Ks (mag)') -plt.ylabel('Absolute Ks (mag)') +plt.scatter( + df["2MASS.J"] - df["2MASS.Ks"], + df["2MASS.Ks"] - df["dm"], + c=df["spectral_type_code"], + alpha=0.8, +) +plt.xlabel("J-Ks (mag)") +plt.ylabel("Absolute Ks (mag)") if INCLUDE_VERSION: - plt.title(f'SIMPLE Sources; Version {version}') + plt.title(f"SIMPLE Sources; Version {version}") ax.invert_yaxis() cbar = plt.colorbar() -cbar.set_label('Spectral Type') +cbar.set_label("Spectral Type") cbar.set_ticks([65, 70, 75, 80, 85]) -cbar.set_ticklabels(['M5', 'L0', 'L5', 'T0', 'T5']) +cbar.set_ticklabels(["M5", "L0", "L5", "T0", "T5"]) plt.tight_layout() # plt.show() -plt.savefig('documentation/figures/colormag_counts.png') +plt.savefig("documentation/figures/colormag_counts.png") diff --git a/scripts/ingests/ingest_utils.py b/scripts/ingests/simple_utils.py similarity index 67% rename from scripts/ingests/ingest_utils.py rename to scripts/ingests/simple_utils.py index 5ce4ab7eb..d80fb9356 100644 --- a/scripts/ingests/ingest_utils.py +++ b/scripts/ingests/simple_utils.py @@ -1,316 +1,33 @@ """ Utils functions for use in ingests """ -from astroquery.simbad import Simbad -from astropy.coordinates import SkyCoord -import astropy.units as u from astroquery.gaia import Gaia from astropy.table import Table -from typing import List, Union, Optional import numpy as np import numpy.ma as ma import pandas as pd -from sqlalchemy import func, null +from sqlalchemy import and_ from astropy.io import fits import dateutil import re import requests import logging -from sqlalchemy import or_, and_ import sqlalchemy.exc -from scripts.ingests.utils import ( - SimpleError, +from astrodb_scripts.utils import ( + AstroDBError, find_source_in_db, - find_publication, check_internet_connection, ) __all__ = [ - "ingest_names", - "ingest_source", - "ingest_sources", "ingest_spectral_types", "ingest_parallaxes", "ingest_proper_motions", "ingest_photometry", "ingest_spectra", - "ingest_instrument", - "find_survey_name_in_simbad", ] -logger = logging.getLogger("SIMPLE") - - -# NAMES -def ingest_names(db, source, other_name): - """ - This function ingests an other name into the Names table - - Parameters - ---------- - db: astrodbkit2.astrodb.Database - Database object created by astrodbkit2 - source: str - Name of source as it appears in sources table - - other_name: str - Name of the source different than that found in source table - - Returns - ------- - None - """ - names_data = [{"source": source, "other_name": other_name}] - try: - with db.engine.connect() as conn: - conn.execute(db.Names.insert().values(names_data)) - conn.commit() - logger.info(f" Name added to database: {names_data}\n") - except sqlalchemy.exc.IntegrityError as e: - msg = f"Could not add {names_data} to database. Name is likely a duplicate." - logger.warning(msg) - raise SimpleError(msg + "\n" + str(e) + "\n") - - -# SOURCES -def ingest_sources( - db, - sources, - references=None, - ras=None, - decs=None, - comments=None, - epochs=None, - equinoxes=None, - other_references=None, - raise_error=True, - search_db=True, -): - """ - Script to ingest sources - TODO: better support references=None - Parameters - ---------- - db: astrodbkit2.astrodb.Database - Database object created by astrodbkit2 - sources: list[str] - Names of sources - references: str or list[strings] - Discovery references of sources - ras: list[floats], optional - Right ascensions of sources. Decimal degrees. - decs: list[floats], optional - Declinations of sources. Decimal degrees. - comments: list[strings], optional - Comments - epochs: str or list[str], optional - Epochs of coordinates - equinoxes: str or list[string], optional - Equinoxes of coordinates - other_references: str or list[strings] - raise_error: bool, optional - True (default): Raise an error if a source cannot be ingested - False: Log a warning but skip sources which cannot be ingested - search_db: bool, optional - True (default): Search database to see if source is already ingested - False: Ingest source without searching the database - - Returns - ------- - - None - - """ - # TODO: add example - - # SETUP INPUTS - if ras is None and decs is None: - coords_provided = False - else: - coords_provided = True - - if isinstance(sources, str): - n_sources = 1 - else: - n_sources = len(sources) - - # Convert single element input values into lists - input_values = [ - sources, - references, - ras, - decs, - epochs, - equinoxes, - comments, - other_references, - ] - for i, input_value in enumerate(input_values): - if input_value is None: - input_values[i] = [None] * n_sources - elif isinstance(input_value, (str, float)): - input_values[i] = [input_value] * n_sources - ( - sources, - references, - ras, - decs, - epochs, - equinoxes, - comments, - other_references, - ) = input_values - - # TODO: figure out counting - # n_added = 0 - # n_existing = 0 - # n_names = 0 - # n_alt_names = 0 - # n_skipped = 0 - # n_multiples = 0 - - if n_sources > 1: - logger.info(f"Trying to add {n_sources} sources") - - # Loop over each source and decide to ingest, skip, or add alt name - for source_counter, source in enumerate(sources): - - logger.debug(f"{source_counter}: Trying to ingest {source}") - - reference = references[source_counter] - other_reference = other_references[source_counter] - comment = ( - None if ma.is_masked(comments[source_counter]) else comments[source_counter] - ) - - if coords_provided: - ra = ras[source_counter] - dec = decs[source_counter] - epoch = ( - None if ma.is_masked(epochs[source_counter]) else epochs[source_counter] - ) - equinox = ( - None - if ma.is_masked(equinoxes[source_counter]) - else equinoxes[source_counter] - ) - - ingest_source( - db, - source, - reference=reference, - ra=ra, - dec=dec, - epoch=epoch, - equinox=equinox, - other_reference=other_reference, - comment=comment, - raise_error=raise_error, - search_db=search_db, - ) - else: - ingest_source( - db, - source, - reference=reference, - other_reference=other_reference, - comment=comment, - raise_error=raise_error, - search_db=search_db, - ) - - # if n_sources > 1: - # logger.info(f"Sources added to database: {n_added}") - # logger.info(f"Names added to database: {n_names} \n") - # logger.info(f"Sources already in database: {n_existing}") - # logger.info(f"Alt Names added to database: {n_alt_names}") - # logger.info( - # f"Sources NOT added to database because multiple matches: {n_multiples}" - # ) - # logger.info(f"Sources NOT added to database: {n_skipped} \n") - - # if n_added != n_names: - # msg = f"Number added should equal names added." - # raise SimpleError(msg) - - # if n_added + n_existing + n_multiples + n_skipped != n_sources: - # msg = f"Number added + Number skipped doesn't add up to total sources" - # raise SimpleError(msg) - - return - - -# SURVEY DATA -def find_survey_name_in_simbad(sources, desig_prefix, source_id_index=None): - """ - Function to extract source designations from SIMBAD - - Parameters - ---------- - sources: astropy.table.Table - Sources names to search for in SIMBAD - desig_prefix - prefix to search for in list of identifiers - source_id_index - After a designation is split, this index indicates source id suffix. - For example, source_id_index = 2 to extract suffix from "Gaia DR2" designations. - source_id_index = 1 to exctract suffix from "2MASS" designations. - Returns - ------- - Astropy table - """ - - n_sources = len(sources) - - Simbad.reset_votable_fields() - Simbad.add_votable_fields("typed_id") # keep search term in result table - Simbad.add_votable_fields("ids") # add all SIMBAD identifiers as an output column - - logger.info("simbad query started") - result_table = Simbad.query_objects(sources["source"]) - logger.info("simbad query ended") - - ind = result_table["SCRIPT_NUMBER_ID"] > 0 # find indexes which contain results - simbad_ids = result_table["TYPED_ID", "IDS"][ind] - - db_names = [] - simbad_designations = [] - source_ids = [] - - for row in simbad_ids: - db_name = row["TYPED_ID"] - ids = row["IDS"].split("|") - designation = [i for i in ids if desig_prefix in i] - - if designation: - logger.debug(f"{db_name}, {designation[0]}") - db_names.append(db_name) - if len(designation) == 1: - simbad_designations.append(designation[0]) - else: - simbad_designations.append(designation[0]) - logger.warning(f"more than one designation matched, {designation}") - - if source_id_index is not None: - source_id = designation[0].split()[source_id_index] - source_ids.append(int(source_id)) # convert to int since long in Gaia - - n_matches = len(db_names) - logger.info( - f"Found, {n_matches}, {desig_prefix}, sources for, {n_sources}, sources" - ) - - if source_id_index is not None: - result_table = Table( - [db_names, simbad_designations, source_ids], - names=("db_names", "designation", "source_id"), - ) - else: - result_table = Table( - [db_names, simbad_designations], names=("db_names", "designation") - ) - - return result_table +logger = logging.getLogger("AstroDB") # SPECTRAL TYPES @@ -388,7 +105,7 @@ def ingest_spectral_types( f"No unique source match for {source} in the database " f"(with SpT: {spectral_types[i]} from {references[i]})" ) - raise SimpleError(msg) + raise AstroDBError(msg) else: db_name = db_name[0] @@ -511,17 +228,17 @@ def ingest_spectral_types( == 0 ): msg = f"The publication {references[i]} does not exist in the database" - msg1 = f"Add it with ingest_publication function." - logger.debug(msg + msg1) - raise SimpleError(msg) + "Add it with ingest_publication function." + logger.debug(msg) + raise AstroDBError(msg) elif "NOT NULL constraint failed: SpectralTypes.regime" in str(e): msg = f"The regime was not provided for {source}" logger.error(msg) - raise SimpleError(msg) + raise AstroDBError(msg) else: msg = "Other error\n" logger.error(msg) - raise SimpleError(msg) + raise AstroDBError(msg) msg = f"Spectral types added: {n_added} \n" f"Spectral Types skipped: {n_skipped}" logger.info(msg) @@ -672,7 +389,7 @@ def ingest_parallaxes(db, sources, plxs, plx_errs, plx_refs, comments=None): if len(db_name) != 1: msg = f"No unique source match for {source} in the database" - raise SimpleError(msg) + raise AstroDBError(msg) else: db_name = db_name[0] @@ -704,7 +421,8 @@ def ingest_parallaxes(db, sources, plxs, plx_errs, plx_refs, comments=None): adopted_ind = source_plx_data["adopted"] == 1 if sum(adopted_ind): old_adopted = source_plx_data[adopted_ind] - # if errors of new data are less than other measurements, set Adopted = True. + # if errors of new data are less than other measurements, + # set Adopted = True. if plx_errs[i] < min(source_plx_data["parallax_error"]): adopted = True @@ -775,7 +493,7 @@ def ingest_parallaxes(db, sources, plxs, plx_errs, plx_refs, comments=None): "The parallax measurement may be a duplicate." ) logger.error(msg) - raise SimpleError(msg) + raise AstroDBError(msg) logger.info(f"Total Parallaxes added to database: {n_added} \n") @@ -807,7 +525,8 @@ def ingest_proper_motions( Examples ---------- - > ingest_proper_motions(db, my_sources, my_pm_ra, my_pm_ra_unc, my_pm_dec, my_pm_dec_unc, my_pm_refs, + > ingest_proper_motions(db, my_sources, my_pm_ra, my_pm_ra_unc, + my_pm_dec, my_pm_dec_unc, my_pm_refs, verbose = True) """ @@ -832,7 +551,7 @@ def ingest_proper_motions( if len(db_name) != 1: msg = f"No unique source match for {source} in the database" - raise SimpleError(msg) + raise AstroDBError(msg) else: db_name = db_name[0] @@ -945,7 +664,7 @@ def ingest_proper_motions( "The proper motion measurement may be a duplicate." ) logger.error(msg) - raise SimpleError(msg) + raise AstroDBError(msg) updated_source_pm_data = ( db.query(db.ProperMotions) @@ -1044,7 +763,7 @@ def ingest_photometry( if len(db_name) != 1: msg = f"No unique source match for {source} in the database" - raise SimpleError(msg) + raise AstroDBError(msg) else: db_name = db_name[0] @@ -1082,7 +801,7 @@ def ingest_photometry( msg = "The measurement may be a duplicate." if raise_error: logger.error(msg) - raise SimpleError(msg) + raise AstroDBError(msg) else: logger.warning(msg) continue @@ -1093,7 +812,7 @@ def ingest_photometry( "Add it with add_publication function." ) logger.error(msg) - raise SimpleError(msg) + raise AstroDBError(msg) logger.info(f"Total photometry measurements added to database: {n_added} \n") @@ -1212,7 +931,7 @@ def ingest_spectra( if len(db_name) != 1: msg = f"No unique source match for {source} in the database" - raise SimpleError(msg) + raise AstroDBError(msg) else: db_name = db_name[0] @@ -1233,7 +952,7 @@ def ingest_spectra( ) logger.error(msg) if raise_error: - raise SimpleError(msg) + raise AstroDBError(msg) else: continue else: @@ -1251,7 +970,7 @@ def ingest_spectra( ) logger.error(msg) if raise_error: - raise SimpleError(msg) + raise AstroDBError(msg) else: continue else: @@ -1259,7 +978,7 @@ def ingest_spectra( logger.debug(msg) else: msg = "No internet connection. Internet is needed to check spectrum files." - raise SimpleError(msg) + raise AstroDBError(msg) # Find what spectra already exists in database for this source source_spec_data = ( @@ -1286,17 +1005,20 @@ def ingest_spectra( except ValueError: n_skipped += 1 if raise_error: - msg = f"{source}: Can't convert obs date to Date Time object: {obs_dates[i]}" + msg = f"{source}: Can't convert obs date to Date Time object: " + "{obs_dates[i]}" logger.error(msg) - raise SimpleError + raise AstroDBError except dateutil.parser._parser.ParserError: n_skipped += 1 if raise_error: - msg = f"{source}: Can't convert obs date to Date Time object: {obs_dates[i]}" + msg = f"{source}: Can't convert obs date to " + f"Date Time object: {obs_dates[i]}" logger.error(msg) - raise SimpleError + raise AstroDBError else: - msg = f"Skipping {source} Can't convert obs date to Date Time object: {obs_dates[i]}" + msg = f"Skipping {source} Can't convert obs date to " + f"Date Time object: {obs_dates[i]}" logger.warning(msg) continue @@ -1305,9 +1027,11 @@ def ingest_spectra( { "source": db_name, "spectrum": spectra[i], - "original_spectrum": None, # if ma.is_masked(original_spectra[i]) or isinstance(original_spectra,None) + "original_spectrum": None, # if ma.is_masked(original_spectra[i]) or + # isinstance(original_spectra,None) # else original_spectra[i], - "local_spectrum": None, # if ma.is_masked(local_spectra[i]) else local_spectra[i], + "local_spectrum": None, # if ma.is_masked(local_spectra[i]) + # else local_spectra[i], "regime": regimes[i], "telescope": telescopes[i], "instrument": None if ma.is_masked(instruments[i]) else instruments[i], @@ -1339,7 +1063,7 @@ def ingest_spectra( msg = f"Regime provided is not in schema: {regimes[i]}" logger.error(msg) if raise_error: - raise SimpleError(msg) + raise AstroDBError(msg) else: continue if ( @@ -1349,12 +1073,14 @@ def ingest_spectra( == 0 ): msg = ( - f"Spectrum for {source} could not be added to the database because the reference {references[i]} is not in Publications table. \n" + f"Spectrum for {source} could not be added to the database " + "because the reference {references[i]} is not in " + "Publications table. \n" f"(Add it with ingest_publication function.) \n " ) logger.warning(msg) if raise_error: - raise SimpleError(msg) + raise AstroDBError(msg) else: continue # check telescope, instrument, mode exists @@ -1388,44 +1114,52 @@ def ingest_spectra( ): msg = f"Skipping suspected duplicate measurement\n{source}\n" msg2 = f"{source_spec_data[ref_dupe_ind]['source', 'instrument', 'mode', 'observation_date', 'reference']}" - msg3 = f"{instruments[i], modes[i], obs_date, references[i], spectra[i]} \n" + msg3 = f"{instruments[i], modes[i]} " + f"{obs_date, references[i], spectra[i]} \n" logger.warning(msg) logger.debug(msg2 + msg3 + str(e)) n_dupes += 1 if raise_error: - raise SimpleError + raise AstroDBError else: continue # Skip duplicate measurement # else: - # msg = f'Spectrum could not be added to the database (other data exist): \n ' \ - # f"{source, instruments[i], modes[i], obs_date, references[i], spectra[i]} \n" + # msg = f'Spectrum could not be added to the database + # "(other data exist): \n ' \ + # f"{source, instruments[i], modes[i], obs_date, references[i], + # "spectra[i]} \n" # msg2 = f"Existing Data: \n " - # # f"{source_spec_data[ref_dupe_ind]['source', 'instrument', 'mode', 'observation_date', 'reference', 'spectrum']}" + # # f"{source_spec_data[ref_dupe_ind]['source', 'instrument', + # 'mode', 'observation_date', 'reference', 'spectrum']}" # msg3 = f"Data not able to add: \n {row_data} \n " # logger.warning(msg + msg2) # source_spec_data[ref_dupe_ind][ - # 'source', 'instrument', 'mode', 'observation_date', 'reference', 'spectrum'].pprint_all() + # 'source', 'instrument', 'mode', 'observation_date', + # 'reference', 'spectrum'].pprint_all() # logger.debug(msg3) # n_skipped += 1 # continue if len(instrument) == 0 or len(mode) == 0 or len(telescope) == 0: msg = ( f"Spectrum for {source} could not be added to the database. \n" - f" Telescope, Instrument, and/or Mode need to be added to the appropriate table. \n" - f" Trying to find telescope: {row_data[0]['telescope']}, instrument: {row_data[0]['instrument']}, " + f" Telescope, Instrument, and/or Mode need to be added " + "to the appropriate table. \n" + f" Trying to find telescope: {row_data[0]['telescope']} " + f"instrument: {row_data[0]['instrument']}, " f" mode: {row_data[0]['mode']} \n" f" Telescope: {telescope}, Instrument: {instrument}, Mode: {mode} \n" ) logger.error(msg) n_missing_instrument += 1 if raise_error: - raise SimpleError + raise AstroDBError else: continue else: - msg = f"Spectrum for {source} could not be added to the database for unknown reason: \n {row_data} \n " + msg = f"Spectrum for {source} could not be added to the database " + "for unknown reason: \n {row_data} \n " logger.error(msg) - raise SimpleError(msg) + raise AstroDBError(msg) msg = ( f"SPECTRA ADDED: {n_added} \n" @@ -1442,121 +1176,32 @@ def ingest_spectra( if n_added + n_dupes + n_blank + n_skipped + n_missing_instrument != n_spectra: msg = "Numbers don't add up: " logger.error(msg) - raise SimpleError(msg) - - spec_count = ( - db.query(Spectra.regime, func.count(Spectra.regime)) - .group_by(Spectra.regime) - .all() - ) - - spec_ref_count = ( - db.query(Spectra.reference, func.count(Spectra.reference)) - .group_by(Spectra.reference) - .order_by(func.count(Spectra.reference).desc()) - .limit(20) - .all() - ) - - telescope_spec_count = ( - db.query(Spectra.telescope, func.count(Spectra.telescope)) - .group_by(Spectra.telescope) - .order_by(func.count(Spectra.telescope).desc()) - .limit(20) - .all() - ) - - # logger.info(f'Spectra in the database: \n {spec_count} \n {spec_ref_count} \n {telescope_spec_count}') - - return - - -def ingest_instrument(db, telescope=None, instrument=None, mode=None): - """ - Script to ingest instrumentation - TODO: Add option to ingest references for the telescope and instruments - - Parameters - ---------- - db: astrodbkit2.astrodb.Database - Database object created by astrodbkit2 - telescope: str - instrument: str - mode: str - - Returns - ------- - - None - - """ - - # Make sure enough inputs are provided - if telescope is None and (instrument is None or mode is None): - msg = "Telescope, Instrument, and Mode must be provided" - logger.error(msg) - raise SimpleError(msg) - - msg_search = f"Searching for {telescope}, {instrument}, {mode} in database" - logger.info(msg_search) - - # Search for the inputs in the database - telescope_db = ( - db.query(db.Telescopes).filter(db.Telescopes.c.telescope == telescope).table() - ) - mode_db = ( - db.query(db.Instruments) - .filter( - and_( - db.Instruments.c.mode == mode, - db.Instruments.c.instrument == instrument, - db.Instruments.c.telescope == telescope, - ) - ) - .table() - ) - - if len(telescope_db) == 1 and len(mode_db) == 1: - msg_found = ( - f"{telescope}, {instrument}, and {mode} are already in the database." - ) - logger.info(msg_found) - return - - # Ingest telescope entry if not already present - if telescope is not None and len(telescope_db) == 0: - telescope_add = [{"telescope": telescope}] - try: - with db.engine.connect() as conn: - conn.execute(db.Telescopes.insert().values(telescope_add)) - conn.commit() - msg_telescope = f"{telescope} was successfully ingested in the database" - logger.info(msg_telescope) - except sqlalchemy.exc.IntegrityError as e: # pylint: disable=invalid-name - msg = "Telescope could not be ingested" - logger.error(msg) - raise SimpleError(msg + "\n" + str(e)) - - # Ingest instrument+mode (requires telescope) if not already present - if ( - telescope is not None - and instrument is not None - and mode is not None - and len(mode_db) == 0 - ): - instrument_add = [ - {"instrument": instrument, "mode": mode, "telescope": telescope} - ] - try: - with db.engine.connect() as conn: - conn.execute(db.Instruments.insert().values(instrument_add)) - conn.commit() - msg_instrument = f"{instrument} was successfully ingested in the database." - logger.info(msg_instrument) - except sqlalchemy.exc.IntegrityError as e: # pylint: disable=invalid-name - msg = "Instrument/Mode could not be ingested" - logger.error(msg) - raise SimpleError(msg + "\n" + str(e)) + raise AstroDBError(msg) + + # spec_count = ( + # db.query(Spectra.regime, func.count(Spectra.regime)) + # .group_by(Spectra.regime) + # .all() + # ) + + # spec_ref_count = ( + # db.query(Spectra.reference, func.count(Spectra.reference)) + # .group_by(Spectra.reference) + # .order_by(func.count(Spectra.reference).desc()) + # .limit(20) + # .all() + # ) + + # telescope_spec_count = ( + # db.query(Spectra.telescope, func.count(Spectra.telescope)) + # .group_by(Spectra.telescope) + # .order_by(func.count(Spectra.telescope).desc()) + # .limit(20) + # .all() + # ) + + # logger.info(f'Spectra in the database: \n {spec_count} + # \n {spec_ref_count} \n {telescope_spec_count}') return @@ -1777,8 +1422,9 @@ def ingest_companion_relationships( - *Child*: The source is lower mass/fainter than the companion - *Sibling*: The source is similar to the companion - *Parent*: The source is higher mass/brighter than the companion - - *Unresolved Parent*: The source is the unresolved, combined light source of an unresolved - multiple system which includes the companion + - *Unresolved Parent*: The source is the unresolved, + combined light source of an unresolved + multiple system which includes the companion """ # checking relationship entered @@ -1786,37 +1432,42 @@ def ingest_companion_relationships( # check captialization if relationship.title() != relationship: logger.info( - f"Relationship captilization changed from {relationship} to {relationship.title()} " + f"Relationship captilization changed from " + f"{relationship} to {relationship.title()} " ) relationship = relationship.title() if relationship not in possible_relationships: - msg = f"Relationship given for {source}, {companion_name}: {relationship} NOT one of the constrained relationships \n {possible_relationships}" + msg = f"Relationship given for {source}, {companion_name}: {relationship} " + f"NOT one of the constrained relationships \n {possible_relationships}" logger.error(msg) - raise SimpleError(msg) + raise AstroDBError(msg) # source canot be same as companion if source == companion_name: msg = f"{source}: Source cannot be the same as companion name" logger.error(msg) - raise SimpleError(msg) + raise AstroDBError(msg) if source == companion_name: msg = f"{source}: Source cannot be the same as companion name" logger.error(msg) - raise SimpleError(msg) + raise AstroDBError(msg) - if projected_separation_arcsec != None and projected_separation_arcsec < 0: + if projected_separation_arcsec is not None and projected_separation_arcsec < 0: msg = f"Projected separation: {projected_separation_arcsec}, cannot be negative" logger.error(msg) - raise SimpleError(msg) - if projected_separation_error != None and projected_separation_error < 0: - msg = f"Projected separation error: {projected_separation_error}, cannot be negative" + raise AstroDBError(msg) + if projected_separation_error is not None and projected_separation_error < 0: + msg = ( + f"Projected separation error: {projected_separation_error}, " + "cannot be negative" + ) logger.error(msg) - raise SimpleError(msg) + raise AstroDBError(msg) # check other names # make sure companion name is included in the list - if other_companion_names == None: + if other_companion_names is None: other_companion_names = companion_name else: companion_name_list = other_companion_names.split(", ") @@ -1842,7 +1493,7 @@ def ingest_companion_relationships( ) conn.commit() logger.info( - f"ComapnionRelationship added: ", + "ComapnionRelationship added: ", [ source, companion_name, @@ -1857,7 +1508,7 @@ def ingest_companion_relationships( if "UNIQUE constraint failed:" in str(e): msg = "The companion may be a duplicate." logger.error(msg) - raise SimpleError(msg) + raise AstroDBError(msg) else: msg = ( @@ -1866,235 +1517,4 @@ def ingest_companion_relationships( "or the reference may not exist in the Publications table. " ) logger.error(msg) - raise SimpleError(msg) - - -def ingest_source( - db, - source, - reference: str = None, - ra: float = None, - dec: float = None, - epoch: str = None, - equinox: str = None, - other_reference: str = None, - comment: str = None, - raise_error: bool = True, - search_db: bool = True, -): - """ - Parameters - ---------- - db: astrodbkit2.astrodb.Database - Database object created by astrodbkit2 - sources: str - Names of sources - references: str - Discovery references of sources - ras: float, optional - Right ascensions of sources. Decimal degrees. - decs: float, optional - Declinations of sources. Decimal degrees. - comments: string, optional - Comments - epochs: str, optional - Epochs of coordinates - equinoxes: str, optional - Equinoxes of coordinates - other_references: str - raise_error: bool, optional - True (default): Raise an error if a source cannot be ingested - False: Log a warning but skip sources which cannot be ingested - search_db: bool, optional - True (default): Search database to see if source is already ingested - False: Ingest source without searching the database - - Returns - ------- - - None - - """ - - if ra is None and dec is None: - coords_provided = False - else: - coords_provided = True - ra = ra - dec = dec - epoch = epoch - equinox = equinox - logger.debug(f"coords_provided:{coords_provided}") - - # Find out if source is already in database or not - if coords_provided and search_db: - logger.debug(f"Checking database for: {source} at ra: {ra}, dec: {dec}") - name_matches = find_source_in_db(db, source, ra=ra, dec=dec) - elif search_db: - logger.debug(f"Checking database for: {source}") - name_matches = find_source_in_db(db, source) - elif not search_db: - name_matches = [] - else: - name_matches = None - - logger.debug(f"Source matches in database: {name_matches}") - - # Source is already in database - # Checking out alternate names - if len(name_matches) == 1 and search_db: - # Figure out if source name provided is an alternate name - db_source_matches = db.search_object( - source, output_table="Sources", fuzzy_search=False - ) - - # Try to add alternate source name to Names table - if len(db_source_matches) == 0: - alt_names_data = [{"source": name_matches[0], "other_name": source}] - try: - with db.engine.connect() as conn: - conn.execute(db.Names.insert().values(alt_names_data)) - conn.commit() - logger.info(f" Name added to database: {alt_names_data}\n") - except sqlalchemy.exc.IntegrityError as e: - msg = f" Could not add {alt_names_data} to database" - logger.warning(msg) - if raise_error: - raise SimpleError(msg + "\n" + str(e)) - else: - return - - msg = f"Not ingesting {source}. Already in database as {name_matches[0]}. \n " - if raise_error: - raise SimpleError(msg) - else: - logger.info(msg) - return # Source is already in database, nothing new to ingest - - # Multiple source matches in the database so unable to ingest source - elif len(name_matches) > 1 and search_db: - msg1 = f" Not ingesting {source}." - msg = f" More than one match for {source}\n {name_matches}\n" - logger.warning(msg1 + msg) - if raise_error: - raise SimpleError(msg) - else: - return - - # No match in the database, INGEST! - elif len(name_matches) == 0 or not search_db: - # Make sure reference is provided and in References table - if reference is None or ma.is_masked(reference): - msg = f"Not ingesting {source}. Discovery reference is blank. \n" - logger.warning(msg) - if raise_error: - raise SimpleError(msg) - else: - return - - ref_check = find_publication(db, name=reference) - logger.debug(f"ref_check: {ref_check}") - - if ref_check is False: - msg = ( - f"Skipping: {source}. Discovery reference {reference} is not in Publications table. \n" - f"(Add it with ingest_publication function.)" - ) - logger.warning(msg) - if raise_error: - raise SimpleError(msg + msg2) - else: - return - - # Try to get coordinates from SIMBAD if they were not provided - if not coords_provided: - # Try to get coordinates from SIMBAD - simbad_result_table = Simbad.query_object(source) - - if simbad_result_table is None: - msg = f"Not ingesting {source}. Coordinates are needed and could not be retrieved from SIMBAD. \n" - logger.warning(msg) - if raise_error: - raise SimpleError(msg) - else: - return - # One SIMBAD match! Using those coordinates for source. - elif len(simbad_result_table) == 1: - simbad_coords = ( - simbad_result_table["RA"][0] + " " + simbad_result_table["DEC"][0] - ) - simbad_skycoord = SkyCoord(simbad_coords, unit=(u.hourangle, u.deg)) - ra = simbad_skycoord.to_string(style="decimal").split()[0] - dec = simbad_skycoord.to_string(style="decimal").split()[1] - epoch = "2000" # Default coordinates from SIMBAD are epoch 2000. - equinox = "J2000" # Default frame from SIMBAD is IRCS and J2000. - msg = f"Coordinates retrieved from SIMBAD {ra}, {dec}" - logger.debug(msg) - else: - msg = f"Not ingesting {source}. Coordinates are needed and could not be retrieved from SIMBAD. \n" - logger.warning(msg) - if raise_error: - raise SimpleError(msg) - else: - return - - # Just in case other conditionals not met - else: - msg = f"Unexpected condition encountered ingesting {source}" - logger.error(msg) - raise SimpleError(msg) - - logger.debug(f" Ingesting {source}.") - - # Construct data to be added - source_data = [ - { - "source": source, - "ra": ra, - "dec": dec, - "reference": reference, - "epoch": epoch, - "equinox": equinox, - "other_references": other_reference, - "comments": comment, - } - ] - names_data = [{"source": source, "other_name": source}] - - # Try to add the source to the database - try: - with db.engine.connect() as conn: - conn.execute(db.Sources.insert().values(source_data)) - conn.commit() - msg = f"Added {str(source_data)}" - logger.info(f"Added {source}") - logger.debug(msg) - except sqlalchemy.exc.IntegrityError: - msg = ( - f"Not ingesting {source}. Not sure why. \n" - "The reference may not exist in Publications table. \n" - "Add it with ingest_publication function. \n" - ) - msg2 = f" {str(source_data)} " - logger.warning(msg) - logger.debug(msg2) - if raise_error: - raise SimpleError(msg + msg2) - else: - return - - # Try to add the source name to the Names table - try: - with db.engine.connect() as conn: - conn.execute(db.Names.insert().values(names_data)) - conn.commit() - logger.debug(f" Name added to database: {names_data}\n") - except sqlalchemy.exc.IntegrityError: - msg = f" Could not add {names_data} to database" - logger.warning(msg) - if raise_error: - raise SimpleError(msg) - else: - return - - return + raise AstroDBError(msg) diff --git a/scripts/ingests/utils.py b/scripts/ingests/utils.py deleted file mode 100644 index 205a8927b..000000000 --- a/scripts/ingests/utils.py +++ /dev/null @@ -1,563 +0,0 @@ -""" -Utils functions for use in ingests -""" -import logging -import os -import sys -import re -import warnings -from pathlib import Path -from astrodbkit2.astrodb import create_database, Database -from simple.schema import * -import ads -from astropy.coordinates import SkyCoord -import astropy.units as u -from astropy.table import Table, unique -from sqlalchemy import or_, and_ -import sqlalchemy.exc -from astroquery.simbad import Simbad -from astropy.coordinates import SkyCoord -import astropy.units as u -import socket -from scripts import REFERENCE_TABLES - -__all__ = [ - "SimpleError", - "load_simpledb", - "find_source_in_db", - "find_publication", - "ingest_publication", - "check_internet_connection", -] - -warnings.filterwarnings("ignore", module="astroquery.simbad") -logger = logging.getLogger("SIMPLE") - -# Logger setup -# This will stream all logger messages to the standard output and apply formatting for that -logger.propagate = False # prevents duplicated logging messages -LOGFORMAT = logging.Formatter( - "%(asctime)s %(levelname)s: %(message)s", datefmt="%m/%d/%Y %I:%M:%S%p" -) -ch = logging.StreamHandler(stream=sys.stdout) -ch.setFormatter(LOGFORMAT) -# To prevent duplicate handlers, only add if they haven't been set previously -if not len(logger.handlers): - logger.addHandler(ch) -logger.setLevel(logging.INFO) - - -class SimpleError(Exception): - pass - - -# TODO: commented out as not using with the new custom error -# @contextmanager -# def disable_exception_traceback(): -# """ -# All traceback information is suppressed and only the exception type and value are printed -# """ -# default_value = getattr(sys, "tracebacklimit", 1000) # `1000` is a Python's default value -# sys.tracebacklimit = 0 -# yield -# sys.tracebacklimit = default_value # revert changes - - -def load_simpledb(db_file, recreatedb=True, reference_tables=REFERENCE_TABLES): - # Utility function to load the database - - db_file_path = Path(db_file) - db_connection_string = "sqlite:///" + db_file - - if recreatedb and db_file_path.exists(): - os.remove(db_file) # removes the current .db file if one already exists - - if not db_file_path.exists(): - try: # Use fancy in-memory database, if supported by astrodbkit2 - db = Database( - "sqlite://", reference_tables=REFERENCE_TABLES - ) # creates and connects to a temporary in-memory database - db.load_database( - "data/" - ) # loads the data from the data files into the database - db.dump_sqlite(db_file) # dump in-memory database to file - db = Database( - db_connection_string, reference_tables=REFERENCE_TABLES - ) # replace database object with new file version - except RuntimeError: - # use in-file database - create_database( - db_connection_string - ) # creates empty database based on the simple schema - db = Database( - db_connection_string, reference_tables=REFERENCE_TABLES - ) # connects to the empty database - db.load_database( - "data/" - ) # loads the data from the data files into the database - else: - db = Database( - db_connection_string, reference_tables=REFERENCE_TABLES - ) # if database already exists, connects to .db file - - return db - - -def find_source_in_db(db, source, ra=None, dec=None, search_radius=60.0): - """ - Find a source in the database given a source name and optional coordinates. - - Parameters - ---------- - db - source: str - Source name - ra: float - Right ascensions of sources. Decimal degrees. - dec: float - Declinations of sources. Decimal degrees. - search_radius - radius in arcseconds to use for source matching - - Returns - ------- - List of strings. - - one match: Single element list with one database source name - multiple matches: List of possible database names - no matches: Empty list - - """ - - # TODO: In astrodbkit2, convert verbose to using logger - - if ra and dec: - coords = True - else: - coords = False - - source = source.strip() - - logger.debug(f"{source}: Searching for match in database.") - - db_name_matches = db.search_object( - source, output_table="Sources", fuzzy_search=False, verbose=False - ) - - # NO MATCHES - # If no matches, try fuzzy search - if len(db_name_matches) == 0: - logger.debug(f"{source}: No name matches, trying fuzzy search") - db_name_matches = db.search_object( - source, output_table="Sources", fuzzy_search=True, verbose=False - ) - - # If still no matches, try to resolve the name with Simbad - if len(db_name_matches) == 0: - logger.debug(f"{source}: No name matches, trying Simbad search") - db_name_matches = db.search_object( - source, resolve_simbad=True, fuzzy_search=False, verbose=False - ) - - # if still no matches, try spatial search using coordinates, if provided - if len(db_name_matches) == 0 and coords: - location = SkyCoord(ra, dec, frame="icrs", unit="deg") - radius = u.Quantity(search_radius, unit="arcsec") - logger.info( - f"{source}: No Simbad match, trying coord search around {location.ra.degree}, {location.dec}" - ) - db_name_matches = db.query_region(location, radius=radius) - - # If still no matches, try to get the coords from SIMBAD - if len(db_name_matches) == 0: - simbad_result_table = Simbad.query_object(source) - if simbad_result_table is not None and len(simbad_result_table) == 1: - simbad_coords = ( - simbad_result_table["RA"][0] + " " + simbad_result_table["DEC"][0] - ) - simbad_skycoord = SkyCoord(simbad_coords, unit=(u.hourangle, u.deg)) - ra = simbad_skycoord.to_string(style="decimal").split()[0] - dec = simbad_skycoord.to_string(style="decimal").split()[1] - msg = f"Coordinates retrieved from SIMBAD {ra}, {dec}" - logger.debug(msg) - # Search database around that coordinate - radius = u.Quantity(search_radius, unit="arcsec") - msg2 = ( - f"Finding SIMBAD matches around {simbad_skycoord} with radius {radius}" - ) - logger.debug(msg2) - db_name_matches = db.query_region(simbad_skycoord, radius=radius) - - if len(db_name_matches) == 1: - db_names = db_name_matches["source"].tolist() - logger.debug(f"One match found for {source}: {db_names[0]}") - elif len(db_name_matches) > 1: - db_names = db_name_matches["source"].tolist() - logger.debug(f"More than match found for {source}: {db_names}") - # TODO: Find way for user to choose correct match - elif len(db_name_matches) == 0: - db_names = [] - logger.debug(f" {source}: No match found") - else: - raise SimpleError(f"Unexpected condition searching for {source}") - - return db_names - - -def find_publication(db, name: str = None, doi: str = None, bibcode: str = None): - """ - Find publications in the database by matching on the publication name, doi, or bibcode - - Parameters - ---------- - db - Variable referencing the database to search - name: str - Name of publication to search - doi: str - DOI of publication to search - bibcode: str - ADS Bibcode of publication to search - - Returns - ------- - True, str: if only one match - False, 0: No matches - False, N_matches: Multiple matches - - Examples - ------- - >>> test = search_publication(db, name='Cruz') - Found 8 matching publications for Cruz or None or None - - >>> test = search_publication(db, name='Kirk19') - Found 1 matching publications for Kirk19 or None or None - name bibcode doi - ------ ------------------- ------------------------ - Kirk19 2019ApJS..240...19K 10.3847/1538-4365/aaf6af - description - ----------------------------------------------------------------------------- - Preliminary Trigonometric Parallaxes of 184 Late-T and Y Dwarfs and an - Analysis of the Field Substellar Mass Function into the Planetary Mass Regime - - >>> test = search_publication(db, name='Smith') - No matching publications for Smith, Trying Smit - No matching publications for Smit - Use add_publication() to add it to the database. - - See Also - -------- - add_publication: Function to add publications in the database - - """ - - # Make sure a search term is provided - if name is None and doi is None and bibcode is None: - logger.error("Name, Bibcode, or DOI must be provided") - return False, 0 - - not_null_pub_filters = [] - if name: - # fuzzy_query_name = '%' + name + '%' - not_null_pub_filters.append(db.Publications.c.reference.ilike(name)) - if doi: - not_null_pub_filters.append(db.Publications.c.doi.ilike(doi)) - if bibcode: - not_null_pub_filters.append(db.Publications.c.bibcode.ilike(bibcode)) - pub_search_table = Table() - if len(not_null_pub_filters) > 0: - pub_search_table = ( - db.query(db.Publications).filter(or_(*not_null_pub_filters)).table() - ) - - n_pubs_found = len(pub_search_table) - - if n_pubs_found == 1: - logger.info( - f"Found {n_pubs_found} matching publications for " - f"{name} or {doi} or {bibcode}: {pub_search_table['reference'].data}" - ) - if logger.level <= 10: # debug - pub_search_table.pprint_all() - return True, pub_search_table["reference"].data[0] - - if n_pubs_found > 1: - logger.warning( - f"Found {n_pubs_found} matching publications for {name} or {doi} or {bibcode}" - ) - if logger.level <= 30: # warning - pub_search_table.pprint_all() - return False, n_pubs_found - - # If no matches found, search using first four characters of input name - if n_pubs_found == 0 and name: - shorter_name = name[:4] - logger.debug(f"No matching publications for {name}, Trying {shorter_name}.") - fuzzy_query_shorter_name = "%" + shorter_name + "%" - pub_search_table = ( - db.query(db.Publications) - .filter(db.Publications.c.reference.ilike(fuzzy_query_shorter_name)) - .table() - ) - n_pubs_found_short = len(pub_search_table) - if n_pubs_found_short == 0: - logger.warning(f"No matching publications for {name} or {shorter_name}") - logger.warning("Use add_publication() to add it to the database.") - return False, 0 - - if n_pubs_found_short > 0: - logger.debug( - f"Found {n_pubs_found_short} matching publications for {shorter_name}" - ) - if logger.level == 10: # debug - pub_search_table.pprint_all() - - # Try to find numbers in the reference which might be a date - dates = re.findall(r"\d+", name) - # try to find a two digit date - if len(dates) == 0: - logger.debug(f"Could not find a date in {name}") - two_digit_date = None - elif len(dates) == 1: - if len(dates[0]) == 4: - two_digit_date = dates[0][2:] - elif len(dates[0]) == 2: - two_digit_date = dates[0] - else: - logger.debug(f"Could not find a two digit date using {dates}") - two_digit_date = None - else: - logger.debug(f"Could not find a two digit date using {dates}") - two_digit_date = None - - if two_digit_date: - logger.debug(f"Trying to limit using {two_digit_date}") - n_pubs_found_short_date = 0 - pubs_found_short_date = [] - for pub in pub_search_table["reference"]: - if pub.find(two_digit_date) != -1: - n_pubs_found_short_date += 1 - pubs_found_short_date.append(pub) - if n_pubs_found_short_date == 1: - logger.debug( - f"Found {n_pubs_found_short_date} matching publications for " - f"{name} using {shorter_name} and {two_digit_date}" - ) - logger.debug(f"{pubs_found_short_date}") - return True, pubs_found_short_date[0] - else: - logger.warning( - f"Found {n_pubs_found_short_date} matching publications for " - f"{name} using {shorter_name} and {two_digit_date}" - ) - logger.warning(f"{pubs_found_short_date}") - return False, n_pubs_found_short_date - else: - return False, n_pubs_found_short - else: - return False, n_pubs_found - - return - - -def ingest_publication( - db, - doi: str = None, - bibcode: str = None, - publication: str = None, - description: str = None, - ignore_ads: bool = False, -): - """ - Adds publication to the database using DOI or ADS Bibcode, including metadata found with ADS. - - In order to auto-populate the fields, An $ADS_TOKEN environment variable must be set. - See https://ui.adsabs.harvard.edu/user/settings/token - - Parameters - ---------- - db - Database object - doi, bibcode: str - The DOI or ADS Bibcode of the reference. One of these is required input. - publication: str, optional - The publication shortname, otherwise it will be generated [optional] - Convention is the first four letters of first authors last name and two digit year (e.g., Smit21) - For last names which are less than four letters, use '_' or first name initial(s). (e.g, Xu__21 or LiYB21) - description: str, optional - Description of the paper, typically the title of the papre [optional] - ignore_ads: bool - - See Also - -------- - search_publication: Function to find publications in the database - - """ - - if not (publication or doi or bibcode): - logger.error("Publication, DOI, or Bibcode is required input") - return - - ads.config.token = os.getenv("ADS_TOKEN") - - if not ads.config.token and (not publication and (not doi or not bibcode)): - logger.error( - "An ADS_TOKEN environment variable must be set in order to auto-populate the fields.\n" - "Without an ADS_TOKEN, name and bibcode or DOI must be set explicity." - ) - return - - if ads.config.token and not ignore_ads: - use_ads = True - else: - use_ads = False - logger.debug(f"Use ADS set to {use_ads}") - - if bibcode: - if "arXiv" in bibcode: - arxiv_id = bibcode - bibcode = None - else: - arxiv_id = None - else: - arxiv_id = None - - name_add, bibcode_add, doi_add = "", "", "" - # Search ADS uing a provided arxiv id - if arxiv_id and use_ads: - arxiv_matches = ads.SearchQuery( - q=arxiv_id, fl=["id", "bibcode", "title", "first_author", "year", "doi"] - ) - arxiv_matches_list = list(arxiv_matches) - if len(arxiv_matches_list) != 1: - logger.error("should only be one matching arxiv id") - return - - if len(arxiv_matches_list) == 1: - logger.debug(f"Publication found in ADS using arxiv id: , {arxiv_id}") - article = arxiv_matches_list[0] - logger.debug( - f"{article.first_author}, {article.year}, {article.bibcode}, {article.title}" - ) - if not publication: # generate the name if it was not provided - name_stub = article.first_author.replace(",", "").replace(" ", "") - name_add = name_stub[0:4] + article.year[-2:] - else: - name_add = publication - description = article.title[0] - bibcode_add = article.bibcode - doi_add = article.doi[0] - - elif arxiv_id: - name_add = publication - bibcode_add = arxiv_id - doi_add = doi - - # Search ADS using a provided DOI - if doi and use_ads: - doi_matches = ads.SearchQuery( - doi=doi, fl=["id", "bibcode", "title", "first_author", "year", "doi"] - ) - doi_matches_list = list(doi_matches) - if len(doi_matches_list) != 1: - logger.error("should only be one matching DOI") - return - - if len(doi_matches_list) == 1: - logger.debug(f"Publication found in ADS using DOI: {doi}") - using = doi - article = doi_matches_list[0] - logger.debug( - f"{article.first_author}, {article.year}, {article.bibcode}, {article.title}" - ) - if not publication: # generate the name if it was not provided - name_stub = article.first_author.replace(",", "").replace(" ", "") - name_add = name_stub[0:4] + article.year[-2:] - else: - name_add = publication - description = article.title[0] - bibcode_add = article.bibcode - doi_add = article.doi[0] - elif doi: - name_add = publication - bibcode_add = bibcode - doi_add = doi - - if bibcode and use_ads: - bibcode_matches = ads.SearchQuery( - bibcode=bibcode, - fl=["id", "bibcode", "title", "first_author", "year", "doi"], - ) - bibcode_matches_list = list(bibcode_matches) - if len(bibcode_matches_list) == 0: - logger.error("not a valid bibcode:" + str(bibcode)) - logger.error("nothing added") - raise - - elif len(bibcode_matches_list) > 1: - logger.error("should only be one matching bibcode for:" + str(bibcode)) - logger.error("nothing added") - raise - - elif len(bibcode_matches_list) == 1: - logger.debug("Publication found in ADS using bibcode: " + str(bibcode)) - using = str(bibcode) - article = bibcode_matches_list[0] - logger.debug( - f"{article.first_author}, {article.year}, {article.bibcode}, {article.doi}, {article.title}" - ) - if not publication: # generate the name if it was not provided - name_stub = article.first_author.replace(",", "").replace(" ", "") - name_add = name_stub[0:4] + article.year[-2:] - else: - name_add = publication - description = article.title[0] - bibcode_add = article.bibcode - if article.doi is None: - doi_add = None - else: - doi_add = article.doi[0] - elif bibcode: - name_add = publication - bibcode_add = bibcode - doi_add = doi - - if publication and not bibcode and not doi: - name_add = publication - using = "user input" - - new_ref = [ - { - "reference": name_add, - "bibcode": bibcode_add, - "doi": doi_add, - "description": description, - } - ] - - try: - with db.engine.connect() as conn: - conn.execute(db.Publications.insert().values(new_ref)) - conn.commit() - logger.info(f"Added {name_add} to Publications table using {using}") - except sqlalchemy.exc.IntegrityError as error: - msg = ( - f"Not able to add {new_ref} to the database. " - f"It's possible that a similar publication already exists in database\n" - "Use find_publication function before adding a new record" - ) - logger.error(msg) - raise SimpleError(msg + str(error)) - - return - - -def check_internet_connection(): - # get current IP address of system - ipaddress = socket.gethostbyname(socket.gethostname()) - - # checking system IP is the same as "127.0.0.1" or not. - if ipaddress == "127.0.0.1": # no internet - return False, ipaddress - else: - return True, ipaddress diff --git a/scripts/tutorials/generate_database.py b/scripts/tutorials/generate_database.py index d48453bae..f059a0db8 100644 --- a/scripts/tutorials/generate_database.py +++ b/scripts/tutorials/generate_database.py @@ -5,15 +5,24 @@ import sys import os from astrodbkit2.astrodb import create_database, Database + sys.path.append(os.getcwd()) # hack to be able to discover simple -from simple.schema import * +from schema.schema import * # Location of source data -DB_PATH = 'data' -DB_NAME = 'SIMPLE.db' +DB_PATH = "data" +DB_NAME = "SIMPLE.db" # Used to overwrite AstrodbKit2 reference tables defaults -REFERENCE_TABLES = ['Publications', 'Telescopes', 'Instruments', 'Modes', 'PhotometryFilters', 'Versions', 'Parameters'] +REFERENCE_TABLES = [ + "Publications", + "Telescopes", + "Instruments", + "Modes", + "PhotometryFilters", + "Versions", + "Parameters", +] def load_postgres(connection_string): @@ -44,14 +53,18 @@ def load_sqlite(): # If the schema has not changed, this part can be skipped if os.path.exists(DB_NAME): os.remove(DB_NAME) - connection_string = 'sqlite:///' + DB_NAME + connection_string = "sqlite:///" + DB_NAME # Use in-memory database for initial load (addresses issues with IO bottlenecks) - try: - db = Database('sqlite://', reference_tables=REFERENCE_TABLES) # creates and connects to a temporary in-memory database - db.load_database(DB_PATH) # loads the data from the data files into the database + try: + db = Database( + "sqlite://", reference_tables=REFERENCE_TABLES + ) # creates and connects to a temporary in-memory database + db.load_database( + DB_PATH + ) # loads the data from the data files into the database db.dump_sqlite(DB_NAME) # dump in-memory database to file - print('In-memory database created and saved to file.') + print("In-memory database created and saved to file.") db.session.close() except RuntimeError: # use in-file database @@ -69,20 +82,25 @@ def load_database(connection_string): db = Database(connection_string, reference_tables=REFERENCE_TABLES) db.load_database(DB_PATH, verbose=False) - print('New database generated.') + print("New database generated.") # Close all connections db.session.close() db.engine.dispose() -if __name__ == '__main__': - - parser = argparse.ArgumentParser(description='Generate the SIMPLE database') - parser.add_argument('architecture', choices=['sqlite', 'postgres'], - help='Database architecture to use.') - parser.add_argument('connection_string', nargs='?', - help='Connection string to use for non-sqlite databases.') +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Generate the SIMPLE database") + parser.add_argument( + "architecture", + choices=["sqlite", "postgres"], + help="Database architecture to use.", + ) + parser.add_argument( + "connection_string", + nargs="?", + help="Connection string to use for non-sqlite databases.", + ) args = parser.parse_args() @@ -90,10 +108,10 @@ def load_database(connection_string): if args.connection_string is not None: connection_string = args.connection_string else: - connection_string = os.getenv('SIMPLE_DATABASE_URL', default='') + connection_string = os.getenv("SIMPLE_DATABASE_URL", default="") # Run the loader for the specified DB architecture - if args.architecture == 'postgres': + if args.architecture == "postgres": load_postgres(connection_string) - elif args.architecture == 'sqlite': + elif args.architecture == "sqlite": load_sqlite() diff --git a/scripts/tutorials/single_object_example.py b/scripts/tutorials/single_object_example.py index 1c1961096..1c11fee02 100644 --- a/scripts/tutorials/single_object_example.py +++ b/scripts/tutorials/single_object_example.py @@ -3,92 +3,111 @@ from astrodbkit2.astrodb import Database, create_database # Establish connection to database -connection_string = 'postgresql://localhost/SIMPLE' # Postgres -connection_string = 'sqlite:///SIMPLE.db' # SQLite +connection_string = "postgresql://localhost/SIMPLE" # Postgres +connection_string = "sqlite:///SIMPLE.sqlite" # SQLite db = Database(connection_string) # If brand new database, run the following -# NOTE: Some databases, like Postgres, will need an empty database created first before running this -from simple.schema import * +# NOTE: Some databases, like Postgres, will need an empty database +# created first before running this +from schema.schema import * + create_database(connection_string) # Adding information for 2MASS J13571237+1428398 # Add references -publications_data = [{'name': 'Schm10', - 'bibcode': '2010AJ....139.1808S', - 'doi': '10.1088/0004-6256/139/5/1808', - 'description': 'Colors and Kinematics of L Dwarfs From the Sloan Digital Sky Survey'}, - {'name': 'Cutr12', - 'bibcode': '2012yCat.2311....0C', - 'doi': None, - 'description': 'WISE All-Sky Data Release'}] +publications_data = [ + { + "name": "Schm10", + "bibcode": "2010AJ....139.1808S", + "doi": "10.1088/0004-6256/139/5/1808", + "description": "Colors and Kinematics of L Dwarfs From the " + "Sloan Digital Sky Survey", + }, + { + "name": "Cutr12", + "bibcode": "2012yCat.2311....0C", + "doi": None, + "description": "WISE All-Sky Data Release", + }, +] with db.engine.connect() as conn: conn.execute(db.Publications.insert().values(publications_data)) conn.commit() # Add telescope with db.engine.connect() as conn: - conn.execute(db.Telescopes.insert().values([{'name': 'WISE'}])) + conn.execute(db.Telescopes.insert().values([{"name": "WISE"}])) conn.commit() # Add source -sources_data = [{'ra': 209.301675, 'dec': 14.477722, - 'source': '2MASS J13571237+1428398', - 'reference': 'Schm10', - 'shortname': '1357+1428'}] +sources_data = [ + { + "ra": 209.301675, + "dec": 14.477722, + "source": "2MASS J13571237+1428398", + "reference": "Schm10", + "shortname": "1357+1428", + } +] with db.engine.connect() as conn: conn.execute(db.Sources.insert().values(sources_data)) conn.commit() # Additional names -names_data = [{'source': '2MASS J13571237+1428398', - 'other_name': 'SDSS J135712.40+142839.8'}, - {'source': '2MASS J13571237+1428398', - 'other_name': '2MASS J13571237+1428398'}, - ] +names_data = [ + {"source": "2MASS J13571237+1428398", "other_name": "SDSS J135712.40+142839.8"}, + {"source": "2MASS J13571237+1428398", "other_name": "2MASS J13571237+1428398"}, +] with db.engine.connect() as conn: conn.execute(db.Names.insert().values(names_data)) conn.commit() # Add Photometry -phot_data = [{'source': '2MASS J13571237+1428398', - 'band': 'WISE_W1', - 'magnitude': 13.348, - 'magnitude_error': 0.025, - 'telescope': 'WISE', - 'reference': 'Cutr12' - }, - {'source': '2MASS J13571237+1428398', - 'band': 'WISE_W2', - 'magnitude': 12.990, - 'magnitude_error': 0.028, - 'telescope': 'WISE', - 'reference': 'Cutr12' - }, - {'source': '2MASS J13571237+1428398', - 'band': 'WISE_W3', - 'magnitude': 12.476, - 'magnitude_error': 0.279, - 'telescope': 'WISE', - 'reference': 'Cutr12' - }, - {'source': '2MASS J13571237+1428398', - 'band': 'WISE_W4', - 'magnitude': 9.560, - 'magnitude_error': None, - 'telescope': 'WISE', - 'reference': 'Cutr12' - }] +phot_data = [ + { + "source": "2MASS J13571237+1428398", + "band": "WISE_W1", + "magnitude": 13.348, + "magnitude_error": 0.025, + "telescope": "WISE", + "reference": "Cutr12", + }, + { + "source": "2MASS J13571237+1428398", + "band": "WISE_W2", + "magnitude": 12.990, + "magnitude_error": 0.028, + "telescope": "WISE", + "reference": "Cutr12", + }, + { + "source": "2MASS J13571237+1428398", + "band": "WISE_W3", + "magnitude": 12.476, + "magnitude_error": 0.279, + "telescope": "WISE", + "reference": "Cutr12", + }, + { + "source": "2MASS J13571237+1428398", + "band": "WISE_W4", + "magnitude": 9.560, + "magnitude_error": None, + "telescope": "WISE", + "reference": "Cutr12", + }, +] with db.engine.connect() as conn: conn.execute(db.Photometry.insert().values(phot_data)) conn.commit() # Checking object -_ = db.inventory('2MASS J13571237+1428398', pretty_print=True) +_ = db.inventory("2MASS J13571237+1428398", pretty_print=True) # Save single object -db.save_json('2MASS J13571237+1428398', 'data') +db.save_json("2MASS J13571237+1428398", "data") # Save entire database to directory 'data' -db.save_database('data') +db.save_database("data") diff --git a/tests/scheduled_checks.py b/tests/scheduled_checks.py index 7088906ad..16ab36a1b 100644 --- a/tests/scheduled_checks.py +++ b/tests/scheduled_checks.py @@ -4,14 +4,14 @@ import requests from tqdm import tqdm from astrodbkit2.astrodb import create_database, Database -from scripts.ingests.utils import check_internet_connection +from astrodb_scripts import check_internet_connection sys.path.append(".") -from simple.schema import * +from schema.schema import * from . import REFERENCE_TABLES -DB_NAME = "temp.db" +DB_NAME = "temp.sqlite" DB_PATH = "data" @@ -62,8 +62,8 @@ def test_spectra_urls(db): if status_code != 200 and status_code != 301: broken_urls.append(spectrum_url) codes.append(status_code) - + # Display broken spectra regardless if it's the number we expect or not print(f"found {len(broken_urls)} broken spectra urls: {broken_urls}, {codes}") - - assert (4 <= len(broken_urls) <= 4) + + assert 4 <= len(broken_urls) <= 4 diff --git a/tests/test_data.py b/tests/test_data.py index e594c074e..4001f6a69 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -6,13 +6,13 @@ from astrodbkit2.astrodb import create_database, Database from sqlalchemy import except_, select, and_ -sys.path.append('.') -from simple.schema import * +sys.path.append(".") +from schema.schema import * from . import REFERENCE_TABLES -DB_NAME = 'temp.db' -DB_PATH = 'data' +DB_NAME = "temp.sqlite" +DB_PATH = "data" # Load the database for use in individual tests @@ -23,20 +23,26 @@ def db(): if os.path.exists(DB_NAME): os.remove(DB_NAME) - connection_string = 'sqlite:///' + DB_NAME + connection_string = "sqlite:///" + DB_NAME create_database(connection_string) assert os.path.exists(DB_NAME) # Connect to the new database and confirm it has the Sources table db = Database(connection_string, reference_tables=REFERENCE_TABLES) assert db - assert 'source' in [c.name for c in db.Sources.columns] + assert "source" in [c.name for c in db.Sources.columns] # Load data into an in-memory sqlite database first, for performance - temp_db = Database('sqlite://', reference_tables=REFERENCE_TABLES) # creates and connects to a temporary in-memory database - temp_db.load_database(DB_PATH, verbose=False) # loads the data from the data files into the database + temp_db = Database( + "sqlite://", reference_tables=REFERENCE_TABLES + ) # creates and connects to a temporary in-memory database + temp_db.load_database( + DB_PATH, verbose=False + ) # loads the data from the data files into the database temp_db.dump_sqlite(DB_NAME) # dump in-memory database to file - db = Database('sqlite:///' + DB_NAME, reference_tables=REFERENCE_TABLES) # replace database object with new file version + db = Database( + "sqlite:///" + DB_NAME, reference_tables=REFERENCE_TABLES + ) # replace database object with new file version return db @@ -45,80 +51,80 @@ def db(): # ----------------------------------------------------------------------------------------- def reference_verifier(t, name, bibcode, doi): # Utility function to verify reference values in a table - ind = t['reference'] == name - assert t[ind]['bibcode'][0] == bibcode, f'{name} did not match bibcode' - assert t[ind]['doi'][0] == doi, f'{name} did not match doi' + ind = t["reference"] == name + assert t[ind]["bibcode"][0] == bibcode, f"{name} did not match bibcode" + assert t[ind]["doi"][0] == doi, f"{name} did not match doi" def test_discovery_references(db): - ''' + """ Values found with this SQL query: SELECT reference, count(*) FROM Sources GROUP BY reference ORDER By 2 DESC - ''' + """ - ref = 'Schm10.1808' + ref = "Schm10.1808" t = db.query(db.Sources).filter(db.Sources.c.reference == ref).astropy() - assert len(t) == 208, f'found {len(t)} discovery reference entries for {ref}' + assert len(t) == 208, f"found {len(t)} discovery reference entries for {ref}" - ref = 'West08' + ref = "West08" t = db.query(db.Sources).filter(db.Sources.c.reference == ref).astropy() - assert len(t) == 192, f'found {len(t)} discovery reference entries for {ref}' + assert len(t) == 192, f"found {len(t)} discovery reference entries for {ref}" - ref = 'Reid08.1290' + ref = "Reid08.1290" t = db.query(db.Sources).filter(db.Sources.c.reference == ref).astropy() - assert len(t) == 206, f'found {len(t)} discovery reference entries for {ref}' + assert len(t) == 206, f"found {len(t)} discovery reference entries for {ref}" - ref = 'Cruz03' + ref = "Cruz03" t = db.query(db.Sources).filter(db.Sources.c.reference == ref).astropy() - assert len(t) == 165, f'found {len(t)} discovery reference entries for {ref}' + assert len(t) == 165, f"found {len(t)} discovery reference entries for {ref}" - ref = 'Maro15' + ref = "Maro15" t = db.query(db.Sources).filter(db.Sources.c.reference == ref).astropy() - assert len(t) == 113, f'found {len(t)} discovery reference entries for {ref}' + assert len(t) == 113, f"found {len(t)} discovery reference entries for {ref}" - ref = 'Best15' + ref = "Best15" t = db.query(db.Sources).filter(db.Sources.c.reference == ref).astropy() - assert len(t) == 101, f'found {len(t)} discovery reference entries for {ref}' + assert len(t) == 101, f"found {len(t)} discovery reference entries for {ref}" - ref = 'Kirk11' + ref = "Kirk11" t = db.query(db.Sources).filter(db.Sources.c.reference == ref).astropy() - assert len(t) == 100, f'found {len(t)} discovery reference entries for {ref}' + assert len(t) == 100, f"found {len(t)} discovery reference entries for {ref}" - ref = 'Mace13.6' + ref = "Mace13.6" t = db.query(db.Sources).filter(db.Sources.c.reference == ref).astropy() - assert len(t) == 93, f'found {len(t)} discovery reference entries for {ref}' + assert len(t) == 93, f"found {len(t)} discovery reference entries for {ref}" - ref = 'Burn13' + ref = "Burn13" t = db.query(db.Sources).filter(db.Sources.c.reference == ref).astropy() - assert len(t) == 69, f'found {len(t)} discovery reference entries for {ref}' + assert len(t) == 69, f"found {len(t)} discovery reference entries for {ref}" - ref = 'Gagn15.33' + ref = "Gagn15.33" t = db.query(db.Sources).filter(db.Sources.c.reference == ref).astropy() - assert len(t) == 68, f'found {len(t)} discovery reference entries for {ref}' + assert len(t) == 68, f"found {len(t)} discovery reference entries for {ref}" - ref = 'Chiu06' + ref = "Chiu06" t = db.query(db.Sources).filter(db.Sources.c.reference == ref).astropy() - assert len(t) == 62, f'found {len(t)} discovery reference entries for {ref}' + assert len(t) == 62, f"found {len(t)} discovery reference entries for {ref}" - ref = 'DayJ13' + ref = "DayJ13" t = db.query(db.Sources).filter(db.Sources.c.reference == ref).astropy() - assert len(t) == 61, f'found {len(t)} discovery reference entries for {ref}' + assert len(t) == 61, f"found {len(t)} discovery reference entries for {ref}" - ref = 'Kirk10' + ref = "Kirk10" t = db.query(db.Sources).filter(db.Sources.c.reference == ref).astropy() - assert len(t) == 56, f'found {len(t)} discovery reference entries for {ref}' + assert len(t) == 56, f"found {len(t)} discovery reference entries for {ref}" - ref = 'Cruz07' + ref = "Cruz07" t = db.query(db.Sources).filter(db.Sources.c.reference == ref).astropy() - assert len(t) == 91, f'found {len(t)} discovery reference entries for {ref}' + assert len(t) == 91, f"found {len(t)} discovery reference entries for {ref}" - ref = 'Roth' + ref = "Roth" t = db.query(db.Sources).filter(db.Sources.c.reference == ref).astropy() - assert len(t) == 83, f'found {len(t)} discovery reference entries for {ref}' + assert len(t) == 83, f"found {len(t)} discovery reference entries for {ref}" def test_proper_motion_refs(db): @@ -133,103 +139,114 @@ def test_proper_motion_refs(db): proper_motion_mearsurements = db.query(ProperMotions.reference, func.count(ProperMotions.reference)).\ group_by(ProperMotions.reference).order_by(func.count(ProperMotions.reference).desc()).limit(20).all() """ - ref = 'GaiaEDR3' + ref = "GaiaEDR3" t = db.query(db.ProperMotions).filter(db.ProperMotions.c.reference == ref).astropy() - assert len(t) == 1133, f'found {len(t)} proper motion reference entries for {ref}' + assert len(t) == 1133, f"found {len(t)} proper motion reference entries for {ref}" - ref = 'GaiaDR2' + ref = "GaiaDR2" t = db.query(db.ProperMotions).filter(db.ProperMotions.c.reference == ref).astropy() - assert len(t) == 1076, f'found {len(t)} proper motion reference entries for {ref}' + assert len(t) == 1076, f"found {len(t)} proper motion reference entries for {ref}" - ref = 'Best20.257' + ref = "Best20.257" t = db.query(db.ProperMotions).filter(db.ProperMotions.c.reference == ref).astropy() - assert len(t) == 348, f'found {len(t)} proper motion reference entries for {ref}' + assert len(t) == 348, f"found {len(t)} proper motion reference entries for {ref}" - ref = 'Gagn15.73' + ref = "Gagn15.73" t = db.query(db.ProperMotions).filter(db.ProperMotions.c.reference == ref).astropy() - assert len(t) == 325, f'found {len(t)} proper motion reference entries for {ref}' + assert len(t) == 325, f"found {len(t)} proper motion reference entries for {ref}" - ref = 'Fahe09' + ref = "Fahe09" t = db.query(db.ProperMotions).filter(db.ProperMotions.c.reference == ref).astropy() - assert len(t) == 216, f'found {len(t)} proper motion reference entries for {ref}' + assert len(t) == 216, f"found {len(t)} proper motion reference entries for {ref}" # Kirk19 tested below. - ref = 'Best15' + ref = "Best15" t = db.query(db.ProperMotions).filter(db.ProperMotions.c.reference == ref).astropy() - assert len(t) == 120, f'found {len(t)} proper motion reference entries for {ref}' + assert len(t) == 120, f"found {len(t)} proper motion reference entries for {ref}" - ref = 'Burn13' + ref = "Burn13" t = db.query(db.ProperMotions).filter(db.ProperMotions.c.reference == ref).astropy() - assert len(t) == 97, f'found {len(t)} proper motion reference entries for {ref}' + assert len(t) == 97, f"found {len(t)} proper motion reference entries for {ref}" - ref = 'Dahn17' + ref = "Dahn17" t = db.query(db.ProperMotions).filter(db.ProperMotions.c.reference == ref).astropy() - assert len(t) == 79, f'found {len(t)} proper motion reference entries for {ref}' + assert len(t) == 79, f"found {len(t)} proper motion reference entries for {ref}" - ref = 'Jame08' + ref = "Jame08" t = db.query(db.ProperMotions).filter(db.ProperMotions.c.reference == ref).astropy() - assert len(t) == 73, f'found {len(t)} proper motion reference entries for {ref}' + assert len(t) == 73, f"found {len(t)} proper motion reference entries for {ref}" - ref = 'vanL07' + ref = "vanL07" t = db.query(db.ProperMotions).filter(db.ProperMotions.c.reference == ref).astropy() - assert len(t) == 68, f'found {len(t)} proper motion reference entries for {ref}' + assert len(t) == 68, f"found {len(t)} proper motion reference entries for {ref}" - ref = 'Smar18' + ref = "Smar18" t = db.query(db.ProperMotions).filter(db.ProperMotions.c.reference == ref).astropy() - assert len(t) == 68, f'found {len(t)} proper motion reference entries for {ref}' + assert len(t) == 68, f"found {len(t)} proper motion reference entries for {ref}" - ref = 'Schm10.1808' + ref = "Schm10.1808" t = db.query(db.ProperMotions).filter(db.ProperMotions.c.reference == ref).astropy() - assert len(t) == 44, f'found {len(t)} proper motion reference entries for {ref}' + assert len(t) == 44, f"found {len(t)} proper motion reference entries for {ref}" def test_parallax_refs(db): # Test total odopted measuruments t = db.query(db.Parallaxes).filter(db.Parallaxes.c.adopted == 1).astropy() - assert len(t) == 1444, f'found {len(t)} adopted parallax measuruments.' + assert len(t) == 1444, f"found {len(t)} adopted parallax measuruments." - ref = 'GaiaDR3' + ref = "GaiaDR3" t = db.query(db.Parallaxes).filter(db.Parallaxes.c.reference == ref).astropy() - assert len(t) == 1, f'found {len(t)} parallax reference entries for {ref}' + assert len(t) == 1, f"found {len(t)} parallax reference entries for {ref}" - ref = 'GaiaDR2' + ref = "GaiaDR2" t = db.query(db.Parallaxes).filter(db.Parallaxes.c.reference == ref).astropy() - assert len(t) == 1076, f'found {len(t)} parallax reference entries for {ref}' + assert len(t) == 1076, f"found {len(t)} parallax reference entries for {ref}" - t = db.query(db.Parallaxes).filter(and_(db.Parallaxes.c.reference == ref, - db.Parallaxes.c.adopted == 1)).astropy() - assert len(t) == 36, f'found {len(t)} adopted parallax reference entries for {ref}' + t = ( + db.query(db.Parallaxes) + .filter(and_(db.Parallaxes.c.reference == ref, db.Parallaxes.c.adopted == 1)) + .astropy() + ) + assert len(t) == 36, f"found {len(t)} adopted parallax reference entries for {ref}" - ref = 'GaiaEDR3' + ref = "GaiaEDR3" t = db.query(db.Parallaxes).filter(db.Parallaxes.c.reference == ref).astropy() - assert len(t) == 1133, f'found {len(t)} parallax reference entries for {ref}' - - t = db.query(db.Parallaxes).filter(and_(db.Parallaxes.c.reference == ref, - db.Parallaxes.c.adopted == 1)).astropy() - assert len(t) == 1104, f'found {len(t)} adopted parallax reference entries for {ref}' - - -@pytest.mark.parametrize('band, value', [ - ('GAIA2.G', 1267), - ('GAIA2.Grp', 1107), - ('GAIA3.G', 1256), - ('GAIA3.Grp', 1261), - ('WISE.W1', 461), - ('WISE.W2', 461), - ('WISE.W3', 457), - ('WISE.W4', 450), - ('2MASS.J', 1802), - ('2MASS.H', 1791), - ('2MASS.Ks', 1762), - ('GPI.Y', 1), - ('NIRI.Y', 21), - ('UFTI.Y', 13), - ('Wircam.Y', 29), - ('WFCAM.Y', 854), - ('VisAO.Ys', 1), - ('VISTA.Y', 59), -]) + assert len(t) == 1133, f"found {len(t)} parallax reference entries for {ref}" + + t = ( + db.query(db.Parallaxes) + .filter(and_(db.Parallaxes.c.reference == ref, db.Parallaxes.c.adopted == 1)) + .astropy() + ) + assert ( + len(t) == 1104 + ), f"found {len(t)} adopted parallax reference entries for {ref}" + + +@pytest.mark.parametrize( + "band, value", + [ + ("GAIA2.G", 1267), + ("GAIA2.Grp", 1107), + ("GAIA3.G", 1256), + ("GAIA3.Grp", 1261), + ("WISE.W1", 461), + ("WISE.W2", 461), + ("WISE.W3", 457), + ("WISE.W4", 450), + ("2MASS.J", 1802), + ("2MASS.H", 1791), + ("2MASS.Ks", 1762), + ("GPI.Y", 1), + ("NIRI.Y", 21), + ("UFTI.Y", 13), + ("Wircam.Y", 29), + ("WFCAM.Y", 854), + ("VisAO.Ys", 1), + ("VISTA.Y", 59), + ], +) def test_photometry_bands(db, band, value): # To refresh these counts: # from sqlalchemy import func @@ -238,171 +255,242 @@ def test_photometry_bands(db, band, value): # astropy() t = db.query(db.Photometry).filter(db.Photometry.c.band == band).astropy() - assert len(t) == value, f'found {len(t)} photometry measurements for {band}' + assert len(t) == value, f"found {len(t)} photometry measurements for {band}" def test_missions(db): # If 2MASS designation in Names, 2MASS photometry should exist - stm = except_(select(db.Names.c.source).where(db.Names.c.other_name.like("2MASS J%")), - select(db.Photometry.c.source).where(db.Photometry.c.band.like("2MASS%"))) + stm = except_( + select(db.Names.c.source).where(db.Names.c.other_name.like("2MASS J%")), + select(db.Photometry.c.source).where(db.Photometry.c.band.like("2MASS%")), + ) s = db.session.scalars(stm).all() - assert len(s) == 256, f'found {len(s)} sources with 2MASS designation that have no 2MASS photometry' + assert ( + len(s) == 256 + ), f"found {len(s)} sources with 2MASS designation that have no 2MASS photometry" # If 2MASS photometry, 2MASS designation should be in Names - stm = except_(select(db.Photometry.c.source).where(db.Photometry.c.band.like("2MASS%")), - select(db.Names.c.source).where(db.Names.c.other_name.like("2MASS J%"))) + stm = except_( + select(db.Photometry.c.source).where(db.Photometry.c.band.like("2MASS%")), + select(db.Names.c.source).where(db.Names.c.other_name.like("2MASS J%")), + ) s = db.session.scalars(stm).all() - assert len(s) == 2, f'found {len(s)} sources with 2MASS photometry that have no 2MASS designation ' + assert ( + len(s) == 2 + ), f"found {len(s)} sources with 2MASS photometry that have no 2MASS designation " # If Gaia designation in Names, Gaia photometry and astrometry should exist - stm = except_(select(db.Names.c.source).where(db.Names.c.other_name.like("Gaia%")), - select(db.Photometry.c.source).where(db.Photometry.c.band.like("GAIA%"))) + stm = except_( + select(db.Names.c.source).where(db.Names.c.other_name.like("Gaia%")), + select(db.Photometry.c.source).where(db.Photometry.c.band.like("GAIA%")), + ) s = db.session.scalars(stm).all() - assert len(s) == 0, f'found {len(s)} sources with Gaia designation that have no GAIA photometry' + assert ( + len(s) == 0 + ), f"found {len(s)} sources with Gaia designation that have no GAIA photometry" # If Gaia photometry, Gaia designation should be in Names - stm = except_(select(db.Photometry.c.source).where(db.Photometry.c.band.like("GAIA%")), - select(db.Names.c.source).where(db.Names.c.other_name.like("Gaia%"))) + stm = except_( + select(db.Photometry.c.source).where(db.Photometry.c.band.like("GAIA%")), + select(db.Names.c.source).where(db.Names.c.other_name.like("Gaia%")), + ) s = db.session.scalars(stm).all() - assert len(s) == 0, f'found {len(s)} sources with Gaia photometry and no Gaia designation in Names' + assert ( + len(s) == 0 + ), f"found {len(s)} sources with Gaia photometry and no Gaia designation in Names" # If Wise designation in Names, Wise phot should exist - stm = except_(select(db.Names.c.source).where(db.Names.c.other_name.like("WISE%")), - select(db.Photometry.c.source).where(db.Photometry.c.band.like("WISE%"))) + stm = except_( + select(db.Names.c.source).where(db.Names.c.other_name.like("WISE%")), + select(db.Photometry.c.source).where(db.Photometry.c.band.like("WISE%")), + ) s = db.session.scalars(stm).all() - assert len(s) == 480, f'found {len(s)} sources with WISE designation that have no WISE photometry' + assert ( + len(s) == 480 + ), f"found {len(s)} sources with WISE designation that have no WISE photometry" # If Wise photometry, Wise designation should be in Names - stm = except_(select(db.Photometry.c.source).where(db.Photometry.c.band.like("WISE%")), - select(db.Names.c.source).where(db.Names.c.other_name.like("WISE%"))) + stm = except_( + select(db.Photometry.c.source).where(db.Photometry.c.band.like("WISE%")), + select(db.Names.c.source).where(db.Names.c.other_name.like("WISE%")), + ) s = db.session.scalars(stm).all() - assert len(s) == 389, f'found {len(s)} sources with WISE photometry and no Wise designation in Names' + assert ( + len(s) == 389 + ), f"found {len(s)} sources with WISE photometry and no Wise designation in Names" # If Gaia EDR3 pm, Gaia EDR3 designation should be in Names - stm = except_(select(db.ProperMotions.c.source).where(db.ProperMotions.c.reference.like("GaiaEDR3%")), - select(db.Names.c.source).where(db.Names.c.other_name.like("Gaia EDR3%"))) + stm = except_( + select(db.ProperMotions.c.source).where( + db.ProperMotions.c.reference.like("GaiaEDR3%") + ), + select(db.Names.c.source).where(db.Names.c.other_name.like("Gaia EDR3%")), + ) s = db.session.scalars(stm).all() - assert len(s) == 0, f'found {len(s)} sources with Gaia EDR3 proper motion and no Gaia EDR3 designation in Names' + assert ( + len(s) == 0 + ), f"found {len(s)} sources with Gaia EDR3 proper motion and no Gaia EDR3 designation in Names" # If Gaia EDR3 parallax, Gaia EDR3 designation should be in Names - stm = except_(select(db.Parallaxes.c.source).where(db.Parallaxes.c.reference.like("GaiaEDR3%")), - select(db.Names.c.source).where(db.Names.c.other_name.like("Gaia EDR3%"))) + stm = except_( + select(db.Parallaxes.c.source).where( + db.Parallaxes.c.reference.like("GaiaEDR3%") + ), + select(db.Names.c.source).where(db.Names.c.other_name.like("Gaia EDR3%")), + ) s = db.session.scalars(stm).all() - assert len(s) == 0, f'found {len(s)} sources with Gaia EDR3 parallax and no Gaia EDR3 designation in Names' + assert ( + len(s) == 0 + ), f"found {len(s)} sources with Gaia EDR3 parallax and no Gaia EDR3 designation in Names" def test_spectra(db): - regime = 'optical' + regime = "optical" t = db.query(db.Spectra).filter(db.Spectra.c.regime == regime).astropy() - assert len(t) == 719, f'found {len(t)} spectra in the {regime} regime' + assert len(t) == 719, f"found {len(t)} spectra in the {regime} regime" - regime = 'em.IR.NIR' + regime = "em.IR.NIR" t = db.query(db.Spectra).filter(db.Spectra.c.regime == regime).astropy() - assert len(t) == 118, f'found {len(t)} spectra in the {regime} regime' + assert len(t) == 118, f"found {len(t)} spectra in the {regime} regime" - regime = 'em.opt' + regime = "em.opt" t = db.query(db.Spectra).filter(db.Spectra.c.regime == regime).astropy() - assert len(t) == 21, f'found {len(t)} spectra in the {regime} regime' + assert len(t) == 21, f"found {len(t)} spectra in the {regime} regime" - regime = 'nir' + regime = "nir" t = db.query(db.Spectra).filter(db.Spectra.c.regime == regime).astropy() - assert len(t) == 459, f'found {len(t)} spectra in the {regime} regime' + assert len(t) == 459, f"found {len(t)} spectra in the {regime} regime" - regime = 'mir' + regime = "mir" t = db.query(db.Spectra).filter(db.Spectra.c.regime == regime).astropy() - assert len(t) == 204, f'found {len(t)} spectra in the {regime} regime' + assert len(t) == 204, f"found {len(t)} spectra in the {regime} regime" - telescope = 'IRTF' + telescope = "IRTF" t = db.query(db.Spectra).filter(db.Spectra.c.telescope == telescope).astropy() - assert len(t) == 436, f'found {len(t)} spectra from {telescope}' - - telescope = 'HST' - instrument = 'WFC3' - t = db.query(db.Spectra).filter( - and_(db.Spectra.c.telescope == telescope, db.Spectra.c.instrument == instrument)).astropy() - assert len(t) == 77, f'found {len(t)} spectra from {telescope}/{instrument}' - - ref = 'Reid08.1290' + assert len(t) == 436, f"found {len(t)} spectra from {telescope}" + + telescope = "HST" + instrument = "WFC3" + t = ( + db.query(db.Spectra) + .filter( + and_( + db.Spectra.c.telescope == telescope, + db.Spectra.c.instrument == instrument, + ) + ) + .astropy() + ) + assert len(t) == 77, f"found {len(t)} spectra from {telescope}/{instrument}" + + ref = "Reid08.1290" t = db.query(db.Spectra).filter(db.Spectra.c.reference == ref).astropy() - assert len(t) == 280, f'found {len(t)} spectra from {ref}' + assert len(t) == 280, f"found {len(t)} spectra from {ref}" - ref = 'Cruz03' + ref = "Cruz03" t = db.query(db.Spectra).filter(db.Spectra.c.reference == ref).astropy() - assert len(t) == 191, f'found {len(t)} spectra from {ref}' + assert len(t) == 191, f"found {len(t)} spectra from {ref}" - ref = 'Cruz18' + ref = "Cruz18" t = db.query(db.Spectra).filter(db.Spectra.c.reference == ref).astropy() - assert len(t) == 186, f'found {len(t)} spectra from {ref}' + assert len(t) == 186, f"found {len(t)} spectra from {ref}" - ref = 'Cruz07' + ref = "Cruz07" t = db.query(db.Spectra).filter(db.Spectra.c.reference == ref).astropy() - assert len(t) == 158, f'found {len(t)} spectra from {ref}' + assert len(t) == 158, f"found {len(t)} spectra from {ref}" - ref = 'Bard14' + ref = "Bard14" t = db.query(db.Spectra).filter(db.Spectra.c.reference == ref).astropy() - assert len(t) == 57, f'found {len(t)} spectra from {ref}' + assert len(t) == 57, f"found {len(t)} spectra from {ref}" - ref = 'Burg10.1142' + ref = "Burg10.1142" t = db.query(db.Spectra).filter(db.Spectra.c.reference == ref).astropy() - assert len(t) == 46, f'found {len(t)} spectra from {ref}' + assert len(t) == 46, f"found {len(t)} spectra from {ref}" - ref = 'Manj20' + ref = "Manj20" t = db.query(db.Spectra).filter(db.Spectra.c.reference == ref).astropy() - assert len(t) == 20, f'found {len(t)} spectra from {ref}' + assert len(t) == 20, f"found {len(t)} spectra from {ref}" def test_spectral_types(db): # Test to verify existing counts of spectral types grouped by regime - regime = 'optical' + regime = "optical" t = db.query(db.SpectralTypes).filter(db.SpectralTypes.c.regime == regime).astropy() - assert len(t) == 1494, f'found {len(t)} spectral types in the {regime} regime' + assert len(t) == 1494, f"found {len(t)} spectral types in the {regime} regime" - regime = 'optical_UCD' + regime = "optical_UCD" t = db.query(db.SpectralTypes).filter(db.SpectralTypes.c.regime == regime).astropy() - assert len(t) == 0, f'found {len(t)} spectral types in the {regime} regime' + assert len(t) == 0, f"found {len(t)} spectral types in the {regime} regime" - regime = 'nir' + regime = "nir" t = db.query(db.SpectralTypes).filter(db.SpectralTypes.c.regime == regime).astropy() - assert len(t) == 381, f'found {len(t)} spectral types in the {regime} regime' + assert len(t) == 381, f"found {len(t)} spectral types in the {regime} regime" - regime = 'nir_UCD' + regime = "nir_UCD" t = db.query(db.SpectralTypes).filter(db.SpectralTypes.c.regime == regime).astropy() - assert len(t) == 1977, f'found {len(t)} spectral types in the {regime} regime' + assert len(t) == 1977, f"found {len(t)} spectral types in the {regime} regime" - regime = 'mir' + regime = "mir" t = db.query(db.SpectralTypes).filter(db.SpectralTypes.c.regime == regime).astropy() - assert len(t) == 0, f'found {len(t)} spectral types in the {regime} regime' + assert len(t) == 0, f"found {len(t)} spectral types in the {regime} regime" - regime = 'mir_UCD' + regime = "mir_UCD" t = db.query(db.SpectralTypes).filter(db.SpectralTypes.c.regime == regime).astropy() - assert len(t) == 0, f'found {len(t)} spectral types in the {regime} regime' + assert len(t) == 0, f"found {len(t)} spectral types in the {regime} regime" - regime = 'unknown' + regime = "unknown" t = db.query(db.SpectralTypes).filter(db.SpectralTypes.c.regime == regime).astropy() - assert len(t) == 10, f'found {len(t)} spectral types in the {regime} regime' + assert len(t) == 10, f"found {len(t)} spectral types in the {regime} regime" # Test number MLTY dwarfs - m_dwarfs = db.query(db.SpectralTypes).filter( - and_(db.SpectralTypes.c.spectral_type_code >= 60, - db.SpectralTypes.c.spectral_type_code < 70)).astropy() - assert len(m_dwarfs) == 843, f'found {len(t)} M spectral types' - - l_dwarfs = db.query(db.SpectralTypes).filter( - and_(db.SpectralTypes.c.spectral_type_code >= 70, - db.SpectralTypes.c.spectral_type_code < 80)).astropy() - assert len(l_dwarfs) == 1963, f'found {len(l_dwarfs)} L spectral types' - - t_dwarfs = db.query(db.SpectralTypes).filter( - and_(db.SpectralTypes.c.spectral_type_code >= 80, - db.SpectralTypes.c.spectral_type_code < 90)).astropy() - assert len(t_dwarfs) == 999, f'found {len(t_dwarfs)} T spectral types' - - y_dwarfs = db.query(db.SpectralTypes).filter( - and_(db.SpectralTypes.c.spectral_type_code >= 90)).astropy() - assert len(y_dwarfs) == 79, f'found {len(y_dwarfs)} Y spectral types' + m_dwarfs = ( + db.query(db.SpectralTypes) + .filter( + and_( + db.SpectralTypes.c.spectral_type_code >= 60, + db.SpectralTypes.c.spectral_type_code < 70, + ) + ) + .astropy() + ) + assert len(m_dwarfs) == 843, f"found {len(t)} M spectral types" + + l_dwarfs = ( + db.query(db.SpectralTypes) + .filter( + and_( + db.SpectralTypes.c.spectral_type_code >= 70, + db.SpectralTypes.c.spectral_type_code < 80, + ) + ) + .astropy() + ) + assert len(l_dwarfs) == 1963, f"found {len(l_dwarfs)} L spectral types" + + t_dwarfs = ( + db.query(db.SpectralTypes) + .filter( + and_( + db.SpectralTypes.c.spectral_type_code >= 80, + db.SpectralTypes.c.spectral_type_code < 90, + ) + ) + .astropy() + ) + assert len(t_dwarfs) == 999, f"found {len(t_dwarfs)} T spectral types" + + y_dwarfs = ( + db.query(db.SpectralTypes) + .filter(and_(db.SpectralTypes.c.spectral_type_code >= 90)) + .astropy() + ) + assert len(y_dwarfs) == 79, f"found {len(y_dwarfs)} Y spectral types" n_spectral_types = db.query(db.SpectralTypes).count() - assert len(m_dwarfs) + len(l_dwarfs) + len(t_dwarfs) + len(y_dwarfs) == n_spectral_types + assert ( + len(m_dwarfs) + len(l_dwarfs) + len(t_dwarfs) + len(y_dwarfs) + == n_spectral_types + ) # Individual ingest tests @@ -421,31 +509,57 @@ def test_Manj19_data(db): Manja_ingest_spectra19.py """ - pub = 'Manj19' + pub = "Manj19" # Check DOI and Bibcode values are correctly set for new publications added - manj19_pub = db.query(db.Publications).filter(db.Publications.c.reference == pub).astropy() - reference_verifier(manj19_pub, 'Manj19', '2019AJ....157..101M', '10.3847/1538-3881/aaf88f') + manj19_pub = ( + db.query(db.Publications).filter(db.Publications.c.reference == pub).astropy() + ) + reference_verifier( + manj19_pub, "Manj19", "2019AJ....157..101M", "10.3847/1538-3881/aaf88f" + ) # Test total spectral types added - n_Manj19_types = db.query(db.SpectralTypes).filter(db.SpectralTypes.c.reference == pub).count() - assert n_Manj19_types == 40, f'found {n_Manj19_types} sources for {pub}' + n_Manj19_types = ( + db.query(db.SpectralTypes).filter(db.SpectralTypes.c.reference == pub).count() + ) + assert n_Manj19_types == 40, f"found {n_Manj19_types} sources for {pub}" # Test number of L types added - n_Manj19_Ltypes = db.query(db.SpectralTypes).filter(and_(db.SpectralTypes.c.spectral_type_code >= 70, - db.SpectralTypes.c.spectral_type_code < 80, - db.SpectralTypes.c.reference == pub)).count() - assert n_Manj19_Ltypes == 19, f'found {n_Manj19_Ltypes} L type dwarfs for {pub}' + n_Manj19_Ltypes = ( + db.query(db.SpectralTypes) + .filter( + and_( + db.SpectralTypes.c.spectral_type_code >= 70, + db.SpectralTypes.c.spectral_type_code < 80, + db.SpectralTypes.c.reference == pub, + ) + ) + .count() + ) + assert n_Manj19_Ltypes == 19, f"found {n_Manj19_Ltypes} L type dwarfs for {pub}" # Test number of T types added - n_Manj19_Ttypes = db.query(db.SpectralTypes).filter(and_(db.SpectralTypes.c.spectral_type_code >= 80, - db.SpectralTypes.c.spectral_type_code < 90, - db.SpectralTypes.c.reference == pub)).count() - assert n_Manj19_Ttypes == 21, f'found {n_Manj19_Ttypes} T type dwarfs for {pub}' + n_Manj19_Ttypes = ( + db.query(db.SpectralTypes) + .filter( + and_( + db.SpectralTypes.c.spectral_type_code >= 80, + db.SpectralTypes.c.spectral_type_code < 90, + db.SpectralTypes.c.reference == pub, + ) + ) + .count() + ) + assert n_Manj19_Ttypes == 21, f"found {n_Manj19_Ttypes} T type dwarfs for {pub}" # Test spectra added - n_Manj19_spectra = db.query(db.Spectra).filter(db.Spectra.c.reference == pub).astropy() - assert len(n_Manj19_spectra) == 77, f'found {len(n_Manj19_spectra)} spectra from {pub}' + n_Manj19_spectra = ( + db.query(db.Spectra).filter(db.Spectra.c.reference == pub).astropy() + ) + assert ( + len(n_Manj19_spectra) == 77 + ), f"found {len(n_Manj19_spectra)} spectra from {pub}" def test_Kirk19_ingest(db): @@ -467,140 +581,251 @@ def test_Kirk19_ingest(db): # ----------------------------------------------------------------------------------------- # Test refereces added - ref_list = ['Tinn18', 'Pinf14.1931', 'Mace13.6', 'Kirk12', 'Cush11.50', 'Kirk13', - 'Schn15', 'Luhm14.18', 'Tinn14', 'Tinn12', 'Cush14', 'Kirk19'] - t = db.query(db.Publications).filter(db.Publications.c.reference.in_(ref_list)).astropy() + ref_list = [ + "Tinn18", + "Pinf14.1931", + "Mace13.6", + "Kirk12", + "Cush11.50", + "Kirk13", + "Schn15", + "Luhm14.18", + "Tinn14", + "Tinn12", + "Cush14", + "Kirk19", + ] + t = ( + db.query(db.Publications) + .filter(db.Publications.c.reference.in_(ref_list)) + .astropy() + ) if len(ref_list) != len(t): - missing_ref = list(set(ref_list) - set(t['name'])) - assert len(ref_list) == len(t), f'Missing references: {missing_ref}' + missing_ref = list(set(ref_list) - set(t["name"])) + assert len(ref_list) == len(t), f"Missing references: {missing_ref}" # Check DOI and Bibcode values are correctly set for new references added - reference_verifier(t, 'Kirk19', '2019ApJS..240...19K', '10.3847/1538-4365/aaf6af') - reference_verifier(t, 'Pinf14.1931', '2014MNRAS.444.1931P', '10.1093/mnras/stu1540') - reference_verifier(t, 'Tinn18', '2018ApJS..236...28T', '10.3847/1538-4365/aabad3') + reference_verifier(t, "Kirk19", "2019ApJS..240...19K", "10.3847/1538-4365/aaf6af") + reference_verifier(t, "Pinf14.1931", "2014MNRAS.444.1931P", "10.1093/mnras/stu1540") + reference_verifier(t, "Tinn18", "2018ApJS..236...28T", "10.3847/1538-4365/aabad3") # Data tests # ----------------------------------------------------------------------------------------- # Test sources added - ref = 'Pinf14.1931' + ref = "Pinf14.1931" t = db.query(db.Sources).filter(db.Sources.c.reference == ref).astropy() - assert len(t) == 1, f'found {len(t)} sources for {ref}' + assert len(t) == 1, f"found {len(t)} sources for {ref}" # Test spectral types added # Test parallaxes - ref = 'Kirk19' + ref = "Kirk19" t = db.query(db.Parallaxes).filter(db.Parallaxes.c.reference == ref).astropy() - assert len(t) == 23, f'found {len(t)} parallax entries for {ref}' + assert len(t) == 23, f"found {len(t)} parallax entries for {ref}" # Test proper motions added - ref = 'Kirk19' + ref = "Kirk19" t = db.query(db.ProperMotions).filter(db.ProperMotions.c.reference == ref).astropy() - assert len(t) == 182, f'found {len(t)} proper motion entries for {ref}' + assert len(t) == 182, f"found {len(t)} proper motion entries for {ref}" # Test photometry added - telescope = 'Spitzer' - ref = 'Kirk19' - t = db.query(db.Photometry).filter(and_( - db.Photometry.c.telescope == telescope, - db.Photometry.c.reference == ref)).astropy() - assert len(t) == 290, f'found {len(t)} photometry entries for {telescope}' - - ref = 'Kirk19' + telescope = "Spitzer" + ref = "Kirk19" + t = ( + db.query(db.Photometry) + .filter( + and_( + db.Photometry.c.telescope == telescope, db.Photometry.c.reference == ref + ) + ) + .astropy() + ) + assert len(t) == 290, f"found {len(t)} photometry entries for {telescope}" + + ref = "Kirk19" t = db.query(db.Photometry).filter(db.Photometry.c.reference == ref).astropy() - assert len(t) == 290, f'found {len(t)} photometry entries for {ref}' + assert len(t) == 290, f"found {len(t)} photometry entries for {ref}" - ref = 'Schn15' + ref = "Schn15" t = db.query(db.Photometry).filter(db.Photometry.c.reference == ref).astropy() - assert len(t) == 28, f'found {len(t)} photometry entries for {ref}' + assert len(t) == 28, f"found {len(t)} photometry entries for {ref}" # Test parallaxes added for ATLAS - ref = 'Mart18' + ref = "Mart18" t = db.query(db.Parallaxes).filter(db.Parallaxes.c.reference == ref).astropy() - assert len(t) == 15, f'found {len(t)} parallax entries for {ref}' + assert len(t) == 15, f"found {len(t)} parallax entries for {ref}" def test_Best2020_ingest(db): # Test for Best20.257 proper motions added - ref = 'Best20.257' + ref = "Best20.257" t = db.query(db.ProperMotions).filter(db.ProperMotions.c.reference == ref).astropy() - assert len(t) == 348, f'found {len(t)} proper motion entries for {ref}' + assert len(t) == 348, f"found {len(t)} proper motion entries for {ref}" # Test for Best20.257 parallaxes added - ref = 'Best20.257' + ref = "Best20.257" t = db.query(db.Parallaxes).filter(db.Parallaxes.c.reference == ref).astropy() - assert len(t) == 348, f'found {len(t)} parallax entries for {ref}' + assert len(t) == 348, f"found {len(t)} parallax entries for {ref}" # Test for number of Best20.257 parallaxes that are adopted - t = db.query(db.Parallaxes).filter(and_(db.Parallaxes.c.reference == ref, - db.Parallaxes.c.adopted == 1)).astropy() - assert len(t) == 255, f'found {len(t)} adopted parallax entries for {ref}' + t = ( + db.query(db.Parallaxes) + .filter(and_(db.Parallaxes.c.reference == ref, db.Parallaxes.c.adopted == 1)) + .astropy() + ) + assert len(t) == 255, f"found {len(t)} adopted parallax entries for {ref}" def test_suar22_ingest(db): - ref_list = ['Suar22'] - ref = 'Suar22' - - t = db.query(db.Publications).filter(db.Publications.c.reference.in_(ref_list)).astropy() + ref_list = ["Suar22"] + ref = "Suar22" + + t = ( + db.query(db.Publications) + .filter(db.Publications.c.reference.in_(ref_list)) + .astropy() + ) if len(ref_list) != len(t): - missing_ref = list(set(ref_list) - set(t['name'])) - assert len(ref_list) == len(t), f'Missing references: {missing_ref}' + missing_ref = list(set(ref_list) - set(t["name"])) + assert len(ref_list) == len(t), f"Missing references: {missing_ref}" # Check DOI and Bibcode values are correctly set for new references added - reference_verifier(t, 'Suar22', '2022MNRAS.513.5701S', '10.1093/mnras/stac1205') + reference_verifier(t, "Suar22", "2022MNRAS.513.5701S", "10.1093/mnras/stac1205") # Test for Suar22 spectra added t = db.query(db.Spectra).filter(db.Spectra.c.reference == ref).astropy() - assert len(t) == 112, f'found {len(t)} spectra entries for {ref}' + assert len(t) == 112, f"found {len(t)} spectra entries for {ref}" def test_modeledparameters(db): # Test to verify existing counts of modeled parameters - ref = 'Fili15' - t = db.query(db.ModeledParameters).filter(db.ModeledParameters.c.reference == ref).astropy() - assert len(t) == 696, f'found {len(t)} modeled parameters with {ref} reference' - - #Test to verify log g counts - param = 'log g' - t = db.query(db.ModeledParameters).filter(db.ModeledParameters.c.parameter == param).astropy() - assert len(t) == 176, f'found {len(t)} modeled parameters with {param} parameter' - - #Test to verify metallicity counts - param = 'metallicity' - t = db.query(db.ModeledParameters).filter(db.ModeledParameters.c.parameter == param).astropy() - assert len(t) == 2, f'found {len(t)} modeled parameters with {param} parameter' - - #Test to verify radius counts - param = 'radius' - t = db.query(db.ModeledParameters).filter(db.ModeledParameters.c.parameter == param).astropy() - assert len(t) == 175, f'found {len(t)} modeled parameters with {param} parameter' - - #Test to verify mass counts - param = 'mass' - t = db.query(db.ModeledParameters).filter(db.ModeledParameters.c.parameter == param).astropy() - assert len(t) == 176, f'found {len(t)} modeled parameters with {param} parameter' - - #Test to verify T eff counts - param = 'T eff' - t = db.query(db.ModeledParameters).filter(db.ModeledParameters.c.parameter == param).astropy() - assert len(t) == 176, f'found {len(t)} modeled parameters with {param} parameter' - - #Test to verify Lodi22 reference counts - ref = 'Lodi22' - t = db.query(db.ModeledParameters).filter(db.ModeledParameters.c.reference == ref).astropy() - assert len(t) == 5, f'found {len(t)} modeled parameters with {ref} reference' + ref = "Fili15" + t = ( + db.query(db.ModeledParameters) + .filter(db.ModeledParameters.c.reference == ref) + .astropy() + ) + assert len(t) == 696, f"found {len(t)} modeled parameters with {ref} reference" + + # Test to verify log g counts + param = "log g" + t = ( + db.query(db.ModeledParameters) + .filter(db.ModeledParameters.c.parameter == param) + .astropy() + ) + assert len(t) == 176, f"found {len(t)} modeled parameters with {param} parameter" + + # Test to verify metallicity counts + param = "metallicity" + t = ( + db.query(db.ModeledParameters) + .filter(db.ModeledParameters.c.parameter == param) + .astropy() + ) + assert len(t) == 2, f"found {len(t)} modeled parameters with {param} parameter" + + # Test to verify radius counts + param = "radius" + t = ( + db.query(db.ModeledParameters) + .filter(db.ModeledParameters.c.parameter == param) + .astropy() + ) + assert len(t) == 175, f"found {len(t)} modeled parameters with {param} parameter" + + # Test to verify mass counts + param = "mass" + t = ( + db.query(db.ModeledParameters) + .filter(db.ModeledParameters.c.parameter == param) + .astropy() + ) + assert len(t) == 176, f"found {len(t)} modeled parameters with {param} parameter" + + # Test to verify T eff counts + param = "T eff" + t = ( + db.query(db.ModeledParameters) + .filter(db.ModeledParameters.c.parameter == param) + .astropy() + ) + assert len(t) == 176, f"found {len(t)} modeled parameters with {param} parameter" + + # Test to verify Lodi22 reference counts + ref = "Lodi22" + t = ( + db.query(db.ModeledParameters) + .filter(db.ModeledParameters.c.reference == ref) + .astropy() + ) + assert len(t) == 5, f"found {len(t)} modeled parameters with {ref} reference" def test_photometrymko_y(db): # Test for Y photometry entries added for references - bands_list = ['Wircam.Y', 'WFCAM.Y', 'NIRI.Y', 'VISTA.Y', 'GPI.Y', 'VisAO.Ys', 'UFTI.Y'] - ref_list = ['Albe11', 'Burn08', 'Burn09', 'Burn10.1885', 'Burn13', 'Burn14', 'Card15', 'Deac11.6319', 'Deac12.100', - 'Deac14.119', 'Deac17.1126', 'Delo08.961', 'Delo12', 'Dupu12.19', 'Dupu15.102', 'Dupu19 ', 'Edge16', - 'Garc17.162', 'Gauz12 ', 'Kell16', 'Lawr07', 'Lawr12', 'Legg13', 'Legg15', 'Legg16', 'Liu_12', - 'Liu_13.20', 'Liu_16', 'Lodi07.372', 'Lodi12.53', 'Lodi13.2474', 'Luca10', 'Male14', 'McMa13', - 'Minn17', 'Naud14', 'Pena11', 'Pena12', 'Pinf08', 'Smit18', 'Warr07.1400'] - - t = db.query(db.Photometry).filter(and_(db.Photometry.c.band.in_(bands_list), - db.Photometry.c.reference.in_(ref_list))).astropy() - assert len(t) == 969, f'found {len(t)} Y photometry entries' + bands_list = [ + "Wircam.Y", + "WFCAM.Y", + "NIRI.Y", + "VISTA.Y", + "GPI.Y", + "VisAO.Ys", + "UFTI.Y", + ] + ref_list = [ + "Albe11", + "Burn08", + "Burn09", + "Burn10.1885", + "Burn13", + "Burn14", + "Card15", + "Deac11.6319", + "Deac12.100", + "Deac14.119", + "Deac17.1126", + "Delo08.961", + "Delo12", + "Dupu12.19", + "Dupu15.102", + "Dupu19 ", + "Edge16", + "Garc17.162", + "Gauz12 ", + "Kell16", + "Lawr07", + "Lawr12", + "Legg13", + "Legg15", + "Legg16", + "Liu_12", + "Liu_13.20", + "Liu_16", + "Lodi07.372", + "Lodi12.53", + "Lodi13.2474", + "Luca10", + "Male14", + "McMa13", + "Minn17", + "Naud14", + "Pena11", + "Pena12", + "Pinf08", + "Smit18", + "Warr07.1400", + ] + + t = ( + db.query(db.Photometry) + .filter( + and_( + db.Photometry.c.band.in_(bands_list), + db.Photometry.c.reference.in_(ref_list), + ) + ) + .astropy() + ) + assert len(t) == 969, f"found {len(t)} Y photometry entries" diff --git a/tests/test_integrity.py b/tests/test_integrity.py index 2ebf7154d..b6ba87042 100644 --- a/tests/test_integrity.py +++ b/tests/test_integrity.py @@ -3,7 +3,7 @@ import pytest from . import REFERENCE_TABLES from sqlalchemy import func, and_ # , select, except_ -from simple.schema import * +from schema.schema import * from astrodbkit2.astrodb import create_database, Database, or_ from astropy.table import unique from astropy import units as u @@ -11,7 +11,7 @@ from astrodbkit2.utils import _name_formatter -DB_NAME = "temp.db" +DB_NAME = "temp.sqlite" DB_PATH = "data" diff --git a/tests/test_utils.py b/tests/test_utils.py index 92a96a53b..dced87653 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,33 +1,27 @@ import pytest -import math import os from astrodbkit2.astrodb import create_database, Database from astropy.table import Table from sqlalchemy import and_ -from scripts.ingests.utils import ( - SimpleError, - find_publication, - ingest_publication, +import logging +from astrodb_scripts.utils import ( + AstroDBError, ) -from scripts.ingests.ingest_utils import ( +from scripts.ingests.simple_utils import ( convert_spt_string_to_code, ingest_companion_relationships, - ingest_source, - ingest_sources, ingest_parallaxes, ingest_spectral_types, - ingest_instrument, ingest_proper_motions, ) -from simple.schema import * -import logging +from schema.schema import * logger = logging.getLogger("SIMPLE") logger.setLevel(logging.DEBUG) -DB_NAME = "temp.db" +DB_NAME = "simple_temp.sqlite" DB_PATH = "data" @@ -124,110 +118,6 @@ def test_setup_db(db): return db -@pytest.mark.filterwarnings( - "ignore::UserWarning" -) # suppress astroquery SIMBAD warnings -def test_ingest_sources(db): - # TODO: Test adding an alt name - source_data1 = Table( - [ - { - "source": "Apple", - "ra": 10.0673755, - "dec": 17.352889, - "reference": "Ref 1", - }, - { - "source": "Orange", - "ra": 12.0673755, - "dec": -15.352889, - "reference": "Ref 2", - }, - { - "source": "Banana", - "ra": 119.0673755, - "dec": -28.352889, - "reference": "Ref 1", - }, - ] - ) - - ingest_sources( - db, - source_data1["source"], - ras=source_data1["ra"], - decs=source_data1["dec"], - references=source_data1["reference"], - raise_error=True, - ) - assert db.query(db.Sources).filter(db.Sources.c.source == "Apple").count() == 1 - assert db.query(db.Sources).filter(db.Sources.c.source == "Orange").count() == 1 - assert db.query(db.Sources).filter(db.Sources.c.source == "Banana").count() == 1 - - -@pytest.mark.filterwarnings( - "ignore::UserWarning" -) # suppress astroquery SIMBAD warnings -def test_ingest_source(db): - ingest_source(db, "Barnard Star", reference="Ref 2", raise_error=True) - - Barnard_star = ( - db.query(db.Sources).filter(db.Sources.c.source == "Barnard Star").astropy() - ) - assert len(Barnard_star) == 1 - assert math.isclose(Barnard_star["ra"][0], 269.452, abs_tol=0.001) - assert math.isclose(Barnard_star["dec"][0], 4.6933, abs_tol=0.001) - - source_data8 = { - "source": "Fake 8", - "ra": 9.06799, - "dec": 18.352889, - "reference": "Ref 4", - } - with pytest.raises(SimpleError) as error_message: - ingest_source( - db, - source_data8["source"], - ra=source_data8["ra"], - dec=source_data8["dec"], - reference=source_data8["reference"], - raise_error=True, - ) - assert "not in Publications table" in str(error_message.value) - - source_data5 = { - "source": "Fake 5", - "ra": 9.06799, - "dec": 18.352889, - "reference": "", - } - with pytest.raises(SimpleError) as error_message: - ingest_source( - db, - source_data5["source"], - ra=source_data5["ra"], - dec=source_data5["dec"], - reference=source_data5["reference"], - raise_error=True, - ) - assert "blank" in str(error_message.value) - - with pytest.raises(SimpleError) as error_message: - ingest_source(db, "NotinSimbad", reference="Ref 1", raise_error=True) - assert "Coordinates are needed" in str(error_message.value) - - with pytest.raises(SimpleError) as error_message: - ingest_source( - db, - "Fake 1", - ra=11.0673755, - dec=18.352889, - reference="Ref 1", - raise_error=True, - ) - assert "already exists" in str(error_message.value) - - def test_convert_spt_string_to_code(): # Test conversion of spectral types into numeric values assert convert_spt_string_to_code(["M5.6"]) == [65.6] @@ -354,7 +244,7 @@ def test_ingest_spectral_types(db): assert results["spectral_type_string"][0] == "Y2pec" assert results["spectral_type_code"][0] == [92] # testing for publication error - with pytest.raises(SimpleError) as error_message: + with pytest.raises(AstroDBError) as error_message: ingest_spectral_types( db, data3["source"], @@ -367,123 +257,6 @@ def test_ingest_spectral_types(db): ) -def test_find_publication(db): - assert not find_publication(db)[0] # False - assert find_publication(db, name="Ref 1")[0] # True - assert find_publication(db, name="Ref 1", doi="10.1093/mnras/staa1522")[0] # True - doi_search = find_publication(db, doi="10.1093/mnras/staa1522") - assert doi_search[0] # True - assert doi_search[1] == "Ref 1" - bibcode_search = find_publication(db, bibcode="2020MNRAS.496.1922B") - assert bibcode_search[0] # True - assert bibcode_search[1] == "Ref 1" - multiple_matches = find_publication(db, name="Ref") - assert not multiple_matches[0] # False, multiple matches - assert multiple_matches[1] == 2 # multiple matches - assert not find_publication(db, name="Ref 2", doi="10.1093/mnras/staa1522")[ - 0 - ] # False - assert not find_publication(db, name="Ref 2", bibcode="2020MNRAS.496.1922B")[ - 0 - ] # False - assert find_publication(db, name="Burningham_2008") == (1, "Burn08") - - -def test_ingest_publication(db): - # should fail if trying to add a duplicate record - with pytest.raises(SimpleError) as error_message: - ingest_publication(db, publication="Ref 1", bibcode="2020MNRAS.496.1922B") - assert " similar publication already exists" in str(error_message.value) - # TODO - Mock environment where ADS_TOKEN is not set. #117 - - -def test_ingest_instrument(db): - # TESTS WHICH SHOULD WORK - - # test adding just telescope - ingest_instrument(db, telescope="test") - telescope_db = ( - db.query(db.Telescopes).filter(db.Telescopes.c.telescope == "test").table() - ) - assert len(telescope_db) == 1 - assert telescope_db["telescope"][0] == "test" - - # No longer supported just adding an instrument without a mode - # test adding telescope and instrument - # tel_test = 'test2' - # inst_test = 'test3' - # ingest_instrument(db, telescope=tel_test, instrument=inst_test) - # telescope_db = db.query(db.Telescopes). - # filter(db.Telescopes.c.telescope == tel_test).table() - # instrument_db = db.query(db.Instruments). - # filter(db.Instruments.c.instrument == inst_test).table() - # assert len(telescope_db) == 1 - # assert telescope_db['telescope'][0] == tel_test - # assert len(instrument_db) == 1 - # assert instrument_db['instrument'][0] == inst_test - - # test adding new telescope, instrument, and mode - tel_test = "test4" - inst_test = "test5" - mode_test = "test6" - ingest_instrument(db, telescope=tel_test, instrument=inst_test, mode=mode_test) - telescope_db = ( - db.query(db.Telescopes).filter(db.Telescopes.c.telescope == tel_test).table() - ) - instrument_db = ( - db.query(db.Instruments) - .filter( - and_( - db.Instruments.c.mode == mode_test, - db.Instruments.c.instrument == inst_test, - db.Instruments.c.telescope == tel_test, - ) - ) - .table() - ) - assert len(telescope_db) == 1, "Missing telescope insert" - assert telescope_db["telescope"][0] == tel_test - assert len(instrument_db) == 1 - assert instrument_db["instrument"][0] == inst_test - assert instrument_db["mode"][0] == mode_test - - # test adding common mode name for new telescope, instrument - tel_test = "test4" - inst_test = "test5" - mode_test = "Prism" - print(db.query(db.Telescopes).table()) - print(db.query(db.Instruments).table()) - ingest_instrument(db, telescope=tel_test, instrument=inst_test, mode=mode_test) - mode_db = ( - db.query(db.Instruments) - .filter( - and_( - db.Instruments.c.mode == mode_test, - db.Instruments.c.instrument == inst_test, - db.Instruments.c.telescope == tel_test, - ) - ) - .table() - ) - assert len(mode_db) == 1 - assert mode_db["mode"][0] == mode_test - - # TESTS WHICH SHOULD FAIL - # test with no variables provided - with pytest.raises(SimpleError) as error_message: - ingest_instrument(db) - assert "Telescope, Instrument, and Mode must be provided" in str( - error_message.value - ) - - # test with mode but no instrument or telescope - with pytest.raises(SimpleError) as error_message: - ingest_instrument(db, mode="test") - assert "Telescope, Instrument, and Mode must be provided" in str( - error_message.value - ) - - # TODO: test for ingest_photometry # TODO: test for ingest_spectra @@ -492,24 +265,24 @@ def test_ingest_instrument(db): def test_companion_relationships(db): # testing companion ingest # trying no companion - with pytest.raises(SimpleError) as error_message: + with pytest.raises(AstroDBError) as error_message: ingest_companion_relationships(db, "Fake 1", None, "Sibling") assert "Make sure all require parameters are provided." in str(error_message.value) # trying companion == source - with pytest.raises(SimpleError) as error_message: + with pytest.raises(AstroDBError) as error_message: ingest_companion_relationships(db, "Fake 1", "Fake 1", "Sibling") assert "Source cannot be the same as companion name" in str(error_message.value) # trying negative separation - with pytest.raises(SimpleError) as error_message: + with pytest.raises(AstroDBError) as error_message: ingest_companion_relationships( db, "Fake 1", "Bad Companion", "Sibling", projected_separation_arcsec=-5 ) assert "cannot be negative" in str(error_message.value) # trying negative separation error - with pytest.raises(SimpleError) as error_message: + with pytest.raises(AstroDBError) as error_message: ingest_companion_relationships( db, "Fake 1", "Bad Companion", "Sibling", projected_separation_error=-5 )