Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ingest parallaxes from Ultracool sheet #528

Merged
merged 11 commits into from
Jul 8, 2024
106 changes: 106 additions & 0 deletions scripts/ingests/ultracool_sheet/Ingest_Parallax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
from astrodb_utils import (
load_astrodb,
find_source_in_db,
AstroDBError,
)
import sys

sys.path.append(".")
import logging
from astropy.io import ascii
from simple.schema import Photometry
from simple.schema import REFERENCE_TABLES
from math import isnan
import sqlalchemy.exc
from simple.utils.astrometry import ingest_parallax
from scripts.ingests.ultracool_sheet.references import uc_ref_to_simple_ref

logger = logging.getLogger("AstroDB")
logger.setLevel(logging.INFO)

DB_SAVE = False
RECREATE_DB = True
db = load_astrodb(
"SIMPLE.sqlite", recreatedb=RECREATE_DB, reference_tables=REFERENCE_TABLES
)


# Load Ultracool sheet
doc_id = "1i98ft8g5mzPp2DNno0kcz4B9nzMxdpyz5UquAVhz-U8"
sheet_id = "361525788"
link = (
f"https://docs.google.com/spreadsheets/d/{doc_id}/export?format=csv&gid={sheet_id}"
)

# read the csv data into an astropy table
uc_sheet_table = ascii.read(
link,
format="csv",
data_start=1,
header_start=0,
guess=False,
fast_reader=False,
delimiter=",",
)

no_sources = 0
multiple_sources = 0
ingested = 0
already_exists = 0

# Ingest loop
for source in uc_sheet_table:
if isnan(source["plx_lit"]): # skip if no data
continue
uc_sheet_name = source["name"]
match = find_source_in_db(
db,
uc_sheet_name,
ra=source["ra_j2000_formula"],
dec=source["dec_j2000_formula"],
)

if len(match) == 1:
# 1 Match found. INGEST!
simple_source = match[0]
logger.info(f"Match found for {uc_sheet_name}: {simple_source}")

try:
references = source["ref_plx_lit"].split(";")
if references[0] == "Harr15": # weird reference in UC sheet.
reference = "Harr15"
Exu-112 marked this conversation as resolved.
Show resolved Hide resolved
else:
reference = uc_ref_to_simple_ref(db, references[0])
kelle marked this conversation as resolved.
Show resolved Hide resolved

comment = None
if len(references) > 1:
comment = f"other references: {uc_ref_to_simple_ref(db, references[1])}"
ingest_parallax(
db,
simple_source,
source["plx_lit"],
source["plxerr_lit"],
reference,
comment,
)
Exu-112 marked this conversation as resolved.
Show resolved Hide resolved
ingested += 1
except AstroDBError as e:
msg = "ingest failed with error: " + str(e)
if "Duplicate measurement exists" not in str(e):
logger.warning(msg)
raise AstroDBError(msg) from e
already_exists += 1
Exu-112 marked this conversation as resolved.
Show resolved Hide resolved
elif len(match) == 0:
no_sources += 1
else:
multiple_sources += 1
Exu-112 marked this conversation as resolved.
Show resolved Hide resolved


# 1108 data points in UC sheet in total
print(f"ingested:{ingested}") # 1013 ingested
print(f"already exists:{already_exists}") # skipped 6 due to preexisting data
print(f"no sources:{no_sources}") # skipped 86 due to 0 matches
print(f"multiple sources:{multiple_sources}") # skipped 2 due to multiple matches
print(f"total:{ingested+already_exists+no_sources+multiple_sources}") # 1108
Exu-112 marked this conversation as resolved.
Show resolved Hide resolved
if DB_SAVE:
Exu-112 marked this conversation as resolved.
Show resolved Hide resolved
db.save_database(directory="data/")
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
logger = logging.getLogger("AstroDB")
logger.setLevel(logging.INFO)

DB_SAVE = True
DB_SAVE = False
RECREATE_DB = True
db = load_astrodb(
"SIMPLE.sqlite", recreatedb=RECREATE_DB, reference_tables=REFERENCE_TABLES
Expand Down
130 changes: 130 additions & 0 deletions simple/utils/astrometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Optional, Union
from sqlalchemy import and_
import sqlalchemy.exc
from simple.schema import Parallaxes
from astropy.units import Quantity
from astropy.table import Table
from astrodbkit2.astrodb import Database
Expand Down Expand Up @@ -181,6 +182,135 @@ def ingest_parallaxes(db, sources, plxs, plx_errs, plx_refs, comments=None):
return


def ingest_parallax(
Exu-112 marked this conversation as resolved.
Show resolved Hide resolved
db,
source: str = None,
parallax: float = None,
plx_error: float = None,
reference: str = None,
comment: str = None,
kelle marked this conversation as resolved.
Show resolved Hide resolved
raise_error: bool = True,
Exu-112 marked this conversation as resolved.
Show resolved Hide resolved
):
kelle marked this conversation as resolved.
Show resolved Hide resolved
"""

Parameters
----------
db: astrodbkit2.astrodb.Database
Database object
sources: str
Exu-112 marked this conversation as resolved.
Show resolved Hide resolved
source name
plxs: float
parallax corresponding to the source
plx_errs: float
parallax uncertainties
plx_refs: str
reference for the parallax data
Exu-112 marked this conversation as resolved.
Show resolved Hide resolved
comment: str
comments
raise_error: bool
raise error if there is an error during ingest
Exu-112 marked this conversation as resolved.
Show resolved Hide resolved

"""
Exu-112 marked this conversation as resolved.
Show resolved Hide resolved
# Search for existing parallax data and determine if this is the best
# If no previous measurement exists, set the new one to the Adopted measurement
adopted = None
source_plx_data: Table = (
db.query(db.Parallaxes).filter(db.Parallaxes.c.source == source).table()
)

if source_plx_data is None or len(source_plx_data) == 0:
# if there's no other measurements in the database,
# set new data Adopted = True
adopted = True
logger.debug("No other measurement")
elif len(source_plx_data) > 0: # Parallax data already exists
# check for duplicate measurement
dupe_ind = source_plx_data["reference"] == reference
if sum(dupe_ind):
logger.debug(f"Duplicate measurement\n, {source_plx_data[dupe_ind]}")
msg = "Duplicate measurement exists with same reference"
raise AstroDBError(msg)
else:
logger.debug("!!! Another parallax measurement exists,")
if logger.level == 10:
source_plx_data.pprint_all()

# check for previous adopted measurement and find new adopted
adopted_ind = source_plx_data["adopted"] == 1
if sum(adopted_ind):
old_adopted = source_plx_data[adopted_ind]
# if errors of new data are less than other measurements,
# set Adopted = True.
if plx_error < min(source_plx_data["parallax_error"]):
adopted = True

# unset old adopted
if old_adopted:
with db.engine.connect() as conn:
conn.execute(
db.Parallaxes.update()
.where(
and_(
db.Parallaxes.c.source == old_adopted["source"][0],
db.Parallaxes.c.reference
== old_adopted["reference"][0],
)
)
.values(adopted=False)
)
conn.commit()
# check that adopted flag is successfully changed
old_adopted_data = (
db.query(db.Parallaxes)
.filter(
and_(
db.Parallaxes.c.source == old_adopted["source"][0],
db.Parallaxes.c.reference
== old_adopted["reference"][0],
)
)
.table()
)
logger.debug("Old adopted measurement unset")
if logger.level == 10:
old_adopted_data.pprint_all()
else:
adopted = False
logger.debug(f"The new measurement's adopted flag is:, {adopted}")
else:
msg = "Unexpected state"
logger.error(msg)
raise RuntimeError(msg)

# Construct data to be added
parallax_data = {
"source": source,
"parallax": parallax,
"parallax_error": plx_error,
"reference": reference,
"adopted": adopted,
"comments": comment,
}

try:
plx_obj = Parallaxes(**parallax_data)
with db.session as session:
session.add(plx_obj)
session.commit()
logger.info(f" Photometry added to database: {parallax_data}\n")
except sqlalchemy.exc.IntegrityError as e:

msg = (
"The source may not exist in Sources table.\n"
"The parallax reference may not exist in Publications table. "
"Add it with add_publication function. \n"
)
if raise_error:
raise AstroDBError(e)
else:
logger.warning(msg)


# PROPER MOTIONS
def ingest_proper_motions(
db, sources, pm_ras, pm_ra_errs, pm_decs, pm_dec_errs, pm_references
Expand Down
42 changes: 34 additions & 8 deletions tests/test_astrometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from astropy.table import Table
from astrodb_utils import AstroDBError
from simple.utils.astrometry import (
ingest_parallaxes,
ingest_parallax,
ingest_proper_motions,
ingest_radial_velocity,
)
Expand All @@ -15,6 +15,7 @@ def t_plx():
[
{"source": "Fake 1", "plx": 113.0, "plx_err": 0.3, "plx_ref": "Ref 1"},
{"source": "Fake 2", "plx": 145.0, "plx_err": 0.5, "plx_ref": "Ref 1"},
{"source": "Fake 1", "plx": 113.0, "plx_err": 0.2, "plx_ref": "Ref 2"},
{"source": "Fake 3", "plx": 155.0, "plx_err": 0.6, "plx_ref": "Ref 2"},
]
)
Expand Down Expand Up @@ -68,25 +69,50 @@ def t_rv():

def test_ingest_parallaxes(temp_db, t_plx):
# Test ingest of parallax data
ingest_parallaxes(
temp_db, t_plx["source"], t_plx["plx"], t_plx["plx_err"], t_plx["plx_ref"]
)

for row in t_plx:
ingest_parallax(
temp_db,
row["source"],
row["plx"],
row["plx_err"],
row["plx_ref"],
)

results = (
temp_db.query(temp_db.Parallaxes)
.filter(temp_db.Parallaxes.c.reference == "Ref 1")
.table()
)
assert len(results) == 2
assert not results["adopted"][0] # 1st source with ref 1 should not be adopted
results = (
temp_db.query(temp_db.Parallaxes)
.filter(temp_db.Parallaxes.c.reference == "Ref 2")
.table()
)
assert len(results) == 1
assert results["source"][0] == "Fake 3"
assert results["parallax"][0] == 155
assert results["parallax_error"][0] == 0.6
assert len(results) == 2
assert results["source"][1] == "Fake 3"
assert results["parallax"][1] == 155
assert results["parallax_error"][1] == 0.6
assert results["adopted"][0] # 1st source with ref 2 should be adopted


def test_parallax_exceptions(temp_db):
with pytest.raises(AstroDBError) as error_message:
ingest_parallax(temp_db, "bad source", 1, 1, "Ref 1")
assert "FOREIGN KEY constraint failed" in str(error_message.value)

with pytest.raises(AstroDBError) as error_message:
ingest_parallax(temp_db, "Fake 1", 1, 1, "bad ref")
assert "FOREIGN KEY constraint failed" in str(error_message.value)

ingest_parallax(temp_db, "Fake 2", 1, 1, "Ref 2")
with pytest.raises(AstroDBError) as error_message:
ingest_parallax(temp_db, "Fake 2", 1, 1, "Ref 2")
assert "Duplicate measurement exists with same reference" in str(
error_message.value
)


def test_ingest_proper_motions(temp_db, t_pm):
Expand Down