Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ingest parallaxes from Ultracool sheet #528

Merged
merged 11 commits into from
Jul 8, 2024
101 changes: 101 additions & 0 deletions scripts/ingests/ultracool_sheet/Ingest_Parallax.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
from astrodb_utils import (
load_astrodb,
find_source_in_db,
AstroDBError,
)
import sys

sys.path.append(".")
import logging
from astropy.io import ascii
from simple.schema import Photometry
from simple.schema import REFERENCE_TABLES
from math import isnan
import sqlalchemy.exc
from simple.utils.astrometry import ingest_parallax
from scripts.ingests.ultracool_sheet.references import uc_ref_to_simple_ref

logger = logging.getLogger("AstroDB")
logger.setLevel(logging.DEBUG)

DB_SAVE = False
RECREATE_DB = True
db = load_astrodb(
"SIMPLE.sqlite", recreatedb=RECREATE_DB, reference_tables=REFERENCE_TABLES
)


# Load Ultracool sheet
doc_id = "1i98ft8g5mzPp2DNno0kcz4B9nzMxdpyz5UquAVhz-U8"
sheet_id = "361525788"
link = (
f"https://docs.google.com/spreadsheets/d/{doc_id}/export?format=csv&gid={sheet_id}"
)

# read the csv data into an astropy table
uc_sheet_table = ascii.read(
link,
format="csv",
data_start=1,
header_start=0,
guess=False,
fast_reader=False,
delimiter=",",
)

no_sources = 0
multiple_sources = 0
ingested = 0
already_exists = 0

# Ingest loop
for source in uc_sheet_table:
if isnan(source["plx_lit"]): # skip if no data
continue
uc_sheet_name = source["name"]
match = find_source_in_db(
db,
uc_sheet_name,
ra=source["ra_j2000_formula"],
dec=source["dec_j2000_formula"],
)

if len(match) == 1:
# 1 Match found. INGEST!
simple_source = match[0]
logger.info(f"Match found for {uc_sheet_name}: {simple_source}")

try:
references = source["ref_plx_lit"].split(";")
comment = None
if len(references) > 1:
comment = f"other references: {uc_ref_to_simple_ref(db, references[1])}"
ingest_parallax(
db,
simple_source,
source["plx_lit"],
source["plxerr_lit"],
uc_ref_to_simple_ref(db, references[0]),
comment,
)
ingested += 1
except AstroDBError as e:
msg = "ingest failed with error: " + str(e)
if "Duplicate measurement exists" not in str(e):
logger.warning(msg)
raise AstroDBError(msg) from e
already_exists += 1
Exu-112 marked this conversation as resolved.
Show resolved Hide resolved
elif len(match) == 0:
no_sources += 1
else:
multiple_sources += 1
Exu-112 marked this conversation as resolved.
Show resolved Hide resolved


# 1108 data points in UC sheet in total
logger.info(f"ingested:{ingested}") # 899 ingested
logger.info(f"already exists:{already_exists}") # skipped 463 due to preexisting data
logger.info(f"no sources:{no_sources}") # skipped 129 due to 0 matches
logger.info(f"multiple sources:{multiple_sources}") # skipped 0 due to multiple matches
logger.info(f"total:{ingested+already_exists+no_sources+multiple_sources}") # 1491
if DB_SAVE:
Exu-112 marked this conversation as resolved.
Show resolved Hide resolved
db.save_database(directory="data/")
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
logger = logging.getLogger("AstroDB")
logger.setLevel(logging.INFO)

DB_SAVE = True
DB_SAVE = False
RECREATE_DB = True
db = load_astrodb(
"SIMPLE.sqlite", recreatedb=RECREATE_DB, reference_tables=REFERENCE_TABLES
Expand Down
98 changes: 98 additions & 0 deletions simple/utils/astrometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Optional, Union
from sqlalchemy import and_
import sqlalchemy.exc
from simple.schema import Parallaxes
from astropy.units import Quantity
from astropy.table import Table
from astrodbkit2.astrodb import Database
Expand Down Expand Up @@ -181,6 +182,103 @@ def ingest_parallaxes(db, sources, plxs, plx_errs, plx_refs, comments=None):
return


def ingest_parallax(
Exu-112 marked this conversation as resolved.
Show resolved Hide resolved
db,
source: str = None,
parallax: float = None,
plx_error: float = None,
reference: str = None,
comment: str = None,
kelle marked this conversation as resolved.
Show resolved Hide resolved
):
kelle marked this conversation as resolved.
Show resolved Hide resolved
# Search for existing parallax data and determine if this is the best
# If no previous measurement exists, set the new one to the Adopted measurement
adopted = None
source_plx_data: Table = (
db.query(db.Parallaxes).filter(db.Parallaxes.c.source == source).table()
)

if source_plx_data is None or len(source_plx_data) == 0:
# if there's no other measurements in the database,
# set new data Adopted = True
adopted = True
# old_adopted = None # not used
logger.debug("No other measurement")
elif len(source_plx_data) > 0: # Parallax data already exists
# check for duplicate measurement
dupe_ind = source_plx_data["reference"] == reference
if sum(dupe_ind):
logger.debug(f"Duplicate measurement\n, {source_plx_data[dupe_ind]}")
msg = "Duplicate measurement exists with same reference"
raise AstroDBError(msg)
else:
logger.debug("!!! Another parallax measurement exists,")
if logger.level == 10:
source_plx_data.pprint_all()

# check for previous adopted measurement and find new adopted
adopted_ind = source_plx_data["adopted"] == 1
if sum(adopted_ind):
old_adopted = source_plx_data[adopted_ind]
# if errors of new data are less than other measurements,
# set Adopted = True.
if plx_error < min(source_plx_data["parallax_error"]):
adopted = True

# unset old adopted
if old_adopted:
with db.engine.connect() as conn:
conn.execute(
db.Parallaxes.update()
.where(
and_(
db.Parallaxes.c.source == old_adopted["source"][0],
db.Parallaxes.c.reference
== old_adopted["reference"][0],
)
)
.values(adopted=False)
)
conn.commit()
# check that adopted flag is successfully changed
old_adopted_data = (
db.query(db.Parallaxes)
.filter(
and_(
db.Parallaxes.c.source == old_adopted["source"][0],
db.Parallaxes.c.reference
== old_adopted["reference"][0],
)
)
.table()
)
logger.debug("Old adopted measurement unset")
if logger.level == 10:
old_adopted_data.pprint_all()
else:
adopted = False
logger.debug(f"The new measurement's adopted flag is:, {adopted}")
else:
msg = "Unexpected state"
logger.error(msg)
raise RuntimeError(msg)

# Construct data to be added
parallax_data = {
"source": source,
"parallax": parallax,
"parallax_error": plx_error,
"reference": reference,
"adopted": adopted,
"comments": comment,
}

plx_obj = Parallaxes(**parallax_data)
with db.session as session:
session.add(plx_obj)
session.commit()
logger.info(f" Photometry added to database: {parallax_data}\n")
Exu-112 marked this conversation as resolved.
Show resolved Hide resolved


# PROPER MOTIONS
def ingest_proper_motions(
db, sources, pm_ras, pm_ra_errs, pm_decs, pm_dec_errs, pm_references
Expand Down
25 changes: 17 additions & 8 deletions tests/test_astrometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from astropy.table import Table
from astrodb_utils import AstroDBError
from simple.utils.astrometry import (
ingest_parallaxes,
ingest_parallax,
ingest_proper_motions,
ingest_radial_velocity,
)
Expand All @@ -15,6 +15,7 @@ def t_plx():
[
{"source": "Fake 1", "plx": 113.0, "plx_err": 0.3, "plx_ref": "Ref 1"},
{"source": "Fake 2", "plx": 145.0, "plx_err": 0.5, "plx_ref": "Ref 1"},
{"source": "Fake 1", "plx": 113.0, "plx_err": 0.2, "plx_ref": "Ref 2"},
{"source": "Fake 3", "plx": 155.0, "plx_err": 0.6, "plx_ref": "Ref 2"},
]
)
Expand Down Expand Up @@ -68,25 +69,33 @@ def t_rv():

def test_ingest_parallaxes(temp_db, t_plx):
# Test ingest of parallax data
ingest_parallaxes(
temp_db, t_plx["source"], t_plx["plx"], t_plx["plx_err"], t_plx["plx_ref"]
)

for row in t_plx:
ingest_parallax(
temp_db,
row["source"],
row["plx"],
row["plx_err"],
row["plx_ref"],
)

results = (
temp_db.query(temp_db.Parallaxes)
.filter(temp_db.Parallaxes.c.reference == "Ref 1")
.table()
)
assert len(results) == 2
assert not results["adopted"][0]
results = (
temp_db.query(temp_db.Parallaxes)
.filter(temp_db.Parallaxes.c.reference == "Ref 2")
.table()
)
assert len(results) == 1
assert results["source"][0] == "Fake 3"
assert results["parallax"][0] == 155
assert results["parallax_error"][0] == 0.6
assert len(results) == 2
assert results["source"][1] == "Fake 3"
assert results["parallax"][1] == 155
assert results["parallax_error"][1] == 0.6
assert results["adopted"][0]


def test_ingest_proper_motions(temp_db, t_pm):
Expand Down