From a0af57735948e324541fc1fa117ccf946537f73e Mon Sep 17 00:00:00 2001 From: kelle Date: Tue, 19 Sep 2023 17:08:57 -0400 Subject: [PATCH 1/7] add test for spectrum urls --- tests/test_data.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/tests/test_data.py b/tests/test_data.py index 8a91be945..993d13fc1 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -3,12 +3,14 @@ import os import pytest import sys +import requests sys.path.append('.') from simple.schema import * from astrodbkit2.astrodb import create_database, Database from sqlalchemy import except_, select, and_ from . import REFERENCE_TABLES +from scripts.ingests.utils import check_internet_connection DB_NAME = 'temp.db' DB_PATH = 'data' @@ -366,7 +368,24 @@ def test_spectra(db): t = db.query(db.Spectra).filter(db.Spectra.c.reference == ref).astropy() assert len(t) == 20, f'found {len(t)} spectra from {ref}' - +@pytest.mark.xfail() +def test_spectra_urls(db): + spectra_urls = db.query(db.Spectra.c.spectrum).astropy() + broken_urls = [] + codes = [] + internet = check_internet_connection() + if internet: + for spectrum_url in spectra_urls['spectrum']: + request_response = requests.head(spectrum_url) + status_code = request_response.status_code + # The website is up if the status code is 200 + # cuny academic commons links give 301 status code + if status_code != 200 and status_code != 301: + broken_urls.append(spectrum_url) + codes.append(status_code) + assert len(broken_urls) == 0, f'found {len(broken_urls)} broken spectra urls: {broken_urls}, {codes}' + + def test_spectral_types(db): # Test to verify existing counts of spectral types grouped by regime regime = 'optical' From ab9e2aa94efad16d06c64b892fa6d8fc7a8732f2 Mon Sep 17 00:00:00 2001 From: kelle Date: Tue, 26 Sep 2023 16:39:45 -0400 Subject: [PATCH 2/7] create scheduled_checks for URLs --- tests/scheduled_checks.py | 57 +++++++++++++++++++++++++++++++++++++++ tests/test_data.py | 20 ++------------ 2 files changed, 59 insertions(+), 18 deletions(-) create mode 100644 tests/scheduled_checks.py diff --git a/tests/scheduled_checks.py b/tests/scheduled_checks.py new file mode 100644 index 000000000..61f1af4a3 --- /dev/null +++ b/tests/scheduled_checks.py @@ -0,0 +1,57 @@ +import os +import pytest +import sys +import requests + +sys.path.append('.') +from simple.schema import * +from astrodbkit2.astrodb import create_database, Database +from sqlalchemy import except_, select, and_ +from . import REFERENCE_TABLES +from scripts.ingests.utils import check_internet_connection + +DB_NAME = 'temp.db' +DB_PATH = 'data' + + +# Load the database for use in individual tests +@pytest.fixture(scope="module") +def db(): + # Create a fresh temporary database and assert it exists + # Because we've imported simple.schema, we will be using that schema for the database + + if os.path.exists(DB_NAME): + os.remove(DB_NAME) + connection_string = 'sqlite:///' + DB_NAME + create_database(connection_string) + assert os.path.exists(DB_NAME) + + # Connect to the new database and confirm it has the Sources table + db = Database(connection_string, reference_tables=REFERENCE_TABLES) + assert db + assert 'source' in [c.name for c in db.Sources.columns] + + # Load data into an in-memory sqlite database first, for performance + temp_db = Database('sqlite://', reference_tables=REFERENCE_TABLES) # creates and connects to a temporary in-memory database + temp_db.load_database(DB_PATH, verbose=False) # loads the data from the data files into the database + temp_db.dump_sqlite(DB_NAME) # dump in-memory database to file + db = Database('sqlite:///' + DB_NAME, reference_tables=REFERENCE_TABLES) # replace database object with new file version + + return db + + +def test_spectra_urls(db): + spectra_urls = db.query(db.Spectra.c.spectrum).astropy() + broken_urls = [] + codes = [] + internet = check_internet_connection() + if internet: + for spectrum_url in spectra_urls['spectrum']: + request_response = requests.head(spectrum_url) + status_code = request_response.status_code + # The website is up if the status code is 200 + # cuny academic commons links give 301 status code + if status_code != 200 and status_code != 301: + broken_urls.append(spectrum_url) + codes.append(status_code) + assert len(broken_urls) == 149, f'found {len(broken_urls)} broken spectra urls: {broken_urls}, {codes}' diff --git a/tests/test_data.py b/tests/test_data.py index 993d13fc1..bf23d993f 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -368,24 +368,7 @@ def test_spectra(db): t = db.query(db.Spectra).filter(db.Spectra.c.reference == ref).astropy() assert len(t) == 20, f'found {len(t)} spectra from {ref}' -@pytest.mark.xfail() -def test_spectra_urls(db): - spectra_urls = db.query(db.Spectra.c.spectrum).astropy() - broken_urls = [] - codes = [] - internet = check_internet_connection() - if internet: - for spectrum_url in spectra_urls['spectrum']: - request_response = requests.head(spectrum_url) - status_code = request_response.status_code - # The website is up if the status code is 200 - # cuny academic commons links give 301 status code - if status_code != 200 and status_code != 301: - broken_urls.append(spectrum_url) - codes.append(status_code) - assert len(broken_urls) == 0, f'found {len(broken_urls)} broken spectra urls: {broken_urls}, {codes}' - - + def test_spectral_types(db): # Test to verify existing counts of spectral types grouped by regime regime = 'optical' @@ -439,6 +422,7 @@ def test_spectral_types(db): n_spectral_types = db.query(db.SpectralTypes).count() assert len(m_dwarfs) + len(l_dwarfs) + len(t_dwarfs) + len(y_dwarfs) == n_spectral_types + # Individual ingest tests # ----------------------------------------------------------------------------------------- def test_Manj19_data(db): From 442328bd95f2953f81a31b302edd4f804dcec195 Mon Sep 17 00:00:00 2001 From: kelle Date: Tue, 26 Sep 2023 17:15:08 -0400 Subject: [PATCH 3/7] fix IRS URLs script --- scripts/updates/fix_IRS_spectra_links.py | 26 ++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 scripts/updates/fix_IRS_spectra_links.py diff --git a/scripts/updates/fix_IRS_spectra_links.py b/scripts/updates/fix_IRS_spectra_links.py new file mode 100644 index 000000000..2dec3be48 --- /dev/null +++ b/scripts/updates/fix_IRS_spectra_links.py @@ -0,0 +1,26 @@ +from scripts.ingests.utils import load_simpledb +from astropy.table import Table + + +SAVE_DB = True # save the data files in addition to modifying the .db file +RECREATE_DB = True # recreates the .db file from the data files + +# LOAD THE DATABASE +db = load_simpledb('SIMPLE.db', recreatedb=RECREATE_DB) + + +# link to live google sheet +link = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQG5cGkI2aHPHD4b6ZZPTU4jjQMirU_z-yhl5ElI3p6nCIufIL64crC-yFalF58OauWHxmYvEKR_isY/pub?gid=0&single=true&output=csv' +columns = ['source', 'original_spectrum', 'fixed_spectrum'] +spectra_link_table = Table.read(link, format='ascii', data_start=2, names=columns, guess=False, fast_reader=False, delimiter=',') + +for row in spectra_link_table: + # t = db.query(db.Spectra).filter(db.Spectra.c.original_spectrum == row['original_spectrum']).astropy() + # print(t['spectrum']) + with db.engine.connect() as conn: + conn.execute(db.Spectra.update().where(db.Spectra.c.original_spectrum == row['original_spectrum']).values(spectrum=row['fixed_spectrum'])) + conn.commit() + +# WRITE THE JSON FILES +if SAVE_DB: + db.save_database(directory='data/') \ No newline at end of file From ef1b484458e246cdbc87f57a983254e5ab0a12a8 Mon Sep 17 00:00:00 2001 From: kelle Date: Fri, 13 Oct 2023 16:36:29 -0400 Subject: [PATCH 4/7] polish of the test scripts. --- tests/scheduled_checks.py | 39 ++++++++++++++++++++++++--------------- tests/test_data.py | 12 ++++++------ 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/tests/scheduled_checks.py b/tests/scheduled_checks.py index 61f1af4a3..2e2927889 100644 --- a/tests/scheduled_checks.py +++ b/tests/scheduled_checks.py @@ -2,16 +2,16 @@ import pytest import sys import requests +from astrodbkit2.astrodb import create_database, Database +from scripts.ingests.utils import check_internet_connection -sys.path.append('.') +sys.path.append(".") from simple.schema import * -from astrodbkit2.astrodb import create_database, Database -from sqlalchemy import except_, select, and_ from . import REFERENCE_TABLES -from scripts.ingests.utils import check_internet_connection -DB_NAME = 'temp.db' -DB_PATH = 'data' + +DB_NAME = "temp.db" +DB_PATH = "data" # Load the database for use in individual tests @@ -22,20 +22,27 @@ def db(): if os.path.exists(DB_NAME): os.remove(DB_NAME) - connection_string = 'sqlite:///' + DB_NAME + connection_string = "sqlite:///" + DB_NAME create_database(connection_string) assert os.path.exists(DB_NAME) # Connect to the new database and confirm it has the Sources table db = Database(connection_string, reference_tables=REFERENCE_TABLES) assert db - assert 'source' in [c.name for c in db.Sources.columns] + assert "source" in [c.name for c in db.Sources.columns] # Load data into an in-memory sqlite database first, for performance - temp_db = Database('sqlite://', reference_tables=REFERENCE_TABLES) # creates and connects to a temporary in-memory database - temp_db.load_database(DB_PATH, verbose=False) # loads the data from the data files into the database - temp_db.dump_sqlite(DB_NAME) # dump in-memory database to file - db = Database('sqlite:///' + DB_NAME, reference_tables=REFERENCE_TABLES) # replace database object with new file version + + # create and connects to a temporary in-memory database + temp_db = Database("sqlite://", reference_tables=REFERENCE_TABLES) + + # load the data from the data files into the database + temp_db.load_database(DB_PATH, verbose=False) + + # dump in-memory database to file + temp_db.dump_sqlite(DB_NAME) + # replace database object with new file version + db = Database("sqlite:///" + DB_NAME, reference_tables=REFERENCE_TABLES) return db @@ -46,12 +53,14 @@ def test_spectra_urls(db): codes = [] internet = check_internet_connection() if internet: - for spectrum_url in spectra_urls['spectrum']: + for spectrum_url in spectra_urls["spectrum"]: request_response = requests.head(spectrum_url) - status_code = request_response.status_code + status_code = request_response.status_code # The website is up if the status code is 200 # cuny academic commons links give 301 status code if status_code != 200 and status_code != 301: broken_urls.append(spectrum_url) codes.append(status_code) - assert len(broken_urls) == 149, f'found {len(broken_urls)} broken spectra urls: {broken_urls}, {codes}' + assert ( + len(broken_urls) == 149 + ), f"found {len(broken_urls)} broken spectra urls: {broken_urls}, {codes}" diff --git a/tests/test_data.py b/tests/test_data.py index bf23d993f..360490723 100644 --- a/tests/test_data.py +++ b/tests/test_data.py @@ -3,14 +3,13 @@ import os import pytest import sys -import requests +from astrodbkit2.astrodb import create_database, Database +from sqlalchemy import except_, select, and_ sys.path.append('.') from simple.schema import * -from astrodbkit2.astrodb import create_database, Database -from sqlalchemy import except_, select, and_ from . import REFERENCE_TABLES -from scripts.ingests.utils import check_internet_connection + DB_NAME = 'temp.db' DB_PATH = 'data' @@ -508,7 +507,7 @@ def test_Kirk19_ingest(db): # Test spectral types added - # Test parallaxes + # Test parallaxes ref = 'Kirk19' t = db.query(db.Parallaxes).filter(db.Parallaxes.c.reference == ref).astropy() assert len(t) == 23, f'found {len(t)} parallax entries for {ref}' @@ -572,8 +571,9 @@ def test_suar22_ingest(db): t = db.query(db.Spectra).filter(db.Spectra.c.reference == ref).astropy() assert len(t) == 112, f'found {len(t)} spectra entries for {ref}' + def test_modeledparameters(db): # Test to verify existing counts of modeled parameters ref = 'Fili15' t = db.query(db.ModeledParameters).filter(db.ModeledParameters.c.reference == ref).astropy() - assert len(t) == 696, f'found {len(t)} modeled parameters with {ref} reference' \ No newline at end of file + assert len(t) == 696, f'found {len(t)} modeled parameters with {ref} reference' From af1e7583a24d7f91646f4565983fe478dd8c19c9 Mon Sep 17 00:00:00 2001 From: kelle Date: Fri, 13 Oct 2023 16:50:49 -0400 Subject: [PATCH 5/7] add scheduled test to run once per month --- .github/workflows/scheduled-tests.yml | 30 +++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/workflows/scheduled-tests.yml diff --git a/.github/workflows/scheduled-tests.yml b/.github/workflows/scheduled-tests.yml new file mode 100644 index 000000000..6d30d5809 --- /dev/null +++ b/.github/workflows/scheduled-tests.yml @@ -0,0 +1,30 @@ +# This workflow will install Python dependencies, run tests and lint with a single version of Python +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: Database Integration Tests + +on: + schedule: + - cron: '30 1 1 * *' +jobs: + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + + - name: Set up Python 3.10 + uses: actions/setup-python@v3 + with: + python-version: '3.10' + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install pytest ads + pip install astrodbkit2 + + - name: Test with pytest + run: | + pytest -s tests/scheduled_tests.py From 3bb985309a5aa1e095d2173a461da75657dc2ad7 Mon Sep 17 00:00:00 2001 From: kelle Date: Fri, 13 Oct 2023 17:07:23 -0400 Subject: [PATCH 6/7] Revert "fix IRS URLs script" This reverts commit 442328bd95f2953f81a31b302edd4f804dcec195. --- scripts/updates/fix_IRS_spectra_links.py | 26 ------------------------ 1 file changed, 26 deletions(-) delete mode 100644 scripts/updates/fix_IRS_spectra_links.py diff --git a/scripts/updates/fix_IRS_spectra_links.py b/scripts/updates/fix_IRS_spectra_links.py deleted file mode 100644 index 2dec3be48..000000000 --- a/scripts/updates/fix_IRS_spectra_links.py +++ /dev/null @@ -1,26 +0,0 @@ -from scripts.ingests.utils import load_simpledb -from astropy.table import Table - - -SAVE_DB = True # save the data files in addition to modifying the .db file -RECREATE_DB = True # recreates the .db file from the data files - -# LOAD THE DATABASE -db = load_simpledb('SIMPLE.db', recreatedb=RECREATE_DB) - - -# link to live google sheet -link = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQG5cGkI2aHPHD4b6ZZPTU4jjQMirU_z-yhl5ElI3p6nCIufIL64crC-yFalF58OauWHxmYvEKR_isY/pub?gid=0&single=true&output=csv' -columns = ['source', 'original_spectrum', 'fixed_spectrum'] -spectra_link_table = Table.read(link, format='ascii', data_start=2, names=columns, guess=False, fast_reader=False, delimiter=',') - -for row in spectra_link_table: - # t = db.query(db.Spectra).filter(db.Spectra.c.original_spectrum == row['original_spectrum']).astropy() - # print(t['spectrum']) - with db.engine.connect() as conn: - conn.execute(db.Spectra.update().where(db.Spectra.c.original_spectrum == row['original_spectrum']).values(spectrum=row['fixed_spectrum'])) - conn.commit() - -# WRITE THE JSON FILES -if SAVE_DB: - db.save_database(directory='data/') \ No newline at end of file From 90f3c0fcf6fd6a353f63dfc95f2ed9896dfbb7a0 Mon Sep 17 00:00:00 2001 From: Kelle Cruz Date: Tue, 17 Oct 2023 16:23:08 -0400 Subject: [PATCH 7/7] change name scheduled-tests.yml --- .github/workflows/scheduled-tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/scheduled-tests.yml b/.github/workflows/scheduled-tests.yml index 6d30d5809..db51bb06e 100644 --- a/.github/workflows/scheduled-tests.yml +++ b/.github/workflows/scheduled-tests.yml @@ -1,7 +1,7 @@ # This workflow will install Python dependencies, run tests and lint with a single version of Python # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: Database Integration Tests +name: Scheduled Checks on: schedule: