From a0af57735948e324541fc1fa117ccf946537f73e Mon Sep 17 00:00:00 2001
From: kelle <kellecruz@gmail.com>
Date: Tue, 19 Sep 2023 17:08:57 -0400
Subject: [PATCH 1/7] add test for spectrum urls

---
 tests/test_data.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/tests/test_data.py b/tests/test_data.py
index 8a91be945..993d13fc1 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -3,12 +3,14 @@
 import os
 import pytest
 import sys
+import requests
 
 sys.path.append('.')
 from simple.schema import *
 from astrodbkit2.astrodb import create_database, Database
 from sqlalchemy import except_, select, and_
 from . import REFERENCE_TABLES
+from scripts.ingests.utils import check_internet_connection
 
 DB_NAME = 'temp.db'
 DB_PATH = 'data'
@@ -366,7 +368,24 @@ def test_spectra(db):
     t = db.query(db.Spectra).filter(db.Spectra.c.reference == ref).astropy()
     assert len(t) == 20, f'found {len(t)} spectra from {ref}'
 
-
+@pytest.mark.xfail()
+def test_spectra_urls(db):
+    spectra_urls = db.query(db.Spectra.c.spectrum).astropy()
+    broken_urls = []
+    codes = []
+    internet = check_internet_connection()
+    if internet:
+        for spectrum_url in spectra_urls['spectrum']:
+            request_response = requests.head(spectrum_url)
+            status_code = request_response.status_code  
+            # The website is up if the status code is 200
+            # cuny academic commons links give 301 status code
+            if status_code != 200 and status_code != 301:
+                broken_urls.append(spectrum_url)
+                codes.append(status_code)
+    assert len(broken_urls) == 0, f'found {len(broken_urls)} broken spectra urls: {broken_urls}, {codes}'
+
+    
 def test_spectral_types(db):
     # Test to verify existing counts of spectral types grouped by regime
     regime = 'optical'

From ab9e2aa94efad16d06c64b892fa6d8fc7a8732f2 Mon Sep 17 00:00:00 2001
From: kelle <kellecruz@gmail.com>
Date: Tue, 26 Sep 2023 16:39:45 -0400
Subject: [PATCH 2/7] create scheduled_checks for URLs

---
 tests/scheduled_checks.py | 57 +++++++++++++++++++++++++++++++++++++++
 tests/test_data.py        | 20 ++------------
 2 files changed, 59 insertions(+), 18 deletions(-)
 create mode 100644 tests/scheduled_checks.py

diff --git a/tests/scheduled_checks.py b/tests/scheduled_checks.py
new file mode 100644
index 000000000..61f1af4a3
--- /dev/null
+++ b/tests/scheduled_checks.py
@@ -0,0 +1,57 @@
+import os
+import pytest
+import sys
+import requests
+
+sys.path.append('.')
+from simple.schema import *
+from astrodbkit2.astrodb import create_database, Database
+from sqlalchemy import except_, select, and_
+from . import REFERENCE_TABLES
+from scripts.ingests.utils import check_internet_connection
+
+DB_NAME = 'temp.db'
+DB_PATH = 'data'
+
+
+# Load the database for use in individual tests
+@pytest.fixture(scope="module")
+def db():
+    # Create a fresh temporary database and assert it exists
+    # Because we've imported simple.schema, we will be using that schema for the database
+
+    if os.path.exists(DB_NAME):
+        os.remove(DB_NAME)
+    connection_string = 'sqlite:///' + DB_NAME
+    create_database(connection_string)
+    assert os.path.exists(DB_NAME)
+
+    # Connect to the new database and confirm it has the Sources table
+    db = Database(connection_string, reference_tables=REFERENCE_TABLES)
+    assert db
+    assert 'source' in [c.name for c in db.Sources.columns]
+
+    # Load data into an in-memory sqlite database first, for performance
+    temp_db = Database('sqlite://', reference_tables=REFERENCE_TABLES)  # creates and connects to a temporary in-memory database
+    temp_db.load_database(DB_PATH, verbose=False)  # loads the data from the data files into the database
+    temp_db.dump_sqlite(DB_NAME)  # dump in-memory database to file
+    db = Database('sqlite:///' + DB_NAME, reference_tables=REFERENCE_TABLES)  # replace database object with new file version
+
+    return db
+
+
+def test_spectra_urls(db):
+    spectra_urls = db.query(db.Spectra.c.spectrum).astropy()
+    broken_urls = []
+    codes = []
+    internet = check_internet_connection()
+    if internet:
+        for spectrum_url in spectra_urls['spectrum']:
+            request_response = requests.head(spectrum_url)
+            status_code = request_response.status_code  
+            # The website is up if the status code is 200
+            # cuny academic commons links give 301 status code
+            if status_code != 200 and status_code != 301:
+                broken_urls.append(spectrum_url)
+                codes.append(status_code)
+    assert len(broken_urls) == 149, f'found {len(broken_urls)} broken spectra urls: {broken_urls}, {codes}'
diff --git a/tests/test_data.py b/tests/test_data.py
index 993d13fc1..bf23d993f 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -368,24 +368,7 @@ def test_spectra(db):
     t = db.query(db.Spectra).filter(db.Spectra.c.reference == ref).astropy()
     assert len(t) == 20, f'found {len(t)} spectra from {ref}'
 
-@pytest.mark.xfail()
-def test_spectra_urls(db):
-    spectra_urls = db.query(db.Spectra.c.spectrum).astropy()
-    broken_urls = []
-    codes = []
-    internet = check_internet_connection()
-    if internet:
-        for spectrum_url in spectra_urls['spectrum']:
-            request_response = requests.head(spectrum_url)
-            status_code = request_response.status_code  
-            # The website is up if the status code is 200
-            # cuny academic commons links give 301 status code
-            if status_code != 200 and status_code != 301:
-                broken_urls.append(spectrum_url)
-                codes.append(status_code)
-    assert len(broken_urls) == 0, f'found {len(broken_urls)} broken spectra urls: {broken_urls}, {codes}'
-
-    
+
 def test_spectral_types(db):
     # Test to verify existing counts of spectral types grouped by regime
     regime = 'optical'
@@ -439,6 +422,7 @@ def test_spectral_types(db):
     n_spectral_types = db.query(db.SpectralTypes).count()
     assert len(m_dwarfs) + len(l_dwarfs) + len(t_dwarfs) + len(y_dwarfs) == n_spectral_types
 
+
 # Individual ingest tests
 # -----------------------------------------------------------------------------------------
 def test_Manj19_data(db):

From 442328bd95f2953f81a31b302edd4f804dcec195 Mon Sep 17 00:00:00 2001
From: kelle <kellecruz@gmail.com>
Date: Tue, 26 Sep 2023 17:15:08 -0400
Subject: [PATCH 3/7] fix IRS URLs script

---
 scripts/updates/fix_IRS_spectra_links.py | 26 ++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 scripts/updates/fix_IRS_spectra_links.py

diff --git a/scripts/updates/fix_IRS_spectra_links.py b/scripts/updates/fix_IRS_spectra_links.py
new file mode 100644
index 000000000..2dec3be48
--- /dev/null
+++ b/scripts/updates/fix_IRS_spectra_links.py
@@ -0,0 +1,26 @@
+from scripts.ingests.utils import load_simpledb
+from astropy.table import Table
+
+
+SAVE_DB = True  # save the data files in addition to modifying the .db file
+RECREATE_DB = True  # recreates the .db file from the data files
+
+# LOAD THE DATABASE
+db = load_simpledb('SIMPLE.db', recreatedb=RECREATE_DB)
+
+
+#  link to live google sheet
+link = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQG5cGkI2aHPHD4b6ZZPTU4jjQMirU_z-yhl5ElI3p6nCIufIL64crC-yFalF58OauWHxmYvEKR_isY/pub?gid=0&single=true&output=csv'
+columns = ['source', 'original_spectrum', 'fixed_spectrum']
+spectra_link_table = Table.read(link, format='ascii', data_start=2, names=columns, guess=False, fast_reader=False, delimiter=',')
+
+for row in spectra_link_table:
+    # t = db.query(db.Spectra).filter(db.Spectra.c.original_spectrum == row['original_spectrum']).astropy()
+    # print(t['spectrum'])
+    with db.engine.connect() as conn:
+        conn.execute(db.Spectra.update().where(db.Spectra.c.original_spectrum == row['original_spectrum']).values(spectrum=row['fixed_spectrum']))
+        conn.commit()
+
+# WRITE THE JSON FILES
+if SAVE_DB:
+    db.save_database(directory='data/')
\ No newline at end of file

From ef1b484458e246cdbc87f57a983254e5ab0a12a8 Mon Sep 17 00:00:00 2001
From: kelle <kellecruz@gmail.com>
Date: Fri, 13 Oct 2023 16:36:29 -0400
Subject: [PATCH 4/7] polish of the test scripts.

---
 tests/scheduled_checks.py | 39 ++++++++++++++++++++++++---------------
 tests/test_data.py        | 12 ++++++------
 2 files changed, 30 insertions(+), 21 deletions(-)

diff --git a/tests/scheduled_checks.py b/tests/scheduled_checks.py
index 61f1af4a3..2e2927889 100644
--- a/tests/scheduled_checks.py
+++ b/tests/scheduled_checks.py
@@ -2,16 +2,16 @@
 import pytest
 import sys
 import requests
+from astrodbkit2.astrodb import create_database, Database
+from scripts.ingests.utils import check_internet_connection
 
-sys.path.append('.')
+sys.path.append(".")
 from simple.schema import *
-from astrodbkit2.astrodb import create_database, Database
-from sqlalchemy import except_, select, and_
 from . import REFERENCE_TABLES
-from scripts.ingests.utils import check_internet_connection
 
-DB_NAME = 'temp.db'
-DB_PATH = 'data'
+
+DB_NAME = "temp.db"
+DB_PATH = "data"
 
 
 # Load the database for use in individual tests
@@ -22,20 +22,27 @@ def db():
 
     if os.path.exists(DB_NAME):
         os.remove(DB_NAME)
-    connection_string = 'sqlite:///' + DB_NAME
+    connection_string = "sqlite:///" + DB_NAME
     create_database(connection_string)
     assert os.path.exists(DB_NAME)
 
     # Connect to the new database and confirm it has the Sources table
     db = Database(connection_string, reference_tables=REFERENCE_TABLES)
     assert db
-    assert 'source' in [c.name for c in db.Sources.columns]
+    assert "source" in [c.name for c in db.Sources.columns]
 
     # Load data into an in-memory sqlite database first, for performance
-    temp_db = Database('sqlite://', reference_tables=REFERENCE_TABLES)  # creates and connects to a temporary in-memory database
-    temp_db.load_database(DB_PATH, verbose=False)  # loads the data from the data files into the database
-    temp_db.dump_sqlite(DB_NAME)  # dump in-memory database to file
-    db = Database('sqlite:///' + DB_NAME, reference_tables=REFERENCE_TABLES)  # replace database object with new file version
+
+    # create and connects to a temporary in-memory database
+    temp_db = Database("sqlite://", reference_tables=REFERENCE_TABLES)
+
+    # load the data from the data files into the database
+    temp_db.load_database(DB_PATH, verbose=False)
+
+    # dump in-memory database to file
+    temp_db.dump_sqlite(DB_NAME)
+    # replace database object with new file version
+    db = Database("sqlite:///" + DB_NAME, reference_tables=REFERENCE_TABLES)
 
     return db
 
@@ -46,12 +53,14 @@ def test_spectra_urls(db):
     codes = []
     internet = check_internet_connection()
     if internet:
-        for spectrum_url in spectra_urls['spectrum']:
+        for spectrum_url in spectra_urls["spectrum"]:
             request_response = requests.head(spectrum_url)
-            status_code = request_response.status_code  
+            status_code = request_response.status_code
             # The website is up if the status code is 200
             # cuny academic commons links give 301 status code
             if status_code != 200 and status_code != 301:
                 broken_urls.append(spectrum_url)
                 codes.append(status_code)
-    assert len(broken_urls) == 149, f'found {len(broken_urls)} broken spectra urls: {broken_urls}, {codes}'
+    assert (
+        len(broken_urls) == 149
+    ), f"found {len(broken_urls)} broken spectra urls: {broken_urls}, {codes}"
diff --git a/tests/test_data.py b/tests/test_data.py
index bf23d993f..360490723 100644
--- a/tests/test_data.py
+++ b/tests/test_data.py
@@ -3,14 +3,13 @@
 import os
 import pytest
 import sys
-import requests
+from astrodbkit2.astrodb import create_database, Database
+from sqlalchemy import except_, select, and_
 
 sys.path.append('.')
 from simple.schema import *
-from astrodbkit2.astrodb import create_database, Database
-from sqlalchemy import except_, select, and_
 from . import REFERENCE_TABLES
-from scripts.ingests.utils import check_internet_connection
+
 
 DB_NAME = 'temp.db'
 DB_PATH = 'data'
@@ -508,7 +507,7 @@ def test_Kirk19_ingest(db):
 
     # Test spectral types added
 
-    # Test parallaxes 
+    # Test parallaxes
     ref = 'Kirk19'
     t = db.query(db.Parallaxes).filter(db.Parallaxes.c.reference == ref).astropy()
     assert len(t) == 23, f'found {len(t)} parallax entries for {ref}'
@@ -572,8 +571,9 @@ def test_suar22_ingest(db):
     t = db.query(db.Spectra).filter(db.Spectra.c.reference == ref).astropy()
     assert len(t) == 112, f'found {len(t)} spectra entries for {ref}'
 
+
 def test_modeledparameters(db):
     # Test to verify existing counts of modeled parameters
     ref = 'Fili15'
     t = db.query(db.ModeledParameters).filter(db.ModeledParameters.c.reference == ref).astropy()
-    assert len(t) == 696, f'found {len(t)} modeled parameters with {ref} reference'
\ No newline at end of file
+    assert len(t) == 696, f'found {len(t)} modeled parameters with {ref} reference'

From af1e7583a24d7f91646f4565983fe478dd8c19c9 Mon Sep 17 00:00:00 2001
From: kelle <kellecruz@gmail.com>
Date: Fri, 13 Oct 2023 16:50:49 -0400
Subject: [PATCH 5/7] add scheduled test to run once per month

---
 .github/workflows/scheduled-tests.yml | 30 +++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 .github/workflows/scheduled-tests.yml

diff --git a/.github/workflows/scheduled-tests.yml b/.github/workflows/scheduled-tests.yml
new file mode 100644
index 000000000..6d30d5809
--- /dev/null
+++ b/.github/workflows/scheduled-tests.yml
@@ -0,0 +1,30 @@
+# This workflow will install Python dependencies, run tests and lint with a single version of Python
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: Database Integration Tests
+
+on:
+  schedule:
+    - cron: '30 1 1 * *'
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v3
+
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.10'
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install pytest ads
+        pip install astrodbkit2
+
+    - name: Test with pytest
+      run: |
+        pytest -s tests/scheduled_tests.py

From 3bb985309a5aa1e095d2173a461da75657dc2ad7 Mon Sep 17 00:00:00 2001
From: kelle <kellecruz@gmail.com>
Date: Fri, 13 Oct 2023 17:07:23 -0400
Subject: [PATCH 6/7] Revert "fix IRS URLs script"

This reverts commit 442328bd95f2953f81a31b302edd4f804dcec195.
---
 scripts/updates/fix_IRS_spectra_links.py | 26 ------------------------
 1 file changed, 26 deletions(-)
 delete mode 100644 scripts/updates/fix_IRS_spectra_links.py

diff --git a/scripts/updates/fix_IRS_spectra_links.py b/scripts/updates/fix_IRS_spectra_links.py
deleted file mode 100644
index 2dec3be48..000000000
--- a/scripts/updates/fix_IRS_spectra_links.py
+++ /dev/null
@@ -1,26 +0,0 @@
-from scripts.ingests.utils import load_simpledb
-from astropy.table import Table
-
-
-SAVE_DB = True  # save the data files in addition to modifying the .db file
-RECREATE_DB = True  # recreates the .db file from the data files
-
-# LOAD THE DATABASE
-db = load_simpledb('SIMPLE.db', recreatedb=RECREATE_DB)
-
-
-#  link to live google sheet
-link = 'https://docs.google.com/spreadsheets/d/e/2PACX-1vQG5cGkI2aHPHD4b6ZZPTU4jjQMirU_z-yhl5ElI3p6nCIufIL64crC-yFalF58OauWHxmYvEKR_isY/pub?gid=0&single=true&output=csv'
-columns = ['source', 'original_spectrum', 'fixed_spectrum']
-spectra_link_table = Table.read(link, format='ascii', data_start=2, names=columns, guess=False, fast_reader=False, delimiter=',')
-
-for row in spectra_link_table:
-    # t = db.query(db.Spectra).filter(db.Spectra.c.original_spectrum == row['original_spectrum']).astropy()
-    # print(t['spectrum'])
-    with db.engine.connect() as conn:
-        conn.execute(db.Spectra.update().where(db.Spectra.c.original_spectrum == row['original_spectrum']).values(spectrum=row['fixed_spectrum']))
-        conn.commit()
-
-# WRITE THE JSON FILES
-if SAVE_DB:
-    db.save_database(directory='data/')
\ No newline at end of file

From 90f3c0fcf6fd6a353f63dfc95f2ed9896dfbb7a0 Mon Sep 17 00:00:00 2001
From: Kelle Cruz <kellecruz@gmail.com>
Date: Tue, 17 Oct 2023 16:23:08 -0400
Subject: [PATCH 7/7] change name scheduled-tests.yml

---
 .github/workflows/scheduled-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/scheduled-tests.yml b/.github/workflows/scheduled-tests.yml
index 6d30d5809..db51bb06e 100644
--- a/.github/workflows/scheduled-tests.yml
+++ b/.github/workflows/scheduled-tests.yml
@@ -1,7 +1,7 @@
 # This workflow will install Python dependencies, run tests and lint with a single version of Python
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 
-name: Database Integration Tests
+name: Scheduled Checks
 
 on:
   schedule: