From 0d2a1f10726d87c3ae99f82c28039e83705c5b58 Mon Sep 17 00:00:00 2001
From: leej3 <johnleenimh@gmail.com>
Date: Fri, 6 Sep 2024 11:55:26 +0100
Subject: [PATCH] start improving and fixing tests

---
 tests/test_api.py              | 76 ++++++++++++++++++++++++++++++++++
 tests/test_schema_helpers.py   | 15 ++-----
 tests/test_schema_integrity.py | 18 ++++++--
 3 files changed, 93 insertions(+), 16 deletions(-)
 create mode 100644 tests/test_api.py

diff --git a/tests/test_api.py b/tests/test_api.py
new file mode 100644
index 00000000..fdf31685
--- /dev/null
+++ b/tests/test_api.py
@@ -0,0 +1,76 @@
+import datetime
+
+import pytest
+from motor.motor_asyncio import AsyncIOMotorClient
+from odmantic import AIOEngine
+
+from osm.schemas import Client, Component, Invocation, Work
+
+
+@pytest.fixture(scope="function")
+def test_engine():
+    client = AsyncIOMotorClient("mongodb://localhost:27017")
+    test_db = client["test_db"].name
+    engine = AIOEngine(client=client, database=test_db)
+    yield engine
+    engine.database.drop_collection("invocation")
+    client.close()
+
+
+@pytest.mark.asyncio
+async def test_engine_save(test_engine):
+    # Create a sample invocation object
+    sample_invocation = Invocation(
+        work=Work(filename="test_file"),
+        client=Client(compute_context_id=1),
+        osm_version="1.0.0",
+        user_comment="Test comment",
+        components=[Component(name="TestComponent", version="1.0")],
+        funder=["Test Funder"],
+        data_tags=["test_tag"],
+        created_at=datetime.datetime.now(datetime.UTC).replace(microsecond=0),
+    )
+
+    # Save the sample invocation to the database
+    await test_engine.save(sample_invocation)
+
+    # Retrieve the saved invocation to verify it was saved correctly
+    saved_invocation = await test_engine.find_one(
+        Invocation, Invocation.id == sample_invocation.id
+    )
+    assert saved_invocation is not None, "Saved invocation not found"
+    assert saved_invocation.id == sample_invocation.id, "IDs do not match"
+
+    # Retrieve the saved invocation to verify it was saved correctly
+    saved_invocation = await test_engine.find_one(
+        Invocation, Invocation.id == sample_invocation.id
+    )
+    assert saved_invocation is not None, "Saved invocation not found"
+    assert saved_invocation.id == sample_invocation.id, "IDs do not match"
+    assert (
+        saved_invocation.work.filename == sample_invocation.work.filename
+    ), "Work data does not match"
+    assert (
+        saved_invocation.client.compute_context_id
+        == sample_invocation.client.compute_context_id
+    ), "Client data does not match"
+    assert (
+        saved_invocation.osm_version == sample_invocation.osm_version
+    ), "OSM version does not match"
+    assert (
+        saved_invocation.user_comment == sample_invocation.user_comment
+    ), "User comment does not match"
+    assert (
+        saved_invocation.components[0].name == sample_invocation.components[0].name
+    ), "Component name does not match"
+    assert (
+        saved_invocation.components[0].version
+        == sample_invocation.components[0].version
+    ), "Component version does not match"
+    assert saved_invocation.funder == sample_invocation.funder, "Funder does not match"
+    assert (
+        saved_invocation.data_tags == sample_invocation.data_tags
+    ), "Data tags do not match"
+    assert (
+        saved_invocation.created_at == sample_invocation.created_at
+    ), "Created at does not match"
diff --git a/tests/test_schema_helpers.py b/tests/test_schema_helpers.py
index f9239e6c..864e4810 100644
--- a/tests/test_schema_helpers.py
+++ b/tests/test_schema_helpers.py
@@ -7,14 +7,7 @@
 def test_transform_data():
     # Create mock data for testing
     data = {
-        "is_open_code": [False],
-        "is_open_data": [True],
         "pmid": [27458207],
-        "open_code_statements": [None],
-        "open_data_category": ["data availability statement"],
-        "open_data_statements": [
-            "deposited data https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE12345"
-        ],
         "funder": [["National Institutes of Health"]],  # Funder as a list of strings
     }
 
@@ -23,7 +16,9 @@ def test_transform_data():
 
     # Use the existing get_table_with_schema function to create the PyArrow Table
     funder_field = pa.field("funder", pa.list_(pa.string()), nullable=True)
-    table = osh.get_table_with_schema(df, [funder_field])
+    table = osh.get_table_with_schema(
+        df, schema_name="Work", other_fields=[funder_field]
+    )
 
     # Mock parameters for the transform_data function
     kwargs = {
@@ -58,7 +53,3 @@ def test_transform_data():
     assert result["user_comment"] == "Bulk upload of NIH-IRP data"
     assert result["components"][0]["name"] == "Sciencebeam parser/RTransparent"
     assert result["components"][0]["version"] == "x.x.x"
-
-
-# Run the test
-test_transform_data()
diff --git a/tests/test_schema_integrity.py b/tests/test_schema_integrity.py
index b5dac7de..5987e6f1 100644
--- a/tests/test_schema_integrity.py
+++ b/tests/test_schema_integrity.py
@@ -38,18 +38,28 @@ def test_initial_data_integrity(data):
     assert data["is_open_data"].dtype == "boolean", "is_open_data should be bool"
 
 
-@pytest.mark.xfail(reason="not sure this is desired behaviour")
 def test_handling_missing_values(data):
     invocations = []
     for _, x in data.iterrows():
-        invocations.append(schema_helpers.get_invocation(x))
+        invocations.append(schema_helpers.get_invocation(x, metrics_schema=Work))
 
     # Ensure that missing values are correctly handled
     assert len(invocations) == 5, "All rows should be processed into Work objects"
     assert (
-        invocations[2].metrics.pmid is None
+        invocations[2]["metrics"]["pmid"] is None
     ), "pmid should be None for the third object"
-    assert False
+    assert (
+        invocations[2]["metrics"]["year"] is None
+    ), "year should be None for the third object"
+    assert (
+        invocations[2]["metrics"]["score"] is None
+    ), "score should be None for the third object"
+    assert (
+        invocations[2]["metrics"]["is_open_code"] is None
+    ), "is_open_code should be None for the third object"
+    assert (
+        invocations[2]["metrics"]["is_open_data"] is None
+    ), "is_open_data should be None for the third object"
 
 
 @pytest.mark.xfail(