From 0d2a1f10726d87c3ae99f82c28039e83705c5b58 Mon Sep 17 00:00:00 2001 From: leej3 Date: Fri, 6 Sep 2024 11:55:26 +0100 Subject: [PATCH] start improving and fixing tests --- tests/test_api.py | 76 ++++++++++++++++++++++++++++++++++ tests/test_schema_helpers.py | 15 ++----- tests/test_schema_integrity.py | 18 ++++++-- 3 files changed, 93 insertions(+), 16 deletions(-) create mode 100644 tests/test_api.py diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 00000000..fdf31685 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,76 @@ +import datetime + +import pytest +from motor.motor_asyncio import AsyncIOMotorClient +from odmantic import AIOEngine + +from osm.schemas import Client, Component, Invocation, Work + + +@pytest.fixture(scope="function") +def test_engine(): + client = AsyncIOMotorClient("mongodb://localhost:27017") + test_db = client["test_db"].name + engine = AIOEngine(client=client, database=test_db) + yield engine + engine.database.drop_collection("invocation") + client.close() + + +@pytest.mark.asyncio +async def test_engine_save(test_engine): + # Create a sample invocation object + sample_invocation = Invocation( + work=Work(filename="test_file"), + client=Client(compute_context_id=1), + osm_version="1.0.0", + user_comment="Test comment", + components=[Component(name="TestComponent", version="1.0")], + funder=["Test Funder"], + data_tags=["test_tag"], + created_at=datetime.datetime.now(datetime.UTC).replace(microsecond=0), + ) + + # Save the sample invocation to the database + await test_engine.save(sample_invocation) + + # Retrieve the saved invocation to verify it was saved correctly + saved_invocation = await test_engine.find_one( + Invocation, Invocation.id == sample_invocation.id + ) + assert saved_invocation is not None, "Saved invocation not found" + assert saved_invocation.id == sample_invocation.id, "IDs do not match" + + # Retrieve the saved invocation to verify it was saved correctly + saved_invocation = await test_engine.find_one( + Invocation, Invocation.id == sample_invocation.id + ) + assert saved_invocation is not None, "Saved invocation not found" + assert saved_invocation.id == sample_invocation.id, "IDs do not match" + assert ( + saved_invocation.work.filename == sample_invocation.work.filename + ), "Work data does not match" + assert ( + saved_invocation.client.compute_context_id + == sample_invocation.client.compute_context_id + ), "Client data does not match" + assert ( + saved_invocation.osm_version == sample_invocation.osm_version + ), "OSM version does not match" + assert ( + saved_invocation.user_comment == sample_invocation.user_comment + ), "User comment does not match" + assert ( + saved_invocation.components[0].name == sample_invocation.components[0].name + ), "Component name does not match" + assert ( + saved_invocation.components[0].version + == sample_invocation.components[0].version + ), "Component version does not match" + assert saved_invocation.funder == sample_invocation.funder, "Funder does not match" + assert ( + saved_invocation.data_tags == sample_invocation.data_tags + ), "Data tags do not match" + assert ( + saved_invocation.created_at == sample_invocation.created_at + ), "Created at does not match" diff --git a/tests/test_schema_helpers.py b/tests/test_schema_helpers.py index f9239e6c..864e4810 100644 --- a/tests/test_schema_helpers.py +++ b/tests/test_schema_helpers.py @@ -7,14 +7,7 @@ def test_transform_data(): # Create mock data for testing data = { - "is_open_code": [False], - "is_open_data": [True], "pmid": [27458207], - "open_code_statements": [None], - "open_data_category": ["data availability statement"], - "open_data_statements": [ - "deposited data https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE12345" - ], "funder": [["National Institutes of Health"]], # Funder as a list of strings } @@ -23,7 +16,9 @@ def test_transform_data(): # Use the existing get_table_with_schema function to create the PyArrow Table funder_field = pa.field("funder", pa.list_(pa.string()), nullable=True) - table = osh.get_table_with_schema(df, [funder_field]) + table = osh.get_table_with_schema( + df, schema_name="Work", other_fields=[funder_field] + ) # Mock parameters for the transform_data function kwargs = { @@ -58,7 +53,3 @@ def test_transform_data(): assert result["user_comment"] == "Bulk upload of NIH-IRP data" assert result["components"][0]["name"] == "Sciencebeam parser/RTransparent" assert result["components"][0]["version"] == "x.x.x" - - -# Run the test -test_transform_data() diff --git a/tests/test_schema_integrity.py b/tests/test_schema_integrity.py index b5dac7de..5987e6f1 100644 --- a/tests/test_schema_integrity.py +++ b/tests/test_schema_integrity.py @@ -38,18 +38,28 @@ def test_initial_data_integrity(data): assert data["is_open_data"].dtype == "boolean", "is_open_data should be bool" -@pytest.mark.xfail(reason="not sure this is desired behaviour") def test_handling_missing_values(data): invocations = [] for _, x in data.iterrows(): - invocations.append(schema_helpers.get_invocation(x)) + invocations.append(schema_helpers.get_invocation(x, metrics_schema=Work)) # Ensure that missing values are correctly handled assert len(invocations) == 5, "All rows should be processed into Work objects" assert ( - invocations[2].metrics.pmid is None + invocations[2]["metrics"]["pmid"] is None ), "pmid should be None for the third object" - assert False + assert ( + invocations[2]["metrics"]["year"] is None + ), "year should be None for the third object" + assert ( + invocations[2]["metrics"]["score"] is None + ), "score should be None for the third object" + assert ( + invocations[2]["metrics"]["is_open_code"] is None + ), "is_open_code should be None for the third object" + assert ( + invocations[2]["metrics"]["is_open_data"] is None + ), "is_open_data should be None for the third object" @pytest.mark.xfail(