diff --git a/tests/contacts_tests/test_contacts.py b/tests/contacts_tests/test_contacts.py index 49eb5f8..a4b8be0 100644 --- a/tests/contacts_tests/test_contacts.py +++ b/tests/contacts_tests/test_contacts.py @@ -7,7 +7,7 @@ import numpy as np import dask.dataframe as dd -from spoc import contacts, fragments +from spoc import contacts # pytlint: disable=unused-import from ..fixtures.symmetry import ( @@ -16,16 +16,16 @@ ) -@pytest.fixture -def triplet_expander(): - """expander for triplets""" - return fragments.FragmentExpander(number_fragments=3, contains_metadata=False) +# @pytest.fixture +# def triplet_expander(): +# """expander for triplets""" +# return fragments.FragmentExpander(number_fragments=3, contains_metadata=False) -@pytest.fixture -def triplet_expander_labelled(): - """expander for triplets""" - return fragments.FragmentExpander(number_fragments=3, contains_metadata=True) +# @pytest.fixture +# def triplet_expander_labelled(): +# """expander for triplets""" +# return fragments.FragmentExpander(number_fragments=3, contains_metadata=True) @pytest.fixture @@ -90,157 +90,6 @@ def unlabelled_df(): ) -@pytest.fixture -def labelled_fragments(labelled_df): - """labelled fragments""" - return fragments.Fragments(labelled_df) - - -@pytest.fixture -def labelled_fragments_dask(labelled_df): - """labelled fragments from a dask dataframe""" - return fragments.Fragments(dd.from_pandas(labelled_df, npartitions=1)) - - -@pytest.fixture -def unlabelled_fragments(unlabelled_df): - """unlabelled fragments""" - return fragments.Fragments(unlabelled_df) - - -@pytest.fixture -def unlabelled_fragments_dask(unlabelled_df): - """unlabelled fragments from a dask dataframe""" - return fragments.Fragments(dd.from_pandas(unlabelled_df, npartitions=1)) - - -@pytest.mark.parametrize( - "fragments, expander", - [ - ("labelled_fragments", "triplet_expander_labelled"), - ("labelled_fragments_dask", "triplet_expander_labelled"), - ("unlabelled_fragments", "triplet_expander"), - ("unlabelled_fragments_dask", "triplet_expander"), - ], -) -def test_expander_drops_reads_w_too_little_fragments(expander, fragments, request): - """Tests whether expander drops reads with too little fragments""" - triplet_expander = request.getfixturevalue(expander) - result = triplet_expander.expand(request.getfixturevalue(fragments)).data - if isinstance(result, dd.DataFrame): - result = result.compute() - assert len(set(result.read_name)) == 1 - assert result.read_name[0] == "dummy" - - -@pytest.mark.parametrize( - "fragments, expander", - [ - ("labelled_fragments", "triplet_expander_labelled"), - ("labelled_fragments_dask", "triplet_expander_labelled"), - ("unlabelled_fragments", "triplet_expander"), - ("unlabelled_fragments_dask", "triplet_expander"), - ], -) -def test_expander_returns_correct_number_of_contacts(expander, fragments, request): - """Tests whether expander returns correct number of contacts""" - triplet_expander = request.getfixturevalue(expander) - result = triplet_expander.expand(request.getfixturevalue(fragments)).data - assert len(result) == 4 - - -@pytest.mark.parametrize("fragments", ["labelled_fragments", "labelled_fragments_dask"]) -def test_expander_returns_correct_contacts_labelled( - triplet_expander_labelled, fragments, request -): - """Tests whether expander returns correct contacts for labelled fragments""" - df = request.getfixturevalue(fragments) - result = triplet_expander_labelled.expand(df).data - if isinstance(result, dd.DataFrame): - result = result.compute() - assert np.array_equal(result["start_1"].values, np.array([1, 1, 1, 2])) - assert np.array_equal(result["end_1"].values, np.array([4, 4, 4, 5])) - assert np.array_equal(result["start_2"].values, np.array([2, 2, 3, 3])) - assert np.array_equal(result["end_2"].values, np.array([5, 5, 6, 6])) - assert np.array_equal(result["start_3"].values, np.array([3, 4, 4, 4])) - assert np.array_equal(result["end_3"].values, np.array([6, 7, 7, 7])) - assert np.array_equal( - result["metadata_1"].values, - np.array(["SisterA", "SisterA", "SisterA", "SisterB"]), - ) - assert np.array_equal( - result["metadata_2"].values, - np.array(["SisterB", "SisterB", "SisterA", "SisterA"]), - ) - assert np.array_equal( - result["metadata_3"].values, - np.array(["SisterA", "SisterB", "SisterB", "SisterB"]), - ) - - -@pytest.mark.parametrize( - "fragments", ["unlabelled_fragments", "unlabelled_fragments_dask"] -) -def test_expander_returns_correct_contacts_unlabelled( - triplet_expander, fragments, request -): - """Tests whether expander returns correct contacts for unlabelled fragments""" - df = request.getfixturevalue(fragments) - result = triplet_expander.expand(df).data - if isinstance(result, dd.DataFrame): - result = result.compute() - assert np.array_equal(result["start_1"].values, np.array([1, 1, 1, 2])) - assert np.array_equal(result["end_1"].values, np.array([4, 4, 4, 5])) - assert np.array_equal(result["start_2"].values, np.array([2, 2, 3, 3])) - assert np.array_equal(result["end_2"].values, np.array([5, 5, 6, 6])) - assert np.array_equal(result["start_3"].values, np.array([3, 4, 4, 4])) - assert np.array_equal(result["end_3"].values, np.array([6, 7, 7, 7])) - assert "metadata_1" not in result.columns - - -def test_contacts_constructor_rejects_wrong_df(bad_df): - """Tests whether contacts constructor rejects wrong df""" - with pytest.raises(pa.errors.SchemaError): - contacts.Contacts(bad_df, number_fragments=3) - - -def test_merge_works_for_good_pandas_df( - triplet_expander, contact_manipulator, labelled_fragments -): - """Tests whether merge works for good pandas df""" - contacts = triplet_expander.expand(labelled_fragments) - result = contact_manipulator.merge_contacts([contacts, contacts]).data - assert result.shape[0] == 8 - assert result.shape[1] == contacts.data.shape[1] - - -def test_merge_works_for_good_dask_df( - triplet_expander, contact_manipulator, labelled_fragments -): - """Tests whether merge works for good dask df""" - cont = triplet_expander.expand(labelled_fragments) - contacts_dask = contacts.Contacts( - dd.from_pandas(cont.data, npartitions=1), number_fragments=3 - ) - result = contact_manipulator.merge_contacts( - [contacts_dask, contacts_dask] - ).data.compute() - assert result.shape[0] == 8 - assert result.shape[1] == cont.data.shape[1] - - -def test_merge_fails_for_pandas_dask_mixed( - triplet_expander, contact_manipulator, labelled_fragments -): - """Tests whether merge fails for pandas dask mixed""" - with pytest.raises(ValueError): - contacts_pandas = triplet_expander.expand(labelled_fragments) - contacts_dask = contacts.Contacts( - dd.from_pandas(contacts_pandas.data, npartitions=1), number_fragments=3 - ) - contact_manipulator.merge_contacts([contacts_pandas, contacts_dask]) - - def test_subset_metadata_fails_if_not_labelled(unlabelled_contacts_2d): """Tests whether subset fails if the datafrane is not laelled""" contact_manipulator = contacts.ContactManipulator() diff --git a/tests/fixtures/symmetry.py b/tests/fixtures/symmetry.py index c9bb53c..02152f5 100644 --- a/tests/fixtures/symmetry.py +++ b/tests/fixtures/symmetry.py @@ -1,4 +1,5 @@ """Fixtures for testing symmetry.py""" +# pylint: disable=redefined-outer-name import pytest import pandas as pd import dask.dataframe as dd diff --git a/tests/fragments_tests/__init__.py b/tests/fragments_tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/fragments_tests/test_fragments.py b/tests/fragments_tests/test_fragments.py new file mode 100644 index 0000000..0cbbad6 --- /dev/null +++ b/tests/fragments_tests/test_fragments.py @@ -0,0 +1,197 @@ +"""Tests for the fragments module.""" + +# pylint: disable=redefined-outer-name +import pytest +import pandas as pd +import numpy as np +import dask.dataframe as dd + +from spoc import fragments + +# pytlint: disable=unused-import +from ..fixtures.symmetry import ( + unlabelled_contacts_2d, + labelled_binary_contacts_2d_sorted, +) + + +@pytest.fixture +def triplet_expander(): + """expander for triplets""" + return fragments.FragmentExpander(number_fragments=3, contains_metadata=False) + + +@pytest.fixture +def triplet_expander_labelled(): + """expander for triplets""" + return fragments.FragmentExpander(number_fragments=3, contains_metadata=True) + + +@pytest.fixture +def contact_manipulator(): + """manipulator for triplest""" + return contacts.ContactManipulator() + + +@pytest.fixture +def bad_df(): + """bad df for testing""" + return pd.DataFrame({"be": ["bop"]}) + + +@pytest.fixture +def labelled_df(): + """Dataframe representing a labelled fragment file""" + return pd.DataFrame( + { + "chrom": ["chr1"] * 6, + "start": [1, 2, 3, 4, 5, 6], + "end": [4, 5, 6, 7, 8, 9], + "strand": [True] * 6, + "read_name": ["dummy"] * 4 + ["dummy2"] * 2, + "read_start": [1, 2, 3, 4, 5, 6], + "read_end": [4, 5, 6, 7, 8, 9], + "read_length": [1] * 6, + "mapping_quality": [1, 2, 3, 4, 5, 6], + "align_score": [1, 2, 3, 4, 5, 6], + "align_base_qscore": [1, 2, 3, 4, 5, 6], + "pass_filter": [True] * 6, + "metadata": [ + "SisterA", + "SisterB", + "SisterA", + "SisterB", + "SisterA", + "SisterB", + ], + } + ) + + +@pytest.fixture +def unlabelled_df(): + """Dataframe representing an unlabelled fragment file""" + return pd.DataFrame( + { + "chrom": ["chr1"] * 6, + "start": [1, 2, 3, 4, 5, 6], + "end": [4, 5, 6, 7, 8, 9], + "strand": [True] * 6, + "read_name": ["dummy"] * 4 + ["dummy2"] * 2, + "read_start": [1, 2, 3, 4, 5, 6], + "read_end": [4, 5, 6, 7, 8, 9], + "read_length": [1] * 6, + "mapping_quality": [1, 2, 3, 4, 5, 6], + "align_score": [1, 2, 3, 4, 5, 6], + "align_base_qscore": [1, 2, 3, 4, 5, 6], + "pass_filter": [True] * 6, + } + ) + + +@pytest.fixture +def labelled_fragments(labelled_df): + """labelled fragments""" + return fragments.Fragments(labelled_df) + + +@pytest.fixture +def labelled_fragments_dask(labelled_df): + """labelled fragments from a dask dataframe""" + return fragments.Fragments(dd.from_pandas(labelled_df, npartitions=1)) + + +@pytest.fixture +def unlabelled_fragments(unlabelled_df): + """unlabelled fragments""" + return fragments.Fragments(unlabelled_df) + + +@pytest.fixture +def unlabelled_fragments_dask(unlabelled_df): + """unlabelled fragments from a dask dataframe""" + return fragments.Fragments(dd.from_pandas(unlabelled_df, npartitions=1)) + + +@pytest.mark.parametrize( + "fragments, expander", + [ + ("labelled_fragments", "triplet_expander_labelled"), + ("labelled_fragments_dask", "triplet_expander_labelled"), + ("unlabelled_fragments", "triplet_expander"), + ("unlabelled_fragments_dask", "triplet_expander"), + ], +) +def test_expander_drops_reads_w_too_little_fragments(expander, fragments, request): + """Tests whether expander drops reads with too little fragments""" + triplet_expander = request.getfixturevalue(expander) + result = triplet_expander.expand(request.getfixturevalue(fragments)).data + if isinstance(result, dd.DataFrame): + result = result.compute() + assert len(set(result.read_name)) == 1 + assert result.read_name[0] == "dummy" + + +@pytest.mark.parametrize( + "fragments, expander", + [ + ("labelled_fragments", "triplet_expander_labelled"), + ("labelled_fragments_dask", "triplet_expander_labelled"), + ("unlabelled_fragments", "triplet_expander"), + ("unlabelled_fragments_dask", "triplet_expander"), + ], +) +def test_expander_returns_correct_number_of_contacts(expander, fragments, request): + """Tests whether expander returns correct number of contacts""" + triplet_expander = request.getfixturevalue(expander) + result = triplet_expander.expand(request.getfixturevalue(fragments)).data + assert len(result) == 4 + + +@pytest.mark.parametrize("fragments", ["labelled_fragments", "labelled_fragments_dask"]) +def test_expander_returns_correct_contacts_labelled( + triplet_expander_labelled, fragments, request +): + """Tests whether expander returns correct contacts for labelled fragments""" + df = request.getfixturevalue(fragments) + result = triplet_expander_labelled.expand(df).data + if isinstance(result, dd.DataFrame): + result = result.compute() + assert np.array_equal(result["start_1"].values, np.array([1, 1, 1, 2])) + assert np.array_equal(result["end_1"].values, np.array([4, 4, 4, 5])) + assert np.array_equal(result["start_2"].values, np.array([2, 2, 3, 3])) + assert np.array_equal(result["end_2"].values, np.array([5, 5, 6, 6])) + assert np.array_equal(result["start_3"].values, np.array([3, 4, 4, 4])) + assert np.array_equal(result["end_3"].values, np.array([6, 7, 7, 7])) + assert np.array_equal( + result["metadata_1"].values, + np.array(["SisterA", "SisterA", "SisterA", "SisterB"]), + ) + assert np.array_equal( + result["metadata_2"].values, + np.array(["SisterB", "SisterB", "SisterA", "SisterA"]), + ) + assert np.array_equal( + result["metadata_3"].values, + np.array(["SisterA", "SisterB", "SisterB", "SisterB"]), + ) + + +@pytest.mark.parametrize( + "fragments", ["unlabelled_fragments", "unlabelled_fragments_dask"] +) +def test_expander_returns_correct_contacts_unlabelled( + triplet_expander, fragments, request +): + """Tests whether expander returns correct contacts for unlabelled fragments""" + df = request.getfixturevalue(fragments) + result = triplet_expander.expand(df).data + if isinstance(result, dd.DataFrame): + result = result.compute() + assert np.array_equal(result["start_1"].values, np.array([1, 1, 1, 2])) + assert np.array_equal(result["end_1"].values, np.array([4, 4, 4, 5])) + assert np.array_equal(result["start_2"].values, np.array([2, 2, 3, 3])) + assert np.array_equal(result["end_2"].values, np.array([5, 5, 6, 6])) + assert np.array_equal(result["start_3"].values, np.array([3, 4, 4, 4])) + assert np.array_equal(result["end_3"].values, np.array([6, 7, 7, 7])) + assert "metadata_1" not in result.columns diff --git a/tests/test_labels.py b/tests/fragments_tests/test_labels.py similarity index 100% rename from tests/test_labels.py rename to tests/fragments_tests/test_labels.py diff --git a/tests/io_tests/test_io_contacts.py b/tests/io_tests/test_io_contacts.py index 8babea4..856b853 100644 --- a/tests/io_tests/test_io_contacts.py +++ b/tests/io_tests/test_io_contacts.py @@ -1,4 +1,5 @@ """This file tests the io module for contacts""" +# pylint: disable=redefined-outer-name import tempfile import os import json diff --git a/tests/io_tests/test_io_pixels.py b/tests/io_tests/test_io_pixels.py index 4f962c2..164ad4d 100644 --- a/tests/io_tests/test_io_pixels.py +++ b/tests/io_tests/test_io_pixels.py @@ -1,4 +1,5 @@ """This file tests the io module for pixels""" +# pylint: disable=redefined-outer-name import tempfile import os import json