diff --git a/spoc/io.py b/spoc/io.py index 2ac3175..c9e9eb2 100644 --- a/spoc/io.py +++ b/spoc/io.py @@ -129,7 +129,7 @@ def _load_pixel_metadata(path: str): """Load metadata""" metadata_path = Path(path) / "metadata.json" if metadata_path.exists(): - with open(metadata_path, "r", encoding='UTF-8') as f: + with open(metadata_path, "r", encoding="UTF-8") as f: metadata = json.load(f) else: raise ValueError(f"Metadata file not found at {metadata_path}") @@ -192,5 +192,5 @@ def write_pixels(self, path: str, pixels: Pixels) -> None: pixels.data.to_parquet(write_path, row_group_size=1024 * 1024) # write metadata current_metadata[write_path.name] = pixels.get_global_parameters().dict() - with open(metadata_path, "w", encoding='UTF-8') as f: + with open(metadata_path, "w", encoding="UTF-8") as f: json.dump(current_metadata, f) diff --git a/spoc/pixels.py b/spoc/pixels.py index d59d77f..bd85373 100644 --- a/spoc/pixels.py +++ b/spoc/pixels.py @@ -71,7 +71,7 @@ def from_uri(uri, mode="path"): Mode can be one of pandas|dask|path, which corresponds to the type of the pixel source. """ # import here to avoid circular imports - #pylint: disable=import-outside-toplevel + # pylint: disable=import-outside-toplevel from spoc.io import FileManager # Define uri parameters diff --git a/spoc/snipping/__init__.py b/spoc/snipping/__init__.py deleted file mode 100644 index 9484342..0000000 --- a/spoc/snipping/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -"""Snipping module of spoc that provides Snipper class and snipping strategies""" -from spoc.snipping.snipper import Snipper diff --git a/spoc/snipping/snipper.py b/spoc/snipping/snipper.py deleted file mode 100644 index 31a7cd7..0000000 --- a/spoc/snipping/snipper.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Snipping class that orchestrates IO und snipping strategies to implement snipping behavior""" -from typing import List, Union, Dict -import pandas as pd -from spoc.snipping.snipping_strategies import SnippingStrategy -from spoc.pixels import PersistedPixels - - -class Snipper: - def __init__(self, strategies: List[SnippingStrategy]) -> None: - self._strategies = strategies - - def snip( - self, - pixels: Union[str, pd.DataFrame], - snip_positions: pd.DataFrame, - threads: int = 2, - ) -> List[pd.DataFrame]: - if isinstance(pixels, str): - pixels = PersistedPixels(pixels) - return [ - strategy.snip(pixels, snip_positions, threads) - for strategy in self._strategies - ] diff --git a/spoc/snipping/snipping_strategies.py b/spoc/snipping/snipping_strategies.py deleted file mode 100644 index d947686..0000000 --- a/spoc/snipping/snipping_strategies.py +++ /dev/null @@ -1,240 +0,0 @@ -"""Snipping strategies for Snipper that implement specific snipping functionality""" -from abc import ABC, abstractmethod -from functools import partial -from enum import Enum -from typing import Union -from multiprocess.pool import ThreadPool -import pandas as pd -import numpy as np -from sparse import COO -import duckdb -import bioframe -from spoc.pixels import PersistedPixels - - -class SnippingValues(Enum): - """Which values the snippet should consist of.""" - - ICCF = 0 - OBSEXP = 1 - - -class SnippingStrategy(ABC): - """Defines interface for snipping strategy""" - - def __init__( - self, - bin_size: int, - half_window_size: int, - snipping_value: Union[str, SnippingValues], - **kwargs, - ): - """Defines the values that should be snipped""" - if isinstance(snipping_value, str): - # check whether string refers to a snipping strategy - snipping_value = SnippingValues[snipping_value.upper()] - self._snipping_value = snipping_value - self._half_window_size = half_window_size - if snipping_value == SnippingValues.OBSEXP: - self._n_random_regions: int = kwargs.get("n_random_regions", 5000) - self._genome: str = kwargs.get("genome", "hg19") - self._bin_size = bin_size - - @staticmethod - def _get_random_coordinates(n_coordinates: int, length: int, genome: str): - """Number of coordinates will not be exactly returned, due to rounding - when distributing to chromosomes.""" - chrom_sizes = bioframe.fetch_chromsizes(genome) - chrom_fractions = chrom_sizes / chrom_sizes.sum() - # accumulate output - chrom_frames = [] - for chrom in chrom_sizes.index: - max_coord = chrom_sizes[chrom] - number_to_choose = int(chrom_fractions[chrom] * n_coordinates) - starts = np.random.randint(0, max_coord, size=number_to_choose) - ends = starts + length - chrom_frames.append( - pd.DataFrame({"chrom": chrom, "start": starts, "end": ends}) - ) - return pd.concat(chrom_frames) - - def __repr__(self) -> str: - return f"" - - @abstractmethod - def get_params(): - raise NotImplementedError - - @abstractmethod - def snip( - self, - pixels: Union[pd.DataFrame, PersistedPixels], - snip_positions: pd.DataFrame, - threads: int = 2, - override_snipping_value: bool = False, - ) -> pd.DataFrame: - """Do snipping""" - raise NotImplementedError - - -class TripletCCT1DSnippingStrategy(SnippingStrategy): - """Implements snipping of 2D-regions based on a set of 1D-regions. - The 1D-regions are taken from the last column of pixels, which is assumed - to contain contacts on the second sister chromatid. The first two pixels - are assumed to contain contacts form the first sister chromatid.""" - - CIS_SNIPPING_QUERY = """ - SELECT - t.position_id, - FLOOR((p.start_1 - t.pos)/{bin_size}::float) as offset_1, - FLOOR((p.start_2 - t.pos)/{bin_size}::float) as offset_2, - SUM(p.contact_count) as contacts - FROM {source_table} as p - INNER JOIN chunk as t ON t.chrom = p.chrom - and - abs(FLOOR((p.start_1 - t.pos)/{bin_size}::float))::int <= {pixel_offset} - and - abs(FLOOR((p.start_2 - t.pos)/{bin_size}::float))::int <= {pixel_offset} - and - abs(FLOOR((p.start_3 - (t.pos + {relative_offset}))/{bin_size}::float))::int <= {cis_selector_offset} - GROUP BY 1,2,3 - """ - - def __init__( - self, - bin_size: int, - half_window_size: int, - snipping_value: SnippingValues, - **kwargs, - ): - """Override constructor to add additional parameters.""" - super().__init__(bin_size, half_window_size, snipping_value, **kwargs) - self._position_slack: int = kwargs.get( - "position_slack", self._bin_size - ) # default is binsize - self._relative_offset: int = kwargs.get("relative_offset", 0) - self._expected = None - - def get_params(self) -> dict: - return { - "bin_size": self._bin_size, - "half_window_size": self._half_window_size, - "snipping_value": self._snipping_value, - "position_slack": self._position_slack, - "relative_offset": self._relative_offset, - } - - def _align_positions_to_bins(self, trans_positions: pd.DataFrame): - """Adds index and round positions to bins""" - if "pos" not in trans_positions.columns: - trans_positions = trans_positions.assign( - pos=lambda df_: (df_.start + df_.end) // 2 - ) - return trans_positions.assign( - position_id=lambda df_: range(len(df_)), - pos=lambda df_: (df_.pos // self._bin_size) * self._bin_size, - ) - - def _get_array_coordinates_from_offset(self, offset: pd.Series): - """Transform offsets to start from 0 to be used as array index""" - return (offset + (self._half_window_size // self._bin_size)).astype(int).values - - def _reduce_snipping_frame(self, snips: pd.DataFrame): - """Takes concat result of snipping and reduces - it along the region dimension""" - output_size = 2 * (self._half_window_size // self._bin_size) + 1 - return ( - COO( - ( - snips.position_id.values, - self._get_array_coordinates_from_offset(snips.offset_1), - self._get_array_coordinates_from_offset(snips.offset_2), - ), - snips.contacts.values, - # TODO: fix shape such that if ids are missing from the end of the input, input shape will not be affected - shape=(np.max(snips.position_id) + 1, output_size, output_size), - ) - .mean(axis=0) # this reduces the result along the region id dimension - .todense() - ) - - def _get_genomic_extent(self): - output_size = 2 * (self._half_window_size // self._bin_size) + 1 - return [ - f"{i * self._bin_size//1000 - self._half_window_size//1000} kb" - for i in range(output_size) - ] - - def _create_expected_for_cis_snipping( - self, - pixels: Union[pd.DataFrame, PersistedPixels], - threads: int = 2, - ): - if self._expected is None: - random_regions = self._get_random_coordinates( - self._n_random_regions, length=100, genome=self._genome - ) - self._expected = self.snip( - pixels, - random_regions, - threads=threads, - override_snipping_value=True, - ) - return self._expected - return self._expected - - def snip( - self, - pixels: Union[pd.DataFrame, PersistedPixels], - snip_positions: pd.DataFrame, - threads: int = 2, - override_snipping_value: bool = False, - ): - """Snips cis sister windows based on supplied trans positions.""" - # dispatch - with ThreadPool(processes=threads) as pool: - result = pool.map( - partial( - self._snip_cis_windows, - pixels=pixels, - ), - np.array_split(self._align_positions_to_bins(snip_positions), threads), - ) - # reduce along positions - dense_matrix = self._reduce_snipping_frame(pd.concat(result)) - # check whether expected is needed - if (self._snipping_value == SnippingValues.OBSEXP) and ( - not override_snipping_value - ): - expected = self._create_expected_for_cis_snipping(pixels, threads) - output = dense_matrix / expected - else: - output = dense_matrix - genomic_extent = self._get_genomic_extent() - return pd.DataFrame(output, columns=genomic_extent, index=genomic_extent) - - def _snip_cis_windows( - self, chunk: pd.DataFrame, pixels: Union[pd.DataFrame, PersistedPixels] - ): - # convert parameters into pixel units - pixel_offset = self._half_window_size // self._bin_size - cis_selector_offset = self._position_slack // self._bin_size - # create local connection. No need to close it, is closed when reference to it goes out of scope - local_connection = duckdb.connect() - local_connection.register("chunk", chunk) - # register pixels if needed - if isinstance(pixels, PersistedPixels): - source_table = f"read_parquet('{pixels.path}')" - else: - source_table = "pixel_frame" - local_connection.register(source_table, pixels) - # do snipping - return local_connection.execute( - self.CIS_SNIPPING_QUERY.format( - source_table=source_table, - pixel_offset=pixel_offset, - cis_selector_offset=cis_selector_offset, - bin_size=self._bin_size, - relative_offset=self._relative_offset, - ) - ).df() diff --git a/tests/test_cli.py b/tests/test_cli.py index 3d96e31..94e326c 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1,11 +1,12 @@ """Tests for CLI of spoc""" +# pylint: disable=redefined-outer-name +import shutil +import os import pytest import pandas as pd from pandas.testing import assert_frame_equal import numpy as np -import shutil -import os from click.testing import CliRunner from spoc import cli, dataframe_models @@ -23,6 +24,7 @@ def _create_tmp_dir(): @pytest.fixture def good_annotated_porec_file(): + """Fixture for a good porec file with annotations""" # setup _create_tmp_dir() yield "tests/test_files/good_porec.lab.parquet" @@ -32,11 +34,13 @@ def good_annotated_porec_file(): @pytest.fixture def label_library_path(): + """Fixture for a label library file""" return "tests/test_files/ll1.pickle" @pytest.fixture def good_triplet_files(): + """Fixture for two triplet contact files used to merge contacts""" # setup _create_tmp_dir() yield [ @@ -49,6 +53,7 @@ def good_triplet_files(): @pytest.fixture def good_triplet_file_for_pixels(): + """Fixture for a good triplet file used to instantiate pixels""" # setup _create_tmp_dir() yield "tests/test_files/good_contacts3.triplets.parquet" @@ -58,6 +63,7 @@ def good_triplet_file_for_pixels(): @pytest.fixture def good_porec_file(): + """Fixture for a good fragment file""" # setup _create_tmp_dir() yield "tests/test_files/good_porec.parquet" @@ -67,6 +73,7 @@ def good_porec_file(): @pytest.fixture def expected_pixels(): + """Fixture for expected pixels from binning contacts""" return pd.DataFrame( { "chrom_1": ["chr1"] * 3, @@ -144,9 +151,7 @@ def test_bin_contacts(good_triplet_file_for_pixels, expected_pixels): """happy path for binning contacts without sister sorting""" runner = CliRunner() output_path = "tmp/test_output5.parquet" - result = runner.invoke( - cli.bin_contacts, [good_triplet_file_for_pixels, output_path] - ) + runner.invoke(cli.bin_contacts, [good_triplet_file_for_pixels, output_path]) # check content of file pixels = pd.read_parquet(output_path) np.array_equal(pixels.values, expected_pixels.values) diff --git a/tests/test_contacts.py b/tests/test_contacts.py index d9e0521..4101bb5 100644 --- a/tests/test_contacts.py +++ b/tests/test_contacts.py @@ -1,10 +1,15 @@ +"""Tests for the contacts module.""" + +# pylint: disable=redefined-outer-name import pytest import pandas as pd import pandera as pa import numpy as np import dask.dataframe as dd -from spoc import contacts, dataframe_models, fragments +from spoc import contacts, fragments + +# pytlint: disable=unused-import from .fixtures.symmetry import ( unlabelled_contacts_2d, labelled_binary_contacts_2d_sorted, @@ -31,11 +36,13 @@ def contact_manipulator(): @pytest.fixture def bad_df(): + """bad df for testing""" return pd.DataFrame({"be": ["bop"]}) @pytest.fixture def labelled_df(): + """Dataframe representing a labelled fragment file""" return pd.DataFrame( { "chrom": ["chr1"] * 6, @@ -64,6 +71,7 @@ def labelled_df(): @pytest.fixture def unlabelled_df(): + """Dataframe representing an unlabelled fragment file""" return pd.DataFrame( { "chrom": ["chr1"] * 6, @@ -84,21 +92,25 @@ def unlabelled_df(): @pytest.fixture def labelled_fragments(labelled_df): + """labelled fragments""" return fragments.Fragments(labelled_df) @pytest.fixture def labelled_fragments_dask(labelled_df): + """labelled fragments from a dask dataframe""" return fragments.Fragments(dd.from_pandas(labelled_df, npartitions=1)) @pytest.fixture def unlabelled_fragments(unlabelled_df): + """unlabelled fragments""" return fragments.Fragments(unlabelled_df) @pytest.fixture def unlabelled_fragments_dask(unlabelled_df): + """unlabelled fragments from a dask dataframe""" return fragments.Fragments(dd.from_pandas(unlabelled_df, npartitions=1)) @@ -112,6 +124,7 @@ def unlabelled_fragments_dask(unlabelled_df): ], ) def test_expander_drops_reads_w_too_little_fragments(expander, fragments, request): + """Tests whether expander drops reads with too little fragments""" triplet_expander = request.getfixturevalue(expander) result = triplet_expander.expand(request.getfixturevalue(fragments)).data if isinstance(result, dd.DataFrame): @@ -130,6 +143,7 @@ def test_expander_drops_reads_w_too_little_fragments(expander, fragments, reques ], ) def test_expander_returns_correct_number_of_contacts(expander, fragments, request): + """Tests whether expander returns correct number of contacts""" triplet_expander = request.getfixturevalue(expander) result = triplet_expander.expand(request.getfixturevalue(fragments)).data assert len(result) == 4 @@ -139,6 +153,7 @@ def test_expander_returns_correct_number_of_contacts(expander, fragments, reques def test_expander_returns_correct_contacts_labelled( triplet_expander_labelled, fragments, request ): + """Tests whether expander returns correct contacts for labelled fragments""" df = request.getfixturevalue(fragments) result = triplet_expander_labelled.expand(df).data if isinstance(result, dd.DataFrame): @@ -169,6 +184,7 @@ def test_expander_returns_correct_contacts_labelled( def test_expander_returns_correct_contacts_unlabelled( triplet_expander, fragments, request ): + """Tests whether expander returns correct contacts for unlabelled fragments""" df = request.getfixturevalue(fragments) result = triplet_expander.expand(df).data if isinstance(result, dd.DataFrame): @@ -183,6 +199,7 @@ def test_expander_returns_correct_contacts_unlabelled( def test_contacts_constructor_rejects_wrong_df(bad_df): + """Tests whether contacts constructor rejects wrong df""" with pytest.raises(pa.errors.SchemaError): contacts.Contacts(bad_df, number_fragments=3) @@ -190,6 +207,7 @@ def test_contacts_constructor_rejects_wrong_df(bad_df): def test_merge_works_for_good_pandas_df( triplet_expander, contact_manipulator, labelled_fragments ): + """Tests whether merge works for good pandas df""" contacts = triplet_expander.expand(labelled_fragments) result = contact_manipulator.merge_contacts([contacts, contacts]).data assert result.shape[0] == 8 @@ -199,6 +217,7 @@ def test_merge_works_for_good_pandas_df( def test_merge_works_for_good_dask_df( triplet_expander, contact_manipulator, labelled_fragments ): + """Tests whether merge works for good dask df""" cont = triplet_expander.expand(labelled_fragments) contacts_dask = contacts.Contacts( dd.from_pandas(cont.data, npartitions=1), number_fragments=3 @@ -213,6 +232,7 @@ def test_merge_works_for_good_dask_df( def test_merge_fails_for_pandas_dask_mixed( triplet_expander, contact_manipulator, labelled_fragments ): + """Tests whether merge fails for pandas dask mixed""" with pytest.raises(ValueError): contacts_pandas = triplet_expander.expand(labelled_fragments) contacts_dask = contacts.Contacts( @@ -222,6 +242,7 @@ def test_merge_fails_for_pandas_dask_mixed( def test_subset_metadata_fails_if_not_labelled(unlabelled_contacts_2d): + """Tests whether subset fails if the datafrane is not laelled""" contact_manipulator = contacts.ContactManipulator() unlab_contacts = contacts.Contacts(unlabelled_contacts_2d) with pytest.raises(AssertionError): @@ -231,6 +252,7 @@ def test_subset_metadata_fails_if_not_labelled(unlabelled_contacts_2d): def test_subset_metadata_fails_if_pattern_longer_than_number_fragments( labelled_binary_contacts_2d_sorted, ): + """Tests whether subset fails if the pattern is longer than number of fragments""" contact_manipulator = contacts.ContactManipulator() lab_contacts = contacts.Contacts(labelled_binary_contacts_2d_sorted) with pytest.raises(AssertionError): @@ -240,6 +262,7 @@ def test_subset_metadata_fails_if_pattern_longer_than_number_fragments( def test_subset_metadata_fails_if_pattern_contains_strings_not_in_metadata( labelled_binary_contacts_2d_sorted, ): + """Tests whether subset fails if the pattern contains strings not in metadata""" contact_manipulator = contacts.ContactManipulator() lab_contacts = contacts.Contacts(labelled_binary_contacts_2d_sorted) with pytest.raises(AssertionError): @@ -247,6 +270,7 @@ def test_subset_metadata_fails_if_pattern_contains_strings_not_in_metadata( def test_subset_metadata_creates_correct_subset(labelled_binary_contacts_2d_sorted): + """Tests whether subset creates the correct subset""" contact_manipulator = contacts.ContactManipulator() lab_contacts = contacts.Contacts(labelled_binary_contacts_2d_sorted) result = contact_manipulator.subset_on_metadata(lab_contacts, ["A", "B"]) diff --git a/tests/test_io.py b/tests/test_io.py index 1fd87d3..409030a 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -1,17 +1,22 @@ """This file tests the io module""" -import tempfile -import pytest -from itertools import product -import os +# pylint: disable=redefined-outer-name import json +import os import shutil +import tempfile +from itertools import product from pathlib import Path + +import dask.dataframe as dd import pandas as pd +import pytest + from spoc.contacts import Contacts -from spoc.io import FileManager from spoc.file_parameter_models import PixelParameters +from spoc.io import FileManager from spoc.pixels import Pixels -import dask.dataframe as dd + +# pytlint: disable=unused-import from .fixtures.symmetry import unlabelled_contacts_2d CONTACT_PARAMETERS = ( @@ -35,6 +40,7 @@ def _create_tmp_dir(): @pytest.fixture def df_order_2(): + """Fixture for dataframe of pixels with order 2""" return pd.DataFrame( { "chrom": ["chr1"] * 6, @@ -47,6 +53,7 @@ def df_order_2(): @pytest.fixture def df_order_3(): + """Fixture for dataframe of pixels with order 3""" return pd.DataFrame( { "chrom": ["chr1"] * 6, @@ -60,6 +67,7 @@ def df_order_3(): @pytest.fixture def example_pixels_w_metadata(df_order_2, df_order_3): + """Fixture for example pixels with metadata""" # setup _create_tmp_dir() # create pixels directory @@ -92,7 +100,7 @@ def example_pixels_w_metadata(df_order_2, df_order_3): "test2.parquet": expected_parameters[1].dict(), "test3.parquet": expected_parameters[2].dict(), } - with open(pixels_dir + "/metadata.json", "w") as f: + with open(pixels_dir + "/metadata.json", "w", encoding="utf-8") as f: json.dump(metadata, f) yield pixels_dir, expected_parameters, paths, dataframes # teardown @@ -198,7 +206,7 @@ def test_read_pixels_as_pandas_df(example_pixels_w_metadata): """Test reading pixels metadata json file""" pixels_dir, expected_parameters, paths, dataframes = example_pixels_w_metadata # read metadata - for path, expected, df in zip(paths, expected_parameters, dataframes): + for _, expected, df in zip(paths, expected_parameters, dataframes): pixels = FileManager(use_dask=False).load_pixels( pixels_dir, expected, load_dataframe=True ) @@ -210,7 +218,7 @@ def test_read_pixels_as_dask_df(example_pixels_w_metadata): """Test reading pixels metadata json file""" pixels_dir, expected_parameters, paths, dataframes = example_pixels_w_metadata # read metadata - for path, expected, df in zip(paths, expected_parameters, dataframes): + for _, expected, df in zip(paths, expected_parameters, dataframes): pixels = FileManager(use_dask=True).load_pixels( pixels_dir, expected, load_dataframe=True ) @@ -232,6 +240,7 @@ def test_read_pixels_as_dask_df(example_pixels_w_metadata): ], ) def test_write_pandas_pixels_to_new_file(df, params, request): + """Test writing pixels to new file""" df = request.getfixturevalue(df) pixels = Pixels(df, **params.dict()) with tempfile.TemporaryDirectory() as tmpdirname: @@ -262,6 +271,7 @@ def test_write_pandas_pixels_to_new_file(df, params, request): ], ) def test_write_dask_pixels_to_new_file(df, params, request): + """Test writing pixels to new file""" df = request.getfixturevalue(df) dask_df = dd.from_pandas(df, npartitions=2) pixels = Pixels(dask_df, **params.dict()) @@ -312,6 +322,7 @@ def test_write_dask_pixels_to_new_file(df, params, request): ], ) def test_add_pandas_pixels_to_existing_file(df1, df2, params, request): + """Test adding pixels to existing file""" df1, df2 = request.getfixturevalue(df1), request.getfixturevalue(df2) params_1, params_2 = params pixels1 = Pixels(df1, **params_1.dict()) @@ -347,7 +358,7 @@ def test_load_pixels_from_uri_fails_without_required_parameters(df, params, requ file_name = tmpdirname + "/" + "test.parquet" FileManager().write_pixels(file_name, pixels) # try loading without required parameters - with pytest.raises(ValueError) as e: + with pytest.raises(ValueError): Pixels.from_uri(file_name) @@ -483,5 +494,5 @@ def test_load_pixels_from_uri_fails_with_ambiguous_specification(df, params, req FileManager().write_pixels(file_name, pixels) FileManager().write_pixels(file_name, pixels2) # load pixels - with pytest.raises(ValueError) as e: + with pytest.raises(ValueError): Pixels.from_uri(file_name + "::" + uri) diff --git a/tests/test_labels.py b/tests/test_labels.py index 9fa499a..3a8a593 100644 --- a/tests/test_labels.py +++ b/tests/test_labels.py @@ -1,4 +1,5 @@ """Tests for label functionality""" +# pylint: disable=redefined-outer-name import pytest import pandas as pd @@ -13,7 +14,7 @@ def empty_annotator(): """sets up an annotator with empty label library""" - return fragments.FragmentAnnotator(dict()) + return fragments.FragmentAnnotator({}) @pytest.fixture @@ -26,11 +27,13 @@ def annotator_with_entries(): @pytest.fixture def bad_df(): + """A dataframe with the wrong schema""" return pd.DataFrame({"be": ["bop"]}) @pytest.fixture def unlabelled_df(): + """A dataframe with the correct schema but no metadata""" return pd.DataFrame( { "chrom": pd.Series(["chr1"] * 3, dtype="category"), @@ -51,16 +54,19 @@ def unlabelled_df(): @pytest.fixture def unlabelled_fragments(unlabelled_df): + """A Fragments object with no metadata""" return fragments.Fragments(unlabelled_df) @pytest.fixture def unlabelled_fragments_dask(unlabelled_df): + """A Fragments object with no metadata with dask dataframe""" return fragments.Fragments(dd.from_pandas(unlabelled_df, npartitions=1)) @pytest.fixture def labelled_df(): + """A dataframe with the correct schema and metadata""" return pd.DataFrame( { "chrom": pd.Series(["chr1"] * 3, dtype="category"), @@ -81,16 +87,19 @@ def labelled_df(): def test_fragment_constructor_rejects_df_w_wrong_structure(bad_df): + """Test that the fragment constructor rejects a bad dataframe""" with pytest.raises(pa.errors.SchemaError): fragments.Fragments(bad_df) def test_fragments_constructor_accepts_unlabelled_fragments(unlabelled_df): + """Test that the fragment constructor accepts a good dataframe without labels""" frag = fragments.Fragments(unlabelled_df) assert not frag.contains_metadata def test_fragments_constructor_accepts_labelled_fragments(labelled_df): + """Test that the fragment constructor accepts a good dataframe with labels""" frag = fragments.Fragments(labelled_df) assert frag.contains_metadata @@ -99,6 +108,7 @@ def test_fragments_constructor_accepts_labelled_fragments(labelled_df): "fragments", ["unlabelled_fragments", "unlabelled_fragments_dask"] ) def test_annotator_drops_unknown_fragments(annotator_with_entries, fragments, request): + """Test that the annotator drops fragments that are not in the library""" labelled_fragments = annotator_with_entries.annotate_fragments( request.getfixturevalue(fragments) ) @@ -110,6 +120,7 @@ def test_annotator_drops_unknown_fragments(annotator_with_entries, fragments, re "fragments", ["unlabelled_fragments", "unlabelled_fragments_dask"] ) def test_annotator_produces_correct_schema(annotator_with_entries, fragments, request): + """Test that the annotator produces a dataframe with the correct schema""" labelled_fragments = annotator_with_entries.annotate_fragments( request.getfixturevalue(fragments) ) @@ -121,6 +132,7 @@ def test_annotator_produces_correct_schema(annotator_with_entries, fragments, re "fragments", ["unlabelled_fragments", "unlabelled_fragments_dask"] ) def test_annotator_calls_sisters_correctly(annotator_with_entries, fragments, request): + """Test that the annotator calls the sister correctly""" labelled_fragments = annotator_with_entries.annotate_fragments( request.getfixturevalue(fragments) ) @@ -138,6 +150,7 @@ def test_annotator_calls_sisters_correctly(annotator_with_entries, fragments, re "fragments", ["unlabelled_fragments", "unlabelled_fragments_dask"] ) def test_annotator_maintains_dataframe_type(annotator_with_entries, fragments, request): + """Test that the annotator maintains the dataframe type""" labelled_fragments = annotator_with_entries.annotate_fragments( request.getfixturevalue(fragments) ) diff --git a/tests/test_pixels.py b/tests/test_pixels.py index 542f98b..e015a70 100644 --- a/tests/test_pixels.py +++ b/tests/test_pixels.py @@ -1,3 +1,6 @@ +"""Tests for the pixels module""" +# pylint: disable=redefined-outer-name + import pytest import dask.dataframe as dd import pandas as pd @@ -7,25 +10,14 @@ @pytest.fixture -def chromosome_sizes(): - return pd.read_csv( - "./tests/test_files/hg19.chrom.sizes", - sep="\t", - header=None, - names=["chrom", "size"], - index_col=["chrom"], - squeeze=True, - ) - - -@pytest.fixture -def genomic_binner(chromosome_sizes): +def genomic_binner(): """genomic binner for pixels""" return pixels.GenomicBinner(bin_size=100_000) @pytest.fixture def contacts_df(): + """A dataframe containing contacts of order 3""" return pd.DataFrame( { "read_name": ["a", "b", "c", "d"], @@ -57,6 +49,7 @@ def contacts_df(): @pytest.fixture def expected_pixels(): + """A dataframe containing the expected pixels after binning""" return pd.DataFrame( { "chrom": ["chr1"] * 2, @@ -70,6 +63,8 @@ def expected_pixels(): @pytest.fixture def expected_pixels_different_chromosomes(): + """A dataframe containing the expected pixels after binning with different chromosomes + enabled""" return pd.DataFrame( { "chrom_1": ["chr1"] * 3, @@ -86,19 +81,21 @@ def expected_pixels_different_chromosomes(): def test_genomic_binner_bins_correctly_same_chromosome_pandas( genomic_binner, contacts_df, expected_pixels ): + """Test if genomic binner bins correctly with same chromosome enabled""" contacts = Contacts(contacts_df) result = genomic_binner.bin_contacts(contacts) assert np.array_equal(result.data.values, expected_pixels.values) assert result.number_fragments == 3 assert result.binsize == 100_000 - assert result.binary_labels_equal == False - assert result.symmetry_flipped == False + assert not result.binary_labels_equal + assert not result.symmetry_flipped assert result.metadata_combi is None def test_genomic_binner_bins_correctly_w_different_chromosomes_pandas( genomic_binner, contacts_df, expected_pixels_different_chromosomes ): + """Test if genomic binner bins correctly with same chromosome disabled""" contacts = Contacts(contacts_df) result = genomic_binner.bin_contacts(contacts, same_chromosome=False) assert np.array_equal( @@ -106,27 +103,31 @@ def test_genomic_binner_bins_correctly_w_different_chromosomes_pandas( ) assert result.number_fragments == 3 assert result.binsize == 100_000 - assert result.binary_labels_equal == False - assert result.symmetry_flipped == False + assert not result.binary_labels_equal + assert not result.symmetry_flipped assert result.metadata_combi is None def test_genomic_binner_bins_correctly_same_chromosome_dask( genomic_binner, contacts_df, expected_pixels ): + """Test if genomic binner bins correctly with same chromosome enabled + using a dask dataframe""" contacts = Contacts(dd.from_pandas(contacts_df, chunksize=1000)) result = genomic_binner.bin_contacts(contacts) assert np.array_equal(result.data.compute().values, expected_pixels.values) assert result.number_fragments == 3 assert result.binsize == 100_000 - assert result.binary_labels_equal == False - assert result.symmetry_flipped == False + assert not result.binary_labels_equal + assert not result.symmetry_flipped assert result.metadata_combi is None def test_genomic_binner_bins_correctly_w_different_chromosome_dask( genomic_binner, contacts_df, expected_pixels_different_chromosomes ): + """Test if genomic binner bins correctly with same chromosome disabled + using a dask dataframe""" contacts = Contacts(dd.from_pandas(contacts_df, chunksize=1000)) result = genomic_binner.bin_contacts(contacts, same_chromosome=False) assert np.array_equal( @@ -134,6 +135,6 @@ def test_genomic_binner_bins_correctly_w_different_chromosome_dask( ) assert result.number_fragments == 3 assert result.binsize == 100_000 - assert result.binary_labels_equal == False - assert result.symmetry_flipped == False + assert not result.binary_labels_equal + assert not result.symmetry_flipped assert result.metadata_combi is None diff --git a/tests/test_symmetry.py b/tests/test_symmetry.py index 8ad4456..495d589 100644 --- a/tests/test_symmetry.py +++ b/tests/test_symmetry.py @@ -1,4 +1,6 @@ """Tests for dealing with symmetry flipping for labelled and unlabelled contacts.""" +# pylint: disable=redefined-outer-name +# pylint: disable=unused-import import pytest import pandas as pd @@ -48,6 +50,7 @@ ], ) def test_unlabelled_contacts_flipped_correctly(unflipped, flipped, request): + """Test that unlabelled contacts are flipped correctly.""" unflipped, flipped = request.getfixturevalue(unflipped), request.getfixturevalue( flipped ) @@ -64,6 +67,7 @@ def test_unlabelled_contacts_flipped_correctly(unflipped, flipped, request): ], ) def test_unlabelled_contacts_flipped_correctly_dask(unflipped, flipped, request): + """Test that unlabelled contacts are flipped correctly.""" unflipped, flipped = dd.from_pandas( request.getfixturevalue(unflipped), npartitions=1 ), request.getfixturevalue(flipped) @@ -83,6 +87,7 @@ def test_unlabelled_contacts_flipped_correctly_dask(unflipped, flipped, request) ], ) def test_labelled_contacts_are_sorted_correctly(unsorted, sorted_contacts, request): + """Test that labelled contacts are sorted correctly.""" unsorted, sorted_contacts = request.getfixturevalue( unsorted ), request.getfixturevalue(sorted_contacts) @@ -102,6 +107,8 @@ def test_labelled_contacts_are_sorted_correctly(unsorted, sorted_contacts, reque def test_labelled_contacts_are_sorted_correctly_dask( unsorted, sorted_contacts, request ): + """Test that labelled contacts are sorted correctly with + underlying dask dataframe.""" unsorted, sorted_contacts = dd.from_pandas( request.getfixturevalue(unsorted), npartitions=1 ), request.getfixturevalue(sorted_contacts) @@ -123,6 +130,7 @@ def test_labelled_contacts_are_sorted_correctly_dask( ], ) def test_equate_binary_labels(unequated, equated, request): + """Test that binary labels are equated correctly.""" unequated, equated = request.getfixturevalue(unequated), request.getfixturevalue( equated ) @@ -140,6 +148,8 @@ def test_equate_binary_labels(unequated, equated, request): ], ) def test_equate_binary_labels_dask(unequated, equated, request): + """Test that binary labels are equated correctly with + underlying dask dataframe.""" unequated, equated = dd.from_pandas( request.getfixturevalue(unequated), npartitions=1 ), request.getfixturevalue(equated) @@ -168,6 +178,7 @@ def test_equate_binary_labels_dask(unequated, equated, request): ], ) def test_flip_labelled_contacts(unflipped, flipped, request): + """Test that labelled contacts are flipped correctly.""" unflipped, flipped = request.getfixturevalue(unflipped), request.getfixturevalue( flipped ) @@ -196,6 +207,8 @@ def test_flip_labelled_contacts(unflipped, flipped, request): ], ) def test_flip_labelled_contacts_dask(unflipped, flipped, request): + """Test that labelled contacts are flipped correctly with + underlying dask dataframe.""" unflipped, flipped = dd.from_pandas( request.getfixturevalue(unflipped), npartitions=1 ), request.getfixturevalue(flipped) @@ -232,6 +245,7 @@ def test_flip_labelled_contacts_dask(unflipped, flipped, request): ], ) def test_flip_unlabelled_contacts_different_chromosomes(unflipped, flipped, request): + """Test that unlabelled contacts are flipped correctly with different chromosomes.""" unflipped, flipped = request.getfixturevalue(unflipped), request.getfixturevalue( flipped ) diff --git a/tests/xtest_1d_snipping_strategy.py b/tests/xtest_1d_snipping_strategy.py deleted file mode 100644 index 2af9444..0000000 --- a/tests/xtest_1d_snipping_strategy.py +++ /dev/null @@ -1,543 +0,0 @@ -import os -import shutil -import pandas as pd -import numpy as np -from itertools import product -import pytest -from spoc.snipping.snipping_strategies import ( - TripletCCT1DSnippingStrategy, - SnippingValues, -) -from spoc.pixels import PersistedPixels - -# Fixtures - - -@pytest.fixture -def complete_synthetic_pixels(): - np.random.seed(42) - # region_1 - pixels_1 = [ - { - "chrom": tup[0], - "start_1": tup[1], - "start_2": tup[2], - "start_3": tup[3], - "contact_count": np.random.randint(0, 10), - } - for tup in product( - ["chr1"], - np.arange(900_000, 1_150_000, 50_000), - np.arange(900_000, 1_150_000, 50_000), - np.arange(900_000, 1_150_000, 50_000), - ) - ] - # region_2 - pixels_2 = [ - { - "chrom": tup[0], - "start_1": tup[1], - "start_2": tup[2], - "start_3": tup[3], - "contact_count": np.random.randint(0, 10), - } - for tup in product( - ["chr2"], - np.arange(900_000, 1_150_000, 50_000), - np.arange(900_000, 1_150_000, 50_000), - np.arange(900_000, 1_150_000, 50_000), - ) - ] - return pd.concat((pd.DataFrame(pixels_1), pd.DataFrame(pixels_2))) - - -@pytest.fixture -def incomplete_synthetic_pixels(): - np.random.seed(42) - # region 1 - pixels_1 = [ - { - "chrom": tup[0], - "start_1": tup[1], - "start_2": tup[2], - "start_3": tup[3], - "contact_count": np.random.randint(0, 10), - } - for tup in product( - ["chr1"], - np.arange(900_000, 1_000_000, 50_000), - np.arange(900_000, 1_000_000, 50_000), - np.arange(900_000, 1_000_000, 50_000), - ) - ] - # region_2 - pixels_2 = [ - { - "chrom": tup[0], - "start_1": tup[1], - "start_2": tup[2], - "start_3": tup[3], - "contact_count": np.random.randint(0, 10), - } - for tup in product( - ["chr2"], - np.arange(1_000_000, 1_150_000, 50_000), - np.arange(1_000_000, 1_150_000, 50_000), - np.arange(1_000_000, 1_150_000, 50_000), - ) - ] - return pd.concat((pd.DataFrame(pixels_1), pd.DataFrame(pixels_2))) - - -@pytest.fixture -def complete_persisted_pixels(complete_synthetic_pixels): - # setup - os.mkdir("tmp") - complete_synthetic_pixels.to_parquet("tmp/csp.parquet") - yield PersistedPixels("tmp/csp.parquet") - # teardown - shutil.rmtree("tmp") - - -@pytest.fixture -def expected_entire_single_region_complete(complete_synthetic_pixels): - return ( - complete_synthetic_pixels.assign( - offset_1=lambda df_: (df_.start_1 - 1_000_000) // 1000, - offset_2=lambda df_: (df_.start_2 - 1_000_000) // 1000, - ) - .query("chrom == 'chr1'") - .pivot_table( - index="offset_1", columns="offset_2", values="contact_count", aggfunc=np.sum - ) - .astype(float) - ) - - -@pytest.fixture -def expected_entire_single_region_incomplete(incomplete_synthetic_pixels): - return ( - incomplete_synthetic_pixels.assign( - offset_1=lambda df_: (df_.start_1 - 1_000_000) // 1000, - offset_2=lambda df_: (df_.start_2 - 1_000_000) // 1000, - ) - .astype( - { - "offset_1": pd.CategoricalDtype( - np.arange(-100, 150, 50) - ), # needed to set possible values - "offset_2": pd.CategoricalDtype( - np.arange(-100, 150, 50) - ), # needed to set possible values - } - ) - .query("chrom == 'chr1'") - .pivot_table( - index="offset_1", columns="offset_2", values="contact_count", aggfunc=np.sum - ) - .astype(float) - ) - - -@pytest.fixture -def expected_two_entire_regions_complete(complete_synthetic_pixels): - # region1 - first = ( - complete_synthetic_pixels.assign( - offset_1=lambda df_: (df_.start_1 - 1_000_000) // 1000, - offset_2=lambda df_: (df_.start_2 - 1_000_000) // 1000, - ) - .query("chrom == 'chr1'") - .groupby(["offset_1", "offset_2"]) - .contact_count.sum() - ) - # region2 - second = ( - complete_synthetic_pixels.assign( - offset_1=lambda df_: (df_.start_1 - 1_000_000) // 1000, - offset_2=lambda df_: (df_.start_2 - 1_000_000) // 1000, - ) - .query("chrom == 'chr2'") - .groupby(["offset_1", "offset_2"]) - .contact_count.sum() - ) - # average and return - return ( - pd.concat((first, second)) - .reset_index() - .pivot_table( - index="offset_1", - columns="offset_2", - values="contact_count", - aggfunc=np.mean, - ) - .astype(float) - ) - - -@pytest.fixture -def expected_two_entire_regions_incomplete(incomplete_synthetic_pixels): - # region1 - first = ( - incomplete_synthetic_pixels.assign( - offset_1=lambda df_: (df_.start_1 - 1_000_000) // 1000, - offset_2=lambda df_: (df_.start_2 - 1_000_000) // 1000, - ) - .astype( - { - "offset_1": pd.CategoricalDtype( - np.arange(-100, 150, 50) - ), # needed to set possible values - "offset_2": pd.CategoricalDtype( - np.arange(-100, 150, 50) - ), # needed to set possible values - } - ) - .query("chrom == 'chr1'") - .groupby(["offset_1", "offset_2"]) - .contact_count.sum() - ) - # region2 - second = ( - incomplete_synthetic_pixels.assign( - offset_1=lambda df_: (df_.start_1 - 1_000_000) // 1000, - offset_2=lambda df_: (df_.start_2 - 1_000_000) // 1000, - ) - .astype( - { - "offset_1": pd.CategoricalDtype( - np.arange(-100, 150, 50) - ), # needed to set possible values - "offset_2": pd.CategoricalDtype( - np.arange(-100, 150, 50) - ), # needed to set possible values - } - ) - .query("chrom == 'chr2'") - .groupby(["offset_1", "offset_2"]) - .contact_count.sum() - ) - # average and return - return ( - pd.concat((first, second)) - .reset_index() - .pivot_table( - index="offset_1", - columns="offset_2", - values="contact_count", - aggfunc=np.mean, - ) - .astype(float) - ) - - -@pytest.fixture -def expected_one_region_center_selection(complete_synthetic_pixels): - return ( - complete_synthetic_pixels.assign( - offset_1=lambda df_: (df_.start_1 - 1_000_000) // 1000, - offset_2=lambda df_: (df_.start_2 - 1_000_000) // 1000, - offset_3=lambda df_: (df_.start_3 - 1_000_000) // 1000, - ) - .query("chrom == 'chr1' and offset_3 == 0") - .pivot_table( - index="offset_1", columns="offset_2", values="contact_count", aggfunc=np.sum - ) - .astype(float) - ) - - -@pytest.fixture -def expected_one_region_50k_offset_selection(complete_synthetic_pixels): - return ( - complete_synthetic_pixels.assign( - offset_1=lambda df_: (df_.start_1 - 1_000_000) // 1000, - offset_2=lambda df_: (df_.start_2 - 1_000_000) // 1000, - offset_3=lambda df_: (df_.start_3 - 1_000_000) // 1000, - ) - .query("chrom == 'chr1' and offset_3 == 50") - .pivot_table( - index="offset_1", columns="offset_2", values="contact_count", aggfunc=np.sum - ) - .astype(float) - ) - - -@pytest.fixture -def expected_two_regions_center_selection(complete_synthetic_pixels): - # region1 - first = ( - complete_synthetic_pixels.assign( - offset_1=lambda df_: (df_.start_1 - 1_000_000) // 1000, - offset_2=lambda df_: (df_.start_2 - 1_000_000) // 1000, - offset_3=lambda df_: (df_.start_3 - 1_000_000) // 1000, - ) - .query("chrom == 'chr1' and offset_3 == 0") - .groupby(["offset_1", "offset_2"]) - .contact_count.sum() - ) - # region2 - second = ( - complete_synthetic_pixels.assign( - offset_1=lambda df_: (df_.start_1 - 1_000_000) // 1000, - offset_2=lambda df_: (df_.start_2 - 1_000_000) // 1000, - offset_3=lambda df_: (df_.start_3 - 1_000_000) // 1000, - ) - .query("chrom == 'chr2' and offset_3 == 0") - .groupby(["offset_1", "offset_2"]) - .contact_count.sum() - ) - # average and return - return ( - pd.concat((first, second)) - .reset_index() - .pivot_table( - index="offset_1", - columns="offset_2", - values="contact_count", - aggfunc=np.mean, - ) - .astype(float) - ) - - -@pytest.fixture -def single_region(): - return pd.DataFrame( - { - "chrom": ["chr1"], - "start": [1_000_000], - "end": [1_000_000], - }, - index=[0], - ) - - -@pytest.fixture -def two_regions(): - return pd.DataFrame( - { - "chrom": ["chr1", "chr2"], - "start": [1_000_000, 1_000_000], - "end": [1_000_000, 1_000_000], - } - ) - - -@pytest.fixture -def standard_snipping_strategy(): - return TripletCCT1DSnippingStrategy( - bin_size=50_000, - half_window_size=100_000, - snipping_value=SnippingValues.ICCF, - position_slack=1_000_000, - ) - - -@pytest.fixture -def standard_snipping_strategy_from_string(): - return TripletCCT1DSnippingStrategy( - bin_size=50_000, - half_window_size=100_000, - snipping_value="iccf", - position_slack=1_000_000, - ) - - -@pytest.fixture -def center_snipping_strategy(): - return TripletCCT1DSnippingStrategy( - bin_size=50_000, - half_window_size=100_000, - snipping_value=SnippingValues.ICCF, - position_slack=0, - ) - - -@pytest.fixture -def center_snipping_strategy_w_offset(): - return TripletCCT1DSnippingStrategy( - bin_size=50_000, - half_window_size=100_000, - snipping_value=SnippingValues.ICCF, - position_slack=0, - relative_offset=50_000, - ) - - -@pytest.fixture -def standard_snipping_strategy_obs_exp(single_region): - return TripletCCT1DSnippingStrategy( - bin_size=50_000, - half_window_size=100_000, - snipping_value=SnippingValues.OBSEXP, - position_slack=1_000_000, - n_random_regions=100, - genome="hg19", - ) - - -# Tests - -## ICCF - - -def test_entire_region_with_complete_pixels( - complete_synthetic_pixels, - single_region, - expected_entire_single_region_complete, - standard_snipping_strategy, -): - """Test whether snipping of an entire region produces correct results when pixels are supplied as dataframe.""" - # do the snipping - result = standard_snipping_strategy.snip(complete_synthetic_pixels, single_region) - # check result - np.testing.assert_array_almost_equal( - result.values, expected_entire_single_region_complete.values - ) - - -def test_entire_region_with_complete_pixels_strategy_from_string( - complete_synthetic_pixels, - single_region, - expected_entire_single_region_complete, - standard_snipping_strategy_from_string, -): - """Test whether snipping of an entire region produces correct results when pixels are supplied as dataframe - and strategy is instantiated with a string""" - # do the snipping - result = standard_snipping_strategy_from_string.snip( - complete_synthetic_pixels, single_region - ) - # check result - np.testing.assert_array_almost_equal( - result.values, expected_entire_single_region_complete.values - ) - - -def test_entire_region_with_complete_pixels_from_file( - complete_persisted_pixels, - single_region, - expected_entire_single_region_complete, - standard_snipping_strategy, -): - """Test whether snipping of an entire region produces correct results when pixels are supplied as file.""" - # do the snipping - result = standard_snipping_strategy.snip(complete_persisted_pixels, single_region) - # check result - np.testing.assert_array_almost_equal( - result.values, expected_entire_single_region_complete.values - ) - - -def test_two_regions_with_complete_pixels( - complete_synthetic_pixels, - two_regions, - expected_two_entire_regions_complete, - standard_snipping_strategy, -): - """Test whether snipping of two regions produces correct results when pixels are supplied as dataframe.""" - # do the snipping - result = standard_snipping_strategy.snip(complete_synthetic_pixels, two_regions) - # check result - np.testing.assert_array_almost_equal( - result.values, expected_two_entire_regions_complete.values - ) - - -def test_entire_region_with_incomplete_pixels( - incomplete_synthetic_pixels, - single_region, - expected_entire_single_region_incomplete, - standard_snipping_strategy, -): - """Test whether snipping of an entire region with incomplete pixels (not dense) produces correct results.""" - # do the snipping - result = standard_snipping_strategy.snip(incomplete_synthetic_pixels, single_region) - - # check result - np.testing.assert_array_almost_equal( - result.values, expected_entire_single_region_incomplete.values - ) - - -def test_two_regions_with_incomplete_pixels( - incomplete_synthetic_pixels, - two_regions, - expected_two_entire_regions_incomplete, - standard_snipping_strategy, -): - """Test whether snipping of two entire regions produces correct result.""" - # do the snipping - result = standard_snipping_strategy.snip(incomplete_synthetic_pixels, two_regions) - # check result - np.testing.assert_array_almost_equal( - result.values, expected_two_entire_regions_incomplete.values - ) - - -def test_one_region_center_selection( - complete_synthetic_pixels, - single_region, - expected_one_region_center_selection, - center_snipping_strategy, -): - """Tests whether snipping of complete region with center selection produces correct results""" - # do the snipping - result = center_snipping_strategy.snip(complete_synthetic_pixels, single_region) - # check result - np.testing.assert_array_almost_equal( - result.values, expected_one_region_center_selection.values - ) - - -def test_two_regions_center_selection( - complete_synthetic_pixels, - two_regions, - expected_two_regions_center_selection, - center_snipping_strategy, -): - """Tests whether snipping of two complete regions with center selection produces correct results""" - # do the snipping - result = center_snipping_strategy.snip(complete_synthetic_pixels, two_regions) - # check result - np.testing.assert_array_almost_equal( - result.values, expected_two_regions_center_selection.values - ) - - -def test_relative_offset_works( - complete_synthetic_pixels, - single_region, - expected_one_region_50k_offset_selection, - center_snipping_strategy_w_offset, -): - # do the snipping - result = center_snipping_strategy_w_offset.snip( - complete_synthetic_pixels, single_region - ) - # check result - np.testing.assert_array_almost_equal( - result.values, expected_one_region_50k_offset_selection.values - ) - - -## Obs/Exp - - -def test_one_region_center_selection_obs_exp( - complete_synthetic_pixels, single_region, standard_snipping_strategy_obs_exp, mocker -): - """Tests whether snipping of complete region with center selection produces correct results""" - # patch random coordinates to return same coordinates - mocker.patch.object( - TripletCCT1DSnippingStrategy, - "_get_random_coordinates", - return_value=single_region, - ) - # do the snipping - result = standard_snipping_strategy_obs_exp.snip( - complete_synthetic_pixels, single_region - ) - # check result -> since observed and expected is the same, should be ones everywhere - np.testing.assert_array_almost_equal(result.values, np.ones(result.values.shape))