Skip to content

Commit

Permalink
update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
JeanMainguy committed Aug 29, 2024
1 parent f3abada commit 14bc194
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 26 deletions.
24 changes: 23 additions & 1 deletion binette/io_manager.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import pyfastx
from typing import Iterable, List, Dict, Tuple
from typing import Iterable, List, Dict, Tuple, Set
import csv

from binette.bin_manager import Bin
Expand Down Expand Up @@ -195,3 +195,25 @@ def check_resume_file(faa_file: Path, diamond_result_file: Path) -> None:
raise FileNotFoundError(error_msg)


def write_original_bin_metrics(bin_set_name_to_bins: Dict[str, Set[Bin]], original_bin_report_dir: Path):
"""
Write metrics of original input bins to a specified directory.
This function takes a dictionary mapping bin set names to sets of bins and writes
the metrics for each bin set to a TSV file in the specified directory. Each bin set
will have its own TSV file named according to its set name.
:param bin_set_name_to_bins: A dictionary where the keys are bin set names (str) and
the values are sets of Bin objects representing bins.
:param original_bin_report_dir: The directory path (Path) where the bin metrics will be saved.
"""

original_bin_report_dir.mkdir(parents=True, exist_ok=True)

for i, (set_name, bins) in enumerate(sorted(bin_set_name_to_bins.items())):
bins_metric_file = original_bin_report_dir / f"input_bins_{i + 1}.{set_name.replace('/', '_')}.tsv"

logging.debug(f"Writing metrics for bin set '{set_name}' to file: {bins_metric_file}")
write_bin_info(bins, bins_metric_file)

logging.debug("Completed writing all original input bin metrics.")
22 changes: 4 additions & 18 deletions binette/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,23 +345,6 @@ def log_selected_bin_info(selected_bins: List[bin_manager.Bin], hq_min_completen
thresholds = f"(completeness >= {hq_min_completeness} and contamination <= {hq_max_conta})"
logging.info(f"{hq_bins}/{len(selected_bins)} selected bins have a high quality {thresholds}.")

def write_original_bin_metrics(bin_set_name_to_bins:Dict[str, Set[bin_manager.Bin]], original_bin_report_dir:Path):
"""
"""

logging.info(f"Writing original input bins metrics in {original_bin_report_dir}")


original_bin_report_dir.mkdir(parents=True, exist_ok=True)

for i, (set_name, bins) in enumerate(sorted(bin_set_name_to_bins.items())):
bins_metric_file = original_bin_report_dir / f"input_bins_{i+1}.{set_name.replace('/', '_')}.tsv"

logging.info(f"Writing bin_set {set_name} input bins metrics in {bins_metric_file}")
io.write_bin_info(bins, bins_metric_file)



def main():
"Orchestrate the execution of the program"
Expand Down Expand Up @@ -416,7 +399,10 @@ def main():
logging.info("Add size and assess quality of input bins")
bin_quality.add_bin_metrics(original_bins, contig_metadat, args.contamination_weight, args.threads)

write_original_bin_metrics(bin_set_name_to_bins, original_bin_report_dir)


logging.info(f"Writting original input bin metrics to directory: {original_bin_report_dir}")
io.write_original_bin_metrics(bin_set_name_to_bins, original_bin_report_dir)


logging.info("Create intermediate bins:")
Expand Down
9 changes: 5 additions & 4 deletions tests/bin_manager_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import networkx as nx

import logging
from pathlib import Path

def test_get_all_possible_combinations():
input_list = ["2", "3", "4"]
Expand Down Expand Up @@ -524,14 +525,14 @@ def create_temp_bin_directories(tmpdir, create_temp_bin_files):
bin2 = bin_dir2.join("binA.fasta")
bin2.write(">contig3\nTTAG\n>contig4\nCGAT\n>contig5\nCGGC")

return {"set1": str(bin_dir1), "set2": str(bin_dir2)}
return {"set1": Path(bin_dir1), "set2": Path(bin_dir2)}


def test_get_bins_from_directory(create_temp_bin_files):
bin_dir = create_temp_bin_files
set_name = "TestSet"

bins = bin_manager.get_bins_from_directory(str(bin_dir), set_name, fasta_extensions={'.fasta'})
bins = bin_manager.get_bins_from_directory(Path(bin_dir), set_name, fasta_extensions={'.fasta'})

assert len(bins) == 2 # Ensure that the correct number of Bin objects is returned

Expand All @@ -546,15 +547,15 @@ def test_get_bins_from_directory(create_temp_bin_files):
assert bins[0].name in ["bin2.fasta", "bin1.fasta"]

def test_get_bins_from_directory_no_files(tmpdir):
bin_dir = str(tmpdir.mkdir("empty_bins"))
bin_dir = Path(tmpdir.mkdir("empty_bins"))
set_name = "EmptySet"

bins = bin_manager.get_bins_from_directory(bin_dir, set_name, fasta_extensions={'.fasta'})

assert len(bins) == 0 # Ensure that no Bin objects are returned for an empty directory

def test_get_bins_from_directory_no_wrong_extensions(create_temp_bin_files):
bin_dir = create_temp_bin_files
bin_dir = Path(create_temp_bin_files)
set_name = "TestSet"

bins = bin_manager.get_bins_from_directory(bin_dir, set_name, fasta_extensions={'.fna'})
Expand Down
36 changes: 33 additions & 3 deletions tests/io_manager_test.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import pytest
from binette import io_manager
from pathlib import Path

from unittest.mock import patch




class Bin:
def __init__(self, bin_id, origin, name, completeness, contamination, score, length, N50, contigs):
self.id = bin_id
self.origin = origin
self.origin = {origin}
self.name = name
self.completeness = completeness
self.contamination = contamination
Expand Down Expand Up @@ -65,7 +65,7 @@ def test_infer_bin_name_from_single_path():
# Check if the output matches the expected dictionary
assert result == expected_result


def test_infer_bin_name_from_bin_table_inputs():
# Mock input data
input_bins = [
Expand Down Expand Up @@ -283,3 +283,33 @@ def test_check_resume_file_missing_diamond(temp_files, caplog):
io_manager.check_resume_file(Path(faa_file), Path("nonexistent_diamond_result.txt"))
assert "Protein file" not in caplog.text
assert "Diamond result file" in caplog.text


@patch('binette.io_manager.write_bin_info')
def test_write_original_bin_metrics(mock_write_bin_info, bin1,bin2, tmp_path):
# Test that `write_original_bin_metrics` correctly writes bin metrics to files

temp_directory = tmp_path / "test_output"

mock_bins = {"set1":{bin1},
"set2":{bin2}}
# Call the function with mock data
io_manager.write_original_bin_metrics(mock_bins, temp_directory)

# Check if the output directory was created
assert temp_directory.exists(), "Output directory should be created."

# Check that the correct files are created
expected_files = [
temp_directory / "input_bins_1.set1.tsv",
temp_directory / "input_bins_2.set2.tsv"
]

assert temp_directory.exists(), f"Expected temp_directory {temp_directory} was not created."

# Check if `write_bin_info` was called correctly
assert mock_write_bin_info.call_count == 2, "write_bin_info should be called once for each bin set."

# Verify the specific calls to `write_bin_info`
mock_write_bin_info.assert_any_call(mock_bins['set1'], expected_files[0])
mock_write_bin_info.assert_any_call(mock_bins['set2'], expected_files[1])
3 changes: 3 additions & 0 deletions tests/main_binette_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -371,6 +371,7 @@ def test_main(monkeypatch):
patch('binette.bin_quality.add_bin_metrics') as mock_add_bin_metrics, \
patch('binette.main.log_selected_bin_info') as mock_log_selected_bin_info, \
patch('binette.contig_manager.make_contig_index') as mock_make_contig_index, \
patch('binette.io_manager.write_original_bin_metrics') as mock_write_original_bin_metrics, \
patch('binette.main.select_bins_and_write_them') as mock_select_bins_and_write_them:

# Set return values for mocked functions if needed
Expand All @@ -395,5 +396,7 @@ def test_main(monkeypatch):

mock_log_selected_bin_info.assert_called_once()
mock_select_bins_and_write_them.assert_called_once()
mock_write_original_bin_metrics.assert_called_once()

assert mock_apply_contig_index.call_count == 3
assert mock_add_bin_metrics.call_count == 2

0 comments on commit 14bc194

Please sign in to comment.