diff --git a/binette/io_manager.py b/binette/io_manager.py index 5ea3041..5899722 100644 --- a/binette/io_manager.py +++ b/binette/io_manager.py @@ -1,6 +1,6 @@ import logging import pyfastx -from typing import Iterable, List, Dict, Tuple +from typing import Iterable, List, Dict, Tuple, Set import csv from binette.bin_manager import Bin @@ -195,3 +195,25 @@ def check_resume_file(faa_file: Path, diamond_result_file: Path) -> None: raise FileNotFoundError(error_msg) +def write_original_bin_metrics(bin_set_name_to_bins: Dict[str, Set[Bin]], original_bin_report_dir: Path): + """ + Write metrics of original input bins to a specified directory. + + This function takes a dictionary mapping bin set names to sets of bins and writes + the metrics for each bin set to a TSV file in the specified directory. Each bin set + will have its own TSV file named according to its set name. + + :param bin_set_name_to_bins: A dictionary where the keys are bin set names (str) and + the values are sets of Bin objects representing bins. + :param original_bin_report_dir: The directory path (Path) where the bin metrics will be saved. + """ + + original_bin_report_dir.mkdir(parents=True, exist_ok=True) + + for i, (set_name, bins) in enumerate(sorted(bin_set_name_to_bins.items())): + bins_metric_file = original_bin_report_dir / f"input_bins_{i + 1}.{set_name.replace('/', '_')}.tsv" + + logging.debug(f"Writing metrics for bin set '{set_name}' to file: {bins_metric_file}") + write_bin_info(bins, bins_metric_file) + + logging.debug("Completed writing all original input bin metrics.") diff --git a/binette/main.py b/binette/main.py index 3044fda..1c92a38 100755 --- a/binette/main.py +++ b/binette/main.py @@ -345,23 +345,6 @@ def log_selected_bin_info(selected_bins: List[bin_manager.Bin], hq_min_completen thresholds = f"(completeness >= {hq_min_completeness} and contamination <= {hq_max_conta})" logging.info(f"{hq_bins}/{len(selected_bins)} selected bins have a high quality {thresholds}.") -def write_original_bin_metrics(bin_set_name_to_bins:Dict[str, Set[bin_manager.Bin]], original_bin_report_dir:Path): - """ - - """ - - logging.info(f"Writing original input bins metrics in {original_bin_report_dir}") - - - original_bin_report_dir.mkdir(parents=True, exist_ok=True) - - for i, (set_name, bins) in enumerate(sorted(bin_set_name_to_bins.items())): - bins_metric_file = original_bin_report_dir / f"input_bins_{i+1}.{set_name.replace('/', '_')}.tsv" - - logging.info(f"Writing bin_set {set_name} input bins metrics in {bins_metric_file}") - io.write_bin_info(bins, bins_metric_file) - - def main(): "Orchestrate the execution of the program" @@ -416,7 +399,10 @@ def main(): logging.info("Add size and assess quality of input bins") bin_quality.add_bin_metrics(original_bins, contig_metadat, args.contamination_weight, args.threads) - write_original_bin_metrics(bin_set_name_to_bins, original_bin_report_dir) + + + logging.info(f"Writting original input bin metrics to directory: {original_bin_report_dir}") + io.write_original_bin_metrics(bin_set_name_to_bins, original_bin_report_dir) logging.info("Create intermediate bins:") diff --git a/tests/bin_manager_test.py b/tests/bin_manager_test.py index f08d6a1..f939b53 100644 --- a/tests/bin_manager_test.py +++ b/tests/bin_manager_test.py @@ -9,6 +9,7 @@ import networkx as nx import logging +from pathlib import Path def test_get_all_possible_combinations(): input_list = ["2", "3", "4"] @@ -524,14 +525,14 @@ def create_temp_bin_directories(tmpdir, create_temp_bin_files): bin2 = bin_dir2.join("binA.fasta") bin2.write(">contig3\nTTAG\n>contig4\nCGAT\n>contig5\nCGGC") - return {"set1": str(bin_dir1), "set2": str(bin_dir2)} + return {"set1": Path(bin_dir1), "set2": Path(bin_dir2)} def test_get_bins_from_directory(create_temp_bin_files): bin_dir = create_temp_bin_files set_name = "TestSet" - bins = bin_manager.get_bins_from_directory(str(bin_dir), set_name, fasta_extensions={'.fasta'}) + bins = bin_manager.get_bins_from_directory(Path(bin_dir), set_name, fasta_extensions={'.fasta'}) assert len(bins) == 2 # Ensure that the correct number of Bin objects is returned @@ -546,7 +547,7 @@ def test_get_bins_from_directory(create_temp_bin_files): assert bins[0].name in ["bin2.fasta", "bin1.fasta"] def test_get_bins_from_directory_no_files(tmpdir): - bin_dir = str(tmpdir.mkdir("empty_bins")) + bin_dir = Path(tmpdir.mkdir("empty_bins")) set_name = "EmptySet" bins = bin_manager.get_bins_from_directory(bin_dir, set_name, fasta_extensions={'.fasta'}) @@ -554,7 +555,7 @@ def test_get_bins_from_directory_no_files(tmpdir): assert len(bins) == 0 # Ensure that no Bin objects are returned for an empty directory def test_get_bins_from_directory_no_wrong_extensions(create_temp_bin_files): - bin_dir = create_temp_bin_files + bin_dir = Path(create_temp_bin_files) set_name = "TestSet" bins = bin_manager.get_bins_from_directory(bin_dir, set_name, fasta_extensions={'.fna'}) diff --git a/tests/io_manager_test.py b/tests/io_manager_test.py index aa1856c..3a44368 100644 --- a/tests/io_manager_test.py +++ b/tests/io_manager_test.py @@ -1,7 +1,7 @@ import pytest from binette import io_manager from pathlib import Path - +from unittest.mock import patch @@ -9,7 +9,7 @@ class Bin: def __init__(self, bin_id, origin, name, completeness, contamination, score, length, N50, contigs): self.id = bin_id - self.origin = origin + self.origin = {origin} self.name = name self.completeness = completeness self.contamination = contamination @@ -65,7 +65,7 @@ def test_infer_bin_name_from_single_path(): # Check if the output matches the expected dictionary assert result == expected_result - + def test_infer_bin_name_from_bin_table_inputs(): # Mock input data input_bins = [ @@ -283,3 +283,33 @@ def test_check_resume_file_missing_diamond(temp_files, caplog): io_manager.check_resume_file(Path(faa_file), Path("nonexistent_diamond_result.txt")) assert "Protein file" not in caplog.text assert "Diamond result file" in caplog.text + + +@patch('binette.io_manager.write_bin_info') +def test_write_original_bin_metrics(mock_write_bin_info, bin1,bin2, tmp_path): + # Test that `write_original_bin_metrics` correctly writes bin metrics to files + + temp_directory = tmp_path / "test_output" + + mock_bins = {"set1":{bin1}, + "set2":{bin2}} + # Call the function with mock data + io_manager.write_original_bin_metrics(mock_bins, temp_directory) + + # Check if the output directory was created + assert temp_directory.exists(), "Output directory should be created." + + # Check that the correct files are created + expected_files = [ + temp_directory / "input_bins_1.set1.tsv", + temp_directory / "input_bins_2.set2.tsv" + ] + + assert temp_directory.exists(), f"Expected temp_directory {temp_directory} was not created." + + # Check if `write_bin_info` was called correctly + assert mock_write_bin_info.call_count == 2, "write_bin_info should be called once for each bin set." + + # Verify the specific calls to `write_bin_info` + mock_write_bin_info.assert_any_call(mock_bins['set1'], expected_files[0]) + mock_write_bin_info.assert_any_call(mock_bins['set2'], expected_files[1]) \ No newline at end of file diff --git a/tests/main_binette_test.py b/tests/main_binette_test.py index f2129db..0e20acd 100644 --- a/tests/main_binette_test.py +++ b/tests/main_binette_test.py @@ -371,6 +371,7 @@ def test_main(monkeypatch): patch('binette.bin_quality.add_bin_metrics') as mock_add_bin_metrics, \ patch('binette.main.log_selected_bin_info') as mock_log_selected_bin_info, \ patch('binette.contig_manager.make_contig_index') as mock_make_contig_index, \ + patch('binette.io_manager.write_original_bin_metrics') as mock_write_original_bin_metrics, \ patch('binette.main.select_bins_and_write_them') as mock_select_bins_and_write_them: # Set return values for mocked functions if needed @@ -395,5 +396,7 @@ def test_main(monkeypatch): mock_log_selected_bin_info.assert_called_once() mock_select_bins_and_write_them.assert_called_once() + mock_write_original_bin_metrics.assert_called_once() + assert mock_apply_contig_index.call_count == 3 assert mock_add_bin_metrics.call_count == 2 \ No newline at end of file