From 25144570229ef27a5637d7152401c840e943df42 Mon Sep 17 00:00:00 2001
From: Jean Mainguy <jean.mainguy@outlook.fr>
Date: Wed, 10 Jul 2024 15:18:01 +0200
Subject: [PATCH 01/36] Run CI on macOS runner as well

---
 .github/workflows/binette_ci.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/binette_ci.yml b/.github/workflows/binette_ci.yml
index 6972dee..98bf1b8 100644
--- a/.github/workflows/binette_ci.yml
+++ b/.github/workflows/binette_ci.yml
@@ -13,7 +13,7 @@ on:
 jobs:
   build:
 
-    runs-on: ubuntu-latest
+    os: ['ubuntu-latest', 'macos-13']
     defaults:
       run:
         shell: bash -el {0}

From 9e3098e8851657e8d5a46f479b289bc65d6f4119 Mon Sep 17 00:00:00 2001
From: Jean Mainguy <jean.mainguy@outlook.fr>
Date: Wed, 10 Jul 2024 15:20:41 +0200
Subject: [PATCH 02/36] fix improper typo

---
 .github/workflows/binette_ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/binette_ci.yml b/.github/workflows/binette_ci.yml
index 98bf1b8..6485dc6 100644
--- a/.github/workflows/binette_ci.yml
+++ b/.github/workflows/binette_ci.yml
@@ -12,14 +12,14 @@ on:
     
 jobs:
   build:
-
-    os: ['ubuntu-latest', 'macos-13']
     defaults:
       run:
         shell: bash -el {0}
+    runs-on: ${{ matrix.os }}
     strategy:
       fail-fast: false
       matrix:
+        os: ['ubuntu-latest', 'macos-13']
         python-version: [3.8] #["3.8", "3.9", "3.10"]
 
     steps:

From 8b4f80f3459ac6369188d3310b11c22a16f6ea5b Mon Sep 17 00:00:00 2001
From: Jean Mainguy <jean.mainguy@outlook.fr>
Date: Wed, 10 Jul 2024 15:30:11 +0200
Subject: [PATCH 03/36] Update setup-miniconda

---
 .github/workflows/binette_ci.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/binette_ci.yml b/.github/workflows/binette_ci.yml
index 6485dc6..ac98620 100644
--- a/.github/workflows/binette_ci.yml
+++ b/.github/workflows/binette_ci.yml
@@ -24,10 +24,10 @@ jobs:
 
     steps:
     # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
-    - uses: actions/checkout@v3
+    - uses: actions/checkout@v4
 
     # Install requirements 
-    - uses: conda-incubator/setup-miniconda@v2
+    - uses: conda-incubator/setup-miniconda@v3
       with:
         mamba-version: "*"
         python-version: ${{ matrix.python-version }}

From 7fe9dd7fc33e160a8f2a9809913a7995ca14b0f2 Mon Sep 17 00:00:00 2001
From: Jean Mainguy <jean.mainguy@outlook.fr>
Date: Mon, 22 Jul 2024 10:18:28 +0200
Subject: [PATCH 04/36] improve clarity as suggested in the JOSS review

see : https://github.com/openjournals/joss-reviews/issues/6782#issuecomment-2187193217
---
 paper/paper.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/paper/paper.md b/paper/paper.md
index b88ec81..3213cde 100644
--- a/paper/paper.md
+++ b/paper/paper.md
@@ -41,7 +41,7 @@ Binette is a Python reimplementation and enhanced version of the bin refinement
 ![**Overview of Binette Steps**. **(A) Intermediate Bin Creation Example**: Bins are represented as square shapes, each containing colored lines representing the contigs they contain. Creation of intermediate bins involves the initial bins sharing at least one contig. Set operations are applied to the contigs within the bins to generate these intermediate bins. **(B) Binette Workflow Overview**: Input bins serve as the basis for generating intermediate bins. Each bin undergoes a scoring process utilizing quality metrics provided by CheckM2. Subsequently, the bins are sorted based on their scores, and a selection process is executed to retain non-redundant bins.\label{fig:overview}](./binette_overview.pdf)
 
 
-Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: $completeness - weight * contamination$, with the default weight set to 2. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins (\autoref{fig:overview}.B). The ability to score bins is based on CheckM2 rather than CheckM1 as in the metaWRAP pipeline. CheckM2 uses a novel approach to evaluate bin quality based on machine learning techniques. This approach improves speed and also provides better results than CheckM1. Binette initiates CheckM2 processing by running its initial steps once for all contigs within the input bins. These initial steps involve gene prediction using Prodigal and alignment against the CheckM2 database using Diamond [@buchfink2015diamond]. Binette uses Pyrodigal [@larralde2022pyrodigal], a Python module that uses Cython to provide bindings to Prodigal [@hyatt2010prodigal]. The intermediate Checkm2 results are then used to assess the quality of individual bins, eliminating redundant calculations and speeding up the refinement process.
+Bin completeness and contamination are assessed using CheckM2 [@chklovski2023checkm2]. Bins are scored using the following scoring function: $completeness - weight * contamination$, with the default weight set to 2. These scored bins are then sorted, facilitating the selection of a final new set of non-redundant bins (\autoref{fig:overview}.B). The ability to score bins is based on CheckM2 rather than CheckM1, which is what the metaWRAP pipeline uses. CheckM2 uses a novel approach to evaluate bin quality based on machine learning techniques. This approach improves speed and also provides better results than CheckM1. Binette initiates CheckM2 processing by running its initial steps once for all contigs within the input bins. These initial steps involve gene prediction using Prodigal and alignment against the CheckM2 database using Diamond [@buchfink2015diamond]. Binette uses Pyrodigal [@larralde2022pyrodigal], a Python module that uses Cython to provide bindings to Prodigal [@hyatt2010prodigal]. The intermediate Checkm2 results are then used to assess the quality of individual bins, eliminating redundant calculations and speeding up the refinement process.
 
 Binette serves as the bin refinement tool within the [metagWGS](https://forgemia.inra.fr/genotoul-bioinfo/metagwgs) metagenomic analysis pipeline [@metagWGS_inprep], providing a robust and faster alternative to the bin refinement module of the metaWRAP pipeline as well as other similar bin refinement tools.
 

From 56bcd7f25b89ee3febe443e175af41bb752be618 Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 28 Aug 2024 17:49:55 +0200
Subject: [PATCH 05/36] fix type pylance warning

---
 binette/bin_manager.py    | 63 ++++++++++++++++++++++++++++++++-------
 binette/bin_quality.py    | 25 ++++++++--------
 binette/cds.py            | 10 +++----
 binette/contig_manager.py |  6 ++--
 binette/diamond.py        |  6 ++--
 binette/io_manager.py     | 10 +++----
 binette/main.py           | 54 ++++++++++++++++++++++++++-------
 7 files changed, 125 insertions(+), 49 deletions(-)

diff --git a/binette/bin_manager.py b/binette/bin_manager.py
index 06d25c7..8f4fd6c 100644
--- a/binette/bin_manager.py
+++ b/binette/bin_manager.py
@@ -7,12 +7,12 @@
 
 import itertools
 import networkx as nx
-from typing import List, Dict, Iterable, Tuple, Set
+from typing import List, Dict, Iterable, Tuple, Set, Mapping
 
 class Bin:
     counter = 0
 
-    def __init__(self, contigs: Iterable[str], origin: str, name: str) -> None:
+    def __init__(self, contigs: Iterable[str], origin: str, name: str, is_original:bool=False) -> None:
         """
         Initialize a Bin object.
 
@@ -35,6 +35,8 @@ def __init__(self, contigs: Iterable[str], origin: str, name: str) -> None:
         self.contamination = None
         self.score = None
 
+        self.is_original = is_original
+
     def __eq__(self, other: 'Bin') -> bool:
         """
         Compare the Bin object with another object for equality.
@@ -163,6 +165,47 @@ def union(self, *others: 'Bin') -> 'Bin':
         return Bin(contigs, origin, name)
     
 
+    def is_complete_enough(self, min_completeness: float) -> bool:
+        """
+        Determine if a bin is complete enough based on completeness threshold.
+
+        :param min_completeness: The minimum completeness required for a bin.
+        
+        :raises ValueError: If completeness has not been set (is None).
+
+        :return: True if the bin meets the min_completeness threshold; False otherwise.
+        """
+
+        if self.completeness is None:
+            raise ValueError(
+                f"The bin '{self.name}' with ID '{self.id}' has not been evaluated for completeness or contamination, "
+                "and therefore cannot be assessed."
+            )
+
+        return self.completeness >= min_completeness
+    
+
+    def is_high_quality(self, min_completeness: float, max_contamination: float) -> bool:
+        """
+        Determine if a bin is considered high quality based on completeness and contamination thresholds.
+
+        :param min_completeness: The minimum completeness required for a bin to be considered high quality.
+        :param max_contamination: The maximum allowed contamination for a bin to be considered high quality.
+        
+        :raises ValueError: If either completeness or contamination has not been set (is None).
+
+        :return: True if the bin meets the high quality criteria; False otherwise.
+        """
+        if self.completeness is None or self.contamination is None:
+            raise ValueError(
+                f"The bin '{self.name}' with ID '{self.id}' has not been evaluated for completeness or contamination, "
+                "and therefore cannot be assessed for high quality."
+            )
+
+        return self.completeness >= min_completeness and self.contamination <= max_contamination
+
+
+
 def get_bins_from_directory(bin_dir: str, set_name: str, fasta_extensions: Set[str]) -> List[Bin]:
     """
     Retrieves a list of Bin objects from a directory containing bin FASTA files.
@@ -239,7 +282,7 @@ def get_bins_from_contig2bin_table(contig2bin_table: str, set_name: str) -> List
             if line.startswith("#") or line.startswith("@"):
                 logging.debug(f"Ignoring a line from {contig2bin_table}: {line}")
                 continue
-            contig_name = line.strip().split("\t")[0]
+            contig_name = line.strip().split()[0]
             bin_name = line.strip().split("\t")[1]
             bin_name2contigs[bin_name].add(contig_name)
 
@@ -250,7 +293,7 @@ def get_bins_from_contig2bin_table(contig2bin_table: str, set_name: str) -> List
     return bins
 
 
-def from_bin_sets_to_bin_graph(bin_name_to_bin_set: Dict[str, set]) -> nx.Graph:
+def from_bin_sets_to_bin_graph(bin_name_to_bin_set: Mapping[str, Iterable[Bin]]) -> nx.Graph:
     """
     Creates a bin graph from a dictionary of bin sets.
 
@@ -272,7 +315,7 @@ def from_bin_sets_to_bin_graph(bin_name_to_bin_set: Dict[str, set]) -> nx.Graph:
 
 
 
-def get_all_possible_combinations(clique: Iterable) -> Iterable[Tuple]:
+def get_all_possible_combinations(clique: List) -> Iterable[Tuple]:
     """
     Generates all possible combinations of elements from a given clique.
 
@@ -366,7 +409,7 @@ def get_union_bins(G: nx.Graph, max_conta: int = 50) -> Set[Bin]:
     return union_bins
 
 
-def select_best_bins(bins: List[Bin]) -> List[Bin]:
+def select_best_bins(bins: Set[Bin]) -> List[Bin]:
     """
     Selects the best bins from a list of bins based on their scores, N50 values, and IDs.
 
@@ -392,7 +435,7 @@ def select_best_bins(bins: List[Bin]) -> List[Bin]:
     return selected_bins
 
 
-def dereplicate_bin_sets(bin_sets):
+def dereplicate_bin_sets(bin_sets) -> Set[Bin]:
     """
     Dereplicates bins from different bin sets to obtain a non-redundant bin set.
 
@@ -403,7 +446,7 @@ def dereplicate_bin_sets(bin_sets):
     return set().union(*bin_sets)
 
 
-def get_contigs_in_bins(bins: List[Bin]) -> Set[str]:
+def get_contigs_in_bins(bins: Iterable[Bin]) -> Set[str]:
     """
     Retrieves all contigs present in the given list of bins.
 
@@ -414,7 +457,7 @@ def get_contigs_in_bins(bins: List[Bin]) -> Set[str]:
     return set().union(*(b.contigs for b in bins))
 
 
-def rename_bin_contigs(bins: List[Bin], contig_to_index: dict):
+def rename_bin_contigs(bins: Iterable[Bin], contig_to_index: dict):
     """
     Renames the contigs in the bins based on the provided mapping.
 
@@ -425,7 +468,7 @@ def rename_bin_contigs(bins: List[Bin], contig_to_index: dict):
         b.contigs = {contig_to_index[contig] for contig in b.contigs}
         b.hash = hash(str(sorted(b.contigs)))
 
-def create_intermediate_bins(bin_set_name_to_bins: Dict[str, Set[Bin]]) -> Set[Bin]:
+def create_intermediate_bins(bin_set_name_to_bins: Mapping[str, Iterable[Bin]]) -> Set[Bin]:
     """
     Creates intermediate bins from a dictionary of bin sets.
 
diff --git a/binette/bin_quality.py b/binette/bin_quality.py
index b9303cf..4722188 100644
--- a/binette/bin_quality.py
+++ b/binette/bin_quality.py
@@ -1,10 +1,9 @@
 #!/usr/bin/env python3
-import concurrent.futures as cf
 import logging
 import os
 from collections import Counter
 from itertools import islice
-from typing import Dict, Iterable, List, Tuple, Iterator
+from typing import Dict, Iterable, Optional, Tuple, Iterator, Set
 
 import numpy as np
 import pandas as pd
@@ -17,7 +16,7 @@
 from checkm2 import keggData, modelPostprocessing, modelProcessing
 from binette.bin_manager import Bin
 
-def get_bins_metadata_df(bins: List, contig_to_cds_count: Dict[str, int], contig_to_aa_counter: Dict[str, Counter], contig_to_aa_length: Dict[str, int]) -> pd.DataFrame:
+def get_bins_metadata_df(bins: Iterable[Bin], contig_to_cds_count: Dict[str, int], contig_to_aa_counter: Dict[str, Counter], contig_to_aa_length: Dict[str, int]) -> pd.DataFrame:
     """
     Generate a DataFrame containing metadata for a list of bins.
 
@@ -56,7 +55,7 @@ def get_bins_metadata_df(bins: List, contig_to_cds_count: Dict[str, int], contig
     metadata_df = metadata_df.set_index("Name", drop=False)
     return metadata_df
 
-def get_diamond_feature_per_bin_df(bins: List, contig_to_kegg_counter: Dict[str, Counter]) -> Tuple[pd.DataFrame, int]:
+def get_diamond_feature_per_bin_df(bins: Iterable[Bin], contig_to_kegg_counter: Dict[str, Counter]) -> Tuple[pd.DataFrame, int]:
     """
     Generate a DataFrame containing Diamond feature counts per bin and completeness information for pathways, categories, and modules.
 
@@ -135,11 +134,11 @@ def add_bin_size_and_N50(bins: Iterable[Bin], contig_to_size: Dict[str,int]):
         bin_obj.add_N50(n50)
 
 
-def add_bin_metrics(bins: List, contig_info: Dict, contamination_weight: float, threads: int = 1):
+def add_bin_metrics(bins: Set[Bin], contig_info: Dict, contamination_weight: float, threads: int = 1):
     """
-    Add metrics to a list of bins.
+    Add metrics to a Set of bins.
 
-    :param bins: List of bin objects.
+    :param bins: Set of bin objects.
     :param contig_info: Dictionary containing contig information.
     :param contamination_weight: Weight for contamination assessment.
     :param threads: Number of threads for parallel processing (default is 1).
@@ -183,13 +182,13 @@ def chunks(iterable: Iterable, size: int) -> Iterator[Tuple]:
     return iter(lambda: tuple(islice(it, size)), ())
 
 
-def assess_bins_quality_by_chunk(bins: List,
+def assess_bins_quality_by_chunk(bins: Iterable[Bin],
     contig_to_kegg_counter: Dict,
     contig_to_cds_count: Dict,
     contig_to_aa_counter: Dict,
     contig_to_aa_length: Dict,
     contamination_weight: float,
-    postProcessor:modelPostprocessing.modelProcessor = None,
+    postProcessor:Optional[modelPostprocessing.modelProcessor] = None,
     threads: int = 1,
     chunk_size: int = 2500):
     """
@@ -223,13 +222,13 @@ def assess_bins_quality_by_chunk(bins: List,
         )
 
 def assess_bins_quality(
-    bins: List,
+    bins: Iterable[Bin],
     contig_to_kegg_counter: Dict,
     contig_to_cds_count: Dict,
     contig_to_aa_counter: Dict,
     contig_to_aa_length: Dict,
     contamination_weight: float,
-    postProcessor: modelPostprocessing.modelProcessor = None,
+    postProcessor: Optional[modelPostprocessing.modelProcessor] = None,
     threads: int = 1,):
     """
     Assess the quality of bins.
@@ -284,7 +283,7 @@ def assess_bins_quality(
     final_results["Contamination"] = np.round(final_cont, 2)
 
     for bin_obj in bins:
-        completeness = final_results.loc[bin_obj.id, "Completeness"]
-        contamination = final_results.loc[bin_obj.id, "Contamination"]
+        completeness = final_results.at[bin_obj.id, "Completeness"]
+        contamination = final_results.at[bin_obj.id, "Contamination"]
 
         bin_obj.add_quality(completeness, contamination, contamination_weight)
diff --git a/binette/cds.py b/binette/cds.py
index cebb76e..c665b70 100644
--- a/binette/cds.py
+++ b/binette/cds.py
@@ -3,7 +3,7 @@
 import multiprocessing.pool
 import logging
 from collections import Counter, defaultdict
-from typing import Dict, List, Iterator, Tuple
+from typing import Dict, List, Iterator, Tuple, Any
 
 import pyfastx
 import pyrodigal
@@ -52,13 +52,13 @@ def predict(contigs_iterator: Iterator, outfaa: str, threads: int =1) -> Dict[st
 
     return contig_to_genes
 
-def predict_genes(find_genes, seq):
+def predict_genes(find_genes, seq) -> Tuple[str, pyrodigal.Genes]:
 
 
     return (seq.name, find_genes(seq.seq) )
 
 
-def write_faa(outfaa: str, contig_to_genes: Dict[str, List[str]]) -> None:
+def write_faa(outfaa: str, contig_to_genes: List[Tuple[str, pyrodigal.Genes]]) -> None:
     """
     Write predicted protein sequences to a FASTA file.
 
@@ -71,7 +71,7 @@ def write_faa(outfaa: str, contig_to_genes: Dict[str, List[str]]) -> None:
         for contig_id, genes in contig_to_genes:
             genes.write_translations(fl, contig_id)
 
-def parse_faa_file(faa_file: str) -> Dict[str, List]:
+def parse_faa_file(faa_file: str) -> Dict[str, List[str]]:
     """
     Parse a FASTA file containing protein sequences and organize them by contig.
 
@@ -115,7 +115,7 @@ def get_contig_cds_metadata_flat(contig_to_genes: Dict[str, List[str]]) -> Tuple
 
     return contig_to_cds_count, contig_to_aa_counter, contig_to_aa_length
 
-def get_contig_cds_metadata(contig_to_genes: Dict[str, List[str]], threads: int) -> Tuple[Dict[str, int], Dict[str, Counter], Dict[str, int]]:
+def get_contig_cds_metadata(contig_to_genes:  Dict[int, Any | List[Any]], threads: int) -> Dict[str, Dict]:
     """
     Calculate metadata for contigs in parallel, including CDS count, amino acid composition, and total amino acid length.
 
diff --git a/binette/contig_manager.py b/binette/contig_manager.py
index c6f4f65..4b43733 100644
--- a/binette/contig_manager.py
+++ b/binette/contig_manager.py
@@ -1,5 +1,5 @@
 import pyfastx
-from typing import Dict, Tuple
+from typing import Dict, Iterable, Tuple, Set, Any, Union
 
 
 def parse_fasta_file(fasta_file: str) -> pyfastx.Fasta:
@@ -14,7 +14,7 @@ def parse_fasta_file(fasta_file: str) -> pyfastx.Fasta:
     return fa
 
 
-def make_contig_index(contigs: list) -> Tuple[Dict[str, int], Dict[int, str]]:
+def make_contig_index(contigs: Set[str]) -> Tuple[Dict[str, int], Dict[int, str]]:
     """
     Create an index mapping for contigs.
 
@@ -27,7 +27,7 @@ def make_contig_index(contigs: list) -> Tuple[Dict[str, int], Dict[int, str]]:
     return contig_to_index, index_to_contig
 
 
-def apply_contig_index(contig_to_index: Dict[str, int], contig_to_info: Dict[str, str]) -> Dict[int, str]:
+def apply_contig_index(contig_to_index: Dict[str, int], contig_to_info: Dict[str, Any]) -> Dict[int, Union[Any,Iterable[Any]]]:
     """
     Apply the contig index mapping to the contig info dictionary.
 
diff --git a/binette/diamond.py b/binette/diamond.py
index b5ee166..500f090 100644
--- a/binette/diamond.py
+++ b/binette/diamond.py
@@ -27,11 +27,11 @@ def get_checkm2_db() -> str:
 
     reg_result = re.search("INFO: (/.*.dmnd)", checkm2_database_raw.stderr)
 
-    try:
-        db_path = reg_result.group(1)
-    except AttributeError:
+    if reg_result is None:
         logging.error(f"Something went wrong when retrieving checkm2 db path:\n{checkm2_database_raw.stderr}")
         sys.exit(1)
+    else:
+        db_path = reg_result.group(1)
 
     return db_path
 
diff --git a/binette/io_manager.py b/binette/io_manager.py
index 467bcc3..bb6c38d 100644
--- a/binette/io_manager.py
+++ b/binette/io_manager.py
@@ -1,7 +1,7 @@
 import logging
 import os
 import pyfastx
-from typing import List, Dict
+from typing import Iterable, List, Dict
 import csv
 
 from binette.bin_manager import Bin
@@ -14,7 +14,7 @@ def infer_bin_name_from_bin_inputs(input_bins: List[str]) -> Dict[str, str]:
     :param input_bins: List of input bin directories.
     :return: Dictionary mapping inferred bin names to their corresponding directories.
     """
-    logging.debug(f"Inferring bin names from input bins:")
+    logging.debug("Inferring bin names from input bins:")
 
     commonprefix_len = len(os.path.commonprefix(input_bins))
     reversed_strings = [s[::-1] for s in input_bins]
@@ -30,7 +30,7 @@ def infer_bin_name_from_bin_inputs(input_bins: List[str]) -> Dict[str, str]:
     return bin_name_to_bin_dir
 
 
-def write_bin_info(bins: List[Bin], output: str, add_contigs: bool = False):
+def write_bin_info(bins: Iterable[Bin], output: str, add_contigs: bool = False):
     """
     Write bin information to a TSV file.
 
@@ -86,8 +86,8 @@ def write_bins_fasta(selected_bins: List[Bin], contigs_fasta: str, outdir: str):
             outfl.write("\n".join(sequences) + "\n")
 
 
-def check_contig_consistency(contigs_from_assembly: List[str],
-                             contigs_from_elsewhere: List[str],
+def check_contig_consistency(contigs_from_assembly: Iterable[str],
+                             contigs_from_elsewhere: Iterable[str],
                              assembly_file: str,
                              elsewhere_file: str ):
     """
diff --git a/binette/main.py b/binette/main.py
index d4eb9f8..9051fdb 100755
--- a/binette/main.py
+++ b/binette/main.py
@@ -16,7 +16,7 @@
 
 import binette
 from binette import contig_manager, cds, diamond, bin_quality, bin_manager, io_manager as io
-from typing import List, Dict, Set, Tuple
+from typing import List, Dict, Optional, Set, Tuple, Union, Sequence, Any
 
 
 def init_logging(verbose, debug):
@@ -39,12 +39,19 @@ def init_logging(verbose, debug):
         f'command line: {" ".join(sys.argv)}',
     )
 
+
 class UniqueStore(Action):
     """
     Custom argparse action to ensure an argument is provided only once.
     """
 
-    def __call__(self, parser: ArgumentParser, namespace: Namespace, values: str, option_string: str = None) -> None:
+    def __call__(
+        self, 
+        parser: ArgumentParser, 
+        namespace: Namespace, 
+        values: Union[str, Sequence[Any], None], 
+        option_string: Optional[str] = None
+    ) -> None:
         """
         Ensures the argument is only used once. Raises an error if the argument appears multiple times.
 
@@ -61,6 +68,7 @@ def __call__(self, parser: ArgumentParser, namespace: Namespace, values: str, op
         setattr(namespace, self.dest, values)
 
 
+
 def parse_arguments(args):
     """Parse script arguments."""
 
@@ -149,7 +157,10 @@ def parse_arguments(args):
     args = parser.parse_args(args)
     return args
 
-def parse_input_files(bin_dirs: List[str], contig2bin_tables: List[str], contigs_fasta: str, fasta_extensions:Set[str] = {".fasta", ".fna", ".fa"}) -> Tuple[Dict[str, List], List, Dict[str, List], Dict[str, int]]:
+def parse_input_files(bin_dirs: List[str], 
+                      contig2bin_tables: List[str],
+                      contigs_fasta: str,
+                      fasta_extensions:Set[str] = {".fasta", ".fna", ".fa"}) -> Tuple[Dict[str, List[bin_manager.Bin]], Set[bin_manager.Bin], Set[str], Dict[str, int]]:
     """
     Parses input files to retrieve information related to bins and contigs.
 
@@ -195,9 +206,9 @@ def parse_input_files(bin_dirs: List[str], contig2bin_tables: List[str], contigs
     return bin_set_name_to_bins, original_bins, contigs_in_bins, contig_to_length
 
 
-def manage_protein_alignement(faa_file: str, contigs_fasta: str, contig_to_length: Dict[str, List],
-                                contigs_in_bins: Dict[str, List], diamond_result_file: str,
-                                checkm2_db: str, threads: int, resume: bool, low_mem: bool) -> Tuple[Dict[str, int], Dict[str, int]]:
+def manage_protein_alignement(faa_file: str, contigs_fasta: str, contig_to_length: Dict[str, int],
+                                contigs_in_bins: Set[str], diamond_result_file: str,
+                                checkm2_db: str, threads: int, resume: bool, low_mem: bool) -> Tuple[Dict[str, int], Dict[str, List[str]]]:
     """
     Predicts or reuses proteins prediction and runs diamond on them.
     
@@ -285,7 +296,7 @@ def select_bins_and_write_them(all_bins: Set[bin_manager.Bin], contigs_fasta: st
     logging.info(f"Bin Selection: {len(selected_bins)} selected bins")
 
     logging.info(f"Filtering bins: only bins with completeness >= {min_completeness} are kept")
-    selected_bins = [b for b in selected_bins if b.completeness >= min_completeness]
+    selected_bins = [b for b in selected_bins if b.is_complete_enough(min_completeness)]
 
     logging.info(f"Filtering bins: {len(selected_bins)} selected bins")
 
@@ -317,11 +328,11 @@ def log_selected_bin_info(selected_bins: List[bin_manager.Bin], hq_min_completen
     # Log completeness and contamination in debug log
     logging.debug("High quality bins:")
     for sb in selected_bins:
-        if sb.completeness >= hq_min_completeness and sb.contamination <= hq_max_conta:
+        if sb.is_high_quality(min_completeness=hq_min_completeness, max_contamination=hq_max_conta):
             logging.debug(f"> {sb} completeness={sb.completeness}, contamination={sb.contamination}")
 
     # Count high-quality bins and single-contig high-quality bins
-    hq_bins = len([sb for sb in selected_bins if sb.completeness >= hq_min_completeness and sb.contamination <= hq_max_conta])
+    hq_bins = len([sb for sb in selected_bins if sb.is_high_quality(min_completeness=hq_min_completeness, max_contamination=hq_max_conta)])
 
     # Log information about high-quality bins
     thresholds = f"(completeness >= {hq_min_completeness} and contamination <= {hq_max_conta})"
@@ -348,7 +359,7 @@ def main():
 
     # Output files #
     final_bin_report = os.path.join(args.outdir, "final_bins_quality_reports.tsv")
-
+    original_bin_report  = os.path.join(args.outdir, "original_bins_quality_reports.tsv")
 
     if args.resume:
         io.check_resume_file(faa_file, diamond_result_file)
@@ -381,6 +392,29 @@ def main():
     logging.info("Add size and assess quality of input bins")
     bin_quality.add_bin_metrics(original_bins, contig_metadat, args.contamination_weight, args.threads)
 
+
+    # for bin_set, bins in bin_set_name_to_bins.items():
+    #     print(bin_set)
+    #     bin_set_name = bin_set.replace("/", "_")
+    #     original_bin_report  = os.path.join(args.outdir, f"{bin_set_name}_bins_quality_reports.tsv")
+    #     bins_with_metric = []
+    #     for bin_obj in bins:
+    #         print(bin_obj.id, bin_obj.score, bin_obj.N50)
+    #         if bin_obj.score is None:
+
+    #             matching_bins = [bin_with_metric for bin_with_metric in original_bins if bin_obj == bin_with_metric]
+    #             assert len(matching_bins) == 1, len(matching_bins) 
+    #             bins_with_metric.append(matching_bins[0])
+    #             print("HAS NOT USE MATCHING BIN IN ORIGINAL SET",matching_bins[0].id, matching_bins[0].score, matching_bins[0].N50)
+                
+    #         else:
+    #             print("has score")
+    #             print(bin_obj.id, bin_obj.score, bin_obj.N50)
+    #             bins_with_metric.append(bin_obj)
+
+                
+        # io.write_bin_info(bins_with_metric, original_bin_report)
+
     logging.info("Create intermediate bins:")
     new_bins = bin_manager.create_intermediate_bins(bin_set_name_to_bins)
 

From 79bbbe7e4e9808ecfdf226beb93d7c5f26c3311b Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 28 Aug 2024 18:25:53 +0200
Subject: [PATCH 06/36] silence import warning

---
 binette/bin_quality.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/binette/bin_quality.py b/binette/bin_quality.py
index 4722188..5a46ab2 100644
--- a/binette/bin_quality.py
+++ b/binette/bin_quality.py
@@ -7,14 +7,16 @@
 
 import numpy as np
 import pandas as pd
+from binette.bin_manager import Bin
+
 
 # Suppress unnecessary TensorFlow warnings
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
 logging.getLogger("tensorflow").setLevel(logging.FATAL)
 
 
-from checkm2 import keggData, modelPostprocessing, modelProcessing
-from binette.bin_manager import Bin
+from checkm2 import keggData, modelPostprocessing, modelProcessing  # noqa: E402
+
 
 def get_bins_metadata_df(bins: Iterable[Bin], contig_to_cds_count: Dict[str, int], contig_to_aa_counter: Dict[str, Counter], contig_to_aa_length: Dict[str, int]) -> pd.DataFrame:
     """

From 31263bdf5ab945841cbe50a57ababd813c4ccc5e Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 28 Aug 2024 18:29:09 +0200
Subject: [PATCH 07/36] use union instead of pipe for compatibility reason

---
 binette/cds.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/binette/cds.py b/binette/cds.py
index c665b70..8845abe 100644
--- a/binette/cds.py
+++ b/binette/cds.py
@@ -3,7 +3,7 @@
 import multiprocessing.pool
 import logging
 from collections import Counter, defaultdict
-from typing import Dict, List, Iterator, Tuple, Any
+from typing import Dict, List, Iterator, Tuple, Any, Union
 
 import pyfastx
 import pyrodigal
@@ -33,9 +33,9 @@ def predict(contigs_iterator: Iterator, outfaa: str, threads: int =1) -> Dict[st
     """
     try:
         # for version >=3 of pyrodigal
-        orf_finder = pyrodigal.GeneFinder(meta="meta")
+        orf_finder = pyrodigal.GeneFinder(meta="meta") # type: ignore
     except AttributeError:
-        orf_finder = pyrodigal.OrfFinder(meta="meta")
+        orf_finder = pyrodigal.OrfFinder(meta="meta") # type: ignore
 
     logging.info(f"Predicting cds sequences with Pyrodigal using {threads} threads.")
     
@@ -115,7 +115,7 @@ def get_contig_cds_metadata_flat(contig_to_genes: Dict[str, List[str]]) -> Tuple
 
     return contig_to_cds_count, contig_to_aa_counter, contig_to_aa_length
 
-def get_contig_cds_metadata(contig_to_genes:  Dict[int, Any | List[Any]], threads: int) -> Dict[str, Dict]:
+def get_contig_cds_metadata(contig_to_genes:  Dict[int, Union[Any, List[Any]]], threads: int) -> Dict[str, Dict]:
     """
     Calculate metadata for contigs in parallel, including CDS count, amino acid composition, and total amino acid length.
 

From 55f7f5592c5a76ec8c3062a0b2a691f9eb4c763f Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 28 Aug 2024 20:58:53 +0200
Subject: [PATCH 08/36] add some warning if contig are duplicated in an input
 set and use id in bin name

---
 binette/bin_manager.py    | 175 ++++++++++++++++++++++++++++++--------
 binette/main.py           |   8 +-
 tests/bin_manager_test.py |  97 +++++++++++++++++----
 3 files changed, 225 insertions(+), 55 deletions(-)

diff --git a/binette/bin_manager.py b/binette/bin_manager.py
index 8f4fd6c..457423b 100644
--- a/binette/bin_manager.py
+++ b/binette/bin_manager.py
@@ -1,5 +1,4 @@
 import logging
-import os
 from collections import defaultdict
 from pathlib import Path
 
@@ -22,7 +21,7 @@ def __init__(self, contigs: Iterable[str], origin: str, name: str, is_original:b
         """
         Bin.counter += 1
 
-        self.origin = origin
+        self.origin = {origin}
         self.name = name
         self.id = Bin.counter
         self.contigs = set(contigs)
@@ -37,7 +36,7 @@ def __init__(self, contigs: Iterable[str], origin: str, name: str, is_original:b
 
         self.is_original = is_original
 
-    def __eq__(self, other: 'Bin') -> bool:
+    def __eq__(self, other) -> bool:
         """
         Compare the Bin object with another object for equality.
 
@@ -60,7 +59,7 @@ def __str__(self) -> str:
 
         :return: The string representation of the Bin object.
         """
-        return f"{self.origin}_{self.id}  ({len(self.contigs)} contigs)"
+        return f"Bin {self.id} from {';'.join(self.origin)}  ({len(self.contigs)} contigs)"
 
     def overlaps_with(self, other: 'Bin') -> Set[str]:
         """
@@ -71,18 +70,18 @@ def overlaps_with(self, other: 'Bin') -> Set[str]:
         """
         return self.contigs & other.contigs
 
-    def __and__(self, other: 'Bin') -> 'Bin':
-        """
-        Perform a logical AND operation between this bin and another bin.
+    # def __and__(self, other: 'Bin') -> 'Bin':
+    #     """
+    #     Perform a logical AND operation between this bin and another bin.
 
-        :param other: The other Bin object.
-        :return: A new Bin object representing the intersection of the bins.
-        """
-        contigs = self.contigs & other.contigs
-        name = f"{self.name} & {other.name}"
-        origin = f"{self.origin} & {other.origin}"
+    #     :param other: The other Bin object.
+    #     :return: A new Bin object representing the intersection of the bins.
+    #     """
+    #     contigs = self.contigs & other.contigs
+    #     name = f"{self.name} & {other.name}"
+    #     origin = "intersection"
 
-        return Bin(contigs, origin, name)
+    #     return Bin(contigs, origin, name)
 
 
     def add_length(self, length: int) -> None:
@@ -131,7 +130,7 @@ def intersection(self, *others: 'Bin') -> 'Bin':
         """
         other_contigs = (o.contigs for o in others)
         contigs = self.contigs.intersection(*other_contigs)
-        name = f"{self.name} & {' & '.join([other.name for other in others])}"
+        name = f"{self.id} & {' & '.join([str(other.id) for other in others])}"
         origin = "intersec"
 
         return Bin(contigs, origin, name)
@@ -145,7 +144,7 @@ def difference(self, *others: 'Bin') -> 'Bin':
         """
         other_contigs = (o.contigs for o in others)
         contigs = self.contigs.difference(*other_contigs)
-        name = f"{self.name} - {' - '.join([other.name for other in others])}"
+        name = f"{self.id} - {' - '.join([str(other.id) for other in others])}"
         origin = "diff"
 
         return Bin(contigs, origin, name)
@@ -159,7 +158,7 @@ def union(self, *others: 'Bin') -> 'Bin':
         """
         other_contigs = (o.contigs for o in others)
         contigs = self.contigs.union(*other_contigs)
-        name = f"{self.name} | {' | '.join([other.name for other in others])}"
+        name = f"{self.id} | {' | '.join([str(other.id) for other in others])}"
         origin = "union"
 
         return Bin(contigs, origin, name)
@@ -234,7 +233,7 @@ def get_bins_from_directory(bin_dir: str, set_name: str, fasta_extensions: Set[s
 
 
 
-def parse_bin_directories(bin_name_to_bin_dir: Dict[str, str], fasta_extensions:Set[str]) -> Dict[str, list]:
+def parse_bin_directories(bin_name_to_bin_dir: Dict[str, str], fasta_extensions:Set[str]) -> Dict[str, Set[Bin]]:
     """
     Parses multiple bin directories and returns a dictionary mapping bin names to a list of Bin objects.
 
@@ -243,28 +242,58 @@ def parse_bin_directories(bin_name_to_bin_dir: Dict[str, str], fasta_extensions:
 
     :return: A dictionary mapping bin names to a list of Bin objects created from the bin directories.
     """
-    bin_name_to_bins = {}
+    bin_set_name_to_bins = {}
 
     for name, bin_dir in bin_name_to_bin_dir.items():
-        bin_name_to_bins[name] = get_bins_from_directory(bin_dir, name, fasta_extensions)
+        bins = get_bins_from_directory(bin_dir, name, fasta_extensions)
+        set_of_bins = set(bins)
+        
+        # Calculate the number of duplicates
+        num_duplicates = len(bins) - len(set_of_bins)
+        
+        if num_duplicates > 0:
+            logging.warning(
+                f'{num_duplicates} bins with identical contig compositions detected in bin set "{name}". '
+                'These bins were merged to ensure uniqueness.'
+            )
+
+        # Store the unique set of bins
+        bin_set_name_to_bins[name] = set_of_bins
 
-    return bin_name_to_bins
 
+    return bin_set_name_to_bins
 
-def parse_contig2bin_tables(bin_name_to_bin_tables: Dict[str, str]) -> Dict[str, list]:
+def parse_contig2bin_tables(bin_name_to_bin_tables: Dict[str, str]) -> Dict[str, Set['Bin']]:
     """
-    Parses multiple contig-to-bin tables and returns a dictionary mapping bin names to a list of Bin objects.
+    Parses multiple contig-to-bin tables and returns a dictionary mapping bin names to a set of unique Bin objects.
 
-    :param bin_name_to_bin_tables: A dictionary mapping bin names to their respective contig-to-bin tables.
+    Logs a warning if duplicate bins are detected within a bin set.
 
-    :return: A dictionary mapping bin names to a list of Bin objects created from the contig-to-bin tables.
+    :param bin_name_to_bin_tables: A dictionary where keys are bin set names and values are file paths or identifiers 
+                                   for contig-to-bin tables. Each table is parsed to extract Bin objects.
+
+    :return: A dictionary where keys are bin set names and values are sets of Bin objects. Duplicates are removed based 
+             on contig composition.
     """
-    bin_name_to_bins = {}
+    bin_set_name_to_bins = {}
 
     for name, contig2bin_table in bin_name_to_bin_tables.items():
-        bin_name_to_bins[name] = get_bins_from_contig2bin_table(contig2bin_table, name)
+        bins = get_bins_from_contig2bin_table(contig2bin_table, name)
+        set_of_bins = set(bins)
+        
+        # Calculate the number of duplicates
+        num_duplicates = len(bins) - len(set_of_bins)
+        
+        if num_duplicates > 0:
+            logging.warning(
+                f'{num_duplicates*2} bins with identical contig compositions detected in bin set "{name}". '
+                'These bins were merged to ensure uniqueness.'
+            )
 
-    return bin_name_to_bins
+        # Store the unique set of bins
+        bin_set_name_to_bins[name] = set_of_bins
+        
+    return bin_set_name_to_bins
 
 
 def get_bins_from_contig2bin_table(contig2bin_table: str, set_name: str) -> List[Bin]:
@@ -434,27 +463,101 @@ def select_best_bins(bins: Set[Bin]) -> List[Bin]:
     logging.info(f"Selected {len(selected_bins)} bins")
     return selected_bins
 
+def group_identical_bins(bins:Iterable[Bin]) -> List[List[Bin]]:
+    """
+    Group identical bins together
+
+    :param bins: list of bins
+
+    return List of list of identical bins
+    """
+    binhash_to_bins = defaultdict(list)
+
+    # Collect bins by their hash values
+    for bin_obj in bins:
+        binhash_to_bins[bin_obj.hash].append(bin_obj)
+
+    return list(binhash_to_bins.values())
 
-def dereplicate_bin_sets(bin_sets) -> Set[Bin]:
+
+def dereplicate_bin_sets(bin_sets: Iterable[Set['Bin']]) -> Set['Bin']:
+    """
+    Consolidate bins from multiple bin sets into a single set of non-redundant bins.
+
+    Bins with the same hash are considered duplicates. For each group of duplicates,
+    the origins are merged, and only one representative bin is kept.
+
+    :param bin_sets: An iterable of sets, where each set contains `Bin` objects. These sets are merged
+                     into a single set of unique bins by consolidating bins with the same hash.
+
+    :return: A set of `Bin` objects with duplicates removed. Each `Bin` in the resulting set has
+             merged origins from the bins it was consolidated with.
     """
-    Dereplicates bins from different bin sets to obtain a non-redundant bin set.
+    all_bins = (bin_obj for bins in bin_sets for bin_obj in bins)
+    list_of_identical_bins = group_identical_bins(all_bins)
 
-    :param bin_sets: A list of bin sets.
+    dereplicated_bins = set()
 
-    :return: A set of non-redundant bins.
+    # Merge bins with the same hash
+    for identical_bins in list_of_identical_bins:
+        # Select the first bin as the representative
+        selected_bin = identical_bins[0]
+        for bin_obj in identical_bins[1:]:
+            # Merge origins of all bins with the same hash
+            selected_bin.origin |= bin_obj.origin
+
+        # Add the representative bin to the result set
+        dereplicated_bins.add(selected_bin)
+
+    return dereplicated_bins
+
+def get_contigs_in_bin_sets(bin_set_name_to_bins: Dict[str, Set[Bin]]) -> Set[str]:
     """
-    return set().union(*bin_sets)
+    Processes bin sets to check for duplicated contigs and logs detailed information about each bin set.
+
+    :param bin_set_name_to_bins: A dictionary where keys are bin set names and values are sets of Bin objects.
+
+    :return:  A set of contig names found in bin sets
+    """
+    # To track all unique contigs across bin sets
+    all_contigs_in_bins = set()
+
+    for bin_set_name, bins in bin_set_name_to_bins.items():
+        list_contigs_in_bin_sets = get_contigs_in_bins(bins)
+
+        # Count duplicates
+        contig_counts = {contig: list_contigs_in_bin_sets.count(contig) for contig in list_contigs_in_bin_sets}
+        duplicated_contigs = {contig: count for contig, count in contig_counts.items() if count > 1}
+
+        if duplicated_contigs:
+            logging.warning(
+                f"Bin set '{bin_set_name}' contains {len(duplicated_contigs)} duplicated contigs. "
+                "Details: " + ", ".join(f"{contig} (found {count} times)" for contig, count in duplicated_contigs.items())
+            )
+
+        # Unique contigs in current bin set
+        unique_contigs_in_bin_set = set(list_contigs_in_bin_sets)
+
+        # Update global contig tracker
+        all_contigs_in_bins |= unique_contigs_in_bin_set
+
+        # Log summary for the current bin set
+        logging.debug(
+            f"Bin set '{bin_set_name}': {len(bins)} bins, {len(unique_contigs_in_bin_set)} unique contigs."
+        )
+
+    return all_contigs_in_bins
 
 
-def get_contigs_in_bins(bins: Iterable[Bin]) -> Set[str]:
+def get_contigs_in_bins(bins: Iterable[Bin]) -> List[str]:
     """
     Retrieves all contigs present in the given list of bins.
 
     :param bins: A list of Bin objects.
 
-    :return: A set of contigs present in the bins.
+    :return: A list of contigs present in the bins.
     """
-    return set().union(*(b.contigs for b in bins))
+    return [contig for b in bins for contig in b.contigs]
 
 
 def rename_bin_contigs(bins: Iterable[Bin], contig_to_index: dict):
diff --git a/binette/main.py b/binette/main.py
index 9051fdb..055a74d 100755
--- a/binette/main.py
+++ b/binette/main.py
@@ -160,7 +160,7 @@ def parse_arguments(args):
 def parse_input_files(bin_dirs: List[str], 
                       contig2bin_tables: List[str],
                       contigs_fasta: str,
-                      fasta_extensions:Set[str] = {".fasta", ".fna", ".fa"}) -> Tuple[Dict[str, List[bin_manager.Bin]], Set[bin_manager.Bin], Set[str], Dict[str, int]]:
+                      fasta_extensions:Set[str] = {".fasta", ".fna", ".fa"}) -> Tuple[Dict[str, Set[bin_manager.Bin]], Set[bin_manager.Bin], Set[str], Dict[str, int]]:
     """
     Parses input files to retrieve information related to bins and contigs.
 
@@ -185,12 +185,12 @@ def parse_input_files(bin_dirs: List[str],
         bin_name_to_bin_table = io.infer_bin_name_from_bin_inputs(contig2bin_tables)
         bin_set_name_to_bins = bin_manager.parse_contig2bin_tables(bin_name_to_bin_table)
 
-    logging.info(f"{len(bin_set_name_to_bins)} bin sets processed:")
+    logging.info(f"Processing {len(bin_set_name_to_bins)} bin sets.")
     for bin_set_id, bins in bin_set_name_to_bins.items():
         logging.info(f" {bin_set_id} - {len(bins)} bins")
 
+    contigs_in_bins = bin_manager.get_contigs_in_bin_sets(bin_set_name_to_bins)
     original_bins = bin_manager.dereplicate_bin_sets(bin_set_name_to_bins.values())
-    contigs_in_bins = bin_manager.get_contigs_in_bins(original_bins)
 
     logging.info(f"Parsing contig fasta file: {contigs_fasta}")
     contigs_object = contig_manager.parse_fasta_file(contigs_fasta)
@@ -405,7 +405,7 @@ def main():
     #             matching_bins = [bin_with_metric for bin_with_metric in original_bins if bin_obj == bin_with_metric]
     #             assert len(matching_bins) == 1, len(matching_bins) 
     #             bins_with_metric.append(matching_bins[0])
-    #             print("HAS NOT USE MATCHING BIN IN ORIGINAL SET",matching_bins[0].id, matching_bins[0].score, matching_bins[0].N50)
+    #             print("HAS NOT USE MATCHING BIN IN ORIGINAL SET", matching_bins[0].id, matching_bins[0].score, matching_bins[0].N50)
                 
     #         else:
     #             print("has score")
diff --git a/tests/bin_manager_test.py b/tests/bin_manager_test.py
index 4b57f92..f5c5829 100644
--- a/tests/bin_manager_test.py
+++ b/tests/bin_manager_test.py
@@ -8,6 +8,8 @@
 from binette import bin_manager
 import networkx as nx
 
+import logging
+
 def test_get_all_possible_combinations():
     input_list = ["2", "3", "4"]
     expected_list = [("2", "3"), ("2", "4"), ("3", "4"), ("2", "3", "4")]
@@ -21,6 +23,7 @@ def example_bin_set1():
     bin2 = bin_manager.Bin(contigs={"3", "4"}, origin="test1", name="bin2")
     bin3 = bin_manager.Bin(contigs={"5"}, origin="test1", name="bin2")
     return {bin1, bin2, bin3}
+
 @pytest.fixture
 def example_bin_set2():
     bin1 = bin_manager.Bin(contigs={"1", "2", "3"}, origin="test2", name="binA")
@@ -96,13 +99,13 @@ def test_add_quality():
     
 
 
-def test_two_bin_intersection():
-    bin1 = bin_manager.Bin(contigs={"1", "2", "e", "987"}, origin="test1", name="bin1")
-    bin2 = bin_manager.Bin(contigs={"1", "e", "2", "33"}, origin="test2", name="binA")
+# def test_two_bin_intersection():
+#     bin1 = bin_manager.Bin(contigs={"1", "2", "e", "987"}, origin="test1", name="bin1")
+#     bin2 = bin_manager.Bin(contigs={"1", "e", "2", "33"}, origin="test2", name="binA")
 
-    bin_intersection = bin1 & bin2
+#     bin_intersection = bin1 & bin2
 
-    assert bin_intersection == bin_manager.Bin({"1", "2", "e"}, "", "")
+#     assert bin_intersection == bin_manager.Bin({"1", "2", "e"}, "", "")
 
 
 def test_multiple_bins_intersection():
@@ -159,7 +162,7 @@ def test_bin_union2():
     # Check the result
     expected_contigs = {'contig1', 'contig2', 'contig3', 'contig4', 'contig5'}
     expected_name = 'bin1 | bin2 | bin3'
-    expected_origin = 'union'
+    expected_origin = {'union'}
 
     assert union_bin.contigs == expected_contigs
     assert union_bin.name == expected_name
@@ -308,7 +311,7 @@ def test_get_contigs_in_bins():
 
     contigs = bin_manager.get_contigs_in_bins(bin_set)
 
-    assert contigs == {"c1", "c2", "c3", "c4", "c18"}
+    assert set(contigs) == {"c1", "c2", "c3", "c4", "c18"}
 
 
 def test_dereplicate_bin_sets():
@@ -467,12 +470,32 @@ def test_parse_contig2bin_tables(tmp_path):
     for name, expected in expected_bins.items():
         assert name in result_bin_dict
         assert len(result_bin_dict[name]) == len(expected)
-        for result_bin, expected_bin in zip(result_bin_dict[name], expected):
-            assert result_bin.contigs == expected_bin.contigs
-            assert result_bin.name == expected_bin.name
-            assert result_bin.origin == expected_bin.origin
+        for result_bin in result_bin_dict[name]:
+            assert result_bin in expected
+
+
+def test_parse_contig2bin_tables_with_duplicated_bins(tmp_path, caplog):
+    # Create temporary contig-to-bin tables for testing
+    test_tables = {
+        "set1": [
+            "# Sample contig-to-bin table for bin1",
+            "contig1\tbin1",
+            "contig2\tbin1",
+            "contig3\tbin2",
+            "contig3\tbin3",
+        ]
+    }
 
+    # Create temporary files for contig-to-bin tables
+    for name, content in test_tables.items():
+        table_path = tmp_path / f"test_{name}_contig2bin_table.txt"
+        table_path.write_text("\n".join(content))
 
+    # Call the function to parse contig-to-bin tables
+    bin_manager.parse_contig2bin_tables({name: str(tmp_path / f"test_{name}_contig2bin_table.txt") for name in test_tables})
+    expected_log_message = ('2 bins with identical contig compositions detected in bin set "set1". '
+                           'These bins were merged to ensure uniqueness.')
+    assert expected_log_message in caplog.text 
 
 
 @pytest.fixture
@@ -518,8 +541,8 @@ def test_get_bins_from_directory(create_temp_bin_files):
     assert isinstance(bins[1], bin_manager.Bin)
     assert bins[1].contigs in [{"contig1", "contig2"}, {"contig3", "contig4"}]
     assert bins[0].contigs in [{"contig1", "contig2"}, {"contig3", "contig4"}]
-    assert bins[0].origin == set_name
-    assert bins[1].origin == set_name
+    assert bins[0].origin == {set_name}
+    assert bins[1].origin == {set_name}
     assert bins[1].name in ["bin2.fasta", "bin1.fasta"]
     assert bins[0].name in ["bin2.fasta", "bin1.fasta"]
 
@@ -551,10 +574,54 @@ def test_parse_bin_directories(create_temp_bin_directories):
     assert len(bins) == 2  # Ensure that the correct number of bin directories is parsed
 
     # Check if the Bin objects are created with the correct contigs, set name, and bin names
-    assert isinstance(bins["set1"][0], bin_manager.Bin)
-    assert isinstance(bins["set2"][0], bin_manager.Bin)
+    assert isinstance(list(bins["set1"])[0], bin_manager.Bin)
+    assert isinstance(list(bins["set2"])[0], bin_manager.Bin)
 
     assert len(bins["set2"]) == 1
     assert len(bins["set1"]) == 2
 
 
+def test_get_contigs_in_bin_sets(example_bin_set1, example_bin_set2, caplog):
+    """
+    Test the get_contigs_in_bin_sets function for correct behavior.
+    
+    :param mock_bins: The mock_bins fixture providing test bin data.
+    :param caplog: The pytest caplog fixture to capture logging output.
+    """
+
+    bin_set_name_to_bins = {"set1":example_bin_set1,
+                            "set2":example_bin_set2}
+
+    # Test the function with valid data
+    with caplog.at_level(logging.WARNING):
+        result = bin_manager.get_contigs_in_bin_sets(bin_set_name_to_bins)
+    
+    # Expected unique contigs
+    expected_contigs = {"1", "2", "3", "4", "5"}
+    
+    # Check if the result matches expected contigs
+    assert result == expected_contigs, "The returned set of contigs is incorrect."
+    
+def test_get_contigs_in_bin_sets_with_duplicated_warning(example_bin_set1, caplog):
+
+    bin1 = bin_manager.Bin(contigs={"contig1", "2"}, origin="test1", name="bin1")
+    bin2 = bin_manager.Bin(contigs={"contig1"}, origin="test1", name="binA")
+
+    bin_set_name_to_bins = {
+                            "set1":example_bin_set1,
+                            "set_dup":{bin1, bin2},
+                            }
+
+    # Test the function with valid data
+    with caplog.at_level(logging.WARNING):
+        result = bin_manager.get_contigs_in_bin_sets(bin_set_name_to_bins)
+    
+    # Expected unique contigs
+    expected_contigs = {"1", "2", "3", "4", "5", "contig1"}
+    
+    # Check if the result matches expected contigs
+    assert result == expected_contigs, "The returned set of contigs is incorrect."
+
+    # Check for expected warnings about duplicate contigs
+    duplicate_warning = "Bin set 'set_dup' contains 1 duplicated contigs. Details: contig1 (found 2 times)"
+    assert duplicate_warning in caplog.text, "The warning for duplicate contigs was not logged correctly."

From bff2899189841ba5e0324949b47d24dbcb4214f9 Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 28 Aug 2024 21:04:58 +0200
Subject: [PATCH 09/36] update test with new bin name

---
 tests/bin_manager_test.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/tests/bin_manager_test.py b/tests/bin_manager_test.py
index f5c5829..f08d6a1 100644
--- a/tests/bin_manager_test.py
+++ b/tests/bin_manager_test.py
@@ -144,10 +144,11 @@ def test_bin_union():
     bin1 = bin_manager.Bin(contigs={"13", "21"}, origin="test1", name="bin1")
     bin2 = bin_manager.Bin(contigs={"1", "e", "2", "33"}, origin="test2", name="binA")
 
-    union_bin = bin_manager.Bin(contigs={"13", "21", "1", "e", "2", "33"}, origin="", name="")
+    expected_union_bin = bin_manager.Bin(contigs={"13", "21", "1", "e", "2", "33"}, origin="", name="")
+    union_bin = bin1.union(bin2)
 
-    assert bin1.union(bin2) == union_bin
-    assert bin1.union(bin2).name == "bin1 | binA"
+    assert union_bin == expected_union_bin
+    assert union_bin.name == f"{bin1.id} | {bin2.id}"
 
 
 def test_bin_union2():
@@ -161,11 +162,9 @@ def test_bin_union2():
 
     # Check the result
     expected_contigs = {'contig1', 'contig2', 'contig3', 'contig4', 'contig5'}
-    expected_name = 'bin1 | bin2 | bin3'
     expected_origin = {'union'}
 
     assert union_bin.contigs == expected_contigs
-    assert union_bin.name == expected_name
     assert union_bin.origin == expected_origin
 
 
@@ -179,7 +178,7 @@ def test_bin_difference():
 
     assert bin1.difference(bin2, bin3) == diff_bin1_23
     assert bin1.difference(bin2) == diff_bin1_2
-    assert bin1.difference(bin2, bin3).name == "bin1 - bin2 - bin3"
+    assert bin1.difference(bin2, bin3).name == f"{bin1.id} - {bin2.id} - {bin3.id}"
 
 
 def test_bin_intersection():
@@ -192,7 +191,7 @@ def test_bin_intersection():
 
     assert bin1.intersection(bin2, bin3) == inter_bin123
     assert bin1.intersection(bin2) == iner_bin1_2
-    assert bin1.intersection(bin2, bin3).name == "bin1 & bin2 & bin3"
+    assert bin1.intersection(bin2, bin3).name == f"{bin1.id} & {bin2.id} & {bin3.id}"
 
 
 def test_select_best_bins_simple():

From 99fafaa517173f45ec7b5c305a6a78a5956ab1c0 Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Thu, 29 Aug 2024 00:45:09 +0200
Subject: [PATCH 10/36] improve fct to infer bin set name

---
 binette/io_manager.py    | 105 +++++++++++++++++++++++++-------
 tests/io_manager_test.py | 128 ++++++++++++++++++++++++++++++++++++---
 2 files changed, 203 insertions(+), 30 deletions(-)

diff --git a/binette/io_manager.py b/binette/io_manager.py
index bb6c38d..0a268e5 100644
--- a/binette/io_manager.py
+++ b/binette/io_manager.py
@@ -1,36 +1,97 @@
 import logging
-import os
 import pyfastx
-from typing import Iterable, List, Dict
+from typing import Iterable, List, Dict, Tuple
 import csv
 
 from binette.bin_manager import Bin
 
+from pathlib import Path
 
-def infer_bin_name_from_bin_inputs(input_bins: List[str]) -> Dict[str, str]:
+def get_paths_common_prefix_suffix(paths: List[Path]) -> Tuple[List[str], List[str], List[str]]:
     """
-    Infer bin names from a list of bin input directories.
+    Determine the common prefix parts, suffix parts, and common extensions of the last part of a list of pathlib.Path objects.
 
-    :param input_bins: List of input bin directories.
-    :return: Dictionary mapping inferred bin names to their corresponding directories.
+    :param paths: List of pathlib.Path objects.
+    :return: A tuple containing three lists:
+             - The common prefix parts.
+             - The common suffix parts.
+             - The common extensions of the last part of the paths.
     """
-    logging.debug("Inferring bin names from input bins:")
+    # Extract parts for all paths
+    parts = [list(path.parts) for path in paths]
+    
+    # Find the common prefix
+    if not parts:
+        return [], [], []
+    
+    # Initialize common prefix and suffix lists
+    common_prefix = list(parts[0])
+    common_suffix = list(parts[0])
+    # Determine common prefix
+    for part_tuple in parts[1:]:
+        common_prefix_length = min(len(common_prefix), len(part_tuple))
+        common_prefix = [common_prefix[i] for i in range(common_prefix_length) if common_prefix[:i+1] == part_tuple[:i+1]]
+        if not common_prefix:
+            break
+
+    # Determine common suffix
+    for part_tuple in parts[1:]:
+        common_suffix_length = min(len(common_suffix), len(part_tuple))
+        common_suffix = [common_suffix[-i] for i in range(1, common_suffix_length + 1) if common_suffix[-i:] == part_tuple[-i:]]
+        if not common_suffix:
+            break
+    if len(parts) > 1:
+        common_suffix.reverse()
+
+    # Determine common extensions of the last part of the paths
+    if len(paths) == 1:
+        common_extensions = paths[0].suffixes
+    else:
+        common_extensions = list(paths[0].suffixes)
+        for path in paths[1:]:
+            common_extension_length = min(len(common_extensions), len(path.suffixes))
+            common_extensions = [common_extensions[i] for i in range(common_extension_length) if common_extensions[i] == path.suffixes[i]]
+            if not common_extensions:
+                break
+    
+    return common_prefix, common_suffix, common_extensions
+    
+def infer_bin_set_names_from_input_paths(input_bins: List[Path]) -> Dict[str, Path]:
+    """
+    Infer bin set names from a list of bin input directories or files.
+
+    :param input_bins: List of input bin directories or files.
+    :return: Dictionary mapping inferred bin names to their corresponding directories or files.
+    """
+    bin_name_to_bin_dir = {}
+
+    common_prefix, common_suffix, common_extensions = get_paths_common_prefix_suffix(input_bins)
+    print(common_prefix, common_suffix, common_extensions )
+    for path in input_bins:
+
+        specific_parts = path.parts[len(common_prefix):len(path.parts)-len(common_suffix)]
+
+        if not common_suffix and common_extensions:
+            last_specific_part = specific_parts[-1].split('.')[:-len(common_extensions)] 
+            specific_parts = list(specific_parts[:-1]) + last_specific_part
+
 
-    commonprefix_len = len(os.path.commonprefix(input_bins))
-    reversed_strings = [s[::-1] for s in input_bins]
-    commonsufix_len = len(os.path.commonprefix(reversed_strings))
+        bin_set_name = '/'.join(specific_parts)
+        if bin_set_name == "":
+            bin_set_name = path.as_posix()
 
-    bin_name_to_bin_dir = {d[commonprefix_len: len(d) - commonsufix_len]: d for d in input_bins}
+        bin_name_to_bin_dir[bin_set_name] = path
 
-    logging.debug(f"Input bins:  {' '.join(input_bins)}")
-    logging.debug(f"Common prefix to remove: {os.path.commonprefix(reversed_strings)[::-1]}")
-    logging.debug(f"Common suffix to remove: {os.path.commonprefix(input_bins)}")
+    logging.debug(f"Input bins: {' '.join([path.as_posix() for path in input_bins])}")
+    logging.debug(f"Common prefix to remove: {common_prefix}")
+    logging.debug(f"Common suffix to remove: {common_suffix}")
+    logging.debug(f"Common extension to remove: {common_suffix}")
     logging.debug(f"bin_name_to_bin_dir: {bin_name_to_bin_dir}")
 
     return bin_name_to_bin_dir
 
 
-def write_bin_info(bins: Iterable[Bin], output: str, add_contigs: bool = False):
+def write_bin_info(bins: Iterable[Bin], output: Path, add_contigs: bool = False):
     """
     Write bin information to a TSV file.
 
@@ -67,7 +128,7 @@ def write_bin_info(bins: Iterable[Bin], output: str, add_contigs: bool = False):
         writer.writerows(bin_infos)
 
 
-def write_bins_fasta(selected_bins: List[Bin], contigs_fasta: str, outdir: str):
+def write_bins_fasta(selected_bins: List[Bin], contigs_fasta: Path, outdir: Path):
     """
     Write selected bins' contigs to separate FASTA files.
 
@@ -76,10 +137,10 @@ def write_bins_fasta(selected_bins: List[Bin], contigs_fasta: str, outdir: str):
     :param outdir: Output directory to save the individual bin FASTA files.
     """
 
-    fa = pyfastx.Fasta(contigs_fasta, build_index=True)
+    fa = pyfastx.Fasta(contigs_fasta.as_posix(), build_index=True)
 
     for sbin in selected_bins:
-        outfile = os.path.join(outdir, f"bin_{sbin.id}.fa")
+        outfile = outdir / f"bin_{sbin.id}.fa"
 
         with open(outfile, "w") as outfl:
             sequences = (f">{c}\n{fa[c]}" for c in sbin.contigs)
@@ -111,7 +172,7 @@ def check_contig_consistency(contigs_from_assembly: Iterable[str],
     assert are_contigs_consistent, message
 
 
-def check_resume_file(faa_file: str, diamond_result_file: str) -> None:
+def check_resume_file(faa_file: Path, diamond_result_file: Path) -> None:
     """
     Check the existence of files required for resuming the process.
 
@@ -120,15 +181,15 @@ def check_resume_file(faa_file: str, diamond_result_file: str) -> None:
     :raises FileNotFoundError: If the required files don't exist for resuming.
     """
 
-    if os.path.isfile(faa_file) and os.path.isfile(diamond_result_file):
+    if faa_file.exists() and diamond_result_file.exists():
         return
 
-    if not os.path.isfile(faa_file):
+    if not faa_file.exists():
         error_msg = f"Protein file '{faa_file}' does not exist. Resuming is not possible."
         logging.error(error_msg)
         raise FileNotFoundError(error_msg)
 
-    if not os.path.isfile(diamond_result_file):
+    if not diamond_result_file.exists():
         error_msg = f"Diamond result file '{diamond_result_file}' does not exist. Resuming is not possible."
         logging.error(error_msg)
         raise FileNotFoundError(error_msg)
diff --git a/tests/io_manager_test.py b/tests/io_manager_test.py
index 01909c2..aa1856c 100644
--- a/tests/io_manager_test.py
+++ b/tests/io_manager_test.py
@@ -36,18 +36,130 @@ def test_infer_bin_name_from_bin_inputs():
     ]
 
     # Call the function
-    result = io_manager.infer_bin_name_from_bin_inputs(input_bins)
+    result = io_manager.infer_bin_set_names_from_input_paths(list(map(Path, input_bins)))
 
     # Define the expected output
     expected_result = {
-        '1': '/path/to/bin1',
-        '2': '/path/to/bin2',
-        '3': '/path/to/bin3'
+        'bin1': Path('/path/to/bin1'),
+        'bin2': Path('/path/to/bin2'),
+        'bin3': Path('/path/to/bin3')
     }
 
     # Check if the output matches the expected dictionary
     assert result == expected_result
 
+def test_infer_bin_name_from_single_path():
+    # Mock input data
+    input_bins = [
+        '/path/to/bin1',
+    ]
+
+    # Call the function
+    result = io_manager.infer_bin_set_names_from_input_paths(list(map(Path, input_bins)))
+
+    # Define the expected output
+    expected_result = {
+        '/path/to/bin1': Path('/path/to/bin1'),
+    }
+
+    # Check if the output matches the expected dictionary
+    assert result == expected_result
+
+    
+def test_infer_bin_name_from_bin_table_inputs():
+    # Mock input data
+    input_bins = [
+        '/path/to/bin1.tsv',
+        '/path/to/bin2.tsv',
+        '/path/to/bin3.tsv'
+    ]
+
+    # Call the function
+    result = io_manager.infer_bin_set_names_from_input_paths(list(map(Path, input_bins)))
+
+    # Define the expected output
+    expected_result = {
+        'bin1': Path('/path/to/bin1.tsv'),
+        'bin2': Path('/path/to/bin2.tsv'),
+        'bin3': Path('/path/to/bin3.tsv')
+    }
+
+    # Check if the output matches the expected dictionary
+    assert result == expected_result
+
+
+def test_infer_bin_name_from_bin_table_with_different_ext():
+    # Mock input data
+    input_bins = [
+        '/path/to/bin1.tsv',
+        '/path/to/bin2.tsv',
+        '/path/to/bin3.txt'
+    ]
+
+    # Call the function
+    result = io_manager.infer_bin_set_names_from_input_paths(list(map(Path, input_bins)))
+
+    # Define the expected output
+    expected_result = {
+        'bin1.tsv': Path('/path/to/bin1.tsv'),
+        'bin2.tsv': Path('/path/to/bin2.tsv'),
+        'bin3.txt': Path('/path/to/bin3.txt')
+    }
+
+    # Check if the output matches the expected dictionary
+    assert result == expected_result
+
+def test_infer_bin_name_from_bin_table_with_different_dir():
+    # Mock input data
+    input_bins = [
+        '/path/to/bins',
+        '/path2/result_bins',
+        '/path2/result/bins',
+    ]
+
+    # Call the function
+    result = io_manager.infer_bin_set_names_from_input_paths(list(map(Path, input_bins)))
+
+    # Define the expected output
+    expected_result = {
+        'path/to/bins' : Path('/path/to/bins'),
+        'path2/result_bins': Path('/path2/result_bins'),
+        'path2/result/bins': Path('/path2/result/bins'),
+    }
+
+    # Check if the output matches the expected dictionary
+    assert result == expected_result
+    
+def test_get_paths_common_prefix_suffix():
+    # Test case 1: No paths provided
+    assert io_manager.get_paths_common_prefix_suffix([]) == ([], [], [])
+
+    # # Test case 2: Single path
+    assert io_manager.get_paths_common_prefix_suffix([Path('/home/user/project')]) == (['/', 'home', 'user', 'project'], ['/', 'home', 'user', 'project'], [])
+
+    # Test case 3: Multiple paths with common prefix and suffix
+    paths = [Path('/home/user/project/src'), Path('/home/user/project/docs'), Path('/home/user/project/tests')]
+    assert io_manager.get_paths_common_prefix_suffix(paths) == (['/', 'home', 'user', 'project'], [], [])
+
+    # Test case 4: Multiple paths with no common prefix or suffix
+    paths = [Path('/var/log/syslog'), Path('/usr/local/bin/python'), Path('/etc/nginx/nginx.conf')]
+    assert io_manager.get_paths_common_prefix_suffix(paths) == (['/'], [], [])
+
+    # Test case 5: Multiple paths with common suffix
+    paths = [Path('/home/user/docs/report.txt'), Path('/home/admin/docs/report.txt')]
+    assert io_manager.get_paths_common_prefix_suffix(paths) == (['/', 'home'], ['docs', 'report.txt'], ['.txt'])
+
+    # Test case 6: Paths with a deeper common prefix and suffix
+    paths = [Path('/data/project_a/results/output.txt'), Path('/data/project_b/results/output.txt')]
+    assert io_manager.get_paths_common_prefix_suffix(paths) == (['/', 'data'], ['results', 'output.txt'], ['.txt'])
+
+    # Test case 7: Paths with only the root as common prefix and different suffix
+    paths = [Path('/project_a/output.txt'), Path('/project_b/output.txt')]
+    assert io_manager.get_paths_common_prefix_suffix(paths) == (['/'], ['output.txt'], ['.txt'])
+
+    # Test case 8: Paths with only the root as common prefix and different suffix
+    paths = [Path('/project_a/output.txt'), Path('/project_a/output.tsv')]
+    assert io_manager.get_paths_common_prefix_suffix(paths) == (['/', 'project_a'], [], [])
 
 def test_write_bin_info(tmp_path, bin1, bin2):
     # Mock input data
@@ -103,7 +215,7 @@ def test_write_bins_fasta(tmp_path, bin1, bin2):
     outdir.mkdir()
 
     # Call the function
-    io_manager.write_bins_fasta(selected_bins, str(contigs_fasta), str(outdir))
+    io_manager.write_bins_fasta(selected_bins, contigs_fasta, outdir)
 
     # Check if the files were created and their content matches the expected output
     assert (outdir / "bin_1.fa").exists()
@@ -152,7 +264,7 @@ def temp_files(tmp_path):
 def test_check_resume_file_exists(temp_files, caplog):
     # Test when both files exist
     faa_file, diamond_result_file = temp_files
-    io_manager.check_resume_file(faa_file, diamond_result_file)
+    io_manager.check_resume_file(Path(faa_file), Path(diamond_result_file))
     assert "Protein file" not in caplog.text
     assert "Diamond result file" not in caplog.text
 
@@ -160,7 +272,7 @@ def test_check_resume_file_missing_faa(temp_files, caplog):
     # Test when faa_file is missing
     _, diamond_result_file = temp_files
     with pytest.raises(FileNotFoundError):
-        io_manager.check_resume_file("nonexistent.faa", diamond_result_file)
+        io_manager.check_resume_file(Path("nonexistent.faa"), Path(diamond_result_file))
     assert "Protein file" in caplog.text
     assert "Diamond result file" not in caplog.text
 
@@ -168,6 +280,6 @@ def test_check_resume_file_missing_diamond(temp_files, caplog):
     # Test when diamond_result_file is missing
     faa_file, _ = temp_files
     with pytest.raises(FileNotFoundError):
-        io_manager.check_resume_file(faa_file, "nonexistent_diamond_result.txt")
+        io_manager.check_resume_file(Path(faa_file), Path("nonexistent_diamond_result.txt"))
     assert "Protein file" not in caplog.text
     assert "Diamond result file" in caplog.text

From b7ee3f77517e85e362280a20ade957419d711a6e Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Thu, 29 Aug 2024 10:12:45 +0200
Subject: [PATCH 11/36] apply pathlib in tests

---
 tests/main_binette_test.py | 58 ++++++++++++++++++++------------------
 1 file changed, 30 insertions(+), 28 deletions(-)

diff --git a/tests/main_binette_test.py b/tests/main_binette_test.py
index d4ceba9..f2129db 100644
--- a/tests/main_binette_test.py
+++ b/tests/main_binette_test.py
@@ -11,7 +11,7 @@
 from collections import Counter
 from tests.bin_manager_test import create_temp_bin_directories, create_temp_bin_files
 from argparse import ArgumentParser
-
+from pathlib import Path
 
 @pytest.fixture
 def bins():
@@ -62,7 +62,7 @@ def test_select_bins_and_write_them(tmp_path, tmpdir, bins):
 
     # Run the function with test data
     selected_bins = select_bins_and_write_them(
-        set(bins), str(contigs_fasta), final_bin_report, min_completeness=60, index_to_contig=index_to_contig, outdir=str(outdir), debug=True
+        set(bins), contigs_fasta, Path(final_bin_report), min_completeness=60, index_to_contig=index_to_contig, outdir=outdir, debug=True
     )
 
     # Assertions to check the function output or file existence
@@ -104,11 +104,11 @@ def test_manage_protein_alignement_resume(tmp_path):
 
         # Run the function with test data
         contig_to_kegg_counter, contig_to_genes = manage_protein_alignement(
-            faa_file=str(faa_file),
-            contigs_fasta="contigs_fasta",
+            faa_file=Path(faa_file),
+            contigs_fasta=Path("contigs_fasta"),
             contig_to_length=contig_to_length,
-            contigs_in_bins={},
-            diamond_result_file="diamond_result_file",
+            contigs_in_bins=set(),
+            diamond_result_file=Path("diamond_result_file"),
             checkm2_db=None,
             threads=1,
             resume=True,
@@ -149,11 +149,11 @@ def test_manage_protein_alignement_not_resume(tmpdir, tmp_path):
         # Call the function
 
         contig_to_kegg_counter, contig_to_genes = manage_protein_alignement(
-            faa_file=str(faa_file),
-            contigs_fasta=contigs_fasta,
+            faa_file=Path(faa_file),
+            contigs_fasta=Path(contigs_fasta),
             contig_to_length=contig_to_length,
-            contigs_in_bins={},
-            diamond_result_file=diamond_result_file,
+            contigs_in_bins=set(),
+            diamond_result_file=Path(diamond_result_file),
             checkm2_db=None,
             threads=1,
             resume=True,
@@ -180,7 +180,7 @@ def test_parse_input_files_with_contig2bin_tables(tmp_path):
     fasta_file.write_text(fasta_file_content)
 
     # Call the function and capture the return values
-    bin_set_name_to_bins, original_bins, contigs_in_bins, contig_to_length = parse_input_files(None, [str(bin_set1), str(bin_set2)], str(fasta_file))
+    bin_set_name_to_bins, original_bins, contigs_in_bins, contig_to_length = parse_input_files(None, [bin_set1, bin_set2], fasta_file)
 
 
     # # Perform assertions on the returned values
@@ -190,7 +190,7 @@ def test_parse_input_files_with_contig2bin_tables(tmp_path):
     assert isinstance(contig_to_length, dict)
 
 
-    assert set(bin_set_name_to_bins) == {'1', "2"}
+    assert set(bin_set_name_to_bins) == {'bin_set1', "bin_set2"}
     assert len(original_bins) == 4
     assert contigs_in_bins == {"contig1","contig2", "contig3","contig4"}
     assert len(contig_to_length) == 4
@@ -206,12 +206,12 @@ def test_parse_input_files_with_contig2bin_tables_with_unknown_contig(tmp_path):
     fasta_file.write_text(fasta_file_content)
 
     with pytest.raises(ValueError):
-        parse_input_files(None, [str(bin_set3)], str(fasta_file))
+        parse_input_files(None, [bin_set3], fasta_file)
 
 
 def test_parse_input_files_bin_dirs(create_temp_bin_directories, tmp_path):
 
-    bin_dirs = list(create_temp_bin_directories.values())
+    bin_dirs = [Path(d) for d in create_temp_bin_directories.values()]
 
     contig2bin_tables = []
 
@@ -224,7 +224,7 @@ def test_parse_input_files_bin_dirs(create_temp_bin_directories, tmp_path):
     fasta_file.write_text(fasta_file_content)
 
     # Call the function and capture the return values
-    bin_set_name_to_bins, original_bins, contigs_in_bins, contig_to_length = parse_input_files(bin_dirs, contig2bin_tables, str(fasta_file))
+    bin_set_name_to_bins, original_bins, contigs_in_bins, contig_to_length = parse_input_files(bin_dirs, contig2bin_tables, fasta_file)
 
     # # Perform assertions on the returned values
     assert isinstance(bin_set_name_to_bins, dict)
@@ -233,7 +233,7 @@ def test_parse_input_files_bin_dirs(create_temp_bin_directories, tmp_path):
     assert isinstance(contig_to_length, dict)
 
 
-    assert set(bin_set_name_to_bins) == {'1', "2"}
+    assert set(bin_set_name_to_bins) == {'set1', 'set2'}
     assert len(original_bins) == 3
     assert contigs_in_bins == {"contig1","contig2", "contig3","contig4","contig5",}
     assert len(contig_to_length) == 5
@@ -257,16 +257,16 @@ def test_argument_used_multiple_times():
 def test_parse_arguments_required_arguments():
     # Test when only required arguments are provided
     args = parse_arguments(["-d", "folder1", "folder2", "-c", "contigs.fasta"])
-    assert args.bin_dirs == ["folder1", "folder2"]
-    assert args.contigs == "contigs.fasta"
+    assert args.bin_dirs == [Path("folder1"), Path("folder2")]
+    assert args.contigs == Path("contigs.fasta")
 
 def test_parse_arguments_optional_arguments():
     # Test when required and optional arguments are provided
     args = parse_arguments(["-d", "folder1", "folder2", "-c", "contigs.fasta", "--threads", "4", "--outdir", "output"])
-    assert args.bin_dirs == ["folder1", "folder2"]
-    assert args.contigs == "contigs.fasta"
+    assert args.bin_dirs == [Path("folder1"), Path("folder2")]
+    assert args.contigs == Path("contigs.fasta")
     assert args.threads == 4
-    assert args.outdir == "output"
+    assert args.outdir == Path("output")
 
 def test_parse_arguments_invalid_arguments():
     # Test when invalid arguments are provided
@@ -294,14 +294,16 @@ def test_init_logging_command_line(caplog):
 
 
 # @patch('diamond.run')
-def test_manage_protein_alignment_no_resume():
+def test_manage_protein_alignment_no_resume(tmp_path):
     # Set up the input parameters
-    faa_file = "test.faa"
-    contigs_fasta = "test.fasta"
+    faa_file = Path("test.faa")
+    contigs_fasta = Path("test.fasta")
     contig_to_length = {"contig1": [1000]}
     contigs_in_bins = {"bin1": ["contig1"]}
-    diamond_result_file = "test_diamond_result.txt"
-    checkm2_db = "checkm2_db"
+    diamond_result_file = Path("test_diamond_result.txt")
+    checkm2_db = tmp_path / "checkm2_db"
+    with open(checkm2_db, "w"):
+        pass
     threads = 4
     resume = False
     low_mem = False
@@ -324,11 +326,11 @@ def test_manage_protein_alignment_no_resume():
         )
         
         # Assertions to check if functions were called
-        mock_parse_fasta_file.assert_called_once_with(contigs_fasta)
+        mock_parse_fasta_file.assert_called_once_with(contigs_fasta.as_posix())
         mock_predict.assert_called_once()
         mock_diamond_get_contig_to_kegg_id.assert_called_once()
         mock_diamond_run.assert_called_once_with(
-            faa_file, diamond_result_file, "checkm2_db", f"{os.path.splitext(diamond_result_file)[0]}.log", threads, low_mem=low_mem
+            faa_file.as_posix(), diamond_result_file.as_posix(), checkm2_db.as_posix(), f"{os.path.splitext(diamond_result_file.as_posix())[0]}.log", threads, low_mem=low_mem
         )
 
 def test_main_resume_when_not_possible(monkeypatch):

From f3abada350662016b6d477389a9ef06e3d5a6677 Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Thu, 29 Aug 2024 10:13:49 +0200
Subject: [PATCH 12/36] output original bin metric files

---
 binette/bin_manager.py |  10 ++--
 binette/io_manager.py  |   2 +-
 binette/main.py        | 118 +++++++++++++++++++++--------------------
 3 files changed, 67 insertions(+), 63 deletions(-)

diff --git a/binette/bin_manager.py b/binette/bin_manager.py
index 457423b..6891ac5 100644
--- a/binette/bin_manager.py
+++ b/binette/bin_manager.py
@@ -205,7 +205,7 @@ def is_high_quality(self, min_completeness: float, max_contamination: float) ->
 
 
 
-def get_bins_from_directory(bin_dir: str, set_name: str, fasta_extensions: Set[str]) -> List[Bin]:
+def get_bins_from_directory(bin_dir: Path, set_name: str, fasta_extensions: Set[str]) -> List[Bin]:
     """
     Retrieves a list of Bin objects from a directory containing bin FASTA files.
 
@@ -217,7 +217,7 @@ def get_bins_from_directory(bin_dir: str, set_name: str, fasta_extensions: Set[s
     """
     bins = []
     fasta_extensions |= {f".{ext}" for ext in fasta_extensions if not ext.startswith(".")} # adding a dot in case given extension are lacking one
-    bin_fasta_files = (fasta_file for fasta_file in Path(bin_dir).glob("*") if set(fasta_file.suffixes) & fasta_extensions)
+    bin_fasta_files = (fasta_file for fasta_file in bin_dir.glob("*") if set(fasta_file.suffixes) & fasta_extensions)
 
     for bin_fasta_path in bin_fasta_files:
 
@@ -233,7 +233,7 @@ def get_bins_from_directory(bin_dir: str, set_name: str, fasta_extensions: Set[s
 
 
 
-def parse_bin_directories(bin_name_to_bin_dir: Dict[str, str], fasta_extensions:Set[str]) -> Dict[str, Set[Bin]]:
+def parse_bin_directories(bin_name_to_bin_dir: Dict[str, Path], fasta_extensions:Set[str]) -> Dict[str, Set[Bin]]:
     """
     Parses multiple bin directories and returns a dictionary mapping bin names to a list of Bin objects.
 
@@ -263,7 +263,7 @@ def parse_bin_directories(bin_name_to_bin_dir: Dict[str, str], fasta_extensions:
 
     return bin_set_name_to_bins
 
-def parse_contig2bin_tables(bin_name_to_bin_tables: Dict[str, str]) -> Dict[str, Set['Bin']]:
+def parse_contig2bin_tables(bin_name_to_bin_tables: Dict[str, Path]) -> Dict[str, Set['Bin']]:
     """
     Parses multiple contig-to-bin tables and returns a dictionary mapping bin names to a set of unique Bin objects.
 
@@ -296,7 +296,7 @@ def parse_contig2bin_tables(bin_name_to_bin_tables: Dict[str, str]) -> Dict[str,
     return bin_set_name_to_bins
 
 
-def get_bins_from_contig2bin_table(contig2bin_table: str, set_name: str) -> List[Bin]:
+def get_bins_from_contig2bin_table(contig2bin_table: Path, set_name: str) -> List[Bin]:
     """
     Retrieves a list of Bin objects from a contig-to-bin table.
 
diff --git a/binette/io_manager.py b/binette/io_manager.py
index 0a268e5..5ea3041 100644
--- a/binette/io_manager.py
+++ b/binette/io_manager.py
@@ -108,7 +108,7 @@ def write_bin_info(bins: Iterable[Bin], output: Path, add_contigs: bool = False)
     for bin_obj in sorted(bins, key=lambda x: (x.score, x.N50, -x.id), reverse=True):
         bin_info = [
             bin_obj.id,
-            bin_obj.origin,
+            ';'.join(bin_obj.origin),
             bin_obj.name,
             bin_obj.completeness,
             bin_obj.contamination,
diff --git a/binette/main.py b/binette/main.py
index 055a74d..3044fda 100755
--- a/binette/main.py
+++ b/binette/main.py
@@ -17,7 +17,7 @@
 import binette
 from binette import contig_manager, cds, diamond, bin_quality, bin_manager, io_manager as io
 from typing import List, Dict, Optional, Set, Tuple, Union, Sequence, Any
-
+from pathlib import Path
 
 def init_logging(verbose, debug):
     """Initialise logging."""
@@ -85,6 +85,7 @@ def parse_arguments(args):
         "-d",
         "--bin_dirs",
         nargs="+",
+        type=Path,
         action=UniqueStore,
         help="List of bin folders containing each bin in a fasta file.",
     )
@@ -94,11 +95,12 @@ def parse_arguments(args):
         "--contig2bin_tables",
         nargs="+",
         action=UniqueStore,
+        type=Path,
         help="List of contig2bin table with two columns separated\
             with a tabulation: contig, bin",
     )
 
-    input_group.add_argument("-c", "--contigs", required=True, help="Contigs in fasta format.")
+    input_group.add_argument("-c", "--contigs", required=True, type=Path, help="Contigs in fasta format.")
 
     # Other parameters category
     other_group = parser.add_argument_group('Other Arguments')
@@ -113,7 +115,7 @@ def parse_arguments(args):
 
     other_group.add_argument("-t", "--threads", default=1, type=int, help="Number of threads to use.")
 
-    other_group.add_argument("-o", "--outdir", default="results", help="Output directory.")
+    other_group.add_argument("-o", "--outdir", default=Path("results"), type=Path, help="Output directory.")
 
     other_group.add_argument(
         "-w",
@@ -135,8 +137,9 @@ def parse_arguments(args):
 
     other_group.add_argument(
         "--checkm2_db",
+        type=Path,
         help="Provide a path for the CheckM2 diamond database. "
-        "By default the database set via <checkm2 database> is used.",
+        "By default the database set via <checkm2 database> is used."
     )
 
     other_group.add_argument("--low_mem", help="Use low mem mode when running diamond", action="store_true")
@@ -157,9 +160,9 @@ def parse_arguments(args):
     args = parser.parse_args(args)
     return args
 
-def parse_input_files(bin_dirs: List[str], 
-                      contig2bin_tables: List[str],
-                      contigs_fasta: str,
+def parse_input_files(bin_dirs: List[Path], 
+                      contig2bin_tables: List[Path],
+                      contigs_fasta: Path,
                       fasta_extensions:Set[str] = {".fasta", ".fna", ".fa"}) -> Tuple[Dict[str, Set[bin_manager.Bin]], Set[bin_manager.Bin], Set[str], Dict[str, int]]:
     """
     Parses input files to retrieve information related to bins and contigs.
@@ -178,11 +181,11 @@ def parse_input_files(bin_dirs: List[str],
 
     if bin_dirs:
         logging.info("Parsing bin directories.")
-        bin_name_to_bin_dir = io.infer_bin_name_from_bin_inputs(bin_dirs)
+        bin_name_to_bin_dir = io.infer_bin_set_names_from_input_paths(bin_dirs)
         bin_set_name_to_bins = bin_manager.parse_bin_directories(bin_name_to_bin_dir, fasta_extensions)
     else:
         logging.info("Parsing bin2contig files.")
-        bin_name_to_bin_table = io.infer_bin_name_from_bin_inputs(contig2bin_tables)
+        bin_name_to_bin_table = io.infer_bin_set_names_from_input_paths(contig2bin_tables)
         bin_set_name_to_bins = bin_manager.parse_contig2bin_tables(bin_name_to_bin_table)
 
     logging.info(f"Processing {len(bin_set_name_to_bins)} bin sets.")
@@ -193,7 +196,7 @@ def parse_input_files(bin_dirs: List[str],
     original_bins = bin_manager.dereplicate_bin_sets(bin_set_name_to_bins.values())
 
     logging.info(f"Parsing contig fasta file: {contigs_fasta}")
-    contigs_object = contig_manager.parse_fasta_file(contigs_fasta)
+    contigs_object = contig_manager.parse_fasta_file(contigs_fasta.as_posix())
 
     unexpected_contigs = {contig for contig in contigs_in_bins if contig not in contigs_object}
 
@@ -206,9 +209,9 @@ def parse_input_files(bin_dirs: List[str],
     return bin_set_name_to_bins, original_bins, contigs_in_bins, contig_to_length
 
 
-def manage_protein_alignement(faa_file: str, contigs_fasta: str, contig_to_length: Dict[str, int],
-                                contigs_in_bins: Set[str], diamond_result_file: str,
-                                checkm2_db: str, threads: int, resume: bool, low_mem: bool) -> Tuple[Dict[str, int], Dict[str, List[str]]]:
+def manage_protein_alignement(faa_file: Path, contigs_fasta: Path, contig_to_length: Dict[str, int],
+                                contigs_in_bins: Set[str], diamond_result_file: Path,
+                                checkm2_db: Optional[Path], threads: int, resume: bool, low_mem: bool) -> Tuple[Dict[str, int], Dict[str, List[str]]]:
     """
     Predicts or reuses proteins prediction and runs diamond on them.
     
@@ -228,41 +231,45 @@ def manage_protein_alignement(faa_file: str, contigs_fasta: str, contig_to_lengt
     # Predict or reuse proteins prediction and run diamond on them
     if resume:
         logging.info(f"Parsing faa file: {faa_file}.")
-        contig_to_genes = cds.parse_faa_file(faa_file)
-        io.check_contig_consistency(contig_to_length, contig_to_genes, contigs_fasta, faa_file)
+        contig_to_genes = cds.parse_faa_file(faa_file.as_posix())
+        io.check_contig_consistency(contig_to_length, contig_to_genes, contigs_fasta.as_posix(), faa_file.as_posix())
 
     else:
-        contigs_iterator = (s for s in contig_manager.parse_fasta_file(contigs_fasta) if s.name in contigs_in_bins)
-        contig_to_genes = cds.predict(contigs_iterator, faa_file, threads)
+        contigs_iterator = (s for s in contig_manager.parse_fasta_file(contigs_fasta.as_posix()) if s.name in contigs_in_bins)
+        contig_to_genes = cds.predict(contigs_iterator, faa_file.as_posix(), threads)
 
-        if checkm2_db:
-            diamond_db_path = checkm2_db
-        else:
+        if checkm2_db is None:
             # get checkm2 db stored in checkm2 install
             diamond_db_path = diamond.get_checkm2_db()
-        
-        diamond_log = f"{os.path.splitext(diamond_result_file)[0]}.log"
+        elif checkm2_db.exists():
+            diamond_db_path = checkm2_db.as_posix()
+        else:
+            raise FileNotFoundError(checkm2_db)
+
+        diamond_log =  diamond_result_file.parents[0] / f"{diamond_result_file.stem}.log"
 
         diamond.run(
-            faa_file,
-            diamond_result_file,
+            faa_file.as_posix(),
+            diamond_result_file.as_posix(),
             diamond_db_path,
-            diamond_log,
+            diamond_log.as_posix(),
             threads,
             low_mem=low_mem,
         )
 
     logging.info("Parsing diamond results.")
-    contig_to_kegg_counter = diamond.get_contig_to_kegg_id(diamond_result_file)
+    contig_to_kegg_counter = diamond.get_contig_to_kegg_id(diamond_result_file.as_posix())
 
     # Check contigs from diamond vs input assembly consistency
-    io.check_contig_consistency(contig_to_length, contig_to_kegg_counter, contigs_fasta, diamond_result_file)
+    io.check_contig_consistency(contig_to_length, contig_to_kegg_counter, contigs_fasta.as_posix(), diamond_result_file.as_posix())
 
     return contig_to_kegg_counter, contig_to_genes
 
 
-def select_bins_and_write_them(all_bins: Set[bin_manager.Bin], contigs_fasta: str, final_bin_report: str, min_completeness: float,
-                               index_to_contig: dict, outdir: str, debug: bool) -> List[bin_manager.Bin]:
+def select_bins_and_write_them(all_bins: Set[bin_manager.Bin],
+                               contigs_fasta: Path,
+                               final_bin_report: Path, min_completeness: float,
+                               index_to_contig: dict, outdir: Path, debug: bool) -> List[bin_manager.Bin]:
     """
     Selects and writes bins based on specific criteria.
 
@@ -276,12 +283,12 @@ def select_bins_and_write_them(all_bins: Set[bin_manager.Bin], contigs_fasta: st
     :return: Selected bins that meet the completeness threshold.
     """
 
-    outdir_final_bin_set = os.path.join(outdir, "final_bins")
+    outdir_final_bin_set = outdir / "final_bins"
     os.makedirs(outdir_final_bin_set, exist_ok=True)
 
     if debug:
         all_bins_for_debug = set(all_bins)
-        all_bin_compo_file = os.path.join(outdir, "all_bins_quality_reports.tsv")
+        all_bin_compo_file = outdir / "all_bins_quality_reports.tsv"
         
         logging.info(f"Writing all bins in {all_bin_compo_file}")
         
@@ -338,6 +345,23 @@ def log_selected_bin_info(selected_bins: List[bin_manager.Bin], hq_min_completen
     thresholds = f"(completeness >= {hq_min_completeness} and contamination <= {hq_max_conta})"
     logging.info(f"{hq_bins}/{len(selected_bins)} selected bins have a high quality {thresholds}.")
 
+def write_original_bin_metrics(bin_set_name_to_bins:Dict[str, Set[bin_manager.Bin]], original_bin_report_dir:Path):
+    """
+    
+    """
+    
+    logging.info(f"Writing original input bins metrics in {original_bin_report_dir}")
+    
+
+    original_bin_report_dir.mkdir(parents=True, exist_ok=True)
+
+    for i, (set_name, bins) in enumerate(sorted(bin_set_name_to_bins.items())):
+        bins_metric_file = original_bin_report_dir / f"input_bins_{i+1}.{set_name.replace('/', '_')}.tsv"
+
+        logging.info(f"Writing bin_set {set_name} input bins metrics in {bins_metric_file}")
+        io.write_bin_info(bins, bins_metric_file)
+
+
 
 def main():
     "Orchestrate the execution of the program"
@@ -351,15 +375,15 @@ def main():
     hq_min_completeness = 90
 
     # Temporary files #
-    out_tmp_dir = os.path.join(args.outdir, "temporary_files")
+    out_tmp_dir:Path = args.outdir / "temporary_files"
     os.makedirs(out_tmp_dir, exist_ok=True)
 
-    faa_file = os.path.join(out_tmp_dir, "assembly_proteins.faa")
-    diamond_result_file = os.path.join(out_tmp_dir, "diamond_result.tsv")
+    faa_file = out_tmp_dir / "assembly_proteins.faa"
+    diamond_result_file = out_tmp_dir / "diamond_result.tsv"
 
     # Output files #
-    final_bin_report = os.path.join(args.outdir, "final_bins_quality_reports.tsv")
-    original_bin_report  = os.path.join(args.outdir, "original_bins_quality_reports.tsv")
+    final_bin_report:Path = args.outdir / "final_bins_quality_reports.tsv"
+    original_bin_report_dir:Path  = args.outdir / "input_bins_quality_reports"
 
     if args.resume:
         io.check_resume_file(faa_file, diamond_result_file)
@@ -392,28 +416,8 @@ def main():
     logging.info("Add size and assess quality of input bins")
     bin_quality.add_bin_metrics(original_bins, contig_metadat, args.contamination_weight, args.threads)
 
+    write_original_bin_metrics(bin_set_name_to_bins, original_bin_report_dir)
 
-    # for bin_set, bins in bin_set_name_to_bins.items():
-    #     print(bin_set)
-    #     bin_set_name = bin_set.replace("/", "_")
-    #     original_bin_report  = os.path.join(args.outdir, f"{bin_set_name}_bins_quality_reports.tsv")
-    #     bins_with_metric = []
-    #     for bin_obj in bins:
-    #         print(bin_obj.id, bin_obj.score, bin_obj.N50)
-    #         if bin_obj.score is None:
-
-    #             matching_bins = [bin_with_metric for bin_with_metric in original_bins if bin_obj == bin_with_metric]
-    #             assert len(matching_bins) == 1, len(matching_bins) 
-    #             bins_with_metric.append(matching_bins[0])
-    #             print("HAS NOT USE MATCHING BIN IN ORIGINAL SET", matching_bins[0].id, matching_bins[0].score, matching_bins[0].N50)
-                
-    #         else:
-    #             print("has score")
-    #             print(bin_obj.id, bin_obj.score, bin_obj.N50)
-    #             bins_with_metric.append(bin_obj)
-
-                
-        # io.write_bin_info(bins_with_metric, original_bin_report)
 
     logging.info("Create intermediate bins:")
     new_bins = bin_manager.create_intermediate_bins(bin_set_name_to_bins)

From 14bc19481978945a7f1b80a8990d517ff073065e Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Thu, 29 Aug 2024 10:43:52 +0200
Subject: [PATCH 13/36] update tests

---
 binette/io_manager.py      | 24 +++++++++++++++++++++++-
 binette/main.py            | 22 ++++------------------
 tests/bin_manager_test.py  |  9 +++++----
 tests/io_manager_test.py   | 36 +++++++++++++++++++++++++++++++++---
 tests/main_binette_test.py |  3 +++
 5 files changed, 68 insertions(+), 26 deletions(-)

diff --git a/binette/io_manager.py b/binette/io_manager.py
index 5ea3041..5899722 100644
--- a/binette/io_manager.py
+++ b/binette/io_manager.py
@@ -1,6 +1,6 @@
 import logging
 import pyfastx
-from typing import Iterable, List, Dict, Tuple
+from typing import Iterable, List, Dict, Tuple, Set
 import csv
 
 from binette.bin_manager import Bin
@@ -195,3 +195,25 @@ def check_resume_file(faa_file: Path, diamond_result_file: Path) -> None:
         raise FileNotFoundError(error_msg)
 
 
+def write_original_bin_metrics(bin_set_name_to_bins: Dict[str, Set[Bin]], original_bin_report_dir: Path):
+    """
+    Write metrics of original input bins to a specified directory.
+
+    This function takes a dictionary mapping bin set names to sets of bins and writes
+    the metrics for each bin set to a TSV file in the specified directory. Each bin set
+    will have its own TSV file named according to its set name.
+
+    :param bin_set_name_to_bins: A dictionary where the keys are bin set names (str) and 
+                                 the values are sets of Bin objects representing bins.
+    :param original_bin_report_dir: The directory path (Path) where the bin metrics will be saved.
+    """
+
+    original_bin_report_dir.mkdir(parents=True, exist_ok=True)
+
+    for i, (set_name, bins) in enumerate(sorted(bin_set_name_to_bins.items())):
+        bins_metric_file = original_bin_report_dir / f"input_bins_{i + 1}.{set_name.replace('/', '_')}.tsv"
+        
+        logging.debug(f"Writing metrics for bin set '{set_name}' to file: {bins_metric_file}")
+        write_bin_info(bins, bins_metric_file)
+
+    logging.debug("Completed writing all original input bin metrics.")
diff --git a/binette/main.py b/binette/main.py
index 3044fda..1c92a38 100755
--- a/binette/main.py
+++ b/binette/main.py
@@ -345,23 +345,6 @@ def log_selected_bin_info(selected_bins: List[bin_manager.Bin], hq_min_completen
     thresholds = f"(completeness >= {hq_min_completeness} and contamination <= {hq_max_conta})"
     logging.info(f"{hq_bins}/{len(selected_bins)} selected bins have a high quality {thresholds}.")
 
-def write_original_bin_metrics(bin_set_name_to_bins:Dict[str, Set[bin_manager.Bin]], original_bin_report_dir:Path):
-    """
-    
-    """
-    
-    logging.info(f"Writing original input bins metrics in {original_bin_report_dir}")
-    
-
-    original_bin_report_dir.mkdir(parents=True, exist_ok=True)
-
-    for i, (set_name, bins) in enumerate(sorted(bin_set_name_to_bins.items())):
-        bins_metric_file = original_bin_report_dir / f"input_bins_{i+1}.{set_name.replace('/', '_')}.tsv"
-
-        logging.info(f"Writing bin_set {set_name} input bins metrics in {bins_metric_file}")
-        io.write_bin_info(bins, bins_metric_file)
-
-
 
 def main():
     "Orchestrate the execution of the program"
@@ -416,7 +399,10 @@ def main():
     logging.info("Add size and assess quality of input bins")
     bin_quality.add_bin_metrics(original_bins, contig_metadat, args.contamination_weight, args.threads)
 
-    write_original_bin_metrics(bin_set_name_to_bins, original_bin_report_dir)
+
+
+    logging.info(f"Writting original input bin metrics to directory: {original_bin_report_dir}")
+    io.write_original_bin_metrics(bin_set_name_to_bins, original_bin_report_dir)
 
 
     logging.info("Create intermediate bins:")
diff --git a/tests/bin_manager_test.py b/tests/bin_manager_test.py
index f08d6a1..f939b53 100644
--- a/tests/bin_manager_test.py
+++ b/tests/bin_manager_test.py
@@ -9,6 +9,7 @@
 import networkx as nx
 
 import logging
+from pathlib import Path
 
 def test_get_all_possible_combinations():
     input_list = ["2", "3", "4"]
@@ -524,14 +525,14 @@ def create_temp_bin_directories(tmpdir, create_temp_bin_files):
     bin2 = bin_dir2.join("binA.fasta")
     bin2.write(">contig3\nTTAG\n>contig4\nCGAT\n>contig5\nCGGC")
 
-    return {"set1": str(bin_dir1), "set2": str(bin_dir2)}
+    return {"set1": Path(bin_dir1), "set2": Path(bin_dir2)}
 
 
 def test_get_bins_from_directory(create_temp_bin_files):
     bin_dir = create_temp_bin_files
     set_name = "TestSet"
 
-    bins = bin_manager.get_bins_from_directory(str(bin_dir), set_name, fasta_extensions={'.fasta'})
+    bins = bin_manager.get_bins_from_directory(Path(bin_dir), set_name, fasta_extensions={'.fasta'})
 
     assert len(bins) == 2  # Ensure that the correct number of Bin objects is returned
 
@@ -546,7 +547,7 @@ def test_get_bins_from_directory(create_temp_bin_files):
     assert bins[0].name in ["bin2.fasta", "bin1.fasta"]
 
 def test_get_bins_from_directory_no_files(tmpdir):
-    bin_dir = str(tmpdir.mkdir("empty_bins"))
+    bin_dir = Path(tmpdir.mkdir("empty_bins"))
     set_name = "EmptySet"
 
     bins = bin_manager.get_bins_from_directory(bin_dir, set_name, fasta_extensions={'.fasta'})
@@ -554,7 +555,7 @@ def test_get_bins_from_directory_no_files(tmpdir):
     assert len(bins) == 0  # Ensure that no Bin objects are returned for an empty directory
 
 def test_get_bins_from_directory_no_wrong_extensions(create_temp_bin_files):
-    bin_dir = create_temp_bin_files
+    bin_dir = Path(create_temp_bin_files)
     set_name = "TestSet"
 
     bins = bin_manager.get_bins_from_directory(bin_dir, set_name, fasta_extensions={'.fna'})
diff --git a/tests/io_manager_test.py b/tests/io_manager_test.py
index aa1856c..3a44368 100644
--- a/tests/io_manager_test.py
+++ b/tests/io_manager_test.py
@@ -1,7 +1,7 @@
 import pytest
 from binette import io_manager
 from pathlib import Path
-
+from unittest.mock import patch
 
 
 
@@ -9,7 +9,7 @@
 class Bin:
     def __init__(self, bin_id, origin, name, completeness, contamination, score, length, N50, contigs):
         self.id = bin_id
-        self.origin = origin
+        self.origin = {origin}
         self.name = name
         self.completeness = completeness
         self.contamination = contamination
@@ -65,7 +65,7 @@ def test_infer_bin_name_from_single_path():
     # Check if the output matches the expected dictionary
     assert result == expected_result
 
-    
+
 def test_infer_bin_name_from_bin_table_inputs():
     # Mock input data
     input_bins = [
@@ -283,3 +283,33 @@ def test_check_resume_file_missing_diamond(temp_files, caplog):
         io_manager.check_resume_file(Path(faa_file), Path("nonexistent_diamond_result.txt"))
     assert "Protein file" not in caplog.text
     assert "Diamond result file" in caplog.text
+
+
+@patch('binette.io_manager.write_bin_info')
+def test_write_original_bin_metrics(mock_write_bin_info, bin1,bin2, tmp_path):
+    # Test that `write_original_bin_metrics` correctly writes bin metrics to files
+
+    temp_directory =  tmp_path / "test_output"
+
+    mock_bins = {"set1":{bin1},
+                 "set2":{bin2}}
+    # Call the function with mock data
+    io_manager.write_original_bin_metrics(mock_bins, temp_directory)
+
+    # Check if the output directory was created
+    assert temp_directory.exists(), "Output directory should be created."
+
+    # Check that the correct files are created
+    expected_files = [
+        temp_directory / "input_bins_1.set1.tsv",
+        temp_directory / "input_bins_2.set2.tsv"
+    ]
+
+    assert temp_directory.exists(), f"Expected temp_directory {temp_directory} was not created."
+
+    # Check if `write_bin_info` was called correctly
+    assert mock_write_bin_info.call_count == 2, "write_bin_info should be called once for each bin set."
+
+    # Verify the specific calls to `write_bin_info`
+    mock_write_bin_info.assert_any_call(mock_bins['set1'], expected_files[0])
+    mock_write_bin_info.assert_any_call(mock_bins['set2'], expected_files[1])
\ No newline at end of file
diff --git a/tests/main_binette_test.py b/tests/main_binette_test.py
index f2129db..0e20acd 100644
--- a/tests/main_binette_test.py
+++ b/tests/main_binette_test.py
@@ -371,6 +371,7 @@ def test_main(monkeypatch):
          patch('binette.bin_quality.add_bin_metrics') as mock_add_bin_metrics, \
          patch('binette.main.log_selected_bin_info') as mock_log_selected_bin_info, \
          patch('binette.contig_manager.make_contig_index') as mock_make_contig_index, \
+         patch('binette.io_manager.write_original_bin_metrics') as mock_write_original_bin_metrics, \
          patch('binette.main.select_bins_and_write_them') as mock_select_bins_and_write_them:
         
         # Set return values for mocked functions if needed
@@ -395,5 +396,7 @@ def test_main(monkeypatch):
         
         mock_log_selected_bin_info.assert_called_once()
         mock_select_bins_and_write_them.assert_called_once()
+        mock_write_original_bin_metrics.assert_called_once()
+
         assert mock_apply_contig_index.call_count == 3
         assert mock_add_bin_metrics.call_count == 2
\ No newline at end of file

From 956cb1773142db6bddbc0b55a1cdb0d384f7a18d Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Tue, 3 Sep 2024 14:56:25 +0200
Subject: [PATCH 14/36] add first draft of the tutorial

---
 docs/conf.py                               |   56 +-
 docs/index.md                              |    6 +-
 docs/tutorial/analyse_binette_result.ipynb | 1688 ++++++++++++++++++++
 docs/tutorial/assembly.md                  |   43 +
 docs/tutorial/binette.md                   |    7 +
 docs/tutorial/binning.md                   |   67 +
 docs/tutorial/set_env_and_get_data.md      |   78 +
 docs/tutorial/tutorial_main.md             |   52 +
 pyproject.toml                             |    7 +-
 9 files changed, 1996 insertions(+), 8 deletions(-)
 create mode 100644 docs/tutorial/analyse_binette_result.ipynb
 create mode 100644 docs/tutorial/assembly.md
 create mode 100644 docs/tutorial/binette.md
 create mode 100644 docs/tutorial/binning.md
 create mode 100644 docs/tutorial/set_env_and_get_data.md
 create mode 100644 docs/tutorial/tutorial_main.md

diff --git a/docs/conf.py b/docs/conf.py
index b8e839a..bb2e303 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -18,28 +18,37 @@
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
 
 extensions = [
-    "myst_parser",
+    #  
     # "sphinxcontrib.jquery",
     "sphinx.ext.duration",
     "sphinx.ext.autosectionlabel",
     "sphinx.ext.autodoc",
-    'sphinx_search.extension'
+    'sphinx_search.extension',
+    # "myst_nb",
+    "myst_parser",
+    'nbsphinx',
+    'nbsphinx_link', 
+    'sphinx.ext.napoleon',
+    'sphinx.ext.viewcode',
+    "myst_parser",
+
 ]
 
 
 source_suffix = {
-    '.md': 'markdown'
+    '.md': 'markdown',
 }
 
 
 templates_path = ['_templates']
 
-
+nb_execution_mode = "off"
+nbsphinx_execute = 'never'
 # Prefix document path to section labels, to use:
 # `path/to/file:heading` instead of just `heading`
 autosectionlabel_prefix_document = True
 
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'build', "api"]
 
 
 
@@ -55,3 +64,40 @@
 
 
 
+
+
+# Include the Plotly JavaScript in the HTML output
+nbsphinx_requirejs_path = ""
+
+# Ensures that the `require.js` is loaded for Plotly to function correctly
+nbsphinx_requirejs_options = {
+    'paths': {
+        'plotly': 'https://cdn.plot.ly/plotly-latest.min'
+    },
+    'shim': {
+        'plotly': {
+            'exports': 'Plotly'
+        }
+    }
+}
+
+# Specify the default language for syntax highlighting in Sphinx
+highlight_language = 'python'
+
+# -- Options for HTML output -------------------------------------------------
+
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Add plotly renderer options
+nbsphinx_prolog = r"""
+.. raw:: html
+
+    <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
+"""
+
+
+
diff --git a/docs/index.md b/docs/index.md
index 45dd4a6..e7ec04b 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -31,14 +31,18 @@ Binette is inspired from the metaWRAP bin-refinement tool but it effectively sol
 - Enhanced Speed: Binette significantly improves the speed of the refinement process. It achieves this by launching the initial steps of CheckM2, such as Prodigal and Diamond runs, only once on all contigs. These intermediate results are then utilized to assess the quality of any given bin, eliminating redundant computations and accelerating the refinement process.
 - No Limit on Input Bin Sets: Unlike its predecessor, Binette is not constrained by the number of input bin sets. It can handle and process multiple bin sets simultaneously.
 
+
+
+
 ```{toctree}
 :caption: 'Documentation'
 :maxdepth: 2
 
 installation
 usage
+tutorial/tutorial_main
 contributing
-tests.md
+tests
 api/api_ref
 ```
 
diff --git a/docs/tutorial/analyse_binette_result.ipynb b/docs/tutorial/analyse_binette_result.ipynb
new file mode 100644
index 0000000..30247a8
--- /dev/null
+++ b/docs/tutorial/analyse_binette_result.ipynb
@@ -0,0 +1,1688 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "52e7f39c",
+   "metadata": {},
+   "source": [
+    "## Analyse Binette results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "e6a1e1ee-681d-4823-b974-7027bafd2ba9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "from pathlib import Path\n",
+    "import plotly.express as px\n",
+    "import plotly.io as pio\n",
+    "pio.renderers.default = \"sphinx_gallery\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "34e80119-f59b-41b0-b0e5-de2d6ed0c6a3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bin_id</th>\n",
+       "      <th>origin</th>\n",
+       "      <th>name</th>\n",
+       "      <th>completeness</th>\n",
+       "      <th>contamination</th>\n",
+       "      <th>score</th>\n",
+       "      <th>size</th>\n",
+       "      <th>N50</th>\n",
+       "      <th>contig_count</th>\n",
+       "      <th>tool</th>\n",
+       "      <th>index</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>17075</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>44 - 10</td>\n",
+       "      <td>100.00</td>\n",
+       "      <td>0.05</td>\n",
+       "      <td>99.90</td>\n",
+       "      <td>4672665</td>\n",
+       "      <td>82084</td>\n",
+       "      <td>93</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>39427</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>36 - 6</td>\n",
+       "      <td>99.90</td>\n",
+       "      <td>0.20</td>\n",
+       "      <td>99.50</td>\n",
+       "      <td>2796605</td>\n",
+       "      <td>41151</td>\n",
+       "      <td>98</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>47060</td>\n",
+       "      <td>union</td>\n",
+       "      <td>58 | 33</td>\n",
+       "      <td>98.59</td>\n",
+       "      <td>0.83</td>\n",
+       "      <td>96.93</td>\n",
+       "      <td>4601336</td>\n",
+       "      <td>41016</td>\n",
+       "      <td>165</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>47177</td>\n",
+       "      <td>union</td>\n",
+       "      <td>91 | 25 | 55</td>\n",
+       "      <td>96.10</td>\n",
+       "      <td>0.34</td>\n",
+       "      <td>95.42</td>\n",
+       "      <td>2598718</td>\n",
+       "      <td>11891</td>\n",
+       "      <td>312</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>21248</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>65 - 8 - 28</td>\n",
+       "      <td>91.98</td>\n",
+       "      <td>1.71</td>\n",
+       "      <td>88.56</td>\n",
+       "      <td>1768095</td>\n",
+       "      <td>9976</td>\n",
+       "      <td>250</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>44137</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>76 - 13 - 28</td>\n",
+       "      <td>92.63</td>\n",
+       "      <td>2.41</td>\n",
+       "      <td>87.81</td>\n",
+       "      <td>3726254</td>\n",
+       "      <td>5669</td>\n",
+       "      <td>850</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>31703</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>31 - 7 - 61</td>\n",
+       "      <td>81.73</td>\n",
+       "      <td>0.84</td>\n",
+       "      <td>80.05</td>\n",
+       "      <td>1665233</td>\n",
+       "      <td>8518</td>\n",
+       "      <td>248</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>13475</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>47 - 37</td>\n",
+       "      <td>72.89</td>\n",
+       "      <td>2.39</td>\n",
+       "      <td>68.11</td>\n",
+       "      <td>1241829</td>\n",
+       "      <td>5061</td>\n",
+       "      <td>252</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>7</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>47926</td>\n",
+       "      <td>union</td>\n",
+       "      <td>75 | 30</td>\n",
+       "      <td>74.31</td>\n",
+       "      <td>4.26</td>\n",
+       "      <td>65.79</td>\n",
+       "      <td>3293949</td>\n",
+       "      <td>2954</td>\n",
+       "      <td>1262</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>46775</td>\n",
+       "      <td>union</td>\n",
+       "      <td>42 | 102</td>\n",
+       "      <td>62.94</td>\n",
+       "      <td>2.75</td>\n",
+       "      <td>57.44</td>\n",
+       "      <td>1293571</td>\n",
+       "      <td>3783</td>\n",
+       "      <td>419</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>9</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>33569</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>83 - 7 - 38 - 31</td>\n",
+       "      <td>59.18</td>\n",
+       "      <td>2.24</td>\n",
+       "      <td>54.70</td>\n",
+       "      <td>2042527</td>\n",
+       "      <td>4437</td>\n",
+       "      <td>514</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>10</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>39350</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>57 - 16 - 75</td>\n",
+       "      <td>52.16</td>\n",
+       "      <td>1.31</td>\n",
+       "      <td>49.54</td>\n",
+       "      <td>2601282</td>\n",
+       "      <td>5332</td>\n",
+       "      <td>509</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>11</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>39558</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>78 - 6 - 43</td>\n",
+       "      <td>64.63</td>\n",
+       "      <td>8.03</td>\n",
+       "      <td>48.57</td>\n",
+       "      <td>1858210</td>\n",
+       "      <td>1430</td>\n",
+       "      <td>1293</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>12</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>51082</td>\n",
+       "      <td>union</td>\n",
+       "      <td>120 | 1</td>\n",
+       "      <td>52.33</td>\n",
+       "      <td>5.06</td>\n",
+       "      <td>42.21</td>\n",
+       "      <td>688879</td>\n",
+       "      <td>1446</td>\n",
+       "      <td>472</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>13</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>14</th>\n",
+       "      <td>19689</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>118 - 18 - 61 - 31</td>\n",
+       "      <td>48.22</td>\n",
+       "      <td>8.23</td>\n",
+       "      <td>31.76</td>\n",
+       "      <td>1782676</td>\n",
+       "      <td>1402</td>\n",
+       "      <td>1265</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>14</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    bin_id origin                name  completeness  contamination  score  \\\n",
+       "0    17075   diff             44 - 10        100.00           0.05  99.90   \n",
+       "1    39427   diff              36 - 6         99.90           0.20  99.50   \n",
+       "2    47060  union             58 | 33         98.59           0.83  96.93   \n",
+       "3    47177  union        91 | 25 | 55         96.10           0.34  95.42   \n",
+       "4    21248   diff         65 - 8 - 28         91.98           1.71  88.56   \n",
+       "5    44137   diff        76 - 13 - 28         92.63           2.41  87.81   \n",
+       "6    31703   diff         31 - 7 - 61         81.73           0.84  80.05   \n",
+       "7    13475   diff             47 - 37         72.89           2.39  68.11   \n",
+       "8    47926  union             75 | 30         74.31           4.26  65.79   \n",
+       "9    46775  union            42 | 102         62.94           2.75  57.44   \n",
+       "10   33569   diff    83 - 7 - 38 - 31         59.18           2.24  54.70   \n",
+       "11   39350   diff        57 - 16 - 75         52.16           1.31  49.54   \n",
+       "12   39558   diff         78 - 6 - 43         64.63           8.03  48.57   \n",
+       "13   51082  union             120 | 1         52.33           5.06  42.21   \n",
+       "14   19689   diff  118 - 18 - 61 - 31         48.22           8.23  31.76   \n",
+       "\n",
+       "       size    N50  contig_count     tool  index  \n",
+       "0   4672665  82084            93  binette      0  \n",
+       "1   2796605  41151            98  binette      1  \n",
+       "2   4601336  41016           165  binette      2  \n",
+       "3   2598718  11891           312  binette      3  \n",
+       "4   1768095   9976           250  binette      4  \n",
+       "5   3726254   5669           850  binette      5  \n",
+       "6   1665233   8518           248  binette      6  \n",
+       "7   1241829   5061           252  binette      7  \n",
+       "8   3293949   2954          1262  binette      8  \n",
+       "9   1293571   3783           419  binette      9  \n",
+       "10  2042527   4437           514  binette     10  \n",
+       "11  2601282   5332           509  binette     11  \n",
+       "12  1858210   1430          1293  binette     12  \n",
+       "13   688879   1446           472  binette     13  \n",
+       "14  1782676   1402          1265  binette     14  "
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "binette_result_file = \"./binette_results/final_bins_quality_reports.tsv\"\n",
+    "df_binette = pd.read_csv(binette_result_file, sep='\\t')\n",
+    "df_binette['tool'] = \"binette\"\n",
+    "df_binette['index'] = df_binette.index\n",
+    "df_binette"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "189038d3-77a0-435a-9590-4d8b3038341e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>completeness</th>\n",
+       "      <th>contamination</th>\n",
+       "      <th>tool</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>100.00</td>\n",
+       "      <td>0.05</td>\n",
+       "      <td>binette</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>99.90</td>\n",
+       "      <td>0.20</td>\n",
+       "      <td>binette</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>98.59</td>\n",
+       "      <td>0.83</td>\n",
+       "      <td>binette</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>96.10</td>\n",
+       "      <td>0.34</td>\n",
+       "      <td>binette</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>91.98</td>\n",
+       "      <td>1.71</td>\n",
+       "      <td>binette</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>20</th>\n",
+       "      <td>8.28</td>\n",
+       "      <td>0.01</td>\n",
+       "      <td>semibin2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>21</th>\n",
+       "      <td>8.12</td>\n",
+       "      <td>0.02</td>\n",
+       "      <td>semibin2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>7.74</td>\n",
+       "      <td>0.01</td>\n",
+       "      <td>semibin2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>6.18</td>\n",
+       "      <td>0.00</td>\n",
+       "      <td>semibin2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>4.41</td>\n",
+       "      <td>0.13</td>\n",
+       "      <td>semibin2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>140 rows × 3 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    completeness  contamination      tool\n",
+       "0         100.00           0.05   binette\n",
+       "1          99.90           0.20   binette\n",
+       "2          98.59           0.83   binette\n",
+       "3          96.10           0.34   binette\n",
+       "4          91.98           1.71   binette\n",
+       "..           ...            ...       ...\n",
+       "20          8.28           0.01  semibin2\n",
+       "21          8.12           0.02  semibin2\n",
+       "22          7.74           0.01  semibin2\n",
+       "23          6.18           0.00  semibin2\n",
+       "24          4.41           0.13  semibin2\n",
+       "\n",
+       "[140 rows x 3 columns]"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "input_bins_quality_reports_dir = Path(\"binette_results/input_bins_quality_reports/\")\n",
+    "\n",
+    "df_input_bin_list = [df_binette]\n",
+    "for input_bin_metric_file in input_bins_quality_reports_dir.glob(\"*tsv\"):\n",
+    "    tool = input_bin_metric_file.name.split('.')[1].split('_')[0]\n",
+    "    df_input = pd.read_csv(input_bin_metric_file, sep='\\t')\n",
+    "    df_input['index'] = df_input.index\n",
+    "    df_input['tool'] = tool\n",
+    "    df_input_bin_list.append(df_input)\n",
+    "\n",
+    "df_bins =  pd.concat(df_input_bin_list)\n",
+    "    \n",
+    "set(df_bins['tool'])\n",
+    "df_bins[\"High quality bin\"] =  (df_bins['completeness'] >= 90) & (df_bins['contamination'] <= 5)\n",
+    "#df_binette = pd.read_csv(binette_result_file, sep='\\t')\n",
+    "#df_binette\n",
+    "df_bins[[\"completeness\", \"contamination\", \"tool\"]]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "911d598f-a6c7-4178-aff2-6059235e7fc4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n",
+       "        <script charset=\"utf-8\" src=\"https://cdn.plot.ly/plotly-2.35.0.min.js\"></script>                <div id=\"5f02a995-2d24-40e9-a697-3bed6b4b3d72\" class=\"plotly-graph-div\" style=\"height:800px; width:800px;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"5f02a995-2d24-40e9-a697-3bed6b4b3d72\")) {                    Plotly.newPlot(                        \"5f02a995-2d24-40e9-a697-3bed6b4b3d72\",                        [{\"hovertemplate\":\"High quality bin=True\\u003cbr\\u003etool=binette\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"True\",\"marker\":{\"color\":\"#636efa\",\"size\":[4672665,2796605,4601336,2598718,1768095,3726254],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"True\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[100.0,99.9,98.59,96.1,91.98,92.63],\"xaxis\":\"x5\",\"y\":[0.05,0.2,0.83,0.34,1.71,2.41],\"yaxis\":\"y5\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=True\\u003cbr\\u003etool=concoct\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"True\",\"marker\":{\"color\":\"#636efa\",\"size\":[3033586,4765466,2274951,3751950],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"True\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[100.0,100.0,92.76,92.63],\"xaxis\":\"x4\",\"y\":[0.38,0.46,0.34,3.42],\"yaxis\":\"y4\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=True\\u003cbr\\u003etool=maxbin2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"True\",\"marker\":{\"color\":\"#636efa\",\"size\":[4616818,2874373],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"True\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[99.81,93.92],\"xaxis\":\"x3\",\"y\":[4.81,3.53],\"yaxis\":\"y3\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=True\\u003cbr\\u003etool=metabat2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"True\",\"marker\":{\"color\":\"#636efa\",\"size\":[2799572,2148097,4266134],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"True\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[99.9,93.17,93.52],\"xaxis\":\"x2\",\"y\":[0.24,0.22,0.92],\"yaxis\":\"y2\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=True\\u003cbr\\u003etool=semibin2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"True\",\"marker\":{\"color\":\"#636efa\",\"size\":[4681369,2937678,2129295,4162911],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"True\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[100.0,99.92,93.43,92.13],\"xaxis\":\"x\",\"y\":[0.09,0.28,0.14,0.03],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=False\\u003cbr\\u003etool=binette\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"False\",\"marker\":{\"color\":\"#EF553B\",\"size\":[1665233,1241829,3293949,1293571,2042527,2601282,1858210,688879,1782676],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"False\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[81.73,72.89,74.31,62.94,59.18,52.16,64.63,52.33,48.22],\"xaxis\":\"x5\",\"y\":[0.84,2.39,4.26,2.75,2.24,1.31,8.03,5.06,8.23],\"yaxis\":\"y5\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=False\\u003cbr\\u003etool=concoct\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"False\",\"marker\":{\"color\":\"#EF553B\",\"size\":[1917859,7431952,3285374,2385110,809087,480789,4821129,717576,1944164,3984942,445168,2403536,8287537,6341799,3480539,344299,345166,483789,89878,1016,3381,193358,12090,4193,8476,5082,5015,5338,13671,2727,1491,2475,1344,2524,10545,2290,4999,1240,1236,12304,1160,6739,1123,1032,8012,117297,16429,98557,173292,122021,106174,75967,55857,36685,20489,28603,48903,41153,44603],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"False\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[87.35,100.0,73.35,69.37,46.66,35.92,45.9,32.76,47.24,99.96,25.03,35.1,100.0,87.66,87.47,17.11,12.5,8.66,6.65,6.62,6.45,6.48,6.43,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.05,5.85,5.58,5.35,10.6,3.74,3.68,3.6,3.32,3.21,2.98,2.73,2.67,2.53,2.5],\"xaxis\":\"x4\",\"y\":[1.8,14.25,4.26,13.16,3.76,0.57,7.36,1.09,9.31,37.25,1.18,7.86,40.63,34.5,34.51,0.07,0.15,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.22,0.0,0.01,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0],\"yaxis\":\"y4\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=False\\u003cbr\\u003etool=maxbin2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"False\",\"marker\":{\"color\":\"#EF553B\",\"size\":[2634516,2438492,3473782,2087133,4743354,5331237,3958158,1586278,1690737,1033153,1112378,3237421,1419869,2765576,454808,1180579,804525,488546,379048,103037,4710071],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"False\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[75.2,62.69,56.93,40.68,45.34,80.35,69.31,34.6,42.81,27.99,26.95,56.41,23.72,76.19,27.26,17.07,28.6,11.11,10.27,4.92,93.2],\"xaxis\":\"x3\",\"y\":[12.31,8.14,14.12,7.03,9.62,27.53,22.06,4.79,9.69,2.4,2.05,17.23,1.41,29.54,6.52,1.62,7.98,0.04,0.56,0.0,48.33],\"yaxis\":\"y3\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=False\\u003cbr\\u003etool=metabat2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"False\",\"marker\":{\"color\":\"#EF553B\",\"size\":[1902761,2980526,1807028,3477636,1384653,1707078,1724699,982239,1077467,8543557,252404],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"False\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[84.4,84.99,83.21,76.53,71.78,51.75,44.85,44.15,25.31,98.03,7.06],\"xaxis\":\"x2\",\"y\":[1.53,2.73,3.16,0.11,5.77,2.99,0.49,1.11,0.03,37.1,0.03],\"yaxis\":\"y2\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=False\\u003cbr\\u003etool=semibin2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"False\",\"marker\":{\"color\":\"#EF553B\",\"size\":[1674156,1820073,2790948,1245031,1728690,2609451,1934420,990463,1699695,1131272,884790,515894,513202,213606,358311,290297,358822,353499,351540,250833,217541],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"False\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[83.09,81.87,80.25,72.57,53.34,51.92,47.29,47.28,36.67,17.07,14.04,9.95,9.95,9.45,8.7,8.66,8.28,8.12,7.74,6.18,4.41],\"xaxis\":\"x\",\"y\":[2.25,1.66,1.63,2.45,1.33,1.31,0.37,0.73,6.12,0.69,1.01,0.01,0.05,0.0,0.0,0.19,0.01,0.02,0.01,0.0,0.13],\"yaxis\":\"y\",\"type\":\"scatter\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,0.98],\"title\":{\"text\":\"completeness\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,0.17600000000000002],\"title\":{\"text\":\"contamination\"}},\"xaxis2\":{\"anchor\":\"y2\",\"domain\":[0.0,0.98],\"matches\":\"x\",\"showticklabels\":false},\"yaxis2\":{\"anchor\":\"x2\",\"domain\":[0.20600000000000002,0.382],\"matches\":\"y\",\"title\":{\"text\":\"contamination\"}},\"xaxis3\":{\"anchor\":\"y3\",\"domain\":[0.0,0.98],\"matches\":\"x\",\"showticklabels\":false},\"yaxis3\":{\"anchor\":\"x3\",\"domain\":[0.41200000000000003,0.5880000000000001],\"matches\":\"y\",\"title\":{\"text\":\"contamination\"}},\"xaxis4\":{\"anchor\":\"y4\",\"domain\":[0.0,0.98],\"matches\":\"x\",\"showticklabels\":false},\"yaxis4\":{\"anchor\":\"x4\",\"domain\":[0.618,0.794],\"matches\":\"y\",\"title\":{\"text\":\"contamination\"}},\"xaxis5\":{\"anchor\":\"y5\",\"domain\":[0.0,0.98],\"matches\":\"x\",\"showticklabels\":false},\"yaxis5\":{\"anchor\":\"x5\",\"domain\":[0.8240000000000001,1.0],\"matches\":\"y\",\"title\":{\"text\":\"contamination\"}},\"annotations\":[{\"font\":{},\"showarrow\":false,\"text\":\"tool=semibin2\",\"textangle\":90,\"x\":0.98,\"xanchor\":\"left\",\"xref\":\"paper\",\"y\":0.08800000000000001,\"yanchor\":\"middle\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"tool=metabat2\",\"textangle\":90,\"x\":0.98,\"xanchor\":\"left\",\"xref\":\"paper\",\"y\":0.29400000000000004,\"yanchor\":\"middle\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"tool=maxbin2\",\"textangle\":90,\"x\":0.98,\"xanchor\":\"left\",\"xref\":\"paper\",\"y\":0.5,\"yanchor\":\"middle\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"tool=concoct\",\"textangle\":90,\"x\":0.98,\"xanchor\":\"left\",\"xref\":\"paper\",\"y\":0.706,\"yanchor\":\"middle\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"tool=binette\",\"textangle\":90,\"x\":0.98,\"xanchor\":\"left\",\"xref\":\"paper\",\"y\":0.912,\"yanchor\":\"middle\",\"yref\":\"paper\"}],\"legend\":{\"title\":{\"text\":\"High quality bin\"},\"tracegroupgap\":0,\"itemsizing\":\"constant\"},\"margin\":{\"t\":60},\"width\":800,\"height\":800},                        {\"responsive\": true}                    )                };                            </script>        </div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "fig = px.scatter(df_bins, x=\"completeness\",y=\"contamination\", color=\"High quality bin\", size=\"size\",  facet_row=\"tool\")\n",
+    "fig.update_layout(\n",
+    "    width=800,\n",
+    "    height=800)\n",
+    "    \n",
+    "fig.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "35c46beb-1ac9-4014-9672-91edcc1bf439",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n",
+       "        <script charset=\"utf-8\" src=\"https://cdn.plot.ly/plotly-2.35.0.min.js\"></script>                <div id=\"697ef335-1144-4257-a58f-67d1d85100ea\" class=\"plotly-graph-div\" style=\"height:500px; width:800px;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"697ef335-1144-4257-a58f-67d1d85100ea\")) {                    Plotly.newPlot(                        \"697ef335-1144-4257-a58f-67d1d85100ea\",                        [{\"hovertemplate\":\"tool=binette\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"binette\",\"line\":{\"color\":\"#636efa\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"binette\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],\"xaxis\":\"x\",\"y\":[99.9,99.5,96.93,95.41999999999999,88.56,87.81,80.05,68.11,65.79,57.44,54.7,49.54,48.56999999999999,42.21,31.759999999999998],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"tool=concoct\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"concoct\",\"line\":{\"color\":\"#EF553B\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"concoct\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62],\"xaxis\":\"x\",\"y\":[99.24,99.08,92.08,85.78999999999999,83.75,71.5,64.83,43.050000000000004,39.14,34.78,31.18,30.58,28.62,25.459999999999994,22.67,19.380000000000003,18.739999999999995,18.659999999999997,18.450000000000003,16.97,12.2,8.64,6.65,6.62,6.45,6.44,6.43,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.05,5.85,5.58,5.35,4.159999999999999,3.74,3.66,3.56,3.32,3.21,2.96,2.73,2.67,2.53,2.5],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"tool=maxbin2\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"maxbin2\",\"line\":{\"color\":\"#00cc96\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"maxbin2\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],\"xaxis\":\"x\",\"y\":[90.19,86.86,50.58,46.41,28.69,26.619999999999997,26.100000000000005,25.289999999999992,25.190000000000005,25.020000000000003,23.430000000000003,23.189999999999998,22.85,21.949999999999996,20.9,17.11,14.220000000000002,13.83,12.64,11.03,9.149999999999999,4.92,-3.4599999999999937],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"tool=metabat2\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"metabat2\",\"line\":{\"color\":\"#ab63fa\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"metabat2\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13],\"xaxis\":\"x\",\"y\":[99.42,92.73,91.67999999999999,81.34,79.53,76.88999999999999,76.31,60.24,45.769999999999996,43.870000000000005,41.93,25.25,23.83,7.0],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"tool=semibin2\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"semibin2\",\"line\":{\"color\":\"#FFA15A\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"semibin2\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24],\"xaxis\":\"x\",\"y\":[99.82,99.36,93.15,92.07,78.59,78.55000000000001,76.99,67.66999999999999,50.68000000000001,49.300000000000004,46.55,45.82,24.43,15.690000000000001,12.02,9.93,9.85,9.45,8.7,8.28,8.26,8.08,7.720000000000001,6.18,4.15],\"yaxis\":\"y\",\"type\":\"scatter\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"index\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"completeness - 2*contamination\"}},\"legend\":{\"title\":{\"text\":\"tool\"},\"tracegroupgap\":0},\"margin\":{\"t\":60},\"width\":800,\"height\":500},                        {\"responsive\": true}                    )                };                            </script>        </div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "df_bins['completeness - 2*contamination'] = df_bins['completeness'] - 2*df_bins['contamination']\n",
+    "fig = px.line(df_bins, x=\"index\",y='completeness - 2*contamination', color=\"tool\",markers=True)\n",
+    "fig.update_layout(\n",
+    "    width=800,\n",
+    "    height=500)\n",
+    "fig.show()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "af74bfb2-457c-4cf4-9c13-3ee9642be7ce",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>bin_id</th>\n",
+       "      <th>origin</th>\n",
+       "      <th>name</th>\n",
+       "      <th>completeness</th>\n",
+       "      <th>contamination</th>\n",
+       "      <th>score</th>\n",
+       "      <th>size</th>\n",
+       "      <th>N50</th>\n",
+       "      <th>contig_count</th>\n",
+       "      <th>tool</th>\n",
+       "      <th>index</th>\n",
+       "      <th>High quality bin</th>\n",
+       "      <th>completeness - 2*contamination</th>\n",
+       "      <th>Contamination ≤ 10 and&lt;br&gt;Completeness</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>17075</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>44 - 10</td>\n",
+       "      <td>100.00</td>\n",
+       "      <td>0.05</td>\n",
+       "      <td>99.90</td>\n",
+       "      <td>4672665</td>\n",
+       "      <td>82084</td>\n",
+       "      <td>93</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>0</td>\n",
+       "      <td>True</td>\n",
+       "      <td>99.90</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>39427</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>36 - 6</td>\n",
+       "      <td>99.90</td>\n",
+       "      <td>0.20</td>\n",
+       "      <td>99.50</td>\n",
+       "      <td>2796605</td>\n",
+       "      <td>41151</td>\n",
+       "      <td>98</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>1</td>\n",
+       "      <td>True</td>\n",
+       "      <td>99.50</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>47060</td>\n",
+       "      <td>union</td>\n",
+       "      <td>58 | 33</td>\n",
+       "      <td>98.59</td>\n",
+       "      <td>0.83</td>\n",
+       "      <td>96.93</td>\n",
+       "      <td>4601336</td>\n",
+       "      <td>41016</td>\n",
+       "      <td>165</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>2</td>\n",
+       "      <td>True</td>\n",
+       "      <td>96.93</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>47177</td>\n",
+       "      <td>union</td>\n",
+       "      <td>91 | 25 | 55</td>\n",
+       "      <td>96.10</td>\n",
+       "      <td>0.34</td>\n",
+       "      <td>95.42</td>\n",
+       "      <td>2598718</td>\n",
+       "      <td>11891</td>\n",
+       "      <td>312</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>3</td>\n",
+       "      <td>True</td>\n",
+       "      <td>95.42</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>21248</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>65 - 8 - 28</td>\n",
+       "      <td>91.98</td>\n",
+       "      <td>1.71</td>\n",
+       "      <td>88.56</td>\n",
+       "      <td>1768095</td>\n",
+       "      <td>9976</td>\n",
+       "      <td>250</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>4</td>\n",
+       "      <td>True</td>\n",
+       "      <td>88.56</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>44137</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>76 - 13 - 28</td>\n",
+       "      <td>92.63</td>\n",
+       "      <td>2.41</td>\n",
+       "      <td>87.81</td>\n",
+       "      <td>3726254</td>\n",
+       "      <td>5669</td>\n",
+       "      <td>850</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>5</td>\n",
+       "      <td>True</td>\n",
+       "      <td>87.81</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>31703</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>31 - 7 - 61</td>\n",
+       "      <td>81.73</td>\n",
+       "      <td>0.84</td>\n",
+       "      <td>80.05</td>\n",
+       "      <td>1665233</td>\n",
+       "      <td>8518</td>\n",
+       "      <td>248</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>6</td>\n",
+       "      <td>False</td>\n",
+       "      <td>80.05</td>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>13475</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>47 - 37</td>\n",
+       "      <td>72.89</td>\n",
+       "      <td>2.39</td>\n",
+       "      <td>68.11</td>\n",
+       "      <td>1241829</td>\n",
+       "      <td>5061</td>\n",
+       "      <td>252</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>7</td>\n",
+       "      <td>False</td>\n",
+       "      <td>68.11</td>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>47926</td>\n",
+       "      <td>union</td>\n",
+       "      <td>75 | 30</td>\n",
+       "      <td>74.31</td>\n",
+       "      <td>4.26</td>\n",
+       "      <td>65.79</td>\n",
+       "      <td>3293949</td>\n",
+       "      <td>2954</td>\n",
+       "      <td>1262</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>8</td>\n",
+       "      <td>False</td>\n",
+       "      <td>65.79</td>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>46775</td>\n",
+       "      <td>union</td>\n",
+       "      <td>42 | 102</td>\n",
+       "      <td>62.94</td>\n",
+       "      <td>2.75</td>\n",
+       "      <td>57.44</td>\n",
+       "      <td>1293571</td>\n",
+       "      <td>3783</td>\n",
+       "      <td>419</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>9</td>\n",
+       "      <td>False</td>\n",
+       "      <td>57.44</td>\n",
+       "      <td>&gt; 50% and ≤ 70%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>33569</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>83 - 7 - 38 - 31</td>\n",
+       "      <td>59.18</td>\n",
+       "      <td>2.24</td>\n",
+       "      <td>54.70</td>\n",
+       "      <td>2042527</td>\n",
+       "      <td>4437</td>\n",
+       "      <td>514</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>10</td>\n",
+       "      <td>False</td>\n",
+       "      <td>54.70</td>\n",
+       "      <td>&gt; 50% and ≤ 70%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>39350</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>57 - 16 - 75</td>\n",
+       "      <td>52.16</td>\n",
+       "      <td>1.31</td>\n",
+       "      <td>49.54</td>\n",
+       "      <td>2601282</td>\n",
+       "      <td>5332</td>\n",
+       "      <td>509</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>11</td>\n",
+       "      <td>False</td>\n",
+       "      <td>49.54</td>\n",
+       "      <td>&gt; 50% and ≤ 70%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>39558</td>\n",
+       "      <td>diff</td>\n",
+       "      <td>78 - 6 - 43</td>\n",
+       "      <td>64.63</td>\n",
+       "      <td>8.03</td>\n",
+       "      <td>48.57</td>\n",
+       "      <td>1858210</td>\n",
+       "      <td>1430</td>\n",
+       "      <td>1293</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>12</td>\n",
+       "      <td>False</td>\n",
+       "      <td>48.57</td>\n",
+       "      <td>&gt; 50% and ≤ 70%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>13</th>\n",
+       "      <td>51082</td>\n",
+       "      <td>union</td>\n",
+       "      <td>120 | 1</td>\n",
+       "      <td>52.33</td>\n",
+       "      <td>5.06</td>\n",
+       "      <td>42.21</td>\n",
+       "      <td>688879</td>\n",
+       "      <td>1446</td>\n",
+       "      <td>472</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>13</td>\n",
+       "      <td>False</td>\n",
+       "      <td>42.21</td>\n",
+       "      <td>&gt; 50% and ≤ 70%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>125</td>\n",
+       "      <td>concoct/bins</td>\n",
+       "      <td>9.fa</td>\n",
+       "      <td>100.00</td>\n",
+       "      <td>0.38</td>\n",
+       "      <td>99.24</td>\n",
+       "      <td>3033586</td>\n",
+       "      <td>37523</td>\n",
+       "      <td>131</td>\n",
+       "      <td>concoct</td>\n",
+       "      <td>0</td>\n",
+       "      <td>True</td>\n",
+       "      <td>99.24</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>67</td>\n",
+       "      <td>concoct/bins</td>\n",
+       "      <td>41.fa</td>\n",
+       "      <td>100.00</td>\n",
+       "      <td>0.46</td>\n",
+       "      <td>99.08</td>\n",
+       "      <td>4765466</td>\n",
+       "      <td>82084</td>\n",
+       "      <td>101</td>\n",
+       "      <td>concoct</td>\n",
+       "      <td>1</td>\n",
+       "      <td>True</td>\n",
+       "      <td>99.08</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>91</td>\n",
+       "      <td>concoct/bins</td>\n",
+       "      <td>7.fa</td>\n",
+       "      <td>92.76</td>\n",
+       "      <td>0.34</td>\n",
+       "      <td>92.08</td>\n",
+       "      <td>2274951</td>\n",
+       "      <td>12187</td>\n",
+       "      <td>265</td>\n",
+       "      <td>concoct</td>\n",
+       "      <td>2</td>\n",
+       "      <td>True</td>\n",
+       "      <td>92.08</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>76</td>\n",
+       "      <td>concoct/bins</td>\n",
+       "      <td>6.fa</td>\n",
+       "      <td>92.63</td>\n",
+       "      <td>3.42</td>\n",
+       "      <td>85.79</td>\n",
+       "      <td>3751950</td>\n",
+       "      <td>5674</td>\n",
+       "      <td>855</td>\n",
+       "      <td>concoct</td>\n",
+       "      <td>3</td>\n",
+       "      <td>True</td>\n",
+       "      <td>85.79</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>65</td>\n",
+       "      <td>concoct/bins</td>\n",
+       "      <td>62.fa</td>\n",
+       "      <td>87.35</td>\n",
+       "      <td>1.80</td>\n",
+       "      <td>83.75</td>\n",
+       "      <td>1917859</td>\n",
+       "      <td>10911</td>\n",
+       "      <td>259</td>\n",
+       "      <td>concoct</td>\n",
+       "      <td>4</td>\n",
+       "      <td>False</td>\n",
+       "      <td>83.75</td>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>75</td>\n",
+       "      <td>concoct/bins</td>\n",
+       "      <td>48.fa</td>\n",
+       "      <td>73.35</td>\n",
+       "      <td>4.26</td>\n",
+       "      <td>64.83</td>\n",
+       "      <td>3285374</td>\n",
+       "      <td>2950</td>\n",
+       "      <td>1261</td>\n",
+       "      <td>concoct</td>\n",
+       "      <td>6</td>\n",
+       "      <td>False</td>\n",
+       "      <td>64.83</td>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>22</td>\n",
+       "      <td>maxbin2</td>\n",
+       "      <td>maxbin2.001.fasta</td>\n",
+       "      <td>99.81</td>\n",
+       "      <td>4.81</td>\n",
+       "      <td>90.19</td>\n",
+       "      <td>4616818</td>\n",
+       "      <td>89436</td>\n",
+       "      <td>133</td>\n",
+       "      <td>maxbin2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>True</td>\n",
+       "      <td>90.19</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>14</td>\n",
+       "      <td>maxbin2</td>\n",
+       "      <td>maxbin2.002.fasta</td>\n",
+       "      <td>93.92</td>\n",
+       "      <td>3.53</td>\n",
+       "      <td>86.86</td>\n",
+       "      <td>2874373</td>\n",
+       "      <td>37523</td>\n",
+       "      <td>195</td>\n",
+       "      <td>maxbin2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>True</td>\n",
+       "      <td>86.86</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>5</td>\n",
+       "      <td>maxbin2</td>\n",
+       "      <td>maxbin2.009.fasta</td>\n",
+       "      <td>62.69</td>\n",
+       "      <td>8.14</td>\n",
+       "      <td>46.41</td>\n",
+       "      <td>2438492</td>\n",
+       "      <td>6141</td>\n",
+       "      <td>604</td>\n",
+       "      <td>maxbin2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>False</td>\n",
+       "      <td>46.41</td>\n",
+       "      <td>&gt; 50% and ≤ 70%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>36</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>metabat2.14.fa</td>\n",
+       "      <td>99.90</td>\n",
+       "      <td>0.24</td>\n",
+       "      <td>99.42</td>\n",
+       "      <td>2799572</td>\n",
+       "      <td>41151</td>\n",
+       "      <td>99</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>True</td>\n",
+       "      <td>99.42</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>25</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>metabat2.8.fa</td>\n",
+       "      <td>93.17</td>\n",
+       "      <td>0.22</td>\n",
+       "      <td>92.73</td>\n",
+       "      <td>2148097</td>\n",
+       "      <td>12225</td>\n",
+       "      <td>226</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>True</td>\n",
+       "      <td>92.73</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>33</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>metabat2.12.fa</td>\n",
+       "      <td>93.52</td>\n",
+       "      <td>0.92</td>\n",
+       "      <td>91.68</td>\n",
+       "      <td>4266134</td>\n",
+       "      <td>39217</td>\n",
+       "      <td>157</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>True</td>\n",
+       "      <td>91.68</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>27</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>metabat2.11.fa</td>\n",
+       "      <td>84.40</td>\n",
+       "      <td>1.53</td>\n",
+       "      <td>81.34</td>\n",
+       "      <td>1902761</td>\n",
+       "      <td>11352</td>\n",
+       "      <td>218</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>False</td>\n",
+       "      <td>81.34</td>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>37</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>metabat2.1.fa</td>\n",
+       "      <td>84.99</td>\n",
+       "      <td>2.73</td>\n",
+       "      <td>79.53</td>\n",
+       "      <td>2980526</td>\n",
+       "      <td>6876</td>\n",
+       "      <td>502</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>False</td>\n",
+       "      <td>79.53</td>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>31</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>metabat2.2.fa</td>\n",
+       "      <td>83.21</td>\n",
+       "      <td>3.16</td>\n",
+       "      <td>76.89</td>\n",
+       "      <td>1807028</td>\n",
+       "      <td>7852</td>\n",
+       "      <td>274</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>5</td>\n",
+       "      <td>False</td>\n",
+       "      <td>76.89</td>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>35</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>metabat2.4.fa</td>\n",
+       "      <td>76.53</td>\n",
+       "      <td>0.11</td>\n",
+       "      <td>76.31</td>\n",
+       "      <td>3477636</td>\n",
+       "      <td>82084</td>\n",
+       "      <td>71</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>False</td>\n",
+       "      <td>76.31</td>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>29</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>metabat2.7.fa</td>\n",
+       "      <td>71.78</td>\n",
+       "      <td>5.77</td>\n",
+       "      <td>60.24</td>\n",
+       "      <td>1384653</td>\n",
+       "      <td>4937</td>\n",
+       "      <td>292</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>False</td>\n",
+       "      <td>60.24</td>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>24</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>metabat2.3.fa</td>\n",
+       "      <td>51.75</td>\n",
+       "      <td>2.99</td>\n",
+       "      <td>45.77</td>\n",
+       "      <td>1707078</td>\n",
+       "      <td>4929</td>\n",
+       "      <td>362</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>8</td>\n",
+       "      <td>False</td>\n",
+       "      <td>45.77</td>\n",
+       "      <td>&gt; 50% and ≤ 70%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>44</td>\n",
+       "      <td>semibin2/output_bins</td>\n",
+       "      <td>SemiBin_27.fa.gz</td>\n",
+       "      <td>100.00</td>\n",
+       "      <td>0.09</td>\n",
+       "      <td>99.82</td>\n",
+       "      <td>4681369</td>\n",
+       "      <td>82084</td>\n",
+       "      <td>94</td>\n",
+       "      <td>semibin2</td>\n",
+       "      <td>0</td>\n",
+       "      <td>True</td>\n",
+       "      <td>99.82</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>53</td>\n",
+       "      <td>semibin2/output_bins</td>\n",
+       "      <td>SemiBin_33.fa.gz</td>\n",
+       "      <td>99.92</td>\n",
+       "      <td>0.28</td>\n",
+       "      <td>99.36</td>\n",
+       "      <td>2937678</td>\n",
+       "      <td>37523</td>\n",
+       "      <td>113</td>\n",
+       "      <td>semibin2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>True</td>\n",
+       "      <td>99.36</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>50</td>\n",
+       "      <td>semibin2/output_bins</td>\n",
+       "      <td>SemiBin_10.fa.gz</td>\n",
+       "      <td>93.43</td>\n",
+       "      <td>0.14</td>\n",
+       "      <td>93.15</td>\n",
+       "      <td>2129295</td>\n",
+       "      <td>12519</td>\n",
+       "      <td>216</td>\n",
+       "      <td>semibin2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>True</td>\n",
+       "      <td>93.15</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>62</td>\n",
+       "      <td>semibin2/output_bins</td>\n",
+       "      <td>SemiBin_24.fa.gz</td>\n",
+       "      <td>92.13</td>\n",
+       "      <td>0.03</td>\n",
+       "      <td>92.07</td>\n",
+       "      <td>4162911</td>\n",
+       "      <td>40395</td>\n",
+       "      <td>139</td>\n",
+       "      <td>semibin2</td>\n",
+       "      <td>3</td>\n",
+       "      <td>True</td>\n",
+       "      <td>92.07</td>\n",
+       "      <td>&gt; 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>38</td>\n",
+       "      <td>semibin2/output_bins</td>\n",
+       "      <td>SemiBin_26.fa.gz</td>\n",
+       "      <td>83.09</td>\n",
+       "      <td>2.25</td>\n",
+       "      <td>78.59</td>\n",
+       "      <td>1674156</td>\n",
+       "      <td>8389</td>\n",
+       "      <td>245</td>\n",
+       "      <td>semibin2</td>\n",
+       "      <td>4</td>\n",
+       "      <td>False</td>\n",
+       "      <td>78.59</td>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>49</td>\n",
+       "      <td>semibin2/output_bins</td>\n",
+       "      <td>SemiBin_32.fa.gz</td>\n",
+       "      <td>81.87</td>\n",
+       "      <td>1.66</td>\n",
+       "      <td>78.55</td>\n",
+       "      <td>1820073</td>\n",
+       "      <td>11737</td>\n",
+       "      <td>205</td>\n",
+       "      <td>semibin2</td>\n",
+       "      <td>5</td>\n",
+       "      <td>False</td>\n",
+       "      <td>78.55</td>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>60</td>\n",
+       "      <td>semibin2/output_bins</td>\n",
+       "      <td>SemiBin_22.fa.gz</td>\n",
+       "      <td>80.25</td>\n",
+       "      <td>1.63</td>\n",
+       "      <td>76.99</td>\n",
+       "      <td>2790948</td>\n",
+       "      <td>7117</td>\n",
+       "      <td>450</td>\n",
+       "      <td>semibin2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>False</td>\n",
+       "      <td>76.99</td>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>47</td>\n",
+       "      <td>semibin2/output_bins</td>\n",
+       "      <td>SemiBin_11.fa.gz</td>\n",
+       "      <td>72.57</td>\n",
+       "      <td>2.45</td>\n",
+       "      <td>67.67</td>\n",
+       "      <td>1245031</td>\n",
+       "      <td>5061</td>\n",
+       "      <td>253</td>\n",
+       "      <td>semibin2</td>\n",
+       "      <td>7</td>\n",
+       "      <td>False</td>\n",
+       "      <td>67.67</td>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>61</td>\n",
+       "      <td>semibin2/output_bins</td>\n",
+       "      <td>SemiBin_3.fa.gz</td>\n",
+       "      <td>53.34</td>\n",
+       "      <td>1.33</td>\n",
+       "      <td>50.68</td>\n",
+       "      <td>1728690</td>\n",
+       "      <td>4913</td>\n",
+       "      <td>367</td>\n",
+       "      <td>semibin2</td>\n",
+       "      <td>8</td>\n",
+       "      <td>False</td>\n",
+       "      <td>50.68</td>\n",
+       "      <td>&gt; 50% and ≤ 70%</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>57</td>\n",
+       "      <td>semibin2/output_bins</td>\n",
+       "      <td>SemiBin_12.fa.gz</td>\n",
+       "      <td>51.92</td>\n",
+       "      <td>1.31</td>\n",
+       "      <td>49.30</td>\n",
+       "      <td>2609451</td>\n",
+       "      <td>5292</td>\n",
+       "      <td>511</td>\n",
+       "      <td>semibin2</td>\n",
+       "      <td>9</td>\n",
+       "      <td>False</td>\n",
+       "      <td>49.30</td>\n",
+       "      <td>&gt; 50% and ≤ 70%</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    bin_id                origin               name  completeness  \\\n",
+       "0    17075                  diff            44 - 10        100.00   \n",
+       "1    39427                  diff             36 - 6         99.90   \n",
+       "2    47060                 union            58 | 33         98.59   \n",
+       "3    47177                 union       91 | 25 | 55         96.10   \n",
+       "4    21248                  diff        65 - 8 - 28         91.98   \n",
+       "5    44137                  diff       76 - 13 - 28         92.63   \n",
+       "6    31703                  diff        31 - 7 - 61         81.73   \n",
+       "7    13475                  diff            47 - 37         72.89   \n",
+       "8    47926                 union            75 | 30         74.31   \n",
+       "9    46775                 union           42 | 102         62.94   \n",
+       "10   33569                  diff   83 - 7 - 38 - 31         59.18   \n",
+       "11   39350                  diff       57 - 16 - 75         52.16   \n",
+       "12   39558                  diff        78 - 6 - 43         64.63   \n",
+       "13   51082                 union            120 | 1         52.33   \n",
+       "0      125          concoct/bins               9.fa        100.00   \n",
+       "1       67          concoct/bins              41.fa        100.00   \n",
+       "2       91          concoct/bins               7.fa         92.76   \n",
+       "3       76          concoct/bins               6.fa         92.63   \n",
+       "4       65          concoct/bins              62.fa         87.35   \n",
+       "6       75          concoct/bins              48.fa         73.35   \n",
+       "0       22               maxbin2  maxbin2.001.fasta         99.81   \n",
+       "1       14               maxbin2  maxbin2.002.fasta         93.92   \n",
+       "3        5               maxbin2  maxbin2.009.fasta         62.69   \n",
+       "0       36              metabat2     metabat2.14.fa         99.90   \n",
+       "1       25              metabat2      metabat2.8.fa         93.17   \n",
+       "2       33              metabat2     metabat2.12.fa         93.52   \n",
+       "3       27              metabat2     metabat2.11.fa         84.40   \n",
+       "4       37              metabat2      metabat2.1.fa         84.99   \n",
+       "5       31              metabat2      metabat2.2.fa         83.21   \n",
+       "6       35              metabat2      metabat2.4.fa         76.53   \n",
+       "7       29              metabat2      metabat2.7.fa         71.78   \n",
+       "8       24              metabat2      metabat2.3.fa         51.75   \n",
+       "0       44  semibin2/output_bins   SemiBin_27.fa.gz        100.00   \n",
+       "1       53  semibin2/output_bins   SemiBin_33.fa.gz         99.92   \n",
+       "2       50  semibin2/output_bins   SemiBin_10.fa.gz         93.43   \n",
+       "3       62  semibin2/output_bins   SemiBin_24.fa.gz         92.13   \n",
+       "4       38  semibin2/output_bins   SemiBin_26.fa.gz         83.09   \n",
+       "5       49  semibin2/output_bins   SemiBin_32.fa.gz         81.87   \n",
+       "6       60  semibin2/output_bins   SemiBin_22.fa.gz         80.25   \n",
+       "7       47  semibin2/output_bins   SemiBin_11.fa.gz         72.57   \n",
+       "8       61  semibin2/output_bins    SemiBin_3.fa.gz         53.34   \n",
+       "9       57  semibin2/output_bins   SemiBin_12.fa.gz         51.92   \n",
+       "\n",
+       "    contamination  score     size    N50  contig_count      tool  index  \\\n",
+       "0            0.05  99.90  4672665  82084            93   binette      0   \n",
+       "1            0.20  99.50  2796605  41151            98   binette      1   \n",
+       "2            0.83  96.93  4601336  41016           165   binette      2   \n",
+       "3            0.34  95.42  2598718  11891           312   binette      3   \n",
+       "4            1.71  88.56  1768095   9976           250   binette      4   \n",
+       "5            2.41  87.81  3726254   5669           850   binette      5   \n",
+       "6            0.84  80.05  1665233   8518           248   binette      6   \n",
+       "7            2.39  68.11  1241829   5061           252   binette      7   \n",
+       "8            4.26  65.79  3293949   2954          1262   binette      8   \n",
+       "9            2.75  57.44  1293571   3783           419   binette      9   \n",
+       "10           2.24  54.70  2042527   4437           514   binette     10   \n",
+       "11           1.31  49.54  2601282   5332           509   binette     11   \n",
+       "12           8.03  48.57  1858210   1430          1293   binette     12   \n",
+       "13           5.06  42.21   688879   1446           472   binette     13   \n",
+       "0            0.38  99.24  3033586  37523           131   concoct      0   \n",
+       "1            0.46  99.08  4765466  82084           101   concoct      1   \n",
+       "2            0.34  92.08  2274951  12187           265   concoct      2   \n",
+       "3            3.42  85.79  3751950   5674           855   concoct      3   \n",
+       "4            1.80  83.75  1917859  10911           259   concoct      4   \n",
+       "6            4.26  64.83  3285374   2950          1261   concoct      6   \n",
+       "0            4.81  90.19  4616818  89436           133   maxbin2      0   \n",
+       "1            3.53  86.86  2874373  37523           195   maxbin2      1   \n",
+       "3            8.14  46.41  2438492   6141           604   maxbin2      3   \n",
+       "0            0.24  99.42  2799572  41151            99  metabat2      0   \n",
+       "1            0.22  92.73  2148097  12225           226  metabat2      1   \n",
+       "2            0.92  91.68  4266134  39217           157  metabat2      2   \n",
+       "3            1.53  81.34  1902761  11352           218  metabat2      3   \n",
+       "4            2.73  79.53  2980526   6876           502  metabat2      4   \n",
+       "5            3.16  76.89  1807028   7852           274  metabat2      5   \n",
+       "6            0.11  76.31  3477636  82084            71  metabat2      6   \n",
+       "7            5.77  60.24  1384653   4937           292  metabat2      7   \n",
+       "8            2.99  45.77  1707078   4929           362  metabat2      8   \n",
+       "0            0.09  99.82  4681369  82084            94  semibin2      0   \n",
+       "1            0.28  99.36  2937678  37523           113  semibin2      1   \n",
+       "2            0.14  93.15  2129295  12519           216  semibin2      2   \n",
+       "3            0.03  92.07  4162911  40395           139  semibin2      3   \n",
+       "4            2.25  78.59  1674156   8389           245  semibin2      4   \n",
+       "5            1.66  78.55  1820073  11737           205  semibin2      5   \n",
+       "6            1.63  76.99  2790948   7117           450  semibin2      6   \n",
+       "7            2.45  67.67  1245031   5061           253  semibin2      7   \n",
+       "8            1.33  50.68  1728690   4913           367  semibin2      8   \n",
+       "9            1.31  49.30  2609451   5292           511  semibin2      9   \n",
+       "\n",
+       "    High quality bin  completeness - 2*contamination  \\\n",
+       "0               True                           99.90   \n",
+       "1               True                           99.50   \n",
+       "2               True                           96.93   \n",
+       "3               True                           95.42   \n",
+       "4               True                           88.56   \n",
+       "5               True                           87.81   \n",
+       "6              False                           80.05   \n",
+       "7              False                           68.11   \n",
+       "8              False                           65.79   \n",
+       "9              False                           57.44   \n",
+       "10             False                           54.70   \n",
+       "11             False                           49.54   \n",
+       "12             False                           48.57   \n",
+       "13             False                           42.21   \n",
+       "0               True                           99.24   \n",
+       "1               True                           99.08   \n",
+       "2               True                           92.08   \n",
+       "3               True                           85.79   \n",
+       "4              False                           83.75   \n",
+       "6              False                           64.83   \n",
+       "0               True                           90.19   \n",
+       "1               True                           86.86   \n",
+       "3              False                           46.41   \n",
+       "0               True                           99.42   \n",
+       "1               True                           92.73   \n",
+       "2               True                           91.68   \n",
+       "3              False                           81.34   \n",
+       "4              False                           79.53   \n",
+       "5              False                           76.89   \n",
+       "6              False                           76.31   \n",
+       "7              False                           60.24   \n",
+       "8              False                           45.77   \n",
+       "0               True                           99.82   \n",
+       "1               True                           99.36   \n",
+       "2               True                           93.15   \n",
+       "3               True                           92.07   \n",
+       "4              False                           78.59   \n",
+       "5              False                           78.55   \n",
+       "6              False                           76.99   \n",
+       "7              False                           67.67   \n",
+       "8              False                           50.68   \n",
+       "9              False                           49.30   \n",
+       "\n",
+       "   Contamination ≤ 10 and<br>Completeness  \n",
+       "0                                   > 90%  \n",
+       "1                                   > 90%  \n",
+       "2                                   > 90%  \n",
+       "3                                   > 90%  \n",
+       "4                                   > 90%  \n",
+       "5                                   > 90%  \n",
+       "6                         > 70% and ≤ 90%  \n",
+       "7                         > 70% and ≤ 90%  \n",
+       "8                         > 70% and ≤ 90%  \n",
+       "9                         > 50% and ≤ 70%  \n",
+       "10                        > 50% and ≤ 70%  \n",
+       "11                        > 50% and ≤ 70%  \n",
+       "12                        > 50% and ≤ 70%  \n",
+       "13                        > 50% and ≤ 70%  \n",
+       "0                                   > 90%  \n",
+       "1                                   > 90%  \n",
+       "2                                   > 90%  \n",
+       "3                                   > 90%  \n",
+       "4                         > 70% and ≤ 90%  \n",
+       "6                         > 70% and ≤ 90%  \n",
+       "0                                   > 90%  \n",
+       "1                                   > 90%  \n",
+       "3                         > 50% and ≤ 70%  \n",
+       "0                                   > 90%  \n",
+       "1                                   > 90%  \n",
+       "2                                   > 90%  \n",
+       "3                         > 70% and ≤ 90%  \n",
+       "4                         > 70% and ≤ 90%  \n",
+       "5                         > 70% and ≤ 90%  \n",
+       "6                         > 70% and ≤ 90%  \n",
+       "7                         > 70% and ≤ 90%  \n",
+       "8                         > 50% and ≤ 70%  \n",
+       "0                                   > 90%  \n",
+       "1                                   > 90%  \n",
+       "2                                   > 90%  \n",
+       "3                                   > 90%  \n",
+       "4                         > 70% and ≤ 90%  \n",
+       "5                         > 70% and ≤ 90%  \n",
+       "6                         > 70% and ≤ 90%  \n",
+       "7                         > 70% and ≤ 90%  \n",
+       "8                         > 50% and ≤ 70%  \n",
+       "9                         > 50% and ≤ 70%  "
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "contamination_cutoff = 10\n",
+    "low_contamination_filt = df_bins['contamination'] <= contamination_cutoff\n",
+    "high_completeness_filt = df_bins['completeness'] > 90\n",
+    "medium_completeness_filt = df_bins['completeness'] > 70\n",
+    "low_completeness_filt = df_bins['completeness'] > 50\n",
+    "\n",
+    "quality  = f'Contamination ≤ {contamination_cutoff} and<br>Completeness'\n",
+    "df_bins.loc[low_contamination_filt & low_completeness_filt, quality] =  '> 50% and ≤ 70%'\n",
+    "df_bins.loc[low_contamination_filt & medium_completeness_filt, quality] =  '> 70% and ≤ 90%'\n",
+    "df_bins.loc[low_contamination_filt & high_completeness_filt, quality] = '> 90%'\n",
+    "df_bins.loc[~df_bins[quality].isna()]\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "fa71ff37-9846-4826-a4bb-6c4b0069cea0",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Contamination ≤ 10 and&lt;br&gt;Completeness</th>\n",
+       "      <th>tool</th>\n",
+       "      <th>bin_count</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>&gt; 50% and ≤ 70%</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>&gt; 50% and ≤ 70%</td>\n",
+       "      <td>maxbin2</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>&gt; 50% and ≤ 70%</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>&gt; 50% and ≤ 70%</td>\n",
+       "      <td>semibin2</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "      <td>concoct</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>6</th>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>7</th>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "      <td>semibin2</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>&gt; 90%</td>\n",
+       "      <td>binette</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>&gt; 90%</td>\n",
+       "      <td>concoct</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>10</th>\n",
+       "      <td>&gt; 90%</td>\n",
+       "      <td>maxbin2</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>11</th>\n",
+       "      <td>&gt; 90%</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>12</th>\n",
+       "      <td>&gt; 90%</td>\n",
+       "      <td>semibin2</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   Contamination ≤ 10 and<br>Completeness      tool  bin_count\n",
+       "0                         > 50% and ≤ 70%   binette          5\n",
+       "1                         > 50% and ≤ 70%   maxbin2          1\n",
+       "2                         > 50% and ≤ 70%  metabat2          1\n",
+       "3                         > 50% and ≤ 70%  semibin2          2\n",
+       "4                         > 70% and ≤ 90%   binette          3\n",
+       "5                         > 70% and ≤ 90%   concoct          2\n",
+       "6                         > 70% and ≤ 90%  metabat2          5\n",
+       "7                         > 70% and ≤ 90%  semibin2          4\n",
+       "8                                   > 90%   binette          6\n",
+       "9                                   > 90%   concoct          4\n",
+       "10                                  > 90%   maxbin2          2\n",
+       "11                                  > 90%  metabat2          3\n",
+       "12                                  > 90%  semibin2          4"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_bins.groupby([quality, 'tool']).value_counts(ascending=True).reset_index()\n",
+    "\n",
+    "df_bins_quality_grouped = df_bins.groupby([quality, 'tool']).agg(bin_count=('bin_id', 'count')).reset_index()\n",
+    "df_bins_quality_grouped"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "250def29-167e-4a3b-8194-282f602945c8",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n",
+       "        <script charset=\"utf-8\" src=\"https://cdn.plot.ly/plotly-2.35.0.min.js\"></script>                <div id=\"b7b12b01-0881-429b-86fd-784e95677a64\" class=\"plotly-graph-div\" style=\"height:500px; width:800px;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"b7b12b01-0881-429b-86fd-784e95677a64\")) {                    Plotly.newPlot(                        \"b7b12b01-0881-429b-86fd-784e95677a64\",                        [{\"alignmentgroup\":\"True\",\"hovertemplate\":\"Contamination \\u2264 10 and\\u003cbr\\u003eCompleteness=\\u003e 50% and \\u2264 70%\\u003cbr\\u003etool=%{x}\\u003cbr\\u003ebin_count=%{text}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"\\u003e 50% and \\u2264 70%\",\"marker\":{\"color\":\"rgb(225, 124, 5)\",\"opacity\":0.9,\"pattern\":{\"shape\":\"\"}},\"name\":\"\\u003e 50% and \\u2264 70%\",\"offsetgroup\":\"\\u003e 50% and \\u2264 70%\",\"orientation\":\"v\",\"showlegend\":true,\"text\":[5.0,1.0,1.0,2.0],\"textposition\":\"auto\",\"x\":[\"binette\",\"maxbin2\",\"metabat2\",\"semibin2\"],\"xaxis\":\"x\",\"y\":[5,1,1,2],\"yaxis\":\"y\",\"type\":\"bar\"},{\"alignmentgroup\":\"True\",\"hovertemplate\":\"Contamination \\u2264 10 and\\u003cbr\\u003eCompleteness=\\u003e 70% and \\u2264 90%\\u003cbr\\u003etool=%{x}\\u003cbr\\u003ebin_count=%{text}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"\\u003e 70% and \\u2264 90%\",\"marker\":{\"color\":\"rgb(56, 166, 165)\",\"opacity\":0.9,\"pattern\":{\"shape\":\"\"}},\"name\":\"\\u003e 70% and \\u2264 90%\",\"offsetgroup\":\"\\u003e 70% and \\u2264 90%\",\"orientation\":\"v\",\"showlegend\":true,\"text\":[3.0,2.0,5.0,4.0],\"textposition\":\"auto\",\"x\":[\"binette\",\"concoct\",\"metabat2\",\"semibin2\"],\"xaxis\":\"x\",\"y\":[3,2,5,4],\"yaxis\":\"y\",\"type\":\"bar\"},{\"alignmentgroup\":\"True\",\"hovertemplate\":\"Contamination \\u2264 10 and\\u003cbr\\u003eCompleteness=\\u003e 90%\\u003cbr\\u003etool=%{x}\\u003cbr\\u003ebin_count=%{text}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"\\u003e 90%\",\"marker\":{\"color\":\"rgb(115, 175, 72)\",\"opacity\":0.9,\"pattern\":{\"shape\":\"\"}},\"name\":\"\\u003e 90%\",\"offsetgroup\":\"\\u003e 90%\",\"orientation\":\"v\",\"showlegend\":true,\"text\":[6.0,4.0,2.0,3.0,4.0],\"textposition\":\"auto\",\"x\":[\"binette\",\"concoct\",\"maxbin2\",\"metabat2\",\"semibin2\"],\"xaxis\":\"x\",\"y\":[6,4,2,3,4],\"yaxis\":\"y\",\"type\":\"bar\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"tool\"},\"categoryorder\":\"array\",\"categoryarray\":[\"binette\",\"semibin2\",\"concoct\",\"metabat2\",\"maxbin2\"]},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"bin_count\"}},\"legend\":{\"title\":{\"text\":\"Contamination \\u2264 10 and\\u003cbr\\u003eCompleteness\"},\"tracegroupgap\":0,\"traceorder\":\"reversed\"},\"margin\":{\"t\":60},\"barmode\":\"stack\",\"width\":800,\"height\":500},                        {\"responsive\": true}                    )                };                            </script>        </div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "color_discrete_map={\"> 90%\": px.colors.qualitative.Prism[4],\n",
+    "                    \"> 70% and ≤ 90%\": px.colors.qualitative.Prism[2],\n",
+    "                   \"> 50% and ≤ 70%\": px.colors.qualitative.Prism[6]}\n",
+    "\n",
+    "fig = px.bar(df_bins_quality_grouped, x='tool', y=\"bin_count\", color=quality,\n",
+    "             barmode='stack', color_discrete_map=color_discrete_map, text=\"bin_count\",\n",
+    "             category_orders={\"tool\":[\"binette\", \"semibin2\", \"concoct\", \"metabat2\",  \"maxbin2\"]},\n",
+    "            opacity = 0.9)#[ \"#008c8a\", px.colors.qualitative.Safe[4], '#2596be'])\n",
+    "\n",
+    "fig.update_layout(\n",
+    "        width=800,\n",
+    "        height=500,\n",
+    "    legend=dict(\n",
+    "      traceorder=\"reversed\",\n",
+    "    ))\n",
+    "fig"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/tutorial/assembly.md b/docs/tutorial/assembly.md
new file mode 100644
index 0000000..cd51e76
--- /dev/null
+++ b/docs/tutorial/assembly.md
@@ -0,0 +1,43 @@
+
+
+## Assemble the reads
+
+We will use megahit to assemble the reads
+
+```bash
+
+cd /home/jmainguy/Analysis/Binette/Binette_tutorial/ncezid-biome_datasets/exec_tutorial_jupyter
+```
+
+```bash
+
+megahit -1 coal-metagenomics/Kickstart_1.fastq.gz -2 coal-metagenomics/Kickstart_2.fastq.gz --out-dir Kickstart.megahit --out-prefix R1 --num-cpu-threads 12
+
+```
+
+
+This take 27m49,879s 
+
+```{note}
+We can use spade as well. It performs generally better that megahit but is generally longer and consume more memory than megahit. See cami benchmark ???  
+```
+
+
+
+## Align the reads over the assembly 
+
+First we need to map the reads back against the assembly to get coverage information
+
+```bash
+
+mkdir -p alignments_bwa/
+
+bwa-mem2 index Kickstart.megahit/R1.contigs.fa -p Kickstart.megahit/R1.contigs.fa
+
+bwa-mem2 mem -t 12  Kickstart.megahit/R1.contigs.fa  coal-metagenomics/Kickstart_*.fastq.gz | samtools view -@ 12 -bS - | samtools sort -@ 12  - -o alignments_bwa/Kickstart.bam
+
+samtools index alignments_bwa/Kickstart.bam 
+
+```
+<!-- #region -->
+This take around   12 minutes
\ No newline at end of file
diff --git a/docs/tutorial/binette.md b/docs/tutorial/binette.md
new file mode 100644
index 0000000..687398f
--- /dev/null
+++ b/docs/tutorial/binette.md
@@ -0,0 +1,7 @@
+## Run Binette 
+
+```{code-cell} bash
+
+binette --bin_dirs maxbin2/ metabat2/ semibin2/output_bins/ concoct/bins/ -c Kickstart.megahit/R1.contigs.fa --verbose  -t 12 -o binette_results
+
+```
\ No newline at end of file
diff --git a/docs/tutorial/binning.md b/docs/tutorial/binning.md
new file mode 100644
index 0000000..36efbed
--- /dev/null
+++ b/docs/tutorial/binning.md
@@ -0,0 +1,67 @@
+
+## Run binning tools
+<!-- #endregion -->
+
+### metabat2
+
+We first generate a depth file from the bam file using jgi_summarize_bam_contig_depths script from metabat2. This depth file will be used also with maxbin2.    
+```bash
+
+jgi_summarize_bam_contig_depths --outputDepth depth_Kickstart.txt alignments_bwa/Kickstart.bam 
+```
+
+Now we can run metabat2: 
+
+```bash
+
+metabat2 --inFile Kickstart.megahit/R1.contigs.fa --abdFile depth_Kickstart.txt --outFile metabat2/metabat2 --numThreads 12 --seed  1 
+
+```
+
+
+### maxbin2
+
+We use the depth file produced by `jgi_summarize_bam_contig_depths`
+
+```bash
+
+mkdir -p maxbin2
+run_MaxBin.pl -contig Kickstart.megahit/R1.contigs.fa -abund   depth_Kickstart.txt -thread 12  -out maxbin2/maxbin2
+
+```
+
+### concoct
+
+Then we can also run concoct with the folowing commands:
+
+```bash
+
+mkdir -p concoct/
+
+cut_up_fasta.py Kickstart.megahit/R1.contigs.fa --chunk_size 10000 --overlap_size 0 --merge_last --bedfile concoct/contigs_10K.bed > concoct/contigs_10K.fa
+
+concoct_coverage_table.py concoct/contigs_10K.bed alignments_bwa/Kickstart.bam  > concoct/coverage_table.tsv
+
+concoct --composition_file concoct/contigs_10K.fa --coverage_file concoct/coverage_table.tsv --basename concoct/bins --threads 12
+
+merge_cutup_clustering.py concoct/bins_clustering_gt1000.csv > concoct/clustering_merge.csv
+
+mkdir -p concoct/bins
+
+extract_fasta_bins.py Kickstart.megahit/R1.contigs.fa concoct/clustering_merge.csv --output_path concoct/bins
+```
+
+### SemiBin2
+
+We can launch semibin2 as well with its `single_easy_bin` command. 
+
+```{note}
+This take some time so it can be skipped.
+```
+
+```bash
+
+SemiBin2 single_easy_bin -i Kickstart.megahit/R1.contigs.fa -b alignments_bwa/Kickstart.bam -o semibin2/ -p 12 
+
+```
+
diff --git a/docs/tutorial/set_env_and_get_data.md b/docs/tutorial/set_env_and_get_data.md
new file mode 100644
index 0000000..b950a44
--- /dev/null
+++ b/docs/tutorial/set_env_and_get_data.md
@@ -0,0 +1,78 @@
+
+## Set tutorial environment
+
+We will download necessary tool in a dedicated conda envrionnement.
+
+
+<!-- #region -->
+Let's create a directory to run the tutorial:
+
+
+```bash
+
+mamba env create -f binette_tutorial_env.yaml -n binette_tuto
+
+```
+<!-- #endregion -->
+
+<!-- #region -->
+## Get the Data
+
+### Using ncezid-biome datasets tool
+
+I downloaded the metagenome Kickstart from the above dataset (SAMN05024035) that correspond to this sra SRR5058924 https://www.ncbi.nlm.nih.gov/Traces/study/?acc=SRR5058924&o=acc_s%3Aa
+
+
+We will donwload the data of the Kickstart (SAMN05024035) dataset this repository that https://github.com/ncezid-biome/datasets?tab=readme-ov-file#edlb
+
+We had use conda as detailed here https://github.com/ncezid-biome/datasets/blob/master/INSTALL.md#conda 
+
+Now we can download the Kickstart dataset with the folowing commands. 
+
+We first download the coal-metagenomic table from the  github repository : https://github.com/ncezid-biome/datasets/blob/master/datasets/coal-metagenomics.tsv
+ANd just select the line corresponding to the Kickstart dataset.
+
+
+<!-- #endregion -->
+
+```bash
+# download the coal-metagenomic tsv file from the github repository
+wget https://raw.githubusercontent.com/ncezid-biome/datasets/master/datasets/coal-metagenomics.tsv
+
+# select the header of the table as it is necessary for the download
+
+head -n7 coal-metagenomics.tsv > coal-metagenomics_Kickstart_only.tsv
+grep SRR5058924  coal-metagenomics.tsv >> coal-metagenomics_Kickstart_only.tsv
+
+GenFSGopher.pl --numcpus 12 --compressed --outdir  coal-metagenomics coal-metagenomics.tsv
+
+```
+
+It takes around 16min to run
+
+You should hae the folowing structure
+```
+├── coal-metagenomics_Kickstart_only.tsv
+└── data
+    ├── in.tsv
+    ├── Kickstart_1.fastq.gz
+    ├── Kickstart_1.fastq.sha256
+    ├── Kickstart_2.fastq.gz
+    ├── Kickstart_2.fastq.sha256
+    ├── Makefile
+    ├── prefetch.done
+    ├── sha256sum.log
+    ├── SRR5058924
+    │   └── SRR5058924.sra
+    └── tree.dnd
+
+
+```
+
+```{tip}
+You can remove the SRA file `data/SRR5058924/SRR5058924.sra` as we do not need it anymore as we will exclusively use the fastq files. with `rm data/SRR5058924/SRR5058924.sra`
+```
+
+```{note}
+You can also download the data using SRA toolkit which what the tool does in the background but add some check sum to ensure data integrity. After instaling sra toolkit (with conda for example : https://anaconda.org/bioconda/sra-tools) you can run the two commands folowing commands to retrived the data: `prefetch  SRR5058924` and `fastq-dump --defline-seq '@$ac_$sn/$ri' --defline-qual '+' --split-3 -O . SRR5058924.sra` 
+```
\ No newline at end of file
diff --git a/docs/tutorial/tutorial_main.md b/docs/tutorial/tutorial_main.md
new file mode 100644
index 0000000..1d35a5a
--- /dev/null
+++ b/docs/tutorial/tutorial_main.md
@@ -0,0 +1,52 @@
+
+# Tutorial 
+
+The goal of this tutorial is to show an example of commands on how Binette can be used on real data. We will start ou journey from metagenomics reads that we  gonna download, then we will assemble these reads in contigs that we will bin with different binning tool. I finally we will use Binette to refine those bins.
+
+
+
+```{toctree}
+:caption: 'Tutorial'
+:maxdepth: 2
+
+set_env_and_get_data
+assembly
+binning
+binette
+analyse_binette_result.ipynb
+```
+
+
+<!-- 
+```{include} ./set_env_and_get_data.md
+```
+
+
+
+```{include} ./assembly.md
+```
+
+```{include} ./binning.md
+```
+
+```{include} ./binette.md
+```
+
+```{include} ./analyse_binette_result.ipynb
+``` -->
+
+
+<!-- ### Compare binette results with intial bins
+
+
+
+## Compare Binette and Das Tool 
+
+
+### Run Das Tool 
+
+
+### Compare binette results with intial bins
+
+
+ -->
diff --git a/pyproject.toml b/pyproject.toml
index 8ff3ff8..06553ae 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,8 +44,11 @@ doc = [
     "readthedocs-sphinx-search==0.3.1",
     "sphinx-autobuild==2021.3.14",
     "myst-parser==1.0.0",
-    "docutils==0.18.1"
-]
+    "docutils==0.18.1",
+    "myst-nb",
+    "nbsphinx"
+     ]
+     
 dev = [
     "pytest>=7.0.0",
     "pytest-cov"

From a1f983c2fd8b60c6e8be6230f5b4f451ef910d7b Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Tue, 3 Sep 2024 15:00:04 +0200
Subject: [PATCH 15/36] adjust nbsphinx version in doc deps

---
 pyproject.toml | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 06553ae..9bbee26 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,11 +44,10 @@ doc = [
     "readthedocs-sphinx-search==0.3.1",
     "sphinx-autobuild==2021.3.14",
     "myst-parser==1.0.0",
-    "docutils==0.18.1",
-    "myst-nb",
-    "nbsphinx"
+    "docutils==0.18.1", #"myst-nb",
+    "nbsphinx==0.9.5"
      ]
-     
+
 dev = [
     "pytest>=7.0.0",
     "pytest-cov"

From 41ef85e7396a93b98543378c920ef275eea6d155 Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Tue, 3 Sep 2024 15:05:56 +0200
Subject: [PATCH 16/36] manage sphinx deps

---
 docs/conf.py   | 4 ++--
 pyproject.toml | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index bb2e303..eae8582 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -28,8 +28,8 @@
     "myst_parser",
     'nbsphinx',
     'nbsphinx_link', 
-    'sphinx.ext.napoleon',
-    'sphinx.ext.viewcode',
+    # 'sphinx.ext.napoleon',
+    # 'sphinx.ext.viewcode',
     "myst_parser",
 
 ]
diff --git a/pyproject.toml b/pyproject.toml
index 9bbee26..51802de 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,7 +45,8 @@ doc = [
     "sphinx-autobuild==2021.3.14",
     "myst-parser==1.0.0",
     "docutils==0.18.1", #"myst-nb",
-    "nbsphinx==0.9.5"
+    "nbsphinx==0.9.5",
+    nbsphinx_link==1.3.0
      ]
 
 dev = [

From 6e670cf61b454cf1ea7c6b57a525146cbb864dab Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Tue, 3 Sep 2024 15:07:08 +0200
Subject: [PATCH 17/36] add missing quote

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 51802de..6687291 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,7 +46,7 @@ doc = [
     "myst-parser==1.0.0",
     "docutils==0.18.1", #"myst-nb",
     "nbsphinx==0.9.5",
-    nbsphinx_link==1.3.0
+    "nbsphinx_link==1.3.0"
      ]
 
 dev = [

From 863352070f51d7eca1d50e96706d6b405192bde8 Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Tue, 3 Sep 2024 15:25:33 +0200
Subject: [PATCH 18/36] test with sphinx-book-theme

---
 docs/conf.py   | 3 ++-
 pyproject.toml | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/docs/conf.py b/docs/conf.py
index eae8582..ff33a85 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -55,7 +55,8 @@
 # -- Options for HTML output -------------------------------------------------
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
-html_theme = 'sphinx_rtd_theme' #'alabaster' # 
+# html_theme = 'sphinx_rtd_theme' #'alabaster' # 
+html_theme = 'sphinx_book_theme'
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
diff --git a/pyproject.toml b/pyproject.toml
index 6687291..6fad059 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -46,7 +46,8 @@ doc = [
     "myst-parser==1.0.0",
     "docutils==0.18.1", #"myst-nb",
     "nbsphinx==0.9.5",
-    "nbsphinx_link==1.3.0"
+    "nbsphinx_link==1.3.0",
+    "sphinx-book-theme==1.0.1"
      ]
 
 dev = [

From ba8c70b67de9b5b7ccd1fc85010d43a33de174b3 Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Tue, 3 Sep 2024 17:59:14 +0200
Subject: [PATCH 19/36] improve tutorial

---
 docs/conf.py                          | 17 +++++-
 docs/tutorial/assembly.md             | 56 ++++++++++---------
 docs/tutorial/binette.md              | 56 +++++++++++++++++--
 docs/tutorial/binning.md              | 70 ++++++++++++++----------
 docs/tutorial/set_env_and_get_data.md | 78 ---------------------------
 docs/tutorial/set_environment.md      | 27 ++++++++++
 docs/tutorial/tutorial_main.md        | 73 +++++++++++++++++++++++--
 7 files changed, 236 insertions(+), 141 deletions(-)
 delete mode 100644 docs/tutorial/set_env_and_get_data.md
 create mode 100644 docs/tutorial/set_environment.md

diff --git a/docs/conf.py b/docs/conf.py
index ff33a85..37654c6 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -31,9 +31,22 @@
     # 'sphinx.ext.napoleon',
     # 'sphinx.ext.viewcode',
     "myst_parser",
-
+    'sphinxcontrib.mermaid'
+]
+myst_enable_extensions = [
+    "amsmath",
+    "colon_fence",
+    "deflist",
+    "dollarmath",
+    "fieldlist",
+    "html_admonition",
+    "html_image",
+    "replacements",
+    "smartquotes",
+    "strikethrough",
+    "substitution",
+    "tasklist",
 ]
-
 
 source_suffix = {
     '.md': 'markdown',
diff --git a/docs/tutorial/assembly.md b/docs/tutorial/assembly.md
index cd51e76..c7104ed 100644
--- a/docs/tutorial/assembly.md
+++ b/docs/tutorial/assembly.md
@@ -1,43 +1,49 @@
+## Assemble the Reads
 
+We will use **MEGAHIT** to assemble the reads from our dataset. Run the following command:
 
-## Assemble the reads
-
-We will use megahit to assemble the reads
-
-```bash
-
-cd /home/jmainguy/Analysis/Binette/Binette_tutorial/ncezid-biome_datasets/exec_tutorial_jupyter
+```{code-block} bash
+megahit -1 coal-metagenomics/Kickstart_1.fastq.gz \
+        -2 coal-metagenomics/Kickstart_2.fastq.gz \
+        --out-dir Kickstart.megahit --out-prefix R1 --num-cpu-threads 12
 ```
 
-```bash
+:::{admonition} ⌛ Expected Time
+:class: note
 
-megahit -1 coal-metagenomics/Kickstart_1.fastq.gz -2 coal-metagenomics/Kickstart_2.fastq.gz --out-dir Kickstart.megahit --out-prefix R1 --num-cpu-threads 12
-
-```
+This process takes approximately 28 minutes to complete.
+:::
 
 
-This take 27m49,879s 
+```{admonition} Note
+:class: note
 
-```{note}
-We can use spade as well. It performs generally better that megahit but is generally longer and consume more memory than megahit. See cami benchmark ???  
+You can also use **SPAdes** for assembly. It generally performs better than MEGAHIT but takes longer and requires more memory. Refer to the CAMI benchmark for a detailed comparison.
 ```
 
+## Align the Reads Over the Assembly
 
+To get coverage information, we first need to map the reads back to the assembly.
 
-## Align the reads over the assembly 
-
-First we need to map the reads back against the assembly to get coverage information
-
-```bash
-
+```{code-block} bash
+# Create a directory for the alignments
 mkdir -p alignments_bwa/
 
+# Index the contigs file using BWA-MEM2
 bwa-mem2 index Kickstart.megahit/R1.contigs.fa -p Kickstart.megahit/R1.contigs.fa
 
-bwa-mem2 mem -t 12  Kickstart.megahit/R1.contigs.fa  coal-metagenomics/Kickstart_*.fastq.gz | samtools view -@ 12 -bS - | samtools sort -@ 12  - -o alignments_bwa/Kickstart.bam
-
-samtools index alignments_bwa/Kickstart.bam 
+# Map reads back to the assembly, convert to BAM format, and sort
+bwa-mem2 mem -t 12 Kickstart.megahit/R1.contigs.fa coal-metagenomics/Kickstart_*.fastq.gz | \
+samtools view -@ 12 -bS - | \
+samtools sort -@ 12 - -o alignments_bwa/Kickstart.bam
 
+# Index the BAM file
+samtools index alignments_bwa/Kickstart.bam
 ```
-<!-- #region -->
-This take around   12 minutes
\ No newline at end of file
+
+
+:::{admonition} ⌛ Expected Time
+:class: note
+
+This process takes approximately 12 minutes to complete.
+:::
diff --git a/docs/tutorial/binette.md b/docs/tutorial/binette.md
index 687398f..926ce3b 100644
--- a/docs/tutorial/binette.md
+++ b/docs/tutorial/binette.md
@@ -1,7 +1,55 @@
-## Run Binette 
 
-```{code-cell} bash
+## Run Binette
 
-binette --bin_dirs maxbin2/ metabat2/ semibin2/output_bins/ concoct/bins/ -c Kickstart.megahit/R1.contigs.fa --verbose  -t 12 -o binette_results
+Binette will use the previously computed bins to refine and improve them, generating a new set of higher-quality bins.
+
+To run Binette, use the following command:
+
+```bash
+binette --bin_dirs maxbin2/ metabat2/ semibin2/output_bins/ concoct/bins/ \
+        -c Kickstart.megahit/R1.contigs.fa \
+        --verbose -t 12 -o binette_results
+```
+
+Once Binette completes, the `binette_results` directory should have the following structure:
+
+```plaintext
+binette_results/
+├── final_bins
+│   ├── bin_13475.fa
+│   ├── bin_17075.fa
+│   ├── bin_19689.fa
+│   ├── bin_21248.fa
+│   ├── bin_31703.fa
+│   ├── bin_33569.fa
+│   ├── bin_39350.fa
+│   ├── bin_39427.fa
+│   ├── bin_39558.fa
+│   ├── bin_44137.fa
+│   ├── bin_46775.fa
+│   ├── bin_47060.fa
+│   ├── bin_47177.fa
+│   ├── bin_47926.fa
+│   └── bin_51082.fa
+├── final_bins_quality_reports.tsv 
+├── input_bins_quality_reports
+│   ├── input_bins_1.concoct_bins.tsv
+│   ├── input_bins_2.maxbin2.tsv
+│   ├── input_bins_3.metabat2.tsv
+│   └── input_bins_4.semibin2_output_bins.tsv
+└── temporary_files
+    ├── assembly_proteins.faa
+    ├── diamond_result.log
+    └── diamond_result.tsv
+```
+
+### Key Output Files:
+
+- **`final_bins/`**: Contains the refined bins in FASTA format.
+- **`final_bins_quality_reports.tsv`**: A summary report containing CheckM2 metrics for the final bin selection.
+- **`input_bins_quality_reports/`**: Quality reports for each of the input bin sets from MaxBin2, MetaBAT2, CONCOCT, and SemiBin2.
+
+### Next Steps
+
+In the next section, we will use `final_bins_quality_reports.tsv` along with the reports from `binette_results/input_bins_quality_reports` to visualize Binette's bins and compare them with the initial bin sets.
 
-```
\ No newline at end of file
diff --git a/docs/tutorial/binning.md b/docs/tutorial/binning.md
index 36efbed..8624cf6 100644
--- a/docs/tutorial/binning.md
+++ b/docs/tutorial/binning.md
@@ -1,49 +1,62 @@
+## Run Binning Tools
 
-## Run binning tools
-<!-- #endregion -->
+In this section, we'll use different binning tools to group contigs of assembly.
 
-### metabat2
+### MetaBAT2
 
-We first generate a depth file from the bam file using jgi_summarize_bam_contig_depths script from metabat2. This depth file will be used also with maxbin2.    
-```bash
+First, generate a depth file from the BAM file using the `jgi_summarize_bam_contig_depths` script from MetaBAT2. This depth file will also be used for MaxBin2. 
 
-jgi_summarize_bam_contig_depths --outputDepth depth_Kickstart.txt alignments_bwa/Kickstart.bam 
+```bash
+jgi_summarize_bam_contig_depths --outputDepth depth_Kickstart.txt alignments_bwa/Kickstart.bam
 ```
 
-Now we can run metabat2: 
+Now, run MetaBAT2 with the generated depth file:
 
 ```bash
+metabat2 --inFile Kickstart.megahit/R1.contigs.fa --abdFile depth_Kickstart.txt --outFile metabat2/metabat2 --numThreads 12 --seed 1
+```
 
-metabat2 --inFile Kickstart.megahit/R1.contigs.fa --abdFile depth_Kickstart.txt --outFile metabat2/metabat2 --numThreads 12 --seed  1 
+### MaxBin2
 
+We will use the same depth file produced by `jgi_summarize_bam_contig_depths` for MetaBAT2:
+
+```bash
+mkdir -p maxbin2
+run_MaxBin.pl -contig Kickstart.megahit/R1.contigs.fa \
+                -abund depth_Kickstart.txt -thread 12 -out maxbin2/maxbin2
 ```
 
+### CONCOCT
 
-### maxbin2
+To run CONCOCT, follow these steps:
 
-We use the depth file produced by `jgi_summarize_bam_contig_depths`
+1. **Cut up the FASTA file** into chunks for processing:
 
 ```bash
+mkdir -p concoct/
 
-mkdir -p maxbin2
-run_MaxBin.pl -contig Kickstart.megahit/R1.contigs.fa -abund   depth_Kickstart.txt -thread 12  -out maxbin2/maxbin2
-
+cut_up_fasta.py Kickstart.megahit/R1.contigs.fa --chunk_size 10000 \
+                --overlap_size 0 --merge_last \
+                --bedfile concoct/contigs_10K.bed > concoct/contigs_10K.fa
 ```
 
-### concoct
-
-Then we can also run concoct with the folowing commands:
+2. **Generate the coverage table** from the BAM file:
 
 ```bash
+concoct_coverage_table.py concoct/contigs_10K.bed alignments_bwa/Kickstart.bam > concoct/coverage_table.tsv
+```
 
-mkdir -p concoct/
-
-cut_up_fasta.py Kickstart.megahit/R1.contigs.fa --chunk_size 10000 --overlap_size 0 --merge_last --bedfile concoct/contigs_10K.bed > concoct/contigs_10K.fa
+3. **Run CONCOCT** with the composition and coverage files:
 
-concoct_coverage_table.py concoct/contigs_10K.bed alignments_bwa/Kickstart.bam  > concoct/coverage_table.tsv
+```bash
+concoct --composition_file concoct/contigs_10K.fa \
+        --coverage_file concoct/coverage_table.tsv \
+        --basename concoct/bins --threads 12
+```
 
-concoct --composition_file concoct/contigs_10K.fa --coverage_file concoct/coverage_table.tsv --basename concoct/bins --threads 12
+4. **Merge the clustering results** and extract bins:
 
+```bash
 merge_cutup_clustering.py concoct/bins_clustering_gt1000.csv > concoct/clustering_merge.csv
 
 mkdir -p concoct/bins
@@ -53,15 +66,16 @@ extract_fasta_bins.py Kickstart.megahit/R1.contigs.fa concoct/clustering_merge.c
 
 ### SemiBin2
 
-We can launch semibin2 as well with its `single_easy_bin` command. 
+You can also run SemiBin2 with its `single_easy_bin` command:
+
+```{admonition} ⏳ Time Note
+:class: note
 
-```{note}
-This take some time so it can be skipped.
+This process can take some time, so it may be skipped.
 ```
 
 ```bash
-
-SemiBin2 single_easy_bin -i Kickstart.megahit/R1.contigs.fa -b alignments_bwa/Kickstart.bam -o semibin2/ -p 12 
-
+SemiBin2 single_easy_bin -i Kickstart.megahit/R1.contigs.fa \
+                            -b alignments_bwa/Kickstart.bam \
+                            -o semibin2/ -p 12
 ```
-
diff --git a/docs/tutorial/set_env_and_get_data.md b/docs/tutorial/set_env_and_get_data.md
deleted file mode 100644
index b950a44..0000000
--- a/docs/tutorial/set_env_and_get_data.md
+++ /dev/null
@@ -1,78 +0,0 @@
-
-## Set tutorial environment
-
-We will download necessary tool in a dedicated conda envrionnement.
-
-
-<!-- #region -->
-Let's create a directory to run the tutorial:
-
-
-```bash
-
-mamba env create -f binette_tutorial_env.yaml -n binette_tuto
-
-```
-<!-- #endregion -->
-
-<!-- #region -->
-## Get the Data
-
-### Using ncezid-biome datasets tool
-
-I downloaded the metagenome Kickstart from the above dataset (SAMN05024035) that correspond to this sra SRR5058924 https://www.ncbi.nlm.nih.gov/Traces/study/?acc=SRR5058924&o=acc_s%3Aa
-
-
-We will donwload the data of the Kickstart (SAMN05024035) dataset this repository that https://github.com/ncezid-biome/datasets?tab=readme-ov-file#edlb
-
-We had use conda as detailed here https://github.com/ncezid-biome/datasets/blob/master/INSTALL.md#conda 
-
-Now we can download the Kickstart dataset with the folowing commands. 
-
-We first download the coal-metagenomic table from the  github repository : https://github.com/ncezid-biome/datasets/blob/master/datasets/coal-metagenomics.tsv
-ANd just select the line corresponding to the Kickstart dataset.
-
-
-<!-- #endregion -->
-
-```bash
-# download the coal-metagenomic tsv file from the github repository
-wget https://raw.githubusercontent.com/ncezid-biome/datasets/master/datasets/coal-metagenomics.tsv
-
-# select the header of the table as it is necessary for the download
-
-head -n7 coal-metagenomics.tsv > coal-metagenomics_Kickstart_only.tsv
-grep SRR5058924  coal-metagenomics.tsv >> coal-metagenomics_Kickstart_only.tsv
-
-GenFSGopher.pl --numcpus 12 --compressed --outdir  coal-metagenomics coal-metagenomics.tsv
-
-```
-
-It takes around 16min to run
-
-You should hae the folowing structure
-```
-├── coal-metagenomics_Kickstart_only.tsv
-└── data
-    ├── in.tsv
-    ├── Kickstart_1.fastq.gz
-    ├── Kickstart_1.fastq.sha256
-    ├── Kickstart_2.fastq.gz
-    ├── Kickstart_2.fastq.sha256
-    ├── Makefile
-    ├── prefetch.done
-    ├── sha256sum.log
-    ├── SRR5058924
-    │   └── SRR5058924.sra
-    └── tree.dnd
-
-
-```
-
-```{tip}
-You can remove the SRA file `data/SRR5058924/SRR5058924.sra` as we do not need it anymore as we will exclusively use the fastq files. with `rm data/SRR5058924/SRR5058924.sra`
-```
-
-```{note}
-You can also download the data using SRA toolkit which what the tool does in the background but add some check sum to ensure data integrity. After instaling sra toolkit (with conda for example : https://anaconda.org/bioconda/sra-tools) you can run the two commands folowing commands to retrived the data: `prefetch  SRR5058924` and `fastq-dump --defline-seq '@$ac_$sn/$ri' --defline-qual '+' --split-3 -O . SRR5058924.sra` 
-```
\ No newline at end of file
diff --git a/docs/tutorial/set_environment.md b/docs/tutorial/set_environment.md
new file mode 100644
index 0000000..52d6092
--- /dev/null
+++ b/docs/tutorial/set_environment.md
@@ -0,0 +1,27 @@
+## Set Up the Tutorial Environment
+
+To get started, we'll download the necessary tools and set them up in a dedicated Conda environment.
+
+### Create a Conda Environment
+
+First, let's create a new Conda environment specifically for this tutorial:
+
+```{code-block} bash
+mamba env create -f binette_tutorial_env.yaml -n binette_tuto
+```
+
+This command will create a Conda environment named `binette_tuto` using the environment file `binette_tutorial_env.yaml`.
+
+### Activate the Environment
+
+After the environment is created, activate it by running:
+
+```{code-block} bash
+conda activate binette_tuto
+```
+
+Below is the content of the `binette_tutorial_env.yaml` file:
+
+```{include} binette_tutorial_env.yaml
+:code: yaml
+```
diff --git a/docs/tutorial/tutorial_main.md b/docs/tutorial/tutorial_main.md
index 1d35a5a..d21931c 100644
--- a/docs/tutorial/tutorial_main.md
+++ b/docs/tutorial/tutorial_main.md
@@ -1,15 +1,80 @@
 
 # Tutorial 
 
-The goal of this tutorial is to show an example of commands on how Binette can be used on real data. We will start ou journey from metagenomics reads that we  gonna download, then we will assemble these reads in contigs that we will bin with different binning tool. I finally we will use Binette to refine those bins.
+In this tutorial, we'll walk through a practical example of how to use Binette with real data. We'll start by downloading metagenomics reads and then assemble these reads into contigs. Next, we'll use different binning tools to group the contigs. Finally, we'll use Binette to refine these bins and improve our results.
 
+```{mermaid}
+---
+title: "Tutorial Overview:"
+align: center
+---
+
+%%{init: {'theme':'default'}}%%
+
+graph LR
+
+  A[Download Metagenomics Reads] --> B
+  B[Assemble Reads into Contigs] --> c
+          subgraph Pangenome creation
+            a:::workflow
+            c:::workflow
+            g:::workflow
+            p:::workflow
+            a("annotate") --> c
+            c(cluster) --> g(graph)
+            g(graph) --> p(partition)
+        end
+
+
+  C[Bin Contigs with Binning Tools] --> D[Refine Bins with Binette]
+
+
+        
+    classDef panrgp fill:#4066d4
+    classDef panmodule fill:#d44066
+    classDef workflow fill:#d4ae40
+
+
+```
+
+```{mermaid}
+
+---
+title: "Tutorial Overview:"
+align: center
+---
+
+
+graph TD
+
+    i[Get Metagenomics Reads] --> B[Assembly & Reads alignment]
+
+
+    B --> metabat2 --> r[Binette]
+    B --> maxbin2 --> r
+    B --> concoct --> r
+    B --> semibin2 --> r
+    
+        subgraph Binning
+            metabat2:::binning
+            maxbin2:::binning
+            concoct:::binning
+            semibin2:::binning
+        end
+
+        
+    classDef binning fill:#d4ae40
+
+
+```
 
 
 ```{toctree}
-:caption: 'Tutorial'
-:maxdepth: 2
+:caption: 'Tutorial steps'
+:maxdepth: 1
 
-set_env_and_get_data
+set_environment
+get_dataset
 assembly
 binning
 binette

From e285579e27147a28b97fc37ec2acd91e05c7601b Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Tue, 3 Sep 2024 18:00:28 +0200
Subject: [PATCH 20/36] rm print

---
 binette/io_manager.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/binette/io_manager.py b/binette/io_manager.py
index 5899722..04213e4 100644
--- a/binette/io_manager.py
+++ b/binette/io_manager.py
@@ -66,7 +66,7 @@ def infer_bin_set_names_from_input_paths(input_bins: List[Path]) -> Dict[str, Pa
     bin_name_to_bin_dir = {}
 
     common_prefix, common_suffix, common_extensions = get_paths_common_prefix_suffix(input_bins)
-    print(common_prefix, common_suffix, common_extensions )
+
     for path in input_bins:
 
         specific_parts = path.parts[len(common_prefix):len(path.parts)-len(common_suffix)]

From de93964300e81baa9878aa390e10c0038f981d69 Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Tue, 3 Sep 2024 18:02:13 +0200
Subject: [PATCH 21/36] add mermaid deps

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6fad059..8f12e86 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -47,7 +47,8 @@ doc = [
     "docutils==0.18.1", #"myst-nb",
     "nbsphinx==0.9.5",
     "nbsphinx_link==1.3.0",
-    "sphinx-book-theme==1.0.1"
+    "sphinx-book-theme==1.0.1",
+    "sphinxcontrib.mermaid"
      ]
 
 dev = [

From 7f2c52f06a1c04b102dcffa0877802e0c6cdd222 Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 4 Sep 2024 12:04:41 +0200
Subject: [PATCH 22/36] improve tutorial

---
 docs/conf.py                                  |    4 +-
 docs/tutorial/analyse_binette_result.ipynb    | 1365 ++++-------------
 docs/tutorial/assembly.md                     |   21 +-
 .../final_bins_quality_reports.tsv            |   16 +
 .../input_bins_1.concoct_bins.tsv             |   64 +
 .../input_bins_2.maxbin2.tsv                  |   24 +
 .../input_bins_3.metabat2.tsv                 |   15 +
 .../input_bins_4.semibin2_output_bins.tsv     |   26 +
 docs/tutorial/binning.md                      |   44 +-
 docs/tutorial/tutorial_main.md                |   52 +-
 10 files changed, 538 insertions(+), 1093 deletions(-)
 create mode 100644 docs/tutorial/binette_results/final_bins_quality_reports.tsv
 create mode 100644 docs/tutorial/binette_results/input_bins_quality_reports/input_bins_1.concoct_bins.tsv
 create mode 100644 docs/tutorial/binette_results/input_bins_quality_reports/input_bins_2.maxbin2.tsv
 create mode 100644 docs/tutorial/binette_results/input_bins_quality_reports/input_bins_3.metabat2.tsv
 create mode 100644 docs/tutorial/binette_results/input_bins_quality_reports/input_bins_4.semibin2_output_bins.tsv

diff --git a/docs/conf.py b/docs/conf.py
index 37654c6..245781e 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -61,7 +61,7 @@
 # `path/to/file:heading` instead of just `heading`
 autosectionlabel_prefix_document = True
 
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'build', "api"]
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'build', "api", "jupyter_execute"]
 
 
 
@@ -69,7 +69,7 @@
 # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
 
 # html_theme = 'sphinx_rtd_theme' #'alabaster' # 
-html_theme = 'sphinx_book_theme'
+html_theme = 'sphinx_rtd_theme' #'sphinx_book_theme'
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
diff --git a/docs/tutorial/analyse_binette_result.ipynb b/docs/tutorial/analyse_binette_result.ipynb
index 30247a8..94d1fc8 100644
--- a/docs/tutorial/analyse_binette_result.ipynb
+++ b/docs/tutorial/analyse_binette_result.ipynb
@@ -2,31 +2,57 @@
  "cells": [
   {
    "cell_type": "markdown",
-   "id": "52e7f39c",
+   "id": "edcb3b82",
    "metadata": {},
    "source": [
-    "## Analyse Binette results"
+    "## Analyse Binette results\n",
+    "\n",
+    "Let's visualize the results from Binette and compare them to the initial bin sets used as input. "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dbe1d73b",
+   "metadata": {},
+   "source": [
+    "### Import Necessary Libraries\n",
+    "\n",
+    "First, we'll need to import the necessary libraries for our analysis and plotting:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 1,
-   "id": "e6a1e1ee-681d-4823-b974-7027bafd2ba9",
+   "id": "9e9153ef",
    "metadata": {},
    "outputs": [],
    "source": [
     "import pandas as pd\n",
     "from pathlib import Path\n",
     "import plotly.express as px\n",
+    "\n",
+    "# This is needed to properly display Plotly graphs in the documentation\n",
     "import plotly.io as pio\n",
     "pio.renderers.default = \"sphinx_gallery\""
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "b93e8a0e",
+   "metadata": {},
+   "source": [
+    "### Load Binette Results\n",
+    "\n",
+    "Now, let's load the final Binette quality report into a Pandas DataFrame:"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 2,
-   "id": "34e80119-f59b-41b0-b0e5-de2d6ed0c6a3",
-   "metadata": {},
+   "id": "d95ad45c",
+   "metadata": {
+    "lines_to_next_cell": 0
+   },
    "outputs": [
     {
      "data": {
@@ -321,15 +347,25 @@
    "source": [
     "binette_result_file = \"./binette_results/final_bins_quality_reports.tsv\"\n",
     "df_binette = pd.read_csv(binette_result_file, sep='\\t')\n",
-    "df_binette['tool'] = \"binette\"\n",
-    "df_binette['index'] = df_binette.index\n",
+    "df_binette['tool'] = \"binette\"  # Add a column to label the tool\n",
+    "df_binette['index'] = df_binette.index  # Add an index column\n",
     "df_binette"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "c1372a73",
+   "metadata": {},
+   "source": [
+    "### Load and Combine Input Bin Quality Reports\n",
+    "\n",
+    "Next, we will load the quality reports of the input bin sets, computed by various tools and saved by Binette. We’ll combine these into a single DataFrame and add a column to indicate high-quality bins. We define a high-quality bin as one with contamination ≤ 5% and completeness ≥ 90%."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 3,
-   "id": "189038d3-77a0-435a-9590-4d8b3038341e",
+   "id": "fcb016f2",
    "metadata": {},
    "outputs": [
     {
@@ -353,98 +389,134 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
+       "      <th>tool</th>\n",
        "      <th>completeness</th>\n",
        "      <th>contamination</th>\n",
-       "      <th>tool</th>\n",
+       "      <th>size</th>\n",
+       "      <th>N50</th>\n",
+       "      <th>contig_count</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
+       "      <td>binette</td>\n",
        "      <td>100.00</td>\n",
        "      <td>0.05</td>\n",
-       "      <td>binette</td>\n",
+       "      <td>4672665</td>\n",
+       "      <td>82084</td>\n",
+       "      <td>93</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
+       "      <td>binette</td>\n",
        "      <td>99.90</td>\n",
        "      <td>0.20</td>\n",
-       "      <td>binette</td>\n",
+       "      <td>2796605</td>\n",
+       "      <td>41151</td>\n",
+       "      <td>98</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
+       "      <td>binette</td>\n",
        "      <td>98.59</td>\n",
        "      <td>0.83</td>\n",
-       "      <td>binette</td>\n",
+       "      <td>4601336</td>\n",
+       "      <td>41016</td>\n",
+       "      <td>165</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
+       "      <td>binette</td>\n",
        "      <td>96.10</td>\n",
        "      <td>0.34</td>\n",
-       "      <td>binette</td>\n",
+       "      <td>2598718</td>\n",
+       "      <td>11891</td>\n",
+       "      <td>312</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
+       "      <td>binette</td>\n",
        "      <td>91.98</td>\n",
        "      <td>1.71</td>\n",
-       "      <td>binette</td>\n",
+       "      <td>1768095</td>\n",
+       "      <td>9976</td>\n",
+       "      <td>250</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>...</th>\n",
        "      <td>...</td>\n",
        "      <td>...</td>\n",
        "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>20</th>\n",
+       "      <td>semibin2</td>\n",
        "      <td>8.28</td>\n",
        "      <td>0.01</td>\n",
-       "      <td>semibin2</td>\n",
+       "      <td>358822</td>\n",
+       "      <td>3296</td>\n",
+       "      <td>106</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>21</th>\n",
+       "      <td>semibin2</td>\n",
        "      <td>8.12</td>\n",
        "      <td>0.02</td>\n",
-       "      <td>semibin2</td>\n",
+       "      <td>353499</td>\n",
+       "      <td>3949</td>\n",
+       "      <td>90</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>22</th>\n",
+       "      <td>semibin2</td>\n",
        "      <td>7.74</td>\n",
        "      <td>0.01</td>\n",
-       "      <td>semibin2</td>\n",
+       "      <td>351540</td>\n",
+       "      <td>4284</td>\n",
+       "      <td>85</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>23</th>\n",
+       "      <td>semibin2</td>\n",
        "      <td>6.18</td>\n",
        "      <td>0.00</td>\n",
-       "      <td>semibin2</td>\n",
+       "      <td>250833</td>\n",
+       "      <td>3607</td>\n",
+       "      <td>66</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>24</th>\n",
+       "      <td>semibin2</td>\n",
        "      <td>4.41</td>\n",
        "      <td>0.13</td>\n",
-       "      <td>semibin2</td>\n",
+       "      <td>217541</td>\n",
+       "      <td>3425</td>\n",
+       "      <td>64</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>140 rows × 3 columns</p>\n",
+       "<p>140 rows × 6 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "    completeness  contamination      tool\n",
-       "0         100.00           0.05   binette\n",
-       "1          99.90           0.20   binette\n",
-       "2          98.59           0.83   binette\n",
-       "3          96.10           0.34   binette\n",
-       "4          91.98           1.71   binette\n",
-       "..           ...            ...       ...\n",
-       "20          8.28           0.01  semibin2\n",
-       "21          8.12           0.02  semibin2\n",
-       "22          7.74           0.01  semibin2\n",
-       "23          6.18           0.00  semibin2\n",
-       "24          4.41           0.13  semibin2\n",
+       "        tool  completeness  contamination     size    N50  contig_count\n",
+       "0    binette        100.00           0.05  4672665  82084            93\n",
+       "1    binette         99.90           0.20  2796605  41151            98\n",
+       "2    binette         98.59           0.83  4601336  41016           165\n",
+       "3    binette         96.10           0.34  2598718  11891           312\n",
+       "4    binette         91.98           1.71  1768095   9976           250\n",
+       "..       ...           ...            ...      ...    ...           ...\n",
+       "20  semibin2          8.28           0.01   358822   3296           106\n",
+       "21  semibin2          8.12           0.02   353499   3949            90\n",
+       "22  semibin2          7.74           0.01   351540   4284            85\n",
+       "23  semibin2          6.18           0.00   250833   3607            66\n",
+       "24  semibin2          4.41           0.13   217541   3425            64\n",
        "\n",
-       "[140 rows x 3 columns]"
+       "[140 rows x 6 columns]"
       ]
      },
      "execution_count": 3,
@@ -453,36 +525,51 @@
     }
    ],
    "source": [
+    "from pathlib import Path\n",
+    "\n",
     "input_bins_quality_reports_dir = Path(\"binette_results/input_bins_quality_reports/\")\n",
     "\n",
+    "# Initialize the list with Binette results\n",
     "df_input_bin_list = [df_binette]\n",
+    "\n",
+    "# Load each input bin quality report\n",
     "for input_bin_metric_file in input_bins_quality_reports_dir.glob(\"*tsv\"):\n",
-    "    tool = input_bin_metric_file.name.split('.')[1].split('_')[0]\n",
+    "    tool = input_bin_metric_file.name.split('.')[1].split('_')[0]  # Extract tool name from file name\n",
     "    df_input = pd.read_csv(input_bin_metric_file, sep='\\t')\n",
     "    df_input['index'] = df_input.index\n",
     "    df_input['tool'] = tool\n",
     "    df_input_bin_list.append(df_input)\n",
     "\n",
-    "df_bins =  pd.concat(df_input_bin_list)\n",
-    "    \n",
-    "set(df_bins['tool'])\n",
-    "df_bins[\"High quality bin\"] =  (df_bins['completeness'] >= 90) & (df_bins['contamination'] <= 5)\n",
-    "#df_binette = pd.read_csv(binette_result_file, sep='\\t')\n",
-    "#df_binette\n",
-    "df_bins[[\"completeness\", \"contamination\", \"tool\"]]\n"
+    "# Combine all DataFrames into one\n",
+    "df_bins = pd.concat(df_input_bin_list)\n",
+    "\n",
+    "# Add a column to indicate high-quality bins\n",
+    "df_bins[\"High quality bin\"] = (df_bins['completeness'] >= 90) & (df_bins['contamination'] <= 5)\n",
+    "\n",
+    "# Display relevant columns\n",
+    "df_bins[[ \"tool\", \"completeness\", \"contamination\", \"size\", \"N50\", \"contig_count\"]]\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "80ef2544",
+   "metadata": {},
+   "source": [
+    "### Plot bin completeness and contamination\n",
+    "With the DataFrame containing both Binette’s final bins and the input bins, we can now create a scatter plot to visualize the results:"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 4,
-   "id": "911d598f-a6c7-4178-aff2-6059235e7fc4",
+   "id": "277cb781",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
        "<div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n",
-       "        <script charset=\"utf-8\" src=\"https://cdn.plot.ly/plotly-2.35.0.min.js\"></script>                <div id=\"5f02a995-2d24-40e9-a697-3bed6b4b3d72\" class=\"plotly-graph-div\" style=\"height:800px; width:800px;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"5f02a995-2d24-40e9-a697-3bed6b4b3d72\")) {                    Plotly.newPlot(                        \"5f02a995-2d24-40e9-a697-3bed6b4b3d72\",                        [{\"hovertemplate\":\"High quality bin=True\\u003cbr\\u003etool=binette\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"True\",\"marker\":{\"color\":\"#636efa\",\"size\":[4672665,2796605,4601336,2598718,1768095,3726254],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"True\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[100.0,99.9,98.59,96.1,91.98,92.63],\"xaxis\":\"x5\",\"y\":[0.05,0.2,0.83,0.34,1.71,2.41],\"yaxis\":\"y5\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=True\\u003cbr\\u003etool=concoct\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"True\",\"marker\":{\"color\":\"#636efa\",\"size\":[3033586,4765466,2274951,3751950],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"True\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[100.0,100.0,92.76,92.63],\"xaxis\":\"x4\",\"y\":[0.38,0.46,0.34,3.42],\"yaxis\":\"y4\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=True\\u003cbr\\u003etool=maxbin2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"True\",\"marker\":{\"color\":\"#636efa\",\"size\":[4616818,2874373],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"True\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[99.81,93.92],\"xaxis\":\"x3\",\"y\":[4.81,3.53],\"yaxis\":\"y3\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=True\\u003cbr\\u003etool=metabat2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"True\",\"marker\":{\"color\":\"#636efa\",\"size\":[2799572,2148097,4266134],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"True\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[99.9,93.17,93.52],\"xaxis\":\"x2\",\"y\":[0.24,0.22,0.92],\"yaxis\":\"y2\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=True\\u003cbr\\u003etool=semibin2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"True\",\"marker\":{\"color\":\"#636efa\",\"size\":[4681369,2937678,2129295,4162911],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"True\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[100.0,99.92,93.43,92.13],\"xaxis\":\"x\",\"y\":[0.09,0.28,0.14,0.03],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=False\\u003cbr\\u003etool=binette\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"False\",\"marker\":{\"color\":\"#EF553B\",\"size\":[1665233,1241829,3293949,1293571,2042527,2601282,1858210,688879,1782676],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"False\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[81.73,72.89,74.31,62.94,59.18,52.16,64.63,52.33,48.22],\"xaxis\":\"x5\",\"y\":[0.84,2.39,4.26,2.75,2.24,1.31,8.03,5.06,8.23],\"yaxis\":\"y5\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=False\\u003cbr\\u003etool=concoct\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"False\",\"marker\":{\"color\":\"#EF553B\",\"size\":[1917859,7431952,3285374,2385110,809087,480789,4821129,717576,1944164,3984942,445168,2403536,8287537,6341799,3480539,344299,345166,483789,89878,1016,3381,193358,12090,4193,8476,5082,5015,5338,13671,2727,1491,2475,1344,2524,10545,2290,4999,1240,1236,12304,1160,6739,1123,1032,8012,117297,16429,98557,173292,122021,106174,75967,55857,36685,20489,28603,48903,41153,44603],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"False\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[87.35,100.0,73.35,69.37,46.66,35.92,45.9,32.76,47.24,99.96,25.03,35.1,100.0,87.66,87.47,17.11,12.5,8.66,6.65,6.62,6.45,6.48,6.43,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.05,5.85,5.58,5.35,10.6,3.74,3.68,3.6,3.32,3.21,2.98,2.73,2.67,2.53,2.5],\"xaxis\":\"x4\",\"y\":[1.8,14.25,4.26,13.16,3.76,0.57,7.36,1.09,9.31,37.25,1.18,7.86,40.63,34.5,34.51,0.07,0.15,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.22,0.0,0.01,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0],\"yaxis\":\"y4\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=False\\u003cbr\\u003etool=maxbin2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"False\",\"marker\":{\"color\":\"#EF553B\",\"size\":[2634516,2438492,3473782,2087133,4743354,5331237,3958158,1586278,1690737,1033153,1112378,3237421,1419869,2765576,454808,1180579,804525,488546,379048,103037,4710071],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"False\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[75.2,62.69,56.93,40.68,45.34,80.35,69.31,34.6,42.81,27.99,26.95,56.41,23.72,76.19,27.26,17.07,28.6,11.11,10.27,4.92,93.2],\"xaxis\":\"x3\",\"y\":[12.31,8.14,14.12,7.03,9.62,27.53,22.06,4.79,9.69,2.4,2.05,17.23,1.41,29.54,6.52,1.62,7.98,0.04,0.56,0.0,48.33],\"yaxis\":\"y3\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=False\\u003cbr\\u003etool=metabat2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"False\",\"marker\":{\"color\":\"#EF553B\",\"size\":[1902761,2980526,1807028,3477636,1384653,1707078,1724699,982239,1077467,8543557,252404],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"False\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[84.4,84.99,83.21,76.53,71.78,51.75,44.85,44.15,25.31,98.03,7.06],\"xaxis\":\"x2\",\"y\":[1.53,2.73,3.16,0.11,5.77,2.99,0.49,1.11,0.03,37.1,0.03],\"yaxis\":\"y2\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=False\\u003cbr\\u003etool=semibin2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"False\",\"marker\":{\"color\":\"#EF553B\",\"size\":[1674156,1820073,2790948,1245031,1728690,2609451,1934420,990463,1699695,1131272,884790,515894,513202,213606,358311,290297,358822,353499,351540,250833,217541],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"False\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[83.09,81.87,80.25,72.57,53.34,51.92,47.29,47.28,36.67,17.07,14.04,9.95,9.95,9.45,8.7,8.66,8.28,8.12,7.74,6.18,4.41],\"xaxis\":\"x\",\"y\":[2.25,1.66,1.63,2.45,1.33,1.31,0.37,0.73,6.12,0.69,1.01,0.01,0.05,0.0,0.0,0.19,0.01,0.02,0.01,0.0,0.13],\"yaxis\":\"y\",\"type\":\"scatter\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,0.98],\"title\":{\"text\":\"completeness\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,0.17600000000000002],\"title\":{\"text\":\"contamination\"}},\"xaxis2\":{\"anchor\":\"y2\",\"domain\":[0.0,0.98],\"matches\":\"x\",\"showticklabels\":false},\"yaxis2\":{\"anchor\":\"x2\",\"domain\":[0.20600000000000002,0.382],\"matches\":\"y\",\"title\":{\"text\":\"contamination\"}},\"xaxis3\":{\"anchor\":\"y3\",\"domain\":[0.0,0.98],\"matches\":\"x\",\"showticklabels\":false},\"yaxis3\":{\"anchor\":\"x3\",\"domain\":[0.41200000000000003,0.5880000000000001],\"matches\":\"y\",\"title\":{\"text\":\"contamination\"}},\"xaxis4\":{\"anchor\":\"y4\",\"domain\":[0.0,0.98],\"matches\":\"x\",\"showticklabels\":false},\"yaxis4\":{\"anchor\":\"x4\",\"domain\":[0.618,0.794],\"matches\":\"y\",\"title\":{\"text\":\"contamination\"}},\"xaxis5\":{\"anchor\":\"y5\",\"domain\":[0.0,0.98],\"matches\":\"x\",\"showticklabels\":false},\"yaxis5\":{\"anchor\":\"x5\",\"domain\":[0.8240000000000001,1.0],\"matches\":\"y\",\"title\":{\"text\":\"contamination\"}},\"annotations\":[{\"font\":{},\"showarrow\":false,\"text\":\"tool=semibin2\",\"textangle\":90,\"x\":0.98,\"xanchor\":\"left\",\"xref\":\"paper\",\"y\":0.08800000000000001,\"yanchor\":\"middle\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"tool=metabat2\",\"textangle\":90,\"x\":0.98,\"xanchor\":\"left\",\"xref\":\"paper\",\"y\":0.29400000000000004,\"yanchor\":\"middle\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"tool=maxbin2\",\"textangle\":90,\"x\":0.98,\"xanchor\":\"left\",\"xref\":\"paper\",\"y\":0.5,\"yanchor\":\"middle\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"tool=concoct\",\"textangle\":90,\"x\":0.98,\"xanchor\":\"left\",\"xref\":\"paper\",\"y\":0.706,\"yanchor\":\"middle\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"tool=binette\",\"textangle\":90,\"x\":0.98,\"xanchor\":\"left\",\"xref\":\"paper\",\"y\":0.912,\"yanchor\":\"middle\",\"yref\":\"paper\"}],\"legend\":{\"title\":{\"text\":\"High quality bin\"},\"tracegroupgap\":0,\"itemsizing\":\"constant\"},\"margin\":{\"t\":60},\"width\":800,\"height\":800},                        {\"responsive\": true}                    )                };                            </script>        </div>"
+       "        <script charset=\"utf-8\" src=\"https://cdn.plot.ly/plotly-2.35.0.min.js\"></script>                <div id=\"393e7eea-81b5-4e00-9094-ccb66d1726ab\" class=\"plotly-graph-div\" style=\"height:800px; width:600px;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"393e7eea-81b5-4e00-9094-ccb66d1726ab\")) {                    Plotly.newPlot(                        \"393e7eea-81b5-4e00-9094-ccb66d1726ab\",                        [{\"hovertemplate\":\"High quality bin=True\\u003cbr\\u003etool=binette\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"True\",\"marker\":{\"color\":\"#636efa\",\"size\":[4672665,2796605,4601336,2598718,1768095,3726254],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"True\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[100.0,99.9,98.59,96.1,91.98,92.63],\"xaxis\":\"x5\",\"y\":[0.05,0.2,0.83,0.34,1.71,2.41],\"yaxis\":\"y5\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=True\\u003cbr\\u003etool=concoct\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"True\",\"marker\":{\"color\":\"#636efa\",\"size\":[3033586,4765466,2274951,3751950],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"True\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[100.0,100.0,92.76,92.63],\"xaxis\":\"x4\",\"y\":[0.38,0.46,0.34,3.42],\"yaxis\":\"y4\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=True\\u003cbr\\u003etool=maxbin2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"True\",\"marker\":{\"color\":\"#636efa\",\"size\":[4616818,2874373],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"True\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[99.81,93.92],\"xaxis\":\"x3\",\"y\":[4.81,3.53],\"yaxis\":\"y3\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=True\\u003cbr\\u003etool=metabat2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"True\",\"marker\":{\"color\":\"#636efa\",\"size\":[2799572,2148097,4266134],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"True\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[99.9,93.17,93.52],\"xaxis\":\"x2\",\"y\":[0.24,0.22,0.92],\"yaxis\":\"y2\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=True\\u003cbr\\u003etool=semibin2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"True\",\"marker\":{\"color\":\"#636efa\",\"size\":[4681369,2937678,2129295,4162911],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"True\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[100.0,99.92,93.43,92.13],\"xaxis\":\"x\",\"y\":[0.09,0.28,0.14,0.03],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=False\\u003cbr\\u003etool=binette\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"False\",\"marker\":{\"color\":\"#EF553B\",\"size\":[1665233,1241829,3293949,1293571,2042527,2601282,1858210,688879,1782676],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"False\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[81.73,72.89,74.31,62.94,59.18,52.16,64.63,52.33,48.22],\"xaxis\":\"x5\",\"y\":[0.84,2.39,4.26,2.75,2.24,1.31,8.03,5.06,8.23],\"yaxis\":\"y5\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=False\\u003cbr\\u003etool=concoct\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"False\",\"marker\":{\"color\":\"#EF553B\",\"size\":[1917859,7431952,3285374,2385110,809087,480789,4821129,717576,1944164,3984942,445168,2403536,8287537,6341799,3480539,344299,345166,483789,89878,1016,3381,193358,12090,4193,8476,5082,5015,5338,13671,2727,1491,2475,1344,2524,10545,2290,4999,1240,1236,12304,1160,6739,1123,1032,8012,117297,16429,98557,173292,122021,106174,75967,55857,36685,20489,28603,48903,41153,44603],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"False\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[87.35,100.0,73.35,69.37,46.66,35.92,45.9,32.76,47.24,99.96,25.03,35.1,100.0,87.66,87.47,17.11,12.5,8.66,6.65,6.62,6.45,6.48,6.43,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.05,5.85,5.58,5.35,10.6,3.74,3.68,3.6,3.32,3.21,2.98,2.73,2.67,2.53,2.5],\"xaxis\":\"x4\",\"y\":[1.8,14.25,4.26,13.16,3.76,0.57,7.36,1.09,9.31,37.25,1.18,7.86,40.63,34.5,34.51,0.07,0.15,0.01,0.0,0.0,0.0,0.02,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.22,0.0,0.01,0.02,0.0,0.0,0.01,0.0,0.0,0.0,0.0],\"yaxis\":\"y4\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=False\\u003cbr\\u003etool=maxbin2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"False\",\"marker\":{\"color\":\"#EF553B\",\"size\":[2634516,2438492,3473782,2087133,4743354,5331237,3958158,1586278,1690737,1033153,1112378,3237421,1419869,2765576,454808,1180579,804525,488546,379048,103037,4710071],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"False\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[75.2,62.69,56.93,40.68,45.34,80.35,69.31,34.6,42.81,27.99,26.95,56.41,23.72,76.19,27.26,17.07,28.6,11.11,10.27,4.92,93.2],\"xaxis\":\"x3\",\"y\":[12.31,8.14,14.12,7.03,9.62,27.53,22.06,4.79,9.69,2.4,2.05,17.23,1.41,29.54,6.52,1.62,7.98,0.04,0.56,0.0,48.33],\"yaxis\":\"y3\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=False\\u003cbr\\u003etool=metabat2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"False\",\"marker\":{\"color\":\"#EF553B\",\"size\":[1902761,2980526,1807028,3477636,1384653,1707078,1724699,982239,1077467,8543557,252404],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"False\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[84.4,84.99,83.21,76.53,71.78,51.75,44.85,44.15,25.31,98.03,7.06],\"xaxis\":\"x2\",\"y\":[1.53,2.73,3.16,0.11,5.77,2.99,0.49,1.11,0.03,37.1,0.03],\"yaxis\":\"y2\",\"type\":\"scatter\"},{\"hovertemplate\":\"High quality bin=False\\u003cbr\\u003etool=semibin2\\u003cbr\\u003ecompleteness=%{x}\\u003cbr\\u003econtamination=%{y}\\u003cbr\\u003esize=%{marker.size}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"False\",\"marker\":{\"color\":\"#EF553B\",\"size\":[1674156,1820073,2790948,1245031,1728690,2609451,1934420,990463,1699695,1131272,884790,515894,513202,213606,358311,290297,358822,353499,351540,250833,217541],\"sizemode\":\"area\",\"sizeref\":21358.8925,\"symbol\":\"circle\"},\"mode\":\"markers\",\"name\":\"False\",\"orientation\":\"v\",\"showlegend\":false,\"x\":[83.09,81.87,80.25,72.57,53.34,51.92,47.29,47.28,36.67,17.07,14.04,9.95,9.95,9.45,8.7,8.66,8.28,8.12,7.74,6.18,4.41],\"xaxis\":\"x\",\"y\":[2.25,1.66,1.63,2.45,1.33,1.31,0.37,0.73,6.12,0.69,1.01,0.01,0.05,0.0,0.0,0.19,0.01,0.02,0.01,0.0,0.13],\"yaxis\":\"y\",\"type\":\"scatter\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,0.98],\"title\":{\"text\":\"completeness\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,0.17600000000000002],\"title\":{\"text\":\"contamination\"}},\"xaxis2\":{\"anchor\":\"y2\",\"domain\":[0.0,0.98],\"matches\":\"x\",\"showticklabels\":false},\"yaxis2\":{\"anchor\":\"x2\",\"domain\":[0.20600000000000002,0.382],\"matches\":\"y\",\"title\":{\"text\":\"contamination\"}},\"xaxis3\":{\"anchor\":\"y3\",\"domain\":[0.0,0.98],\"matches\":\"x\",\"showticklabels\":false},\"yaxis3\":{\"anchor\":\"x3\",\"domain\":[0.41200000000000003,0.5880000000000001],\"matches\":\"y\",\"title\":{\"text\":\"contamination\"}},\"xaxis4\":{\"anchor\":\"y4\",\"domain\":[0.0,0.98],\"matches\":\"x\",\"showticklabels\":false},\"yaxis4\":{\"anchor\":\"x4\",\"domain\":[0.618,0.794],\"matches\":\"y\",\"title\":{\"text\":\"contamination\"}},\"xaxis5\":{\"anchor\":\"y5\",\"domain\":[0.0,0.98],\"matches\":\"x\",\"showticklabels\":false},\"yaxis5\":{\"anchor\":\"x5\",\"domain\":[0.8240000000000001,1.0],\"matches\":\"y\",\"title\":{\"text\":\"contamination\"}},\"annotations\":[{\"font\":{},\"showarrow\":false,\"text\":\"tool=semibin2\",\"textangle\":90,\"x\":0.98,\"xanchor\":\"left\",\"xref\":\"paper\",\"y\":0.08800000000000001,\"yanchor\":\"middle\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"tool=metabat2\",\"textangle\":90,\"x\":0.98,\"xanchor\":\"left\",\"xref\":\"paper\",\"y\":0.29400000000000004,\"yanchor\":\"middle\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"tool=maxbin2\",\"textangle\":90,\"x\":0.98,\"xanchor\":\"left\",\"xref\":\"paper\",\"y\":0.5,\"yanchor\":\"middle\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"tool=concoct\",\"textangle\":90,\"x\":0.98,\"xanchor\":\"left\",\"xref\":\"paper\",\"y\":0.706,\"yanchor\":\"middle\",\"yref\":\"paper\"},{\"font\":{},\"showarrow\":false,\"text\":\"tool=binette\",\"textangle\":90,\"x\":0.98,\"xanchor\":\"left\",\"xref\":\"paper\",\"y\":0.912,\"yanchor\":\"middle\",\"yref\":\"paper\"}],\"legend\":{\"title\":{\"text\":\"High Quality Bin\"},\"tracegroupgap\":0,\"itemsizing\":\"constant\"},\"title\":{\"text\":\"Comparison of Bin Quality Metrics\"},\"width\":600,\"height\":800},                        {\"responsive\": true}                    )                };                            </script>        </div>"
       ]
      },
      "metadata": {},
@@ -490,25 +577,63 @@
     }
    ],
    "source": [
-    "fig = px.scatter(df_bins, x=\"completeness\",y=\"contamination\", color=\"High quality bin\", size=\"size\",  facet_row=\"tool\")\n",
+    "import plotly.express as px\n",
+    "\n",
+    "# Create a scatter plot to visualize completeness and contamination\n",
+    "fig = px.scatter(df_bins, \n",
+    "                 x=\"completeness\", \n",
+    "                 y=\"contamination\", \n",
+    "                 color=\"High quality bin\", \n",
+    "                 size=\"size\",  \n",
+    "                 facet_row=\"tool\",\n",
+    "                 title=\"Bin Quality Comparison\",\n",
+    "                )\n",
+    "\n",
+    "# Update layout for better visibility\n",
     "fig.update_layout(\n",
-    "    width=800,\n",
-    "    height=800)\n",
-    "    \n",
+    "    width=600,\n",
+    "    height=800,\n",
+    "    legend_title=\"High Quality Bin\",\n",
+    "    title=\"Comparison of Bin Quality Metrics\"\n",
+    ")\n",
+    "\n",
+    "# Show the plot\n",
     "fig.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "06a14412",
+   "metadata": {},
+   "source": [
+    "We can see that binette bins are the one displaying the most high quality bins (completeness ≥ 90% and contamination ≤ 5%).\n",
+    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "323f5637",
+   "metadata": {},
+   "source": [
+    "### Comparing Binning Tools Using Bin Score Curves\n",
+    "\n",
+    "A common way to compare bin sets is by sorting the bins based on their scores and plotting them against their index.\n",
+    "\n",
+    "Here’s how we can create such a plot:"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 5,
-   "id": "35c46beb-1ac9-4014-9672-91edcc1bf439",
+   "id": "79faaa3a",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
        "<div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n",
-       "        <script charset=\"utf-8\" src=\"https://cdn.plot.ly/plotly-2.35.0.min.js\"></script>                <div id=\"697ef335-1144-4257-a58f-67d1d85100ea\" class=\"plotly-graph-div\" style=\"height:500px; width:800px;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"697ef335-1144-4257-a58f-67d1d85100ea\")) {                    Plotly.newPlot(                        \"697ef335-1144-4257-a58f-67d1d85100ea\",                        [{\"hovertemplate\":\"tool=binette\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"binette\",\"line\":{\"color\":\"#636efa\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"binette\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],\"xaxis\":\"x\",\"y\":[99.9,99.5,96.93,95.41999999999999,88.56,87.81,80.05,68.11,65.79,57.44,54.7,49.54,48.56999999999999,42.21,31.759999999999998],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"tool=concoct\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"concoct\",\"line\":{\"color\":\"#EF553B\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"concoct\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62],\"xaxis\":\"x\",\"y\":[99.24,99.08,92.08,85.78999999999999,83.75,71.5,64.83,43.050000000000004,39.14,34.78,31.18,30.58,28.62,25.459999999999994,22.67,19.380000000000003,18.739999999999995,18.659999999999997,18.450000000000003,16.97,12.2,8.64,6.65,6.62,6.45,6.44,6.43,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.05,5.85,5.58,5.35,4.159999999999999,3.74,3.66,3.56,3.32,3.21,2.96,2.73,2.67,2.53,2.5],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"tool=maxbin2\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"maxbin2\",\"line\":{\"color\":\"#00cc96\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"maxbin2\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],\"xaxis\":\"x\",\"y\":[90.19,86.86,50.58,46.41,28.69,26.619999999999997,26.100000000000005,25.289999999999992,25.190000000000005,25.020000000000003,23.430000000000003,23.189999999999998,22.85,21.949999999999996,20.9,17.11,14.220000000000002,13.83,12.64,11.03,9.149999999999999,4.92,-3.4599999999999937],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"tool=metabat2\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"metabat2\",\"line\":{\"color\":\"#ab63fa\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"metabat2\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13],\"xaxis\":\"x\",\"y\":[99.42,92.73,91.67999999999999,81.34,79.53,76.88999999999999,76.31,60.24,45.769999999999996,43.870000000000005,41.93,25.25,23.83,7.0],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"tool=semibin2\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"semibin2\",\"line\":{\"color\":\"#FFA15A\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"semibin2\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24],\"xaxis\":\"x\",\"y\":[99.82,99.36,93.15,92.07,78.59,78.55000000000001,76.99,67.66999999999999,50.68000000000001,49.300000000000004,46.55,45.82,24.43,15.690000000000001,12.02,9.93,9.85,9.45,8.7,8.28,8.26,8.08,7.720000000000001,6.18,4.15],\"yaxis\":\"y\",\"type\":\"scatter\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"index\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"completeness - 2*contamination\"}},\"legend\":{\"title\":{\"text\":\"tool\"},\"tracegroupgap\":0},\"margin\":{\"t\":60},\"width\":800,\"height\":500},                        {\"responsive\": true}                    )                };                            </script>        </div>"
+       "        <script charset=\"utf-8\" src=\"https://cdn.plot.ly/plotly-2.35.0.min.js\"></script>                <div id=\"8f44f040-4b41-4a83-a428-934b45fc5061\" class=\"plotly-graph-div\" style=\"height:500px; width:600px;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"8f44f040-4b41-4a83-a428-934b45fc5061\")) {                    Plotly.newPlot(                        \"8f44f040-4b41-4a83-a428-934b45fc5061\",                        [{\"hovertemplate\":\"tool=binette\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"binette\",\"line\":{\"color\":\"#636efa\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"binette\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],\"xaxis\":\"x\",\"y\":[99.9,99.5,96.93,95.41999999999999,88.56,87.81,80.05,68.11,65.79,57.44,54.7,49.54,48.56999999999999,42.21,31.759999999999998],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"tool=concoct\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"concoct\",\"line\":{\"color\":\"#EF553B\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"concoct\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62],\"xaxis\":\"x\",\"y\":[99.24,99.08,92.08,85.78999999999999,83.75,71.5,64.83,43.050000000000004,39.14,34.78,31.18,30.58,28.62,25.459999999999994,22.67,19.380000000000003,18.739999999999995,18.659999999999997,18.450000000000003,16.97,12.2,8.64,6.65,6.62,6.45,6.44,6.43,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.05,5.85,5.58,5.35,4.159999999999999,3.74,3.66,3.56,3.32,3.21,2.96,2.73,2.67,2.53,2.5],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"tool=maxbin2\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"maxbin2\",\"line\":{\"color\":\"#00cc96\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"maxbin2\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],\"xaxis\":\"x\",\"y\":[90.19,86.86,50.58,46.41,28.69,26.619999999999997,26.100000000000005,25.289999999999992,25.190000000000005,25.020000000000003,23.430000000000003,23.189999999999998,22.85,21.949999999999996,20.9,17.11,14.220000000000002,13.83,12.64,11.03,9.149999999999999,4.92,-3.4599999999999937],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"tool=metabat2\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"metabat2\",\"line\":{\"color\":\"#ab63fa\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"metabat2\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13],\"xaxis\":\"x\",\"y\":[99.42,92.73,91.67999999999999,81.34,79.53,76.88999999999999,76.31,60.24,45.769999999999996,43.870000000000005,41.93,25.25,23.83,7.0],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"tool=semibin2\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"semibin2\",\"line\":{\"color\":\"#FFA15A\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"semibin2\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24],\"xaxis\":\"x\",\"y\":[99.82,99.36,93.15,92.07,78.59,78.55000000000001,76.99,67.66999999999999,50.68000000000001,49.300000000000004,46.55,45.82,24.43,15.690000000000001,12.02,9.93,9.85,9.45,8.7,8.28,8.26,8.08,7.720000000000001,6.18,4.15],\"yaxis\":\"y\",\"type\":\"scatter\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"index\"}},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"completeness - 2*contamination\"}},\"legend\":{\"title\":{\"text\":\"tool\"},\"tracegroupgap\":0},\"margin\":{\"t\":60},\"width\":600,\"height\":500},                        {\"responsive\": true}                    )                };                            </script>        </div>"
       ]
      },
      "metadata": {},
@@ -516,18 +641,79 @@
     }
    ],
    "source": [
-    "df_bins['completeness - 2*contamination'] = df_bins['completeness'] - 2*df_bins['contamination']\n",
-    "fig = px.line(df_bins, x=\"index\",y='completeness - 2*contamination', color=\"tool\",markers=True)\n",
-    "fig.update_layout(\n",
-    "    width=800,\n",
-    "    height=500)\n",
+    "# Calculate the score for each bin\n",
+    "df_bins['completeness - 2*contamination'] = df_bins['completeness'] - 2 * df_bins['contamination']\n",
+    "\n",
+    "# Plot the score against the bin index\n",
+    "fig = px.line(df_bins, x=\"index\", y='completeness - 2*contamination', color=\"tool\", markers=True)\n",
+    "fig.update_layout(width=600, height=500)\n",
     "fig.show()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "97aee4d0",
+   "metadata": {},
+   "source": [
+    "From the plot, you might notice that Concoct has a lot of bins with lower quality scores. Let’s zoom in to get a better look:"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 6,
-   "id": "af74bfb2-457c-4cf4-9c13-3ee9642be7ce",
+   "id": "063974f6",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n",
+       "        <script charset=\"utf-8\" src=\"https://cdn.plot.ly/plotly-2.35.0.min.js\"></script>                <div id=\"e15e8831-b82d-48c0-8bdb-89b500f7d5c6\" class=\"plotly-graph-div\" style=\"height:500px; width:600px;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"e15e8831-b82d-48c0-8bdb-89b500f7d5c6\")) {                    Plotly.newPlot(                        \"e15e8831-b82d-48c0-8bdb-89b500f7d5c6\",                        [{\"hovertemplate\":\"tool=binette\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"binette\",\"line\":{\"color\":\"#636efa\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"binette\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14],\"xaxis\":\"x\",\"y\":[99.9,99.5,96.93,95.41999999999999,88.56,87.81,80.05,68.11,65.79,57.44,54.7,49.54,48.56999999999999,42.21,31.759999999999998],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"tool=concoct\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"concoct\",\"line\":{\"color\":\"#EF553B\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"concoct\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62],\"xaxis\":\"x\",\"y\":[99.24,99.08,92.08,85.78999999999999,83.75,71.5,64.83,43.050000000000004,39.14,34.78,31.18,30.58,28.62,25.459999999999994,22.67,19.380000000000003,18.739999999999995,18.659999999999997,18.450000000000003,16.97,12.2,8.64,6.65,6.62,6.45,6.44,6.43,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.38,6.05,5.85,5.58,5.35,4.159999999999999,3.74,3.66,3.56,3.32,3.21,2.96,2.73,2.67,2.53,2.5],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"tool=maxbin2\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"maxbin2\",\"line\":{\"color\":\"#00cc96\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"maxbin2\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22],\"xaxis\":\"x\",\"y\":[90.19,86.86,50.58,46.41,28.69,26.619999999999997,26.100000000000005,25.289999999999992,25.190000000000005,25.020000000000003,23.430000000000003,23.189999999999998,22.85,21.949999999999996,20.9,17.11,14.220000000000002,13.83,12.64,11.03,9.149999999999999,4.92,-3.4599999999999937],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"tool=metabat2\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"metabat2\",\"line\":{\"color\":\"#ab63fa\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"metabat2\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13],\"xaxis\":\"x\",\"y\":[99.42,92.73,91.67999999999999,81.34,79.53,76.88999999999999,76.31,60.24,45.769999999999996,43.870000000000005,41.93,25.25,23.83,7.0],\"yaxis\":\"y\",\"type\":\"scatter\"},{\"hovertemplate\":\"tool=semibin2\\u003cbr\\u003eindex=%{x}\\u003cbr\\u003ecompleteness - 2*contamination=%{y}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"semibin2\",\"line\":{\"color\":\"#FFA15A\",\"dash\":\"solid\"},\"marker\":{\"symbol\":\"circle\"},\"mode\":\"lines+markers\",\"name\":\"semibin2\",\"orientation\":\"v\",\"showlegend\":true,\"x\":[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24],\"xaxis\":\"x\",\"y\":[99.82,99.36,93.15,92.07,78.59,78.55000000000001,76.99,67.66999999999999,50.68000000000001,49.300000000000004,46.55,45.82,24.43,15.690000000000001,12.02,9.93,9.85,9.45,8.7,8.28,8.26,8.08,7.720000000000001,6.18,4.15],\"yaxis\":\"y\",\"type\":\"scatter\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"index\"},\"range\":[-1,20]},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"completeness - 2*contamination\"},\"range\":[0,100]},\"legend\":{\"title\":{\"text\":\"tool\"},\"tracegroupgap\":0},\"margin\":{\"t\":60},\"width\":600,\"height\":500},                        {\"responsive\": true}                    )                };                            </script>        </div>"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "# Adjust the plot view to zoom in\n",
+    "fig.update_layout(\n",
+    "    xaxis_range=[-1, 20],  # Zoom on x-axis\n",
+    "    yaxis_range=[0, 100],  # Zoom on y-axis\n",
+    "    width=600,\n",
+    "    height=500\n",
+    ")\n",
+    "fig.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "136b17e4",
+   "metadata": {},
+   "source": [
+    "Binette line consistently appears above the other binning tools. This indicates that Binette produce higher-quality bins compared to the initial bin sets."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "46f1b3d0",
+   "metadata": {},
+   "source": [
+    "### Plot Number of High-Quality Bins per Bin Set\n",
+    "\n",
+    "Let's plot the number of bins falling into different quality categories. We’ll focus on bins with a maximum of 10% contamination and classify them into three completeness categories:\n",
+    "\n",
+    "- **`> 50% and ≤ 70%`**\n",
+    "- **`> 70% and ≤ 90%`**\n",
+    "- **`> 90%`**\n",
+    "\n",
+    "First, let’s group and count the bins in each category:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "943f88b4",
    "metadata": {},
    "outputs": [
     {
@@ -551,1006 +737,47 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>bin_id</th>\n",
-       "      <th>origin</th>\n",
-       "      <th>name</th>\n",
-       "      <th>completeness</th>\n",
-       "      <th>contamination</th>\n",
-       "      <th>score</th>\n",
-       "      <th>size</th>\n",
-       "      <th>N50</th>\n",
-       "      <th>contig_count</th>\n",
-       "      <th>tool</th>\n",
-       "      <th>index</th>\n",
-       "      <th>High quality bin</th>\n",
-       "      <th>completeness - 2*contamination</th>\n",
        "      <th>Contamination ≤ 10 and&lt;br&gt;Completeness</th>\n",
+       "      <th>tool</th>\n",
+       "      <th>bin_count</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>17075</td>\n",
-       "      <td>diff</td>\n",
-       "      <td>44 - 10</td>\n",
-       "      <td>100.00</td>\n",
-       "      <td>0.05</td>\n",
-       "      <td>99.90</td>\n",
-       "      <td>4672665</td>\n",
-       "      <td>82084</td>\n",
-       "      <td>93</td>\n",
+       "      <td>&gt; 50% and ≤ 70%</td>\n",
        "      <td>binette</td>\n",
-       "      <td>0</td>\n",
-       "      <td>True</td>\n",
-       "      <td>99.90</td>\n",
-       "      <td>&gt; 90%</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>39427</td>\n",
-       "      <td>diff</td>\n",
-       "      <td>36 - 6</td>\n",
-       "      <td>99.90</td>\n",
-       "      <td>0.20</td>\n",
-       "      <td>99.50</td>\n",
-       "      <td>2796605</td>\n",
-       "      <td>41151</td>\n",
-       "      <td>98</td>\n",
-       "      <td>binette</td>\n",
+       "      <td>&gt; 50% and ≤ 70%</td>\n",
+       "      <td>maxbin2</td>\n",
        "      <td>1</td>\n",
-       "      <td>True</td>\n",
-       "      <td>99.50</td>\n",
-       "      <td>&gt; 90%</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>47060</td>\n",
-       "      <td>union</td>\n",
-       "      <td>58 | 33</td>\n",
-       "      <td>98.59</td>\n",
-       "      <td>0.83</td>\n",
-       "      <td>96.93</td>\n",
-       "      <td>4601336</td>\n",
-       "      <td>41016</td>\n",
-       "      <td>165</td>\n",
-       "      <td>binette</td>\n",
-       "      <td>2</td>\n",
-       "      <td>True</td>\n",
-       "      <td>96.93</td>\n",
-       "      <td>&gt; 90%</td>\n",
+       "      <td>&gt; 50% and ≤ 70%</td>\n",
+       "      <td>metabat2</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>47177</td>\n",
-       "      <td>union</td>\n",
-       "      <td>91 | 25 | 55</td>\n",
-       "      <td>96.10</td>\n",
-       "      <td>0.34</td>\n",
-       "      <td>95.42</td>\n",
-       "      <td>2598718</td>\n",
-       "      <td>11891</td>\n",
-       "      <td>312</td>\n",
-       "      <td>binette</td>\n",
-       "      <td>3</td>\n",
-       "      <td>True</td>\n",
-       "      <td>95.42</td>\n",
-       "      <td>&gt; 90%</td>\n",
+       "      <td>&gt; 50% and ≤ 70%</td>\n",
+       "      <td>semibin2</td>\n",
+       "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>21248</td>\n",
-       "      <td>diff</td>\n",
-       "      <td>65 - 8 - 28</td>\n",
-       "      <td>91.98</td>\n",
-       "      <td>1.71</td>\n",
-       "      <td>88.56</td>\n",
-       "      <td>1768095</td>\n",
-       "      <td>9976</td>\n",
-       "      <td>250</td>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
        "      <td>binette</td>\n",
-       "      <td>4</td>\n",
-       "      <td>True</td>\n",
-       "      <td>88.56</td>\n",
-       "      <td>&gt; 90%</td>\n",
+       "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
-       "      <td>44137</td>\n",
-       "      <td>diff</td>\n",
-       "      <td>76 - 13 - 28</td>\n",
-       "      <td>92.63</td>\n",
-       "      <td>2.41</td>\n",
-       "      <td>87.81</td>\n",
-       "      <td>3726254</td>\n",
-       "      <td>5669</td>\n",
-       "      <td>850</td>\n",
-       "      <td>binette</td>\n",
-       "      <td>5</td>\n",
-       "      <td>True</td>\n",
-       "      <td>87.81</td>\n",
-       "      <td>&gt; 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>31703</td>\n",
-       "      <td>diff</td>\n",
-       "      <td>31 - 7 - 61</td>\n",
-       "      <td>81.73</td>\n",
-       "      <td>0.84</td>\n",
-       "      <td>80.05</td>\n",
-       "      <td>1665233</td>\n",
-       "      <td>8518</td>\n",
-       "      <td>248</td>\n",
-       "      <td>binette</td>\n",
-       "      <td>6</td>\n",
-       "      <td>False</td>\n",
-       "      <td>80.05</td>\n",
-       "      <td>&gt; 70% and ≤ 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>13475</td>\n",
-       "      <td>diff</td>\n",
-       "      <td>47 - 37</td>\n",
-       "      <td>72.89</td>\n",
-       "      <td>2.39</td>\n",
-       "      <td>68.11</td>\n",
-       "      <td>1241829</td>\n",
-       "      <td>5061</td>\n",
-       "      <td>252</td>\n",
-       "      <td>binette</td>\n",
-       "      <td>7</td>\n",
-       "      <td>False</td>\n",
-       "      <td>68.11</td>\n",
-       "      <td>&gt; 70% and ≤ 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>47926</td>\n",
-       "      <td>union</td>\n",
-       "      <td>75 | 30</td>\n",
-       "      <td>74.31</td>\n",
-       "      <td>4.26</td>\n",
-       "      <td>65.79</td>\n",
-       "      <td>3293949</td>\n",
-       "      <td>2954</td>\n",
-       "      <td>1262</td>\n",
-       "      <td>binette</td>\n",
-       "      <td>8</td>\n",
-       "      <td>False</td>\n",
-       "      <td>65.79</td>\n",
-       "      <td>&gt; 70% and ≤ 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>46775</td>\n",
-       "      <td>union</td>\n",
-       "      <td>42 | 102</td>\n",
-       "      <td>62.94</td>\n",
-       "      <td>2.75</td>\n",
-       "      <td>57.44</td>\n",
-       "      <td>1293571</td>\n",
-       "      <td>3783</td>\n",
-       "      <td>419</td>\n",
-       "      <td>binette</td>\n",
-       "      <td>9</td>\n",
-       "      <td>False</td>\n",
-       "      <td>57.44</td>\n",
-       "      <td>&gt; 50% and ≤ 70%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>33569</td>\n",
-       "      <td>diff</td>\n",
-       "      <td>83 - 7 - 38 - 31</td>\n",
-       "      <td>59.18</td>\n",
-       "      <td>2.24</td>\n",
-       "      <td>54.70</td>\n",
-       "      <td>2042527</td>\n",
-       "      <td>4437</td>\n",
-       "      <td>514</td>\n",
-       "      <td>binette</td>\n",
-       "      <td>10</td>\n",
-       "      <td>False</td>\n",
-       "      <td>54.70</td>\n",
-       "      <td>&gt; 50% and ≤ 70%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>39350</td>\n",
-       "      <td>diff</td>\n",
-       "      <td>57 - 16 - 75</td>\n",
-       "      <td>52.16</td>\n",
-       "      <td>1.31</td>\n",
-       "      <td>49.54</td>\n",
-       "      <td>2601282</td>\n",
-       "      <td>5332</td>\n",
-       "      <td>509</td>\n",
-       "      <td>binette</td>\n",
-       "      <td>11</td>\n",
-       "      <td>False</td>\n",
-       "      <td>49.54</td>\n",
-       "      <td>&gt; 50% and ≤ 70%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>39558</td>\n",
-       "      <td>diff</td>\n",
-       "      <td>78 - 6 - 43</td>\n",
-       "      <td>64.63</td>\n",
-       "      <td>8.03</td>\n",
-       "      <td>48.57</td>\n",
-       "      <td>1858210</td>\n",
-       "      <td>1430</td>\n",
-       "      <td>1293</td>\n",
-       "      <td>binette</td>\n",
-       "      <td>12</td>\n",
-       "      <td>False</td>\n",
-       "      <td>48.57</td>\n",
-       "      <td>&gt; 50% and ≤ 70%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>51082</td>\n",
-       "      <td>union</td>\n",
-       "      <td>120 | 1</td>\n",
-       "      <td>52.33</td>\n",
-       "      <td>5.06</td>\n",
-       "      <td>42.21</td>\n",
-       "      <td>688879</td>\n",
-       "      <td>1446</td>\n",
-       "      <td>472</td>\n",
-       "      <td>binette</td>\n",
-       "      <td>13</td>\n",
-       "      <td>False</td>\n",
-       "      <td>42.21</td>\n",
-       "      <td>&gt; 50% and ≤ 70%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>125</td>\n",
-       "      <td>concoct/bins</td>\n",
-       "      <td>9.fa</td>\n",
-       "      <td>100.00</td>\n",
-       "      <td>0.38</td>\n",
-       "      <td>99.24</td>\n",
-       "      <td>3033586</td>\n",
-       "      <td>37523</td>\n",
-       "      <td>131</td>\n",
-       "      <td>concoct</td>\n",
-       "      <td>0</td>\n",
-       "      <td>True</td>\n",
-       "      <td>99.24</td>\n",
-       "      <td>&gt; 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>67</td>\n",
-       "      <td>concoct/bins</td>\n",
-       "      <td>41.fa</td>\n",
-       "      <td>100.00</td>\n",
-       "      <td>0.46</td>\n",
-       "      <td>99.08</td>\n",
-       "      <td>4765466</td>\n",
-       "      <td>82084</td>\n",
-       "      <td>101</td>\n",
-       "      <td>concoct</td>\n",
-       "      <td>1</td>\n",
-       "      <td>True</td>\n",
-       "      <td>99.08</td>\n",
-       "      <td>&gt; 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>91</td>\n",
-       "      <td>concoct/bins</td>\n",
-       "      <td>7.fa</td>\n",
-       "      <td>92.76</td>\n",
-       "      <td>0.34</td>\n",
-       "      <td>92.08</td>\n",
-       "      <td>2274951</td>\n",
-       "      <td>12187</td>\n",
-       "      <td>265</td>\n",
-       "      <td>concoct</td>\n",
-       "      <td>2</td>\n",
-       "      <td>True</td>\n",
-       "      <td>92.08</td>\n",
-       "      <td>&gt; 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>76</td>\n",
-       "      <td>concoct/bins</td>\n",
-       "      <td>6.fa</td>\n",
-       "      <td>92.63</td>\n",
-       "      <td>3.42</td>\n",
-       "      <td>85.79</td>\n",
-       "      <td>3751950</td>\n",
-       "      <td>5674</td>\n",
-       "      <td>855</td>\n",
-       "      <td>concoct</td>\n",
-       "      <td>3</td>\n",
-       "      <td>True</td>\n",
-       "      <td>85.79</td>\n",
-       "      <td>&gt; 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>65</td>\n",
-       "      <td>concoct/bins</td>\n",
-       "      <td>62.fa</td>\n",
-       "      <td>87.35</td>\n",
-       "      <td>1.80</td>\n",
-       "      <td>83.75</td>\n",
-       "      <td>1917859</td>\n",
-       "      <td>10911</td>\n",
-       "      <td>259</td>\n",
-       "      <td>concoct</td>\n",
-       "      <td>4</td>\n",
-       "      <td>False</td>\n",
-       "      <td>83.75</td>\n",
-       "      <td>&gt; 70% and ≤ 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>75</td>\n",
-       "      <td>concoct/bins</td>\n",
-       "      <td>48.fa</td>\n",
-       "      <td>73.35</td>\n",
-       "      <td>4.26</td>\n",
-       "      <td>64.83</td>\n",
-       "      <td>3285374</td>\n",
-       "      <td>2950</td>\n",
-       "      <td>1261</td>\n",
-       "      <td>concoct</td>\n",
-       "      <td>6</td>\n",
-       "      <td>False</td>\n",
-       "      <td>64.83</td>\n",
-       "      <td>&gt; 70% and ≤ 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>22</td>\n",
-       "      <td>maxbin2</td>\n",
-       "      <td>maxbin2.001.fasta</td>\n",
-       "      <td>99.81</td>\n",
-       "      <td>4.81</td>\n",
-       "      <td>90.19</td>\n",
-       "      <td>4616818</td>\n",
-       "      <td>89436</td>\n",
-       "      <td>133</td>\n",
-       "      <td>maxbin2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>True</td>\n",
-       "      <td>90.19</td>\n",
-       "      <td>&gt; 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>14</td>\n",
-       "      <td>maxbin2</td>\n",
-       "      <td>maxbin2.002.fasta</td>\n",
-       "      <td>93.92</td>\n",
-       "      <td>3.53</td>\n",
-       "      <td>86.86</td>\n",
-       "      <td>2874373</td>\n",
-       "      <td>37523</td>\n",
-       "      <td>195</td>\n",
-       "      <td>maxbin2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>True</td>\n",
-       "      <td>86.86</td>\n",
-       "      <td>&gt; 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>5</td>\n",
-       "      <td>maxbin2</td>\n",
-       "      <td>maxbin2.009.fasta</td>\n",
-       "      <td>62.69</td>\n",
-       "      <td>8.14</td>\n",
-       "      <td>46.41</td>\n",
-       "      <td>2438492</td>\n",
-       "      <td>6141</td>\n",
-       "      <td>604</td>\n",
-       "      <td>maxbin2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>False</td>\n",
-       "      <td>46.41</td>\n",
-       "      <td>&gt; 50% and ≤ 70%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>36</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>metabat2.14.fa</td>\n",
-       "      <td>99.90</td>\n",
-       "      <td>0.24</td>\n",
-       "      <td>99.42</td>\n",
-       "      <td>2799572</td>\n",
-       "      <td>41151</td>\n",
-       "      <td>99</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>True</td>\n",
-       "      <td>99.42</td>\n",
-       "      <td>&gt; 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>25</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>metabat2.8.fa</td>\n",
-       "      <td>93.17</td>\n",
-       "      <td>0.22</td>\n",
-       "      <td>92.73</td>\n",
-       "      <td>2148097</td>\n",
-       "      <td>12225</td>\n",
-       "      <td>226</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>True</td>\n",
-       "      <td>92.73</td>\n",
-       "      <td>&gt; 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>33</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>metabat2.12.fa</td>\n",
-       "      <td>93.52</td>\n",
-       "      <td>0.92</td>\n",
-       "      <td>91.68</td>\n",
-       "      <td>4266134</td>\n",
-       "      <td>39217</td>\n",
-       "      <td>157</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>True</td>\n",
-       "      <td>91.68</td>\n",
-       "      <td>&gt; 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>27</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>metabat2.11.fa</td>\n",
-       "      <td>84.40</td>\n",
-       "      <td>1.53</td>\n",
-       "      <td>81.34</td>\n",
-       "      <td>1902761</td>\n",
-       "      <td>11352</td>\n",
-       "      <td>218</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>False</td>\n",
-       "      <td>81.34</td>\n",
-       "      <td>&gt; 70% and ≤ 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>37</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>metabat2.1.fa</td>\n",
-       "      <td>84.99</td>\n",
-       "      <td>2.73</td>\n",
-       "      <td>79.53</td>\n",
-       "      <td>2980526</td>\n",
-       "      <td>6876</td>\n",
-       "      <td>502</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>4</td>\n",
-       "      <td>False</td>\n",
-       "      <td>79.53</td>\n",
-       "      <td>&gt; 70% and ≤ 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>31</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>metabat2.2.fa</td>\n",
-       "      <td>83.21</td>\n",
-       "      <td>3.16</td>\n",
-       "      <td>76.89</td>\n",
-       "      <td>1807028</td>\n",
-       "      <td>7852</td>\n",
-       "      <td>274</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>5</td>\n",
-       "      <td>False</td>\n",
-       "      <td>76.89</td>\n",
-       "      <td>&gt; 70% and ≤ 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>35</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>metabat2.4.fa</td>\n",
-       "      <td>76.53</td>\n",
-       "      <td>0.11</td>\n",
-       "      <td>76.31</td>\n",
-       "      <td>3477636</td>\n",
-       "      <td>82084</td>\n",
-       "      <td>71</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>6</td>\n",
-       "      <td>False</td>\n",
-       "      <td>76.31</td>\n",
-       "      <td>&gt; 70% and ≤ 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>29</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>metabat2.7.fa</td>\n",
-       "      <td>71.78</td>\n",
-       "      <td>5.77</td>\n",
-       "      <td>60.24</td>\n",
-       "      <td>1384653</td>\n",
-       "      <td>4937</td>\n",
-       "      <td>292</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>7</td>\n",
-       "      <td>False</td>\n",
-       "      <td>60.24</td>\n",
-       "      <td>&gt; 70% and ≤ 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>24</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>metabat2.3.fa</td>\n",
-       "      <td>51.75</td>\n",
-       "      <td>2.99</td>\n",
-       "      <td>45.77</td>\n",
-       "      <td>1707078</td>\n",
-       "      <td>4929</td>\n",
-       "      <td>362</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>8</td>\n",
-       "      <td>False</td>\n",
-       "      <td>45.77</td>\n",
-       "      <td>&gt; 50% and ≤ 70%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>44</td>\n",
-       "      <td>semibin2/output_bins</td>\n",
-       "      <td>SemiBin_27.fa.gz</td>\n",
-       "      <td>100.00</td>\n",
-       "      <td>0.09</td>\n",
-       "      <td>99.82</td>\n",
-       "      <td>4681369</td>\n",
-       "      <td>82084</td>\n",
-       "      <td>94</td>\n",
-       "      <td>semibin2</td>\n",
-       "      <td>0</td>\n",
-       "      <td>True</td>\n",
-       "      <td>99.82</td>\n",
-       "      <td>&gt; 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>53</td>\n",
-       "      <td>semibin2/output_bins</td>\n",
-       "      <td>SemiBin_33.fa.gz</td>\n",
-       "      <td>99.92</td>\n",
-       "      <td>0.28</td>\n",
-       "      <td>99.36</td>\n",
-       "      <td>2937678</td>\n",
-       "      <td>37523</td>\n",
-       "      <td>113</td>\n",
-       "      <td>semibin2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>True</td>\n",
-       "      <td>99.36</td>\n",
-       "      <td>&gt; 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>50</td>\n",
-       "      <td>semibin2/output_bins</td>\n",
-       "      <td>SemiBin_10.fa.gz</td>\n",
-       "      <td>93.43</td>\n",
-       "      <td>0.14</td>\n",
-       "      <td>93.15</td>\n",
-       "      <td>2129295</td>\n",
-       "      <td>12519</td>\n",
-       "      <td>216</td>\n",
-       "      <td>semibin2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>True</td>\n",
-       "      <td>93.15</td>\n",
-       "      <td>&gt; 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>62</td>\n",
-       "      <td>semibin2/output_bins</td>\n",
-       "      <td>SemiBin_24.fa.gz</td>\n",
-       "      <td>92.13</td>\n",
-       "      <td>0.03</td>\n",
-       "      <td>92.07</td>\n",
-       "      <td>4162911</td>\n",
-       "      <td>40395</td>\n",
-       "      <td>139</td>\n",
-       "      <td>semibin2</td>\n",
-       "      <td>3</td>\n",
-       "      <td>True</td>\n",
-       "      <td>92.07</td>\n",
-       "      <td>&gt; 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>38</td>\n",
-       "      <td>semibin2/output_bins</td>\n",
-       "      <td>SemiBin_26.fa.gz</td>\n",
-       "      <td>83.09</td>\n",
-       "      <td>2.25</td>\n",
-       "      <td>78.59</td>\n",
-       "      <td>1674156</td>\n",
-       "      <td>8389</td>\n",
-       "      <td>245</td>\n",
-       "      <td>semibin2</td>\n",
-       "      <td>4</td>\n",
-       "      <td>False</td>\n",
-       "      <td>78.59</td>\n",
-       "      <td>&gt; 70% and ≤ 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>49</td>\n",
-       "      <td>semibin2/output_bins</td>\n",
-       "      <td>SemiBin_32.fa.gz</td>\n",
-       "      <td>81.87</td>\n",
-       "      <td>1.66</td>\n",
-       "      <td>78.55</td>\n",
-       "      <td>1820073</td>\n",
-       "      <td>11737</td>\n",
-       "      <td>205</td>\n",
-       "      <td>semibin2</td>\n",
-       "      <td>5</td>\n",
-       "      <td>False</td>\n",
-       "      <td>78.55</td>\n",
-       "      <td>&gt; 70% and ≤ 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>60</td>\n",
-       "      <td>semibin2/output_bins</td>\n",
-       "      <td>SemiBin_22.fa.gz</td>\n",
-       "      <td>80.25</td>\n",
-       "      <td>1.63</td>\n",
-       "      <td>76.99</td>\n",
-       "      <td>2790948</td>\n",
-       "      <td>7117</td>\n",
-       "      <td>450</td>\n",
-       "      <td>semibin2</td>\n",
-       "      <td>6</td>\n",
-       "      <td>False</td>\n",
-       "      <td>76.99</td>\n",
-       "      <td>&gt; 70% and ≤ 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>47</td>\n",
-       "      <td>semibin2/output_bins</td>\n",
-       "      <td>SemiBin_11.fa.gz</td>\n",
-       "      <td>72.57</td>\n",
-       "      <td>2.45</td>\n",
-       "      <td>67.67</td>\n",
-       "      <td>1245031</td>\n",
-       "      <td>5061</td>\n",
-       "      <td>253</td>\n",
-       "      <td>semibin2</td>\n",
-       "      <td>7</td>\n",
-       "      <td>False</td>\n",
-       "      <td>67.67</td>\n",
-       "      <td>&gt; 70% and ≤ 90%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>61</td>\n",
-       "      <td>semibin2/output_bins</td>\n",
-       "      <td>SemiBin_3.fa.gz</td>\n",
-       "      <td>53.34</td>\n",
-       "      <td>1.33</td>\n",
-       "      <td>50.68</td>\n",
-       "      <td>1728690</td>\n",
-       "      <td>4913</td>\n",
-       "      <td>367</td>\n",
-       "      <td>semibin2</td>\n",
-       "      <td>8</td>\n",
-       "      <td>False</td>\n",
-       "      <td>50.68</td>\n",
-       "      <td>&gt; 50% and ≤ 70%</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>57</td>\n",
-       "      <td>semibin2/output_bins</td>\n",
-       "      <td>SemiBin_12.fa.gz</td>\n",
-       "      <td>51.92</td>\n",
-       "      <td>1.31</td>\n",
-       "      <td>49.30</td>\n",
-       "      <td>2609451</td>\n",
-       "      <td>5292</td>\n",
-       "      <td>511</td>\n",
-       "      <td>semibin2</td>\n",
-       "      <td>9</td>\n",
-       "      <td>False</td>\n",
-       "      <td>49.30</td>\n",
-       "      <td>&gt; 50% and ≤ 70%</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "    bin_id                origin               name  completeness  \\\n",
-       "0    17075                  diff            44 - 10        100.00   \n",
-       "1    39427                  diff             36 - 6         99.90   \n",
-       "2    47060                 union            58 | 33         98.59   \n",
-       "3    47177                 union       91 | 25 | 55         96.10   \n",
-       "4    21248                  diff        65 - 8 - 28         91.98   \n",
-       "5    44137                  diff       76 - 13 - 28         92.63   \n",
-       "6    31703                  diff        31 - 7 - 61         81.73   \n",
-       "7    13475                  diff            47 - 37         72.89   \n",
-       "8    47926                 union            75 | 30         74.31   \n",
-       "9    46775                 union           42 | 102         62.94   \n",
-       "10   33569                  diff   83 - 7 - 38 - 31         59.18   \n",
-       "11   39350                  diff       57 - 16 - 75         52.16   \n",
-       "12   39558                  diff        78 - 6 - 43         64.63   \n",
-       "13   51082                 union            120 | 1         52.33   \n",
-       "0      125          concoct/bins               9.fa        100.00   \n",
-       "1       67          concoct/bins              41.fa        100.00   \n",
-       "2       91          concoct/bins               7.fa         92.76   \n",
-       "3       76          concoct/bins               6.fa         92.63   \n",
-       "4       65          concoct/bins              62.fa         87.35   \n",
-       "6       75          concoct/bins              48.fa         73.35   \n",
-       "0       22               maxbin2  maxbin2.001.fasta         99.81   \n",
-       "1       14               maxbin2  maxbin2.002.fasta         93.92   \n",
-       "3        5               maxbin2  maxbin2.009.fasta         62.69   \n",
-       "0       36              metabat2     metabat2.14.fa         99.90   \n",
-       "1       25              metabat2      metabat2.8.fa         93.17   \n",
-       "2       33              metabat2     metabat2.12.fa         93.52   \n",
-       "3       27              metabat2     metabat2.11.fa         84.40   \n",
-       "4       37              metabat2      metabat2.1.fa         84.99   \n",
-       "5       31              metabat2      metabat2.2.fa         83.21   \n",
-       "6       35              metabat2      metabat2.4.fa         76.53   \n",
-       "7       29              metabat2      metabat2.7.fa         71.78   \n",
-       "8       24              metabat2      metabat2.3.fa         51.75   \n",
-       "0       44  semibin2/output_bins   SemiBin_27.fa.gz        100.00   \n",
-       "1       53  semibin2/output_bins   SemiBin_33.fa.gz         99.92   \n",
-       "2       50  semibin2/output_bins   SemiBin_10.fa.gz         93.43   \n",
-       "3       62  semibin2/output_bins   SemiBin_24.fa.gz         92.13   \n",
-       "4       38  semibin2/output_bins   SemiBin_26.fa.gz         83.09   \n",
-       "5       49  semibin2/output_bins   SemiBin_32.fa.gz         81.87   \n",
-       "6       60  semibin2/output_bins   SemiBin_22.fa.gz         80.25   \n",
-       "7       47  semibin2/output_bins   SemiBin_11.fa.gz         72.57   \n",
-       "8       61  semibin2/output_bins    SemiBin_3.fa.gz         53.34   \n",
-       "9       57  semibin2/output_bins   SemiBin_12.fa.gz         51.92   \n",
-       "\n",
-       "    contamination  score     size    N50  contig_count      tool  index  \\\n",
-       "0            0.05  99.90  4672665  82084            93   binette      0   \n",
-       "1            0.20  99.50  2796605  41151            98   binette      1   \n",
-       "2            0.83  96.93  4601336  41016           165   binette      2   \n",
-       "3            0.34  95.42  2598718  11891           312   binette      3   \n",
-       "4            1.71  88.56  1768095   9976           250   binette      4   \n",
-       "5            2.41  87.81  3726254   5669           850   binette      5   \n",
-       "6            0.84  80.05  1665233   8518           248   binette      6   \n",
-       "7            2.39  68.11  1241829   5061           252   binette      7   \n",
-       "8            4.26  65.79  3293949   2954          1262   binette      8   \n",
-       "9            2.75  57.44  1293571   3783           419   binette      9   \n",
-       "10           2.24  54.70  2042527   4437           514   binette     10   \n",
-       "11           1.31  49.54  2601282   5332           509   binette     11   \n",
-       "12           8.03  48.57  1858210   1430          1293   binette     12   \n",
-       "13           5.06  42.21   688879   1446           472   binette     13   \n",
-       "0            0.38  99.24  3033586  37523           131   concoct      0   \n",
-       "1            0.46  99.08  4765466  82084           101   concoct      1   \n",
-       "2            0.34  92.08  2274951  12187           265   concoct      2   \n",
-       "3            3.42  85.79  3751950   5674           855   concoct      3   \n",
-       "4            1.80  83.75  1917859  10911           259   concoct      4   \n",
-       "6            4.26  64.83  3285374   2950          1261   concoct      6   \n",
-       "0            4.81  90.19  4616818  89436           133   maxbin2      0   \n",
-       "1            3.53  86.86  2874373  37523           195   maxbin2      1   \n",
-       "3            8.14  46.41  2438492   6141           604   maxbin2      3   \n",
-       "0            0.24  99.42  2799572  41151            99  metabat2      0   \n",
-       "1            0.22  92.73  2148097  12225           226  metabat2      1   \n",
-       "2            0.92  91.68  4266134  39217           157  metabat2      2   \n",
-       "3            1.53  81.34  1902761  11352           218  metabat2      3   \n",
-       "4            2.73  79.53  2980526   6876           502  metabat2      4   \n",
-       "5            3.16  76.89  1807028   7852           274  metabat2      5   \n",
-       "6            0.11  76.31  3477636  82084            71  metabat2      6   \n",
-       "7            5.77  60.24  1384653   4937           292  metabat2      7   \n",
-       "8            2.99  45.77  1707078   4929           362  metabat2      8   \n",
-       "0            0.09  99.82  4681369  82084            94  semibin2      0   \n",
-       "1            0.28  99.36  2937678  37523           113  semibin2      1   \n",
-       "2            0.14  93.15  2129295  12519           216  semibin2      2   \n",
-       "3            0.03  92.07  4162911  40395           139  semibin2      3   \n",
-       "4            2.25  78.59  1674156   8389           245  semibin2      4   \n",
-       "5            1.66  78.55  1820073  11737           205  semibin2      5   \n",
-       "6            1.63  76.99  2790948   7117           450  semibin2      6   \n",
-       "7            2.45  67.67  1245031   5061           253  semibin2      7   \n",
-       "8            1.33  50.68  1728690   4913           367  semibin2      8   \n",
-       "9            1.31  49.30  2609451   5292           511  semibin2      9   \n",
-       "\n",
-       "    High quality bin  completeness - 2*contamination  \\\n",
-       "0               True                           99.90   \n",
-       "1               True                           99.50   \n",
-       "2               True                           96.93   \n",
-       "3               True                           95.42   \n",
-       "4               True                           88.56   \n",
-       "5               True                           87.81   \n",
-       "6              False                           80.05   \n",
-       "7              False                           68.11   \n",
-       "8              False                           65.79   \n",
-       "9              False                           57.44   \n",
-       "10             False                           54.70   \n",
-       "11             False                           49.54   \n",
-       "12             False                           48.57   \n",
-       "13             False                           42.21   \n",
-       "0               True                           99.24   \n",
-       "1               True                           99.08   \n",
-       "2               True                           92.08   \n",
-       "3               True                           85.79   \n",
-       "4              False                           83.75   \n",
-       "6              False                           64.83   \n",
-       "0               True                           90.19   \n",
-       "1               True                           86.86   \n",
-       "3              False                           46.41   \n",
-       "0               True                           99.42   \n",
-       "1               True                           92.73   \n",
-       "2               True                           91.68   \n",
-       "3              False                           81.34   \n",
-       "4              False                           79.53   \n",
-       "5              False                           76.89   \n",
-       "6              False                           76.31   \n",
-       "7              False                           60.24   \n",
-       "8              False                           45.77   \n",
-       "0               True                           99.82   \n",
-       "1               True                           99.36   \n",
-       "2               True                           93.15   \n",
-       "3               True                           92.07   \n",
-       "4              False                           78.59   \n",
-       "5              False                           78.55   \n",
-       "6              False                           76.99   \n",
-       "7              False                           67.67   \n",
-       "8              False                           50.68   \n",
-       "9              False                           49.30   \n",
-       "\n",
-       "   Contamination ≤ 10 and<br>Completeness  \n",
-       "0                                   > 90%  \n",
-       "1                                   > 90%  \n",
-       "2                                   > 90%  \n",
-       "3                                   > 90%  \n",
-       "4                                   > 90%  \n",
-       "5                                   > 90%  \n",
-       "6                         > 70% and ≤ 90%  \n",
-       "7                         > 70% and ≤ 90%  \n",
-       "8                         > 70% and ≤ 90%  \n",
-       "9                         > 50% and ≤ 70%  \n",
-       "10                        > 50% and ≤ 70%  \n",
-       "11                        > 50% and ≤ 70%  \n",
-       "12                        > 50% and ≤ 70%  \n",
-       "13                        > 50% and ≤ 70%  \n",
-       "0                                   > 90%  \n",
-       "1                                   > 90%  \n",
-       "2                                   > 90%  \n",
-       "3                                   > 90%  \n",
-       "4                         > 70% and ≤ 90%  \n",
-       "6                         > 70% and ≤ 90%  \n",
-       "0                                   > 90%  \n",
-       "1                                   > 90%  \n",
-       "3                         > 50% and ≤ 70%  \n",
-       "0                                   > 90%  \n",
-       "1                                   > 90%  \n",
-       "2                                   > 90%  \n",
-       "3                         > 70% and ≤ 90%  \n",
-       "4                         > 70% and ≤ 90%  \n",
-       "5                         > 70% and ≤ 90%  \n",
-       "6                         > 70% and ≤ 90%  \n",
-       "7                         > 70% and ≤ 90%  \n",
-       "8                         > 50% and ≤ 70%  \n",
-       "0                                   > 90%  \n",
-       "1                                   > 90%  \n",
-       "2                                   > 90%  \n",
-       "3                                   > 90%  \n",
-       "4                         > 70% and ≤ 90%  \n",
-       "5                         > 70% and ≤ 90%  \n",
-       "6                         > 70% and ≤ 90%  \n",
-       "7                         > 70% and ≤ 90%  \n",
-       "8                         > 50% and ≤ 70%  \n",
-       "9                         > 50% and ≤ 70%  "
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "contamination_cutoff = 10\n",
-    "low_contamination_filt = df_bins['contamination'] <= contamination_cutoff\n",
-    "high_completeness_filt = df_bins['completeness'] > 90\n",
-    "medium_completeness_filt = df_bins['completeness'] > 70\n",
-    "low_completeness_filt = df_bins['completeness'] > 50\n",
-    "\n",
-    "quality  = f'Contamination ≤ {contamination_cutoff} and<br>Completeness'\n",
-    "df_bins.loc[low_contamination_filt & low_completeness_filt, quality] =  '> 50% and ≤ 70%'\n",
-    "df_bins.loc[low_contamination_filt & medium_completeness_filt, quality] =  '> 70% and ≤ 90%'\n",
-    "df_bins.loc[low_contamination_filt & high_completeness_filt, quality] = '> 90%'\n",
-    "df_bins.loc[~df_bins[quality].isna()]\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "fa71ff37-9846-4826-a4bb-6c4b0069cea0",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>Contamination ≤ 10 and&lt;br&gt;Completeness</th>\n",
-       "      <th>tool</th>\n",
-       "      <th>bin_count</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>&gt; 50% and ≤ 70%</td>\n",
-       "      <td>binette</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>&gt; 50% and ≤ 70%</td>\n",
-       "      <td>maxbin2</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>&gt; 50% and ≤ 70%</td>\n",
-       "      <td>metabat2</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>&gt; 50% and ≤ 70%</td>\n",
-       "      <td>semibin2</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>&gt; 70% and ≤ 90%</td>\n",
-       "      <td>binette</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>&gt; 70% and ≤ 90%</td>\n",
-       "      <td>concoct</td>\n",
-       "      <td>2</td>\n",
+       "      <td>&gt; 70% and ≤ 90%</td>\n",
+       "      <td>concoct</td>\n",
+       "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
@@ -1621,23 +848,45 @@
     }
    ],
    "source": [
-    "df_bins.groupby([quality, 'tool']).value_counts(ascending=True).reset_index()\n",
+    "# Define the contamination cutoff\n",
+    "contamination_cutoff = 10\n",
+    "\n",
+    "# Create filters for completeness categories\n",
+    "low_contamination_filt = df_bins['contamination'] <= contamination_cutoff\n",
+    "high_completeness_filt = df_bins['completeness'] > 90\n",
+    "medium_completeness_filt = df_bins['completeness'] > 70\n",
+    "low_completeness_filt = df_bins['completeness'] > 50\n",
     "\n",
+    "# Define quality categories\n",
+    "quality  = f'Contamination ≤ {contamination_cutoff} and<br>Completeness'\n",
+    "df_bins.loc[low_contamination_filt & low_completeness_filt, quality] =  '> 50% and ≤ 70%'\n",
+    "df_bins.loc[low_contamination_filt & medium_completeness_filt, quality] =  '> 70% and ≤ 90%'\n",
+    "df_bins.loc[low_contamination_filt & high_completeness_filt, quality] = '> 90%'\n",
+    "\n",
+    "# Group and count bins by quality category and tool\n",
     "df_bins_quality_grouped = df_bins.groupby([quality, 'tool']).agg(bin_count=('bin_id', 'count')).reset_index()\n",
     "df_bins_quality_grouped"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "6eec391a",
+   "metadata": {},
+   "source": [
+    "Now, let’s create a bar plot to visualize the number of bins in each quality category for each bin sets:"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 8,
-   "id": "250def29-167e-4a3b-8194-282f602945c8",
+   "id": "36ce51ac",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/html": [
        "<div>            <script src=\"https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js?config=TeX-AMS-MML_SVG\"></script><script type=\"text/javascript\">if (window.MathJax && window.MathJax.Hub && window.MathJax.Hub.Config) {window.MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script>                <script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script>\n",
-       "        <script charset=\"utf-8\" src=\"https://cdn.plot.ly/plotly-2.35.0.min.js\"></script>                <div id=\"b7b12b01-0881-429b-86fd-784e95677a64\" class=\"plotly-graph-div\" style=\"height:500px; width:800px;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"b7b12b01-0881-429b-86fd-784e95677a64\")) {                    Plotly.newPlot(                        \"b7b12b01-0881-429b-86fd-784e95677a64\",                        [{\"alignmentgroup\":\"True\",\"hovertemplate\":\"Contamination \\u2264 10 and\\u003cbr\\u003eCompleteness=\\u003e 50% and \\u2264 70%\\u003cbr\\u003etool=%{x}\\u003cbr\\u003ebin_count=%{text}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"\\u003e 50% and \\u2264 70%\",\"marker\":{\"color\":\"rgb(225, 124, 5)\",\"opacity\":0.9,\"pattern\":{\"shape\":\"\"}},\"name\":\"\\u003e 50% and \\u2264 70%\",\"offsetgroup\":\"\\u003e 50% and \\u2264 70%\",\"orientation\":\"v\",\"showlegend\":true,\"text\":[5.0,1.0,1.0,2.0],\"textposition\":\"auto\",\"x\":[\"binette\",\"maxbin2\",\"metabat2\",\"semibin2\"],\"xaxis\":\"x\",\"y\":[5,1,1,2],\"yaxis\":\"y\",\"type\":\"bar\"},{\"alignmentgroup\":\"True\",\"hovertemplate\":\"Contamination \\u2264 10 and\\u003cbr\\u003eCompleteness=\\u003e 70% and \\u2264 90%\\u003cbr\\u003etool=%{x}\\u003cbr\\u003ebin_count=%{text}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"\\u003e 70% and \\u2264 90%\",\"marker\":{\"color\":\"rgb(56, 166, 165)\",\"opacity\":0.9,\"pattern\":{\"shape\":\"\"}},\"name\":\"\\u003e 70% and \\u2264 90%\",\"offsetgroup\":\"\\u003e 70% and \\u2264 90%\",\"orientation\":\"v\",\"showlegend\":true,\"text\":[3.0,2.0,5.0,4.0],\"textposition\":\"auto\",\"x\":[\"binette\",\"concoct\",\"metabat2\",\"semibin2\"],\"xaxis\":\"x\",\"y\":[3,2,5,4],\"yaxis\":\"y\",\"type\":\"bar\"},{\"alignmentgroup\":\"True\",\"hovertemplate\":\"Contamination \\u2264 10 and\\u003cbr\\u003eCompleteness=\\u003e 90%\\u003cbr\\u003etool=%{x}\\u003cbr\\u003ebin_count=%{text}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"\\u003e 90%\",\"marker\":{\"color\":\"rgb(115, 175, 72)\",\"opacity\":0.9,\"pattern\":{\"shape\":\"\"}},\"name\":\"\\u003e 90%\",\"offsetgroup\":\"\\u003e 90%\",\"orientation\":\"v\",\"showlegend\":true,\"text\":[6.0,4.0,2.0,3.0,4.0],\"textposition\":\"auto\",\"x\":[\"binette\",\"concoct\",\"maxbin2\",\"metabat2\",\"semibin2\"],\"xaxis\":\"x\",\"y\":[6,4,2,3,4],\"yaxis\":\"y\",\"type\":\"bar\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"tool\"},\"categoryorder\":\"array\",\"categoryarray\":[\"binette\",\"semibin2\",\"concoct\",\"metabat2\",\"maxbin2\"]},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"bin_count\"}},\"legend\":{\"title\":{\"text\":\"Contamination \\u2264 10 and\\u003cbr\\u003eCompleteness\"},\"tracegroupgap\":0,\"traceorder\":\"reversed\"},\"margin\":{\"t\":60},\"barmode\":\"stack\",\"width\":800,\"height\":500},                        {\"responsive\": true}                    )                };                            </script>        </div>"
+       "        <script charset=\"utf-8\" src=\"https://cdn.plot.ly/plotly-2.35.0.min.js\"></script>                <div id=\"4e47e2a0-f955-4754-858d-282a9f80e15c\" class=\"plotly-graph-div\" style=\"height:500px; width:600px;\"></div>            <script type=\"text/javascript\">                                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById(\"4e47e2a0-f955-4754-858d-282a9f80e15c\")) {                    Plotly.newPlot(                        \"4e47e2a0-f955-4754-858d-282a9f80e15c\",                        [{\"alignmentgroup\":\"True\",\"hovertemplate\":\"Contamination \\u2264 10 and\\u003cbr\\u003eCompleteness=\\u003e 50% and \\u2264 70%\\u003cbr\\u003etool=%{x}\\u003cbr\\u003ebin_count=%{text}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"\\u003e 50% and \\u2264 70%\",\"marker\":{\"color\":\"rgb(225, 124, 5)\",\"opacity\":0.9,\"pattern\":{\"shape\":\"\"}},\"name\":\"\\u003e 50% and \\u2264 70%\",\"offsetgroup\":\"\\u003e 50% and \\u2264 70%\",\"orientation\":\"v\",\"showlegend\":true,\"text\":[5.0,1.0,1.0,2.0],\"textposition\":\"auto\",\"x\":[\"binette\",\"maxbin2\",\"metabat2\",\"semibin2\"],\"xaxis\":\"x\",\"y\":[5,1,1,2],\"yaxis\":\"y\",\"type\":\"bar\"},{\"alignmentgroup\":\"True\",\"hovertemplate\":\"Contamination \\u2264 10 and\\u003cbr\\u003eCompleteness=\\u003e 70% and \\u2264 90%\\u003cbr\\u003etool=%{x}\\u003cbr\\u003ebin_count=%{text}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"\\u003e 70% and \\u2264 90%\",\"marker\":{\"color\":\"rgb(56, 166, 165)\",\"opacity\":0.9,\"pattern\":{\"shape\":\"\"}},\"name\":\"\\u003e 70% and \\u2264 90%\",\"offsetgroup\":\"\\u003e 70% and \\u2264 90%\",\"orientation\":\"v\",\"showlegend\":true,\"text\":[3.0,2.0,5.0,4.0],\"textposition\":\"auto\",\"x\":[\"binette\",\"concoct\",\"metabat2\",\"semibin2\"],\"xaxis\":\"x\",\"y\":[3,2,5,4],\"yaxis\":\"y\",\"type\":\"bar\"},{\"alignmentgroup\":\"True\",\"hovertemplate\":\"Contamination \\u2264 10 and\\u003cbr\\u003eCompleteness=\\u003e 90%\\u003cbr\\u003etool=%{x}\\u003cbr\\u003ebin_count=%{text}\\u003cextra\\u003e\\u003c\\u002fextra\\u003e\",\"legendgroup\":\"\\u003e 90%\",\"marker\":{\"color\":\"rgb(115, 175, 72)\",\"opacity\":0.9,\"pattern\":{\"shape\":\"\"}},\"name\":\"\\u003e 90%\",\"offsetgroup\":\"\\u003e 90%\",\"orientation\":\"v\",\"showlegend\":true,\"text\":[6.0,4.0,2.0,3.0,4.0],\"textposition\":\"auto\",\"x\":[\"binette\",\"concoct\",\"maxbin2\",\"metabat2\",\"semibin2\"],\"xaxis\":\"x\",\"y\":[6,4,2,3,4],\"yaxis\":\"y\",\"type\":\"bar\"}],                        {\"template\":{\"data\":{\"histogram2dcontour\":[{\"type\":\"histogram2dcontour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"choropleth\":[{\"type\":\"choropleth\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"histogram2d\":[{\"type\":\"histogram2d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmap\":[{\"type\":\"heatmap\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"heatmapgl\":[{\"type\":\"heatmapgl\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"contourcarpet\":[{\"type\":\"contourcarpet\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"contour\":[{\"type\":\"contour\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"surface\":[{\"type\":\"surface\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"},\"colorscale\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]]}],\"mesh3d\":[{\"type\":\"mesh3d\",\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}],\"scatter\":[{\"fillpattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2},\"type\":\"scatter\"}],\"parcoords\":[{\"type\":\"parcoords\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolargl\":[{\"type\":\"scatterpolargl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"bar\":[{\"error_x\":{\"color\":\"#2a3f5f\"},\"error_y\":{\"color\":\"#2a3f5f\"},\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"bar\"}],\"scattergeo\":[{\"type\":\"scattergeo\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterpolar\":[{\"type\":\"scatterpolar\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"histogram\":[{\"marker\":{\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"histogram\"}],\"scattergl\":[{\"type\":\"scattergl\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatter3d\":[{\"type\":\"scatter3d\",\"line\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattermapbox\":[{\"type\":\"scattermapbox\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scatterternary\":[{\"type\":\"scatterternary\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"scattercarpet\":[{\"type\":\"scattercarpet\",\"marker\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}}}],\"carpet\":[{\"aaxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"baxis\":{\"endlinecolor\":\"#2a3f5f\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"minorgridcolor\":\"white\",\"startlinecolor\":\"#2a3f5f\"},\"type\":\"carpet\"}],\"table\":[{\"cells\":{\"fill\":{\"color\":\"#EBF0F8\"},\"line\":{\"color\":\"white\"}},\"header\":{\"fill\":{\"color\":\"#C8D4E3\"},\"line\":{\"color\":\"white\"}},\"type\":\"table\"}],\"barpolar\":[{\"marker\":{\"line\":{\"color\":\"#E5ECF6\",\"width\":0.5},\"pattern\":{\"fillmode\":\"overlay\",\"size\":10,\"solidity\":0.2}},\"type\":\"barpolar\"}],\"pie\":[{\"automargin\":true,\"type\":\"pie\"}]},\"layout\":{\"autotypenumbers\":\"strict\",\"colorway\":[\"#636efa\",\"#EF553B\",\"#00cc96\",\"#ab63fa\",\"#FFA15A\",\"#19d3f3\",\"#FF6692\",\"#B6E880\",\"#FF97FF\",\"#FECB52\"],\"font\":{\"color\":\"#2a3f5f\"},\"hovermode\":\"closest\",\"hoverlabel\":{\"align\":\"left\"},\"paper_bgcolor\":\"white\",\"plot_bgcolor\":\"#E5ECF6\",\"polar\":{\"bgcolor\":\"#E5ECF6\",\"angularaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"radialaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"ternary\":{\"bgcolor\":\"#E5ECF6\",\"aaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"baxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"},\"caxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\"}},\"coloraxis\":{\"colorbar\":{\"outlinewidth\":0,\"ticks\":\"\"}},\"colorscale\":{\"sequential\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"sequentialminus\":[[0.0,\"#0d0887\"],[0.1111111111111111,\"#46039f\"],[0.2222222222222222,\"#7201a8\"],[0.3333333333333333,\"#9c179e\"],[0.4444444444444444,\"#bd3786\"],[0.5555555555555556,\"#d8576b\"],[0.6666666666666666,\"#ed7953\"],[0.7777777777777778,\"#fb9f3a\"],[0.8888888888888888,\"#fdca26\"],[1.0,\"#f0f921\"]],\"diverging\":[[0,\"#8e0152\"],[0.1,\"#c51b7d\"],[0.2,\"#de77ae\"],[0.3,\"#f1b6da\"],[0.4,\"#fde0ef\"],[0.5,\"#f7f7f7\"],[0.6,\"#e6f5d0\"],[0.7,\"#b8e186\"],[0.8,\"#7fbc41\"],[0.9,\"#4d9221\"],[1,\"#276419\"]]},\"xaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"yaxis\":{\"gridcolor\":\"white\",\"linecolor\":\"white\",\"ticks\":\"\",\"title\":{\"standoff\":15},\"zerolinecolor\":\"white\",\"automargin\":true,\"zerolinewidth\":2},\"scene\":{\"xaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"yaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2},\"zaxis\":{\"backgroundcolor\":\"#E5ECF6\",\"gridcolor\":\"white\",\"linecolor\":\"white\",\"showbackground\":true,\"ticks\":\"\",\"zerolinecolor\":\"white\",\"gridwidth\":2}},\"shapedefaults\":{\"line\":{\"color\":\"#2a3f5f\"}},\"annotationdefaults\":{\"arrowcolor\":\"#2a3f5f\",\"arrowhead\":0,\"arrowwidth\":1},\"geo\":{\"bgcolor\":\"white\",\"landcolor\":\"#E5ECF6\",\"subunitcolor\":\"white\",\"showland\":true,\"showlakes\":true,\"lakecolor\":\"white\"},\"title\":{\"x\":0.05},\"mapbox\":{\"style\":\"light\"}}},\"xaxis\":{\"anchor\":\"y\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"tool\"},\"categoryorder\":\"array\",\"categoryarray\":[\"binette\",\"semibin2\",\"concoct\",\"metabat2\",\"maxbin2\"]},\"yaxis\":{\"anchor\":\"x\",\"domain\":[0.0,1.0],\"title\":{\"text\":\"bin_count\"}},\"legend\":{\"title\":{\"text\":\"Contamination \\u2264 10 and\\u003cbr\\u003eCompleteness\"},\"tracegroupgap\":0,\"traceorder\":\"reversed\"},\"margin\":{\"t\":60},\"barmode\":\"stack\",\"width\":600,\"height\":500},                        {\"responsive\": true}                    )                };                            </script>        </div>"
       ]
      },
      "metadata": {},
@@ -1645,22 +894,42 @@
     }
    ],
    "source": [
-    "color_discrete_map={\"> 90%\": px.colors.qualitative.Prism[4],\n",
-    "                    \"> 70% and ≤ 90%\": px.colors.qualitative.Prism[2],\n",
-    "                   \"> 50% and ≤ 70%\": px.colors.qualitative.Prism[6]}\n",
+    "# Define colors for each completeness category\n",
+    "color_discrete_map = {\n",
+    "    \"> 90%\": px.colors.qualitative.Prism[4],\n",
+    "    \"> 70% and ≤ 90%\": px.colors.qualitative.Prism[2],\n",
+    "    \"> 50% and ≤ 70%\": px.colors.qualitative.Prism[6]\n",
+    "}\n",
     "\n",
-    "fig = px.bar(df_bins_quality_grouped, x='tool', y=\"bin_count\", color=quality,\n",
-    "             barmode='stack', color_discrete_map=color_discrete_map, text=\"bin_count\",\n",
-    "             category_orders={\"tool\":[\"binette\", \"semibin2\", \"concoct\", \"metabat2\",  \"maxbin2\"]},\n",
-    "            opacity = 0.9)#[ \"#008c8a\", px.colors.qualitative.Safe[4], '#2596be'])\n",
+    "# Create the bar plot\n",
+    "fig = px.bar(\n",
+    "    df_bins_quality_grouped, \n",
+    "    x='tool', \n",
+    "    y=\"bin_count\", \n",
+    "    color=quality,\n",
+    "    barmode='stack', \n",
+    "    color_discrete_map=color_discrete_map, \n",
+    "    text=\"bin_count\",\n",
+    "    category_orders={\"tool\": [\"binette\", \"semibin2\", \"concoct\", \"metabat2\", \"maxbin2\"]},\n",
+    "    opacity=0.9\n",
+    ")\n",
     "\n",
+    "# Update layout for better appearance\n",
     "fig.update_layout(\n",
-    "        width=800,\n",
-    "        height=500,\n",
-    "    legend=dict(\n",
-    "      traceorder=\"reversed\",\n",
-    "    ))\n",
-    "fig"
+    "    width=600,\n",
+    "    height=500,\n",
+    "    legend=dict(traceorder=\"reversed\")\n",
+    ")\n",
+    "\n",
+    "fig.show()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f78d0f29",
+   "metadata": {},
+   "source": [
+    "From the plot, you can see that Binette produces more high-quality bins compared to the initial bin sets! 🎉"
    ]
   }
  ],
@@ -1680,7 +949,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.12"
+   "version": "3.8.19"
   }
  },
  "nbformat": 4,
diff --git a/docs/tutorial/assembly.md b/docs/tutorial/assembly.md
index c7104ed..d2084bb 100644
--- a/docs/tutorial/assembly.md
+++ b/docs/tutorial/assembly.md
@@ -18,9 +18,28 @@ This process takes approximately 28 minutes to complete.
 ```{admonition} Note
 :class: note
 
-You can also use **SPAdes** for assembly. It generally performs better than MEGAHIT but takes longer and requires more memory. Refer to the CAMI benchmark for a detailed comparison.
+You can also use **SPAdes** for assembly. It generally performs better than MEGAHIT but takes longer and requires more memory.
 ```
 
+
+```{admonition} Best Practices
+:class: tip
+
+Here are some general tips that might help improve your assembly results, depending on your data:
+
+- **Read Cleaning:** If your reads have low-quality bases or adapters, consider cleaning them with a tool like `sickle`. It can boost the overall quality of your assembly.
+
+- **Quality Check:** Tools like `metaQUAST` are handy for checking your assembly’s quality. It’s a good way to ensure your results are solid before moving on.
+
+- **Assembly Filtering:** After assembling, it’s often a good idea to filter out small or low-coverage contigs. 
+
+
+These steps aren’t mandatory, and since this tutorial focuses on binning and using Binette, we’ll skip them for now.
+
+```
+
+
+
 ## Align the Reads Over the Assembly
 
 To get coverage information, we first need to map the reads back to the assembly.
diff --git a/docs/tutorial/binette_results/final_bins_quality_reports.tsv b/docs/tutorial/binette_results/final_bins_quality_reports.tsv
new file mode 100644
index 0000000..f575212
--- /dev/null
+++ b/docs/tutorial/binette_results/final_bins_quality_reports.tsv
@@ -0,0 +1,16 @@
+bin_id	origin	name	completeness	contamination	score	size	N50	contig_count
+17075	diff	44 - 10	100.0	0.05	99.9	4672665	82084	93
+39427	diff	36 - 6	99.9	0.2	99.5	2796605	41151	98
+47060	union	58 | 33	98.59	0.83	96.93	4601336	41016	165
+47177	union	91 | 25 | 55	96.1	0.34	95.41999999999999	2598718	11891	312
+21248	diff	65 - 8 - 28	91.98	1.71	88.56	1768095	9976	250
+44137	diff	76 - 13 - 28	92.63	2.41	87.81	3726254	5669	850
+31703	diff	31 - 7 - 61	81.73	0.84	80.05	1665233	8518	248
+13475	diff	47 - 37	72.89	2.39	68.11	1241829	5061	252
+47926	union	75 | 30	74.31	4.26	65.79	3293949	2954	1262
+46775	union	42 | 102	62.94	2.75	57.44	1293571	3783	419
+33569	diff	83 - 7 - 38 - 31	59.18	2.24	54.7	2042527	4437	514
+39350	diff	57 - 16 - 75	52.16	1.31	49.54	2601282	5332	509
+39558	diff	78 - 6 - 43	64.63	8.03	48.56999999999999	1858210	1430	1293
+51082	union	120 | 1	52.33	5.06	42.21	688879	1446	472
+19689	diff	118 - 18 - 61 - 31	48.22	8.23	31.759999999999998	1782676	1402	1265
diff --git a/docs/tutorial/binette_results/input_bins_quality_reports/input_bins_1.concoct_bins.tsv b/docs/tutorial/binette_results/input_bins_quality_reports/input_bins_1.concoct_bins.tsv
new file mode 100644
index 0000000..f4a995a
--- /dev/null
+++ b/docs/tutorial/binette_results/input_bins_quality_reports/input_bins_1.concoct_bins.tsv
@@ -0,0 +1,64 @@
+bin_id	origin	name	completeness	contamination	score	size	N50	contig_count
+125	concoct/bins	9.fa	100.0	0.38	99.24	3033586	37523	131
+67	concoct/bins	41.fa	100.0	0.46	99.08	4765466	82084	101
+91	concoct/bins	7.fa	92.76	0.34	92.08	2274951	12187	265
+76	concoct/bins	6.fa	92.63	3.42	85.78999999999999	3751950	5674	855
+65	concoct/bins	62.fa	87.35	1.8	83.75	1917859	10911	259
+109	concoct/bins	31.fa	100.0	14.25	71.5	7431952	25567	536
+75	concoct/bins	48.fa	73.35	4.26	64.83	3285374	2950	1261
+78	concoct/bins	2.fa	69.37	13.16	43.050000000000004	2385110	1471	1615
+113	concoct/bins	39.fa	46.66	3.76	39.14	809087	1511	527
+120	concoct/bins	29.fa	35.92	0.57	34.78	480789	1467	325
+86	concoct/bins	51.fa	45.9	7.36	31.18	4821129	1550	3079
+117	concoct/bins	20.fa	32.76	1.09	30.58	717576	1577	464
+118	concoct/bins	11.fa	47.24	9.31	28.62	1944164	1431	1340
+83	concoct/bins	26.fa	99.96	37.25	25.459999999999994	3984942	5809	809
+104	concoct/bins	38.fa	25.03	1.18	22.67	445168	1362	322
+119	concoct/bins	19.fa	35.1	7.86	19.380000000000003	2403536	1599	1464
+82	concoct/bins	59.fa	100.0	40.63	18.739999999999995	8287537	3951	2241
+89	concoct/bins	3.fa	87.66	34.5	18.659999999999997	6341799	1568	4092
+121	concoct/bins	30.fa	87.47	34.51	18.450000000000003	3480539	4299	901
+102	concoct/bins	0.fa	17.11	0.07	16.97	344299	1699	211
+97	concoct/bins	43.fa	12.5	0.15	12.2	345166	1384	238
+110	concoct/bins	35.fa	8.66	0.01	8.64	483789	1273	355
+108	concoct/bins	52.fa	6.65	0.0	6.65	89878	2232	40
+63	concoct/bins	27.fa	6.62	0.0	6.62	1016	1016	1
+101	concoct/bins	24.fa	6.45	0.0	6.45	3381	1107	3
+124	concoct/bins	18.fa	6.48	0.02	6.44	193358	1267	148
+106	concoct/bins	36.fa	6.43	0.0	6.43	12090	1997	7
+123	concoct/bins	14.fa	6.38	0.0	6.38	4193	3113	2
+114	concoct/bins	60.fa	6.38	0.0	6.38	8476	2499	5
+93	concoct/bins	5.fa	6.38	0.0	6.38	5082	1686	3
+87	concoct/bins	28.fa	6.38	0.0	6.38	5015	1668	3
+80	concoct/bins	13.fa	6.38	0.0	6.38	5338	1601	3
+112	concoct/bins	50.fa	6.38	0.0	6.38	13671	1587	9
+96	concoct/bins	54.fa	6.38	0.0	6.38	2727	1576	2
+107	concoct/bins	58.fa	6.38	0.0	6.38	1491	1491	1
+74	concoct/bins	45.fa	6.38	0.0	6.38	2475	1448	2
+70	concoct/bins	22.fa	6.38	0.0	6.38	1344	1344	1
+116	concoct/bins	10.fa	6.38	0.0	6.38	2524	1332	2
+98	concoct/bins	25.fa	6.38	0.0	6.38	10545	1304	8
+71	concoct/bins	32.fa	6.38	0.0	6.38	2290	1266	2
+92	concoct/bins	57.fa	6.38	0.0	6.38	4999	1246	4
+105	concoct/bins	34.fa	6.38	0.0	6.38	1240	1240	1
+66	concoct/bins	23.fa	6.38	0.0	6.38	1236	1236	1
+72	concoct/bins	1.fa	6.38	0.0	6.38	12304	1223	10
+88	concoct/bins	53.fa	6.38	0.0	6.38	1160	1160	1
+68	concoct/bins	4.fa	6.38	0.0	6.38	6739	1136	6
+64	concoct/bins	37.fa	6.38	0.0	6.38	1123	1123	1
+94	concoct/bins	33.fa	6.38	0.0	6.38	1032	1032	1
+69	concoct/bins	17.fa	6.05	0.0	6.05	8012	1402	6
+85	concoct/bins	55.fa	5.85	0.0	5.85	117297	100818	12
+79	concoct/bins	40.fa	5.58	0.0	5.58	16429	1658	10
+99	concoct/bins	8.fa	5.35	0.0	5.35	98557	1192	80
+122	concoct/bins	61.fa	10.6	3.22	4.159999999999999	173292	1225	136
+77	concoct/bins	42.fa	3.74	0.0	3.74	122021	3383	50
+90	concoct/bins	15.fa	3.68	0.01	3.66	106174	24244	6
+111	concoct/bins	49.fa	3.6	0.02	3.56	75967	2458	39
+84	concoct/bins	46.fa	3.32	0.0	3.32	55857	1166	47
+115	concoct/bins	16.fa	3.21	0.0	3.21	36685	1138	31
+100	concoct/bins	21.fa	2.98	0.01	2.96	20489	1588	12
+95	concoct/bins	56.fa	2.73	0.0	2.73	28603	1276	21
+73	concoct/bins	47.fa	2.67	0.0	2.67	48903	2372	23
+103	concoct/bins	12.fa	2.53	0.0	2.53	41153	1182	34
+81	concoct/bins	44.fa	2.5	0.0	2.5	44603	1410	30
diff --git a/docs/tutorial/binette_results/input_bins_quality_reports/input_bins_2.maxbin2.tsv b/docs/tutorial/binette_results/input_bins_quality_reports/input_bins_2.maxbin2.tsv
new file mode 100644
index 0000000..d515b9d
--- /dev/null
+++ b/docs/tutorial/binette_results/input_bins_quality_reports/input_bins_2.maxbin2.tsv
@@ -0,0 +1,24 @@
+bin_id	origin	name	completeness	contamination	score	size	N50	contig_count
+22	maxbin2	maxbin2.001.fasta	99.81	4.81	90.19	4616818	89436	133
+14	maxbin2	maxbin2.002.fasta	93.92	3.53	86.86	2874373	37523	195
+11	maxbin2	maxbin2.006.fasta	75.2	12.31	50.58	2634516	12326	626
+5	maxbin2	maxbin2.009.fasta	62.69	8.14	46.41	2438492	6141	604
+18	maxbin2	maxbin2.012.fasta	56.93	14.12	28.69	3473782	2619	1410
+7	maxbin2	maxbin2.011.fasta	40.68	7.03	26.619999999999997	2087133	6988	510
+23	maxbin2	maxbin2.010.fasta	45.34	9.62	26.100000000000005	4743354	1971	2401
+3	maxbin2	maxbin2.018.fasta	80.35	27.53	25.289999999999992	5331237	4487	1756
+13	maxbin2	maxbin2.013.fasta	69.31	22.06	25.190000000000005	3958158	5259	1353
+21	maxbin2	maxbin2.007.fasta	34.6	4.79	25.020000000000003	1586278	12519	451
+6	maxbin2	maxbin2.021.fasta	42.81	9.69	23.430000000000003	1690737	2715	767
+19	maxbin2	maxbin2.020.fasta	27.99	2.4	23.189999999999998	1033153	3328	310
+20	maxbin2	maxbin2.014.fasta	26.95	2.05	22.85	1112378	1806	570
+10	maxbin2	maxbin2.008.fasta	56.41	17.23	21.949999999999996	3237421	2381	1425
+2	maxbin2	maxbin2.003.fasta	23.72	1.41	20.9	1419869	2539	575
+17	maxbin2	maxbin2.019.fasta	76.19	29.54	17.11	2765576	3328	1163
+1	maxbin2	maxbin2.023.fasta	27.26	6.52	14.220000000000002	454808	1432	314
+9	maxbin2	maxbin2.004.fasta	17.07	1.62	13.83	1180579	2361	491
+4	maxbin2	maxbin2.022.fasta	28.6	7.98	12.64	804525	1593	497
+8	maxbin2	maxbin2.005.fasta	11.11	0.04	11.03	488546	17602	45
+15	maxbin2	maxbin2.015.fasta	10.27	0.56	9.149999999999999	379048	3202	126
+16	maxbin2	maxbin2.016.fasta	4.92	0.0	4.92	103037	3558	49
+12	maxbin2	maxbin2.017.fasta	93.2	48.33	-3.4599999999999937	4710071	2372	2074
diff --git a/docs/tutorial/binette_results/input_bins_quality_reports/input_bins_3.metabat2.tsv b/docs/tutorial/binette_results/input_bins_quality_reports/input_bins_3.metabat2.tsv
new file mode 100644
index 0000000..fdc6bdd
--- /dev/null
+++ b/docs/tutorial/binette_results/input_bins_quality_reports/input_bins_3.metabat2.tsv
@@ -0,0 +1,15 @@
+bin_id	origin	name	completeness	contamination	score	size	N50	contig_count
+36	metabat2	metabat2.14.fa	99.9	0.24	99.42	2799572	41151	99
+25	metabat2	metabat2.8.fa	93.17	0.22	92.73	2148097	12225	226
+33	metabat2	metabat2.12.fa	93.52	0.92	91.67999999999999	4266134	39217	157
+27	metabat2	metabat2.11.fa	84.4	1.53	81.34	1902761	11352	218
+37	metabat2	metabat2.1.fa	84.99	2.73	79.53	2980526	6876	502
+31	metabat2	metabat2.2.fa	83.21	3.16	76.88999999999999	1807028	7852	274
+35	metabat2	metabat2.4.fa	76.53	0.11	76.31	3477636	82084	71
+29	metabat2	metabat2.7.fa	71.78	5.77	60.24	1384653	4937	292
+24	metabat2	metabat2.3.fa	51.75	2.99	45.769999999999996	1707078	4929	362
+30	metabat2	metabat2.13.fa	44.85	0.49	43.870000000000005	1724699	4259	415
+26	metabat2	metabat2.10.fa	44.15	1.11	41.93	982239	4743	219
+32	metabat2	metabat2.5.fa	25.31	0.03	25.25	1077467	91995	14
+28	metabat2	metabat2.9.fa	98.03	37.1	23.83	8543557	4347	1974
+34	metabat2	metabat2.6.fa	7.06	0.03	7.0	252404	64012	6
diff --git a/docs/tutorial/binette_results/input_bins_quality_reports/input_bins_4.semibin2_output_bins.tsv b/docs/tutorial/binette_results/input_bins_quality_reports/input_bins_4.semibin2_output_bins.tsv
new file mode 100644
index 0000000..c3a150b
--- /dev/null
+++ b/docs/tutorial/binette_results/input_bins_quality_reports/input_bins_4.semibin2_output_bins.tsv
@@ -0,0 +1,26 @@
+bin_id	origin	name	completeness	contamination	score	size	N50	contig_count
+44	semibin2/output_bins	SemiBin_27.fa.gz	100.0	0.09	99.82	4681369	82084	94
+53	semibin2/output_bins	SemiBin_33.fa.gz	99.92	0.28	99.36	2937678	37523	113
+50	semibin2/output_bins	SemiBin_10.fa.gz	93.43	0.14	93.15	2129295	12519	216
+62	semibin2/output_bins	SemiBin_24.fa.gz	92.13	0.03	92.07	4162911	40395	139
+38	semibin2/output_bins	SemiBin_26.fa.gz	83.09	2.25	78.59	1674156	8389	245
+49	semibin2/output_bins	SemiBin_32.fa.gz	81.87	1.66	78.55000000000001	1820073	11737	205
+60	semibin2/output_bins	SemiBin_22.fa.gz	80.25	1.63	76.99	2790948	7117	450
+47	semibin2/output_bins	SemiBin_11.fa.gz	72.57	2.45	67.66999999999999	1245031	5061	253
+61	semibin2/output_bins	SemiBin_3.fa.gz	53.34	1.33	50.68000000000001	1728690	4913	367
+57	semibin2/output_bins	SemiBin_12.fa.gz	51.92	1.31	49.300000000000004	2609451	5292	511
+56	semibin2/output_bins	SemiBin_17.fa.gz	47.29	0.37	46.55	1934420	4160	470
+42	semibin2/output_bins	SemiBin_14.fa.gz	47.28	0.73	45.82	990463	4692	222
+51	semibin2/output_bins	SemiBin_13.fa.gz	36.67	6.12	24.43	1699695	4402	395
+54	semibin2/output_bins	SemiBin_18.fa.gz	17.07	0.69	15.690000000000001	1131272	3943	277
+59	semibin2/output_bins	SemiBin_15.fa.gz	14.04	1.01	12.02	884790	4349	206
+45	semibin2/output_bins	SemiBin_20.fa.gz	9.95	0.01	9.93	515894	8389	67
+43	semibin2/output_bins	SemiBin_5.fa.gz	9.95	0.05	9.85	513202	3891	131
+39	semibin2/output_bins	SemiBin_35.fa.gz	9.45	0.0	9.45	213606	3336	63
+58	semibin2/output_bins	SemiBin_84.fa.gz	8.7	0.0	8.7	358311	64012	9
+55	semibin2/output_bins	SemiBin_66.fa.gz	8.66	0.19	8.28	290297	6707	44
+48	semibin2/output_bins	SemiBin_52.fa.gz	8.28	0.01	8.26	358822	3296	106
+41	semibin2/output_bins	SemiBin_19.fa.gz	8.12	0.02	8.08	353499	3949	90
+52	semibin2/output_bins	SemiBin_6.fa.gz	7.74	0.01	7.720000000000001	351540	4284	85
+46	semibin2/output_bins	SemiBin_37.fa.gz	6.18	0.0	6.18	250833	3607	66
+40	semibin2/output_bins	SemiBin_80.fa.gz	4.41	0.13	4.15	217541	3425	64
diff --git a/docs/tutorial/binning.md b/docs/tutorial/binning.md
index 8624cf6..bace377 100644
--- a/docs/tutorial/binning.md
+++ b/docs/tutorial/binning.md
@@ -1,10 +1,50 @@
+
+## Align the Reads to the Assembly
+
+Binning tools rely on coverage information, among other criteria, to evaluate each contig. 
+
+To obtain this coverage data, we first need to map the reads back to the assembly.
+
+```{code-block} bash
+# Create a directory for the alignments
+mkdir -p alignments_bwa/
+
+# Index the contigs file using BWA-MEM2
+bwa-mem2 index Kickstart.megahit/R1.contigs.fa -p Kickstart.megahit/R1.contigs.fa
+
+# Map reads back to the assembly, convert to BAM format, and sort
+bwa-mem2 mem -t 12 Kickstart.megahit/R1.contigs.fa coal-metagenomics/Kickstart_*.fastq.gz | \
+samtools view -@ 12 -bS - | \
+samtools sort -@ 12 - -o alignments_bwa/Kickstart.bam
+
+# Index the BAM file
+samtools index alignments_bwa/Kickstart.bam
+```
+
+
+:::{admonition} ⌛ Expected Time
+:class: note
+
+This process takes approximately 12 minutes to complete.
+:::
+
+```{admonition}
+:class: tip
+
+If you have multiple samples and assemble them separately, cross-aligning the samples can significantly improve binning. Align each sample to all assemblies and use the resulting BAM files in binning. This approach gives the binning tools more coverage variation, which can be beneficial. However, keep in mind that this process can be resource-intensive, especially with many samples. 
+
+If you did a cross-assembly with your samples, make sure to map the reads separately for each one, generating as many BAM files as you have samples, to help the binning tool. 🚀
+
+```
+
+
 ## Run Binning Tools
 
-In this section, we'll use different binning tools to group contigs of assembly.
+Let's use different binning tools to group the contigs into bins, which we'll refine in the next section with Binette.
 
 ### MetaBAT2
 
-First, generate a depth file from the BAM file using the `jgi_summarize_bam_contig_depths` script from MetaBAT2. This depth file will also be used for MaxBin2. 
+First, generate a depth file from the BAM file using the `jgi_summarize_bam_contig_depths` script from MetaBAT2. This depth file will also be used by MaxBin2. 
 
 ```bash
 jgi_summarize_bam_contig_depths --outputDepth depth_Kickstart.txt alignments_bwa/Kickstart.bam
diff --git a/docs/tutorial/tutorial_main.md b/docs/tutorial/tutorial_main.md
index d21931c..4493b3e 100644
--- a/docs/tutorial/tutorial_main.md
+++ b/docs/tutorial/tutorial_main.md
@@ -1,59 +1,30 @@
 
 # Tutorial 
 
-In this tutorial, we'll walk through a practical example of how to use Binette with real data. We'll start by downloading metagenomics reads and then assemble these reads into contigs. Next, we'll use different binning tools to group the contigs. Finally, we'll use Binette to refine these bins and improve our results.
+In this tutorial, we'll walk through a practical example of how to use Binette with real data. We'll start by downloading metagenomics reads and then assemble these reads into contigs. Next, we'll use different binning tools to group the contigs into bins. Finally, we'll use Binette to refine these bins.
 
 ```{mermaid}
----
-title: "Tutorial Overview:"
-align: center
----
-
-%%{init: {'theme':'default'}}%%
-
-graph LR
-
-  A[Download Metagenomics Reads] --> B
-  B[Assemble Reads into Contigs] --> c
-          subgraph Pangenome creation
-            a:::workflow
-            c:::workflow
-            g:::workflow
-            p:::workflow
-            a("annotate") --> c
-            c(cluster) --> g(graph)
-            g(graph) --> p(partition)
-        end
-
-
-  C[Bin Contigs with Binning Tools] --> D[Refine Bins with Binette]
 
-
-        
-    classDef panrgp fill:#4066d4
-    classDef panmodule fill:#d44066
-    classDef workflow fill:#d4ae40
-
-
-```
-
-```{mermaid}
-
----
+--- 
 title: "Tutorial Overview:"
 align: center
+
+config:
+  look: handDrawn
+  theme: neutral
 ---
 
 
 graph TD
 
-    i[Get Metagenomics Reads] --> B[Assembly & Reads alignment]
+    i[metagenomics reads] --> B[assembly]
 
 
-    B --> metabat2 --> r[Binette]
+    B --> metabat2 --> r[binette]
     B --> maxbin2 --> r
     B --> concoct --> r
     B --> semibin2 --> r
+    r --> f[final bins]
     
         subgraph Binning
             metabat2:::binning
@@ -62,8 +33,8 @@ graph TD
             semibin2:::binning
         end
 
-        
-    classDef binning fill:#d4ae40
+            
+        classDef binning fill:#d4ae40
 
 
 ```
@@ -79,6 +50,7 @@ assembly
 binning
 binette
 analyse_binette_result.ipynb
+analyse_binette_result.myst
 ```
 
 

From 25443d416cba4ef2d66f985c8de5793de46e8caa Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 4 Sep 2024 13:57:37 +0200
Subject: [PATCH 23/36] improve doc

---
 docs/api/api_ref.md                     |  1 -
 docs/api/binette.md                     | 13 +---
 docs/api/modules.md                     |  7 --
 docs/conf.py                            |  4 +-
 docs/contributing.md                    |  2 +-
 docs/tests.md                           |  2 +-
 docs/tutorial/assembly.md               | 31 ++++++---
 docs/tutorial/binette.md                |  2 +-
 docs/tutorial/binette_tutorial_env.yaml | 21 ++++++
 docs/tutorial/binning.md                | 51 ++------------
 docs/tutorial/get_dataset.md            | 88 +++++++++++++++++++++++++
 docs/tutorial/set_environment.md        | 12 ++--
 docs/tutorial/tutorial_main.md          | 21 +++---
 docs/usage.md                           |  4 +-
 14 files changed, 162 insertions(+), 97 deletions(-)
 delete mode 100644 docs/api/modules.md
 create mode 100644 docs/tutorial/binette_tutorial_env.yaml
 create mode 100644 docs/tutorial/get_dataset.md

diff --git a/docs/api/api_ref.md b/docs/api/api_ref.md
index 3de18e1..58bea10 100644
--- a/docs/api/api_ref.md
+++ b/docs/api/api_ref.md
@@ -3,6 +3,5 @@
 ```{toctree}
 :maxdepth: 2
 binette
-indice_and_table
 ```
 
diff --git a/docs/api/binette.md b/docs/api/binette.md
index bc3c754..a4e2c9a 100644
--- a/docs/api/binette.md
+++ b/docs/api/binette.md
@@ -20,15 +20,6 @@
    :show-inheritance:
 ```
 
-## binette.binette module
-
-```{eval-rst}
-.. automodule:: binette.binette
-   :members:
-   :undoc-members:
-   :show-inheritance:
-```
-
 ## binette.cds module
 
 ```{eval-rst}
@@ -65,10 +56,10 @@
    :show-inheritance:
 ```
 
-## Module contents
+## binette.main module
 
 ```{eval-rst}
-.. automodule:: binette
+.. automodule:: binette.main
    :members:
    :undoc-members:
    :show-inheritance:
diff --git a/docs/api/modules.md b/docs/api/modules.md
deleted file mode 100644
index b83d27c..0000000
--- a/docs/api/modules.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# binette
-
-```{toctree}
-:maxdepth: 4
-
-binette
-```
diff --git a/docs/conf.py b/docs/conf.py
index 245781e..0cce3b6 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -24,13 +24,13 @@
     "sphinx.ext.autosectionlabel",
     "sphinx.ext.autodoc",
     'sphinx_search.extension',
+     'sphinx_togglebutton',
     # "myst_nb",
     "myst_parser",
     'nbsphinx',
     'nbsphinx_link', 
     # 'sphinx.ext.napoleon',
     # 'sphinx.ext.viewcode',
-    "myst_parser",
     'sphinxcontrib.mermaid'
 ]
 myst_enable_extensions = [
@@ -61,7 +61,7 @@
 # `path/to/file:heading` instead of just `heading`
 autosectionlabel_prefix_document = True
 
-exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'build', "api", "jupyter_execute"]
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store', 'build', "jupyter_execute"]
 
 
 
diff --git a/docs/contributing.md b/docs/contributing.md
index f400a68..e9b69ee 100644
--- a/docs/contributing.md
+++ b/docs/contributing.md
@@ -26,7 +26,7 @@ For minor changes like fixing typos or making small edits, create a new Pull Req
    - Clone your forked repository to your local machine.
 
 2. **Get an Environment:**
-   Create an environment with all Binette prerequisites installed by following the installation instructions [here](./installation.md#installing-from-source-code-within-a-conda-environnement).
+   Create an environment with all Binette prerequisites installed by following the installation instructions [here](./installation.md#from-the-source-code-within-a-conda-environnement).
 
 3. **Install in Editable Mode:**
    To enable code editing and testing of new functionality, you can install Binette in editable mode using the following command:
diff --git a/docs/tests.md b/docs/tests.md
index b6c0e60..ed1cdd7 100644
--- a/docs/tests.md
+++ b/docs/tests.md
@@ -8,7 +8,7 @@ Tests have been implemented to ensure the correctness of Binette.
 Unit tests have been implmented in the tests directory using pytest. 
 
 
-To run the test suit you would need to  have install Binette from the source code.  For that, you can follow installation instructions [here](./installation.md#installing-from-source-code-within-a-conda-environnement).
+To run the test suit you would need to  have install Binette from the source code.  For that, you can follow installation instructions [here](./installation.md#from-the-source-code-within-a-conda-environnement).
 
 
 To install pytest in you environement you can run :
diff --git a/docs/tutorial/assembly.md b/docs/tutorial/assembly.md
index d2084bb..f919f00 100644
--- a/docs/tutorial/assembly.md
+++ b/docs/tutorial/assembly.md
@@ -10,39 +10,40 @@ megahit -1 coal-metagenomics/Kickstart_1.fastq.gz \
 
 :::{admonition} ⌛ Expected Time
 :class: note
+:class: dropdown
 
 This process takes approximately 28 minutes to complete.
 :::
 
 
-```{admonition} Note
-:class: note
-
-You can also use **SPAdes** for assembly. It generally performs better than MEGAHIT but takes longer and requires more memory.
-```
 
-
-```{admonition} Best Practices
+```{admonition} Assembly tips
 :class: tip
+:class: dropdown
 
 Here are some general tips that might help improve your assembly results, depending on your data:
 
 - **Read Cleaning:** If your reads have low-quality bases or adapters, consider cleaning them with a tool like `sickle`. It can boost the overall quality of your assembly.
 
+- **Use SPAdes rather than MEGAHIT** **SPAdes** generally performs better than MEGAHIT but takes longer and requires more memory.
+
 - **Quality Check:** Tools like `metaQUAST` are handy for checking your assembly’s quality. It’s a good way to ensure your results are solid before moving on.
 
 - **Assembly Filtering:** After assembling, it’s often a good idea to filter out small or low-coverage contigs. 
 
 
-These steps aren’t mandatory, and since this tutorial focuses on binning and using Binette, we’ll skip them for now.
+These steps aren’t mandatory, and since this tutorial focuses on binning refinement with Binette, we’ll skip them.
 
 ```
 
 
 
-## Align the Reads Over the Assembly
 
-To get coverage information, we first need to map the reads back to the assembly.
+## Align the Reads to the Assembly
+
+Binning tools rely on coverage information, among other criteria, to evaluate each contig. 
+
+To obtain this coverage data, we first need to map the reads back to the assembly.
 
 ```{code-block} bash
 # Create a directory for the alignments
@@ -63,6 +64,16 @@ samtools index alignments_bwa/Kickstart.bam
 
 :::{admonition} ⌛ Expected Time
 :class: note
+:class: dropdown
 
 This process takes approximately 12 minutes to complete.
 :::
+
+```{admonition} Read alignment strategy
+:class: tip
+
+If you have multiple samples and assemble them separately, cross-aligning the samples can significantly improve binning. Align each sample to all assemblies and use the resulting BAM files in binning. This approach gives the binning tools more coverage variation, which can be beneficial. However, keep in mind that this process can be resource-intensive, especially with many samples. 
+
+If you did a cross-assembly with your samples, make sure to map the reads separately for each one, generating as many BAM files as you have samples, to help the binning tool. 🚀
+
+```
\ No newline at end of file
diff --git a/docs/tutorial/binette.md b/docs/tutorial/binette.md
index 926ce3b..a2d7796 100644
--- a/docs/tutorial/binette.md
+++ b/docs/tutorial/binette.md
@@ -13,7 +13,7 @@ binette --bin_dirs maxbin2/ metabat2/ semibin2/output_bins/ concoct/bins/ \
 
 Once Binette completes, the `binette_results` directory should have the following structure:
 
-```plaintext
+```
 binette_results/
 ├── final_bins
 │   ├── bin_13475.fa
diff --git a/docs/tutorial/binette_tutorial_env.yaml b/docs/tutorial/binette_tutorial_env.yaml
new file mode 100644
index 0000000..72655d3
--- /dev/null
+++ b/docs/tutorial/binette_tutorial_env.yaml
@@ -0,0 +1,21 @@
+name: binette_tutorial
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - uscdc-datasets-sars-cov-2 # Dataset downloader to get the tutorial initial data
+  - fastqc                 # Quality control for high-throughput sequencing data
+  - samtools=1             # Tools for manipulating sequencing data in SAM format
+  - bedtools=2             # Suite of tools for genome arithmetic
+  - bwa-mem2=2             # Align reads to a reference genome (using BWA-MEM2)
+  - megahit=1              # De novo assembler for large genomes
+  - maxbin2=2              # Binning tool for metagenomic datasets
+  - metabat2=2             # Binning tool for metagenomic datasets
+  - semibin=2              # Binning tool for metagenomic datasets
+  - concoct=1              # Binning tool for metagenomic datasets
+  - binette=1.0.1          # Binette for binning and genome analysis
+  - das_tool=1             # Bin refiner to compare with Binette
+  - jupyter                # Jupyter notebook for interactive analysis
+  - pandas=1               # Data manipulation and analysis
+  - plotly=5               # Interactive graphing
\ No newline at end of file
diff --git a/docs/tutorial/binning.md b/docs/tutorial/binning.md
index bace377..cb82166 100644
--- a/docs/tutorial/binning.md
+++ b/docs/tutorial/binning.md
@@ -1,43 +1,3 @@
-
-## Align the Reads to the Assembly
-
-Binning tools rely on coverage information, among other criteria, to evaluate each contig. 
-
-To obtain this coverage data, we first need to map the reads back to the assembly.
-
-```{code-block} bash
-# Create a directory for the alignments
-mkdir -p alignments_bwa/
-
-# Index the contigs file using BWA-MEM2
-bwa-mem2 index Kickstart.megahit/R1.contigs.fa -p Kickstart.megahit/R1.contigs.fa
-
-# Map reads back to the assembly, convert to BAM format, and sort
-bwa-mem2 mem -t 12 Kickstart.megahit/R1.contigs.fa coal-metagenomics/Kickstart_*.fastq.gz | \
-samtools view -@ 12 -bS - | \
-samtools sort -@ 12 - -o alignments_bwa/Kickstart.bam
-
-# Index the BAM file
-samtools index alignments_bwa/Kickstart.bam
-```
-
-
-:::{admonition} ⌛ Expected Time
-:class: note
-
-This process takes approximately 12 minutes to complete.
-:::
-
-```{admonition}
-:class: tip
-
-If you have multiple samples and assemble them separately, cross-aligning the samples can significantly improve binning. Align each sample to all assemblies and use the resulting BAM files in binning. This approach gives the binning tools more coverage variation, which can be beneficial. However, keep in mind that this process can be resource-intensive, especially with many samples. 
-
-If you did a cross-assembly with your samples, make sure to map the reads separately for each one, generating as many BAM files as you have samples, to help the binning tool. 🚀
-
-```
-
-
 ## Run Binning Tools
 
 Let's use different binning tools to group the contigs into bins, which we'll refine in the next section with Binette.
@@ -108,14 +68,15 @@ extract_fasta_bins.py Kickstart.megahit/R1.contigs.fa concoct/clustering_merge.c
 
 You can also run SemiBin2 with its `single_easy_bin` command:
 
-```{admonition} ⏳ Time Note
-:class: note
-
-This process can take some time, so it may be skipped.
-```
 
 ```bash
 SemiBin2 single_easy_bin -i Kickstart.megahit/R1.contigs.fa \
                             -b alignments_bwa/Kickstart.bam \
                             -o semibin2/ -p 12
 ```
+
+```{admonition} ⏳ Time Note
+:class: note
+
+This process can take some time.
+```
diff --git a/docs/tutorial/get_dataset.md b/docs/tutorial/get_dataset.md
new file mode 100644
index 0000000..3967bc3
--- /dev/null
+++ b/docs/tutorial/get_dataset.md
@@ -0,0 +1,88 @@
+## Obtaining Metagenomic Data for the Tutorial
+
+### Using the ncezid-biome Datasets Tool
+
+For this tutorial, we’ll use the "Kickstart" metagenome dataset from the [ncezid-biome datasets GitHub repository](https://github.com/ncezid-biome/). This dataset corresponds to sample [SAMN05024035](https://www.ncbi.nlm.nih.gov/Traces/study/?acc=SRR5058924&o=acc_s%3Aa) and SRA [SRR5058924](https://www.ncbi.nlm.nih.gov/Traces/study/?acc=SRR5058924&o=acc_s%3Aa).
+
+
+We'll download the "Kickstart" dataset using the ncezid-biome datasets tool. You can find the tool and instructions on how to use it in their [GitHub repository](https://github.com/ncezid-biome/datasets?tab=readme-ov-file#edlb).
+
+The tool called `uscdc-datasets-sars-cov-2` on bioconda is part of the Conda environment created in the [previous section](./set_environment.md). 
+
+
+#### Download the Kickstart Dataset
+
+Once the tool is installed, you can download the "Kickstart" dataset with the following steps:
+
+1. **Download the coal-metagenomics table** from the GitHub repository:
+   
+   ```{code-block} bash
+   wget https://raw.githubusercontent.com/ncezid-biome/datasets/master/datasets/coal-metagenomics.tsv
+   ```
+
+2. **Select the relevant line** corresponding to the "Kickstart" dataset (SRR5058924) by extracting the header and the specific entry:
+
+   ```{code-block} bash
+   # Select the header of the table
+   head -n7 coal-metagenomics.tsv > coal-metagenomics_Kickstart_only.tsv
+   
+   # Append the relevant line for the Kickstart dataset
+   grep SRR5058924 coal-metagenomics.tsv >> coal-metagenomics_Kickstart_only.tsv
+   ```
+
+3. **Run the dataset download** using the `GenFSGopher.pl` script:
+
+   ```{code-block} bash
+   GenFSGopher.pl --numcpus 12 --compressed --outdir coal-metagenomics coal-metagenomics_Kickstart_only.tsv
+   ```
+
+
+:::{admonition} ⌛ Expected Time
+:class: note
+
+This process takes approximately 16 minutes to complete.
+:::
+
+#### Directory Structure
+
+After downloading, your directory structure should look like this:
+
+```{code-block} text
+├── coal-metagenomics_Kickstart_only.tsv
+└── data
+    ├── in.tsv
+    ├── Kickstart_1.fastq.gz
+    ├── Kickstart_1.fastq.sha256
+    ├── Kickstart_2.fastq.gz
+    ├── Kickstart_2.fastq.sha256
+    ├── Makefile
+    ├── prefetch.done
+    ├── sha256sum.log
+    ├── SRR5058924
+    │   └── SRR5058924.sra
+    └── tree.dnd
+```
+
+In the next section, will assemble the two reads files to obtain an assembly of the dataset:
+- `data/Kickstart_1.fastq.gz`
+- `data/Kickstart_2.fastq.gz`
+
+
+:::{admonition} 🧹 Cleaning Tip
+:class: tip
+
+You can remove the SRA file `data/SRR5058924/SRR5058924.sra` as it is no longer needed; we will use only the FASTQ files. To remove it, run:
+
+```{code-block} bash
+rm data/SRR5058924/SRR5058924.sra
+:::
+
+```{note}
+Alternatively, you can download the data using the SRA Toolkit, which is what the ncezid-biome tool uses in the background. 
+Note that ncezid-biome tool provides additional checksum verification to ensure data integrity.
+You can retrieve the data with the following commands after installing the SRA Toolkit (e.g., via Conda: [sra-tools on Anaconda](https://anaconda.org/bioconda/sra-tools)):
+```{code-block} bash
+prefetch SRR5058924
+fastq-dump --defline-seq '@$ac_$sn/$ri' --defline-qual '+' --split-3 -O . SRR5058924.sra
+```
+
diff --git a/docs/tutorial/set_environment.md b/docs/tutorial/set_environment.md
index 52d6092..7f039fa 100644
--- a/docs/tutorial/set_environment.md
+++ b/docs/tutorial/set_environment.md
@@ -12,6 +12,13 @@ mamba env create -f binette_tutorial_env.yaml -n binette_tuto
 
 This command will create a Conda environment named `binette_tuto` using the environment file `binette_tutorial_env.yaml`.
 
+Below is the content of the `binette_tutorial_env.yaml` file:
+
+```{include} binette_tutorial_env.yaml
+:code: yaml
+```
+
+
 ### Activate the Environment
 
 After the environment is created, activate it by running:
@@ -20,8 +27,3 @@ After the environment is created, activate it by running:
 conda activate binette_tuto
 ```
 
-Below is the content of the `binette_tutorial_env.yaml` file:
-
-```{include} binette_tutorial_env.yaml
-:code: yaml
-```
diff --git a/docs/tutorial/tutorial_main.md b/docs/tutorial/tutorial_main.md
index 4493b3e..8cd8082 100644
--- a/docs/tutorial/tutorial_main.md
+++ b/docs/tutorial/tutorial_main.md
@@ -7,7 +7,7 @@ In this tutorial, we'll walk through a practical example of how to use Binette w
 
 --- 
 title: "Tutorial Overview:"
-align: center
+align: right
 
 config:
   look: handDrawn
@@ -17,20 +17,20 @@ config:
 
 graph TD
 
-    i[metagenomics reads] --> B[assembly]
+    i[Metagenomics reads] --> B[Assembly]
 
 
-    B --> metabat2 --> r[binette]
-    B --> maxbin2 --> r
-    B --> concoct --> r
-    B --> semibin2 --> r
+    B --> MetaBAT2 --> r[Binette]
+    B --> MaxBin2 --> r
+    B --> CONCOCT --> r
+    B --> SemiBin2 --> r
     r --> f[final bins]
     
         subgraph Binning
-            metabat2:::binning
-            maxbin2:::binning
-            concoct:::binning
-            semibin2:::binning
+            MetaBAT2:::binning
+            MaxBin2:::binning
+            CONCOCT:::binning
+            SemiBin2:::binning
         end
 
             
@@ -50,7 +50,6 @@ assembly
 binning
 binette
 analyse_binette_result.ipynb
-analyse_binette_result.myst
 ```
 
 
diff --git a/docs/usage.md b/docs/usage.md
index 2108a50..063a51b 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -11,7 +11,7 @@ For example, consider the following two `contig2bin_tables`:
 
 - `bin_set1.tsv`:
 
-    ```tsv
+    ```
     contig_1   binA
     contig_8   binA
     contig_15  binB
@@ -20,7 +20,7 @@ For example, consider the following two `contig2bin_tables`:
     
 - `bin_set2.tsv`:
 
-    ```tsv
+    ```
     contig_1   bin.0
     contig_8   bin.0
     contig_15  bin.1

From 716e2e55cacb964f50f1fba19f54625b78ca3e83 Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 4 Sep 2024 14:19:11 +0200
Subject: [PATCH 24/36] update usage with input bin sets report new output

---
 docs/usage.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/usage.md b/docs/usage.md
index 063a51b..9b37065 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -65,6 +65,7 @@ Binette results are stored in the `results` directory. You can specify a differe
 In this directory you will find:
 - `final_bins_quality_reports.tsv`: This is a TSV (tab-separated values) file containing quality information about the final selected bins.
 - `final_bins/`: This directory stores all the selected bins in fasta format.
+- `input_bins_quality_reports/`: A directory storing quality reports for the input bin sets, with files following the same structure as `final_bins_quality_reports.tsv`.
 - `temporary_files/`: This directory contains intermediate files. If you choose to use the `--resume` option, Binette will utilize files in this directory to prevent the recomputation of time-consuming steps.
 
 

From 3344a4548e41312eed143ed3d32d92bdf3c4e139 Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 4 Sep 2024 14:20:32 +0200
Subject: [PATCH 25/36] add  missing sphinx_togglebutton extension

---
 pyproject.toml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 8f12e86..3f99085 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -48,7 +48,8 @@ doc = [
     "nbsphinx==0.9.5",
     "nbsphinx_link==1.3.0",
     "sphinx-book-theme==1.0.1",
-    "sphinxcontrib.mermaid"
+    "sphinxcontrib.mermaid",
+    "sphinx_togglebutton=0.3.2"
      ]
 
 dev = [

From eeee06e6e7678d91c7ef450863c209a4932317dc Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 4 Sep 2024 14:27:16 +0200
Subject: [PATCH 26/36] fix pip format for added ext

---
 pyproject.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pyproject.toml b/pyproject.toml
index 3f99085..2769cd9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -49,7 +49,7 @@ doc = [
     "nbsphinx_link==1.3.0",
     "sphinx-book-theme==1.0.1",
     "sphinxcontrib.mermaid",
-    "sphinx_togglebutton=0.3.2"
+    "sphinx_togglebutton==0.3.2"
      ]
 
 dev = [

From 05696e8c8faaaa211a9269f230afd854296bcbfe Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 4 Sep 2024 15:04:50 +0200
Subject: [PATCH 27/36] improve doc

---
 docs/tutorial/assembly.md | 1 -
 docs/tutorial/binette.md  | 8 ++++++++
 docs/tutorial/binning.md  | 5 +++--
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/docs/tutorial/assembly.md b/docs/tutorial/assembly.md
index f919f00..60d25fa 100644
--- a/docs/tutorial/assembly.md
+++ b/docs/tutorial/assembly.md
@@ -19,7 +19,6 @@ This process takes approximately 28 minutes to complete.
 
 ```{admonition} Assembly tips
 :class: tip
-:class: dropdown
 
 Here are some general tips that might help improve your assembly results, depending on your data:
 
diff --git a/docs/tutorial/binette.md b/docs/tutorial/binette.md
index a2d7796..5f83676 100644
--- a/docs/tutorial/binette.md
+++ b/docs/tutorial/binette.md
@@ -11,6 +11,14 @@ binette --bin_dirs maxbin2/ metabat2/ semibin2/output_bins/ concoct/bins/ \
         --verbose -t 12 -o binette_results
 ```
 
+```{admonition} ⌛ Expected Time
+:class: note
+:class: dropdown
+
+This process should talke around 9 minutes to complete.
+```
+
+
 Once Binette completes, the `binette_results` directory should have the following structure:
 
 ```
diff --git a/docs/tutorial/binning.md b/docs/tutorial/binning.md
index cb82166..b495db1 100644
--- a/docs/tutorial/binning.md
+++ b/docs/tutorial/binning.md
@@ -75,8 +75,9 @@ SemiBin2 single_easy_bin -i Kickstart.megahit/R1.contigs.fa \
                             -o semibin2/ -p 12
 ```
 
-```{admonition} ⏳ Time Note
+```{admonition} ⌛ Expected Time
 :class: note
+:class: dropdown
 
-This process can take some time.
+This process take around 1 hour to complete.
 ```

From 8a31ecb840ecc3bececc6d05ee3326759616a257 Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 4 Sep 2024 15:05:44 +0200
Subject: [PATCH 28/36] add environement.yml for binder

---
 environment.yml | 8 ++++++++
 1 file changed, 8 insertions(+)
 create mode 100644 environment.yml

diff --git a/environment.yml b/environment.yml
new file mode 100644
index 0000000..dd03bfa
--- /dev/null
+++ b/environment.yml
@@ -0,0 +1,8 @@
+channels:
+  - conda-forge
+  - defaults
+dependencies:
+  - jupyter                # Jupyter notebook for interactive analysis
+  - pandas=1               # Data manipulation and analysis
+  - plotly=5               # Interactive graphing
+  - nbgitpuller
\ No newline at end of file

From 46cd347cfd565d240aecd6731be33e8ac494da24 Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 4 Sep 2024 15:21:13 +0200
Subject: [PATCH 29/36] fix typo in assembly tuto doc

---
 docs/tutorial/assembly.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/tutorial/assembly.md b/docs/tutorial/assembly.md
index 60d25fa..fdd1003 100644
--- a/docs/tutorial/assembly.md
+++ b/docs/tutorial/assembly.md
@@ -24,7 +24,7 @@ Here are some general tips that might help improve your assembly results, depend
 
 - **Read Cleaning:** If your reads have low-quality bases or adapters, consider cleaning them with a tool like `sickle`. It can boost the overall quality of your assembly.
 
-- **Use SPAdes rather than MEGAHIT** **SPAdes** generally performs better than MEGAHIT but takes longer and requires more memory.
+- **Use SPAdes rather than MEGAHIT:** SPAdes generally performs better than MEGAHIT but takes longer and requires more memory.
 
 - **Quality Check:** Tools like `metaQUAST` are handy for checking your assembly’s quality. It’s a good way to ensure your results are solid before moving on.
 

From 2e5b7b04de8c1011cdd6e5cd1fe7eb8e20175523 Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 4 Sep 2024 15:21:39 +0200
Subject: [PATCH 30/36] use proper name for env

---
 docs/tutorial/set_environment.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/tutorial/set_environment.md b/docs/tutorial/set_environment.md
index 7f039fa..5d003dc 100644
--- a/docs/tutorial/set_environment.md
+++ b/docs/tutorial/set_environment.md
@@ -7,7 +7,7 @@ To get started, we'll download the necessary tools and set them up in a dedicate
 First, let's create a new Conda environment specifically for this tutorial:
 
 ```{code-block} bash
-mamba env create -f binette_tutorial_env.yaml -n binette_tuto
+mamba env create -f binette_tutorial_env.yaml -n binette_tutorial
 ```
 
 This command will create a Conda environment named `binette_tuto` using the environment file `binette_tutorial_env.yaml`.

From 91c34528d14907d9d3ab4ec835b91ecf571fa5c0 Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 4 Sep 2024 16:03:11 +0200
Subject: [PATCH 31/36] improve env file

---
 environment.yml => binder/environment.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
 rename environment.yml => binder/environment.yml (61%)

diff --git a/environment.yml b/binder/environment.yml
similarity index 61%
rename from environment.yml
rename to binder/environment.yml
index dd03bfa..4f9610f 100644
--- a/environment.yml
+++ b/binder/environment.yml
@@ -1,8 +1,8 @@
+name: binder_tutorial_env
 channels:
   - conda-forge
-  - defaults
 dependencies:
   - jupyter                # Jupyter notebook for interactive analysis
-  - pandas=1               # Data manipulation and analysis
+  - pandas               # Data manipulation and analysis
   - plotly=5               # Interactive graphing
-  - nbgitpuller
\ No newline at end of file
+  # - nbgitpuller
\ No newline at end of file

From b5bd70b705f77d91705f5375a5dbb9eb7a1765db Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 4 Sep 2024 17:04:49 +0200
Subject: [PATCH 32/36] try binder build from requirements.txt

---
 binder/environment.yml  | 8 --------
 binder/requirements.txt | 3 +++
 2 files changed, 3 insertions(+), 8 deletions(-)
 delete mode 100644 binder/environment.yml
 create mode 100644 binder/requirements.txt

diff --git a/binder/environment.yml b/binder/environment.yml
deleted file mode 100644
index 4f9610f..0000000
--- a/binder/environment.yml
+++ /dev/null
@@ -1,8 +0,0 @@
-name: binder_tutorial_env
-channels:
-  - conda-forge
-dependencies:
-  - jupyter                # Jupyter notebook for interactive analysis
-  - pandas               # Data manipulation and analysis
-  - plotly=5               # Interactive graphing
-  # - nbgitpuller
\ No newline at end of file
diff --git a/binder/requirements.txt b/binder/requirements.txt
new file mode 100644
index 0000000..8b0c82c
--- /dev/null
+++ b/binder/requirements.txt
@@ -0,0 +1,3 @@
+jupyter                # Jupyter notebook for interactive analysis
+pandas               # Data manipulation and analysis
+plotly               # Interactive graphing

From 8719f3c1b4bb621ee502ef6c0f8870a5b3526cbc Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 4 Sep 2024 17:17:18 +0200
Subject: [PATCH 33/36] improve tutorial

---
 docs/tutorial/get_dataset.md   | 8 ++++----
 docs/tutorial/tutorial_main.md | 6 +++++-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/docs/tutorial/get_dataset.md b/docs/tutorial/get_dataset.md
index 3967bc3..f099b17 100644
--- a/docs/tutorial/get_dataset.md
+++ b/docs/tutorial/get_dataset.md
@@ -37,11 +37,11 @@ Once the tool is installed, you can download the "Kickstart" dataset with the fo
    ```
 
 
-:::{admonition} ⌛ Expected Time
-:class: note
+   :::{admonition} ⌛ Expected Time
+   :class: note
 
-This process takes approximately 16 minutes to complete.
-:::
+   This process takes approximately 16 minutes to complete.
+   :::
 
 #### Directory Structure
 
diff --git a/docs/tutorial/tutorial_main.md b/docs/tutorial/tutorial_main.md
index 8cd8082..1fdc365 100644
--- a/docs/tutorial/tutorial_main.md
+++ b/docs/tutorial/tutorial_main.md
@@ -1,7 +1,11 @@
 
 # Tutorial 
 
-In this tutorial, we'll walk through a practical example of how to use Binette with real data. We'll start by downloading metagenomics reads and then assemble these reads into contigs. Next, we'll use different binning tools to group the contigs into bins. Finally, we'll use Binette to refine these bins.
+In this tutorial, we'll walk through a practical example of how to use Binette with real data.
+
+  1. We'll start by downloading metagenomics reads and then assemble these reads into contigs.
+  2. Next, we'll use different binning tools to group the contigs into bins.
+  3. Finally, we'll use Binette to refine these bins.
 
 ```{mermaid}
 

From 8aec08230b9a1921b588ad7ce4d297cc146ecd1f Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 4 Sep 2024 17:18:40 +0200
Subject: [PATCH 34/36] remvoe binder env as it does not work

---
 binder/requirements.txt | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 binder/requirements.txt

diff --git a/binder/requirements.txt b/binder/requirements.txt
deleted file mode 100644
index 8b0c82c..0000000
--- a/binder/requirements.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-jupyter                # Jupyter notebook for interactive analysis
-pandas               # Data manipulation and analysis
-plotly               # Interactive graphing

From 9099a553ec5faaab535c92355c3901f129b6d1a3 Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 4 Sep 2024 17:41:52 +0200
Subject: [PATCH 35/36] bump to version 1.0.2

---
 binette/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/binette/__init__.py b/binette/__init__.py
index 6c4c011..34c1db3 100644
--- a/binette/__init__.py
+++ b/binette/__init__.py
@@ -1 +1 @@
-__version__ = '1.0.1'
\ No newline at end of file
+__version__ = '1.0.2'
\ No newline at end of file

From 888923c38926ecac6a1c9d57a4fda31b295893dc Mon Sep 17 00:00:00 2001
From: JeanMainguy <jean.mainguy@outlook.fr>
Date: Wed, 4 Sep 2024 17:47:53 +0200
Subject: [PATCH 36/36] update tuto env file for binette 1.0.2

---
 docs/tutorial/binette_tutorial_env.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/tutorial/binette_tutorial_env.yaml b/docs/tutorial/binette_tutorial_env.yaml
index 72655d3..be03147 100644
--- a/docs/tutorial/binette_tutorial_env.yaml
+++ b/docs/tutorial/binette_tutorial_env.yaml
@@ -14,7 +14,7 @@ dependencies:
   - metabat2=2             # Binning tool for metagenomic datasets
   - semibin=2              # Binning tool for metagenomic datasets
   - concoct=1              # Binning tool for metagenomic datasets
-  - binette=1.0.1          # Binette for binning and genome analysis
+  - binette=1.0.2          # Binette for binning and genome analysis
   - das_tool=1             # Bin refiner to compare with Binette
   - jupyter                # Jupyter notebook for interactive analysis
   - pandas=1               # Data manipulation and analysis