diff --git a/coverage-badge.svg b/coverage-badge.svg new file mode 100644 index 0000000..af2f09e --- /dev/null +++ b/coverage-badge.svg @@ -0,0 +1 @@ +coverage: 0.00%coverage0.00% \ No newline at end of file diff --git a/coverage_html.js b/coverage_html.js new file mode 100644 index 0000000..5934882 --- /dev/null +++ b/coverage_html.js @@ -0,0 +1,624 @@ +// Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 +// For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt + +// Coverage.py HTML report browser code. +/*jslint browser: true, sloppy: true, vars: true, plusplus: true, maxerr: 50, indent: 4 */ +/*global coverage: true, document, window, $ */ + +coverage = {}; + +// General helpers +function debounce(callback, wait) { + let timeoutId = null; + return function(...args) { + clearTimeout(timeoutId); + timeoutId = setTimeout(() => { + callback.apply(this, args); + }, wait); + }; +}; + +function checkVisible(element) { + const rect = element.getBoundingClientRect(); + const viewBottom = Math.max(document.documentElement.clientHeight, window.innerHeight); + const viewTop = 30; + return !(rect.bottom < viewTop || rect.top >= viewBottom); +} + +function on_click(sel, fn) { + const elt = document.querySelector(sel); + if (elt) { + elt.addEventListener("click", fn); + } +} + +// Helpers for table sorting +function getCellValue(row, column = 0) { + const cell = row.cells[column] // nosemgrep: eslint.detect-object-injection + if (cell.childElementCount == 1) { + const child = cell.firstElementChild + if (child instanceof HTMLTimeElement && child.dateTime) { + return child.dateTime + } else if (child instanceof HTMLDataElement && child.value) { + return child.value + } + } + return cell.innerText || cell.textContent; +} + +function rowComparator(rowA, rowB, column = 0) { + let valueA = getCellValue(rowA, column); + let valueB = getCellValue(rowB, column); + if (!isNaN(valueA) && !isNaN(valueB)) { + return valueA - valueB + } + return valueA.localeCompare(valueB, undefined, {numeric: true}); +} + +function sortColumn(th) { + // Get the current sorting direction of the selected header, + // clear state on other headers and then set the new sorting direction + const currentSortOrder = th.getAttribute("aria-sort"); + [...th.parentElement.cells].forEach(header => header.setAttribute("aria-sort", "none")); + if (currentSortOrder === "none") { + th.setAttribute("aria-sort", th.dataset.defaultSortOrder || "ascending"); + } else { + th.setAttribute("aria-sort", currentSortOrder === "ascending" ? "descending" : "ascending"); + } + + const column = [...th.parentElement.cells].indexOf(th) + + // Sort all rows and afterwards append them in order to move them in the DOM + Array.from(th.closest("table").querySelectorAll("tbody tr")) + .sort((rowA, rowB) => rowComparator(rowA, rowB, column) * (th.getAttribute("aria-sort") === "ascending" ? 1 : -1)) + .forEach(tr => tr.parentElement.appendChild(tr) ); +} + +// Find all the elements with data-shortcut attribute, and use them to assign a shortcut key. +coverage.assign_shortkeys = function () { + document.querySelectorAll("[data-shortcut]").forEach(element => { + document.addEventListener("keypress", event => { + if (event.target.tagName.toLowerCase() === "input") { + return; // ignore keypress from search filter + } + if (event.key === element.dataset.shortcut) { + element.click(); + } + }); + }); +}; + +// Create the events for the filter box. +coverage.wire_up_filter = function () { + // Cache elements. + const table = document.querySelector("table.index"); + const table_body_rows = table.querySelectorAll("tbody tr"); + const no_rows = document.getElementById("no_rows"); + + // Observe filter keyevents. + document.getElementById("filter").addEventListener("input", debounce(event => { + // Keep running total of each metric, first index contains number of shown rows + const totals = new Array(table.rows[0].cells.length).fill(0); + // Accumulate the percentage as fraction + totals[totals.length - 1] = { "numer": 0, "denom": 0 }; // nosemgrep: eslint.detect-object-injection + + // Hide / show elements. + table_body_rows.forEach(row => { + if (!row.cells[0].textContent.includes(event.target.value)) { + // hide + row.classList.add("hidden"); + return; + } + + // show + row.classList.remove("hidden"); + totals[0]++; + + for (let column = 1; column < totals.length; column++) { + // Accumulate dynamic totals + cell = row.cells[column] // nosemgrep: eslint.detect-object-injection + if (column === totals.length - 1) { + // Last column contains percentage + const [numer, denom] = cell.dataset.ratio.split(" "); + totals[column]["numer"] += parseInt(numer, 10); // nosemgrep: eslint.detect-object-injection + totals[column]["denom"] += parseInt(denom, 10); // nosemgrep: eslint.detect-object-injection + } else { + totals[column] += parseInt(cell.textContent, 10); // nosemgrep: eslint.detect-object-injection + } + } + }); + + // Show placeholder if no rows will be displayed. + if (!totals[0]) { + // Show placeholder, hide table. + no_rows.style.display = "block"; + table.style.display = "none"; + return; + } + + // Hide placeholder, show table. + no_rows.style.display = null; + table.style.display = null; + + const footer = table.tFoot.rows[0]; + // Calculate new dynamic sum values based on visible rows. + for (let column = 1; column < totals.length; column++) { + // Get footer cell element. + const cell = footer.cells[column]; // nosemgrep: eslint.detect-object-injection + + // Set value into dynamic footer cell element. + if (column === totals.length - 1) { + // Percentage column uses the numerator and denominator, + // and adapts to the number of decimal places. + const match = /\.([0-9]+)/.exec(cell.textContent); + const places = match ? match[1].length : 0; + const { numer, denom } = totals[column]; // nosemgrep: eslint.detect-object-injection + cell.dataset.ratio = `${numer} ${denom}`; + // Check denom to prevent NaN if filtered files contain no statements + cell.textContent = denom + ? `${(numer * 100 / denom).toFixed(places)}%` + : `${(100).toFixed(places)}%`; + } else { + cell.textContent = totals[column]; // nosemgrep: eslint.detect-object-injection + } + } + })); + + // Trigger change event on setup, to force filter on page refresh + // (filter value may still be present). + document.getElementById("filter").dispatchEvent(new Event("input")); +}; + +coverage.INDEX_SORT_STORAGE = "COVERAGE_INDEX_SORT_2"; + +// Loaded on index.html +coverage.index_ready = function () { + coverage.assign_shortkeys(); + coverage.wire_up_filter(); + document.querySelectorAll("[data-sortable] th[aria-sort]").forEach( + th => th.addEventListener("click", e => sortColumn(e.target)) + ); + + // Look for a localStorage item containing previous sort settings: + const stored_list = localStorage.getItem(coverage.INDEX_SORT_STORAGE); + + if (stored_list) { + const {column, direction} = JSON.parse(stored_list); + const th = document.querySelector("[data-sortable]").tHead.rows[0].cells[column]; // nosemgrep: eslint.detect-object-injection + th.setAttribute("aria-sort", direction === "ascending" ? "descending" : "ascending"); + th.click() + } + + // Watch for page unload events so we can save the final sort settings: + window.addEventListener("unload", function () { + const th = document.querySelector('[data-sortable] th[aria-sort="ascending"], [data-sortable] [aria-sort="descending"]'); + if (!th) { + return; + } + localStorage.setItem(coverage.INDEX_SORT_STORAGE, JSON.stringify({ + column: [...th.parentElement.cells].indexOf(th), + direction: th.getAttribute("aria-sort"), + })); + }); + + on_click(".button_prev_file", coverage.to_prev_file); + on_click(".button_next_file", coverage.to_next_file); + + on_click(".button_show_hide_help", coverage.show_hide_help); +}; + +// -- pyfile stuff -- + +coverage.LINE_FILTERS_STORAGE = "COVERAGE_LINE_FILTERS"; + +coverage.pyfile_ready = function () { + // If we're directed to a particular line number, highlight the line. + var frag = location.hash; + if (frag.length > 2 && frag[1] === "t") { + document.querySelector(frag).closest(".n").classList.add("highlight"); + coverage.set_sel(parseInt(frag.substr(2), 10)); + } else { + coverage.set_sel(0); + } + + on_click(".button_toggle_run", coverage.toggle_lines); + on_click(".button_toggle_mis", coverage.toggle_lines); + on_click(".button_toggle_exc", coverage.toggle_lines); + on_click(".button_toggle_par", coverage.toggle_lines); + + on_click(".button_next_chunk", coverage.to_next_chunk_nicely); + on_click(".button_prev_chunk", coverage.to_prev_chunk_nicely); + on_click(".button_top_of_page", coverage.to_top); + on_click(".button_first_chunk", coverage.to_first_chunk); + + on_click(".button_prev_file", coverage.to_prev_file); + on_click(".button_next_file", coverage.to_next_file); + on_click(".button_to_index", coverage.to_index); + + on_click(".button_show_hide_help", coverage.show_hide_help); + + coverage.filters = undefined; + try { + coverage.filters = localStorage.getItem(coverage.LINE_FILTERS_STORAGE); + } catch(err) {} + + if (coverage.filters) { + coverage.filters = JSON.parse(coverage.filters); + } + else { + coverage.filters = {run: false, exc: true, mis: true, par: true}; + } + + for (cls in coverage.filters) { + coverage.set_line_visibilty(cls, coverage.filters[cls]); // nosemgrep: eslint.detect-object-injection + } + + coverage.assign_shortkeys(); + coverage.init_scroll_markers(); + coverage.wire_up_sticky_header(); + + document.querySelectorAll("[id^=ctxs]").forEach( + cbox => cbox.addEventListener("click", coverage.expand_contexts) + ); + + // Rebuild scroll markers when the window height changes. + window.addEventListener("resize", coverage.build_scroll_markers); +}; + +coverage.toggle_lines = function (event) { + const btn = event.target.closest("button"); + const category = btn.value + const show = !btn.classList.contains("show_" + category); + coverage.set_line_visibilty(category, show); + coverage.build_scroll_markers(); + coverage.filters[category] = show; + try { + localStorage.setItem(coverage.LINE_FILTERS_STORAGE, JSON.stringify(coverage.filters)); + } catch(err) {} +}; + +coverage.set_line_visibilty = function (category, should_show) { + const cls = "show_" + category; + const btn = document.querySelector(".button_toggle_" + category); + if (btn) { + if (should_show) { + document.querySelectorAll("#source ." + category).forEach(e => e.classList.add(cls)); + btn.classList.add(cls); + } + else { + document.querySelectorAll("#source ." + category).forEach(e => e.classList.remove(cls)); + btn.classList.remove(cls); + } + } +}; + +// Return the nth line div. +coverage.line_elt = function (n) { + return document.getElementById("t" + n)?.closest("p"); +}; + +// Set the selection. b and e are line numbers. +coverage.set_sel = function (b, e) { + // The first line selected. + coverage.sel_begin = b; + // The next line not selected. + coverage.sel_end = (e === undefined) ? b+1 : e; +}; + +coverage.to_top = function () { + coverage.set_sel(0, 1); + coverage.scroll_window(0); +}; + +coverage.to_first_chunk = function () { + coverage.set_sel(0, 1); + coverage.to_next_chunk(); +}; + +coverage.to_prev_file = function () { + window.location = document.getElementById("prevFileLink").href; +} + +coverage.to_next_file = function () { + window.location = document.getElementById("nextFileLink").href; +} + +coverage.to_index = function () { + location.href = document.getElementById("indexLink").href; +} + +coverage.show_hide_help = function () { + const helpCheck = document.getElementById("help_panel_state") + helpCheck.checked = !helpCheck.checked; +} + +// Return a string indicating what kind of chunk this line belongs to, +// or null if not a chunk. +coverage.chunk_indicator = function (line_elt) { + const classes = line_elt?.className; + if (!classes) { + return null; + } + const match = classes.match(/\bshow_\w+\b/); + if (!match) { + return null; + } + return match[0]; +}; + +coverage.to_next_chunk = function () { + const c = coverage; + + // Find the start of the next colored chunk. + var probe = c.sel_end; + var chunk_indicator, probe_line; + while (true) { + probe_line = c.line_elt(probe); + if (!probe_line) { + return; + } + chunk_indicator = c.chunk_indicator(probe_line); + if (chunk_indicator) { + break; + } + probe++; + } + + // There's a next chunk, `probe` points to it. + var begin = probe; + + // Find the end of this chunk. + var next_indicator = chunk_indicator; + while (next_indicator === chunk_indicator) { + probe++; + probe_line = c.line_elt(probe); + next_indicator = c.chunk_indicator(probe_line); + } + c.set_sel(begin, probe); + c.show_selection(); +}; + +coverage.to_prev_chunk = function () { + const c = coverage; + + // Find the end of the prev colored chunk. + var probe = c.sel_begin-1; + var probe_line = c.line_elt(probe); + if (!probe_line) { + return; + } + var chunk_indicator = c.chunk_indicator(probe_line); + while (probe > 1 && !chunk_indicator) { + probe--; + probe_line = c.line_elt(probe); + if (!probe_line) { + return; + } + chunk_indicator = c.chunk_indicator(probe_line); + } + + // There's a prev chunk, `probe` points to its last line. + var end = probe+1; + + // Find the beginning of this chunk. + var prev_indicator = chunk_indicator; + while (prev_indicator === chunk_indicator) { + probe--; + if (probe <= 0) { + return; + } + probe_line = c.line_elt(probe); + prev_indicator = c.chunk_indicator(probe_line); + } + c.set_sel(probe+1, end); + c.show_selection(); +}; + +// Returns 0, 1, or 2: how many of the two ends of the selection are on +// the screen right now? +coverage.selection_ends_on_screen = function () { + if (coverage.sel_begin === 0) { + return 0; + } + + const begin = coverage.line_elt(coverage.sel_begin); + const end = coverage.line_elt(coverage.sel_end-1); + + return ( + (checkVisible(begin) ? 1 : 0) + + (checkVisible(end) ? 1 : 0) + ); +}; + +coverage.to_next_chunk_nicely = function () { + if (coverage.selection_ends_on_screen() === 0) { + // The selection is entirely off the screen: + // Set the top line on the screen as selection. + + // This will select the top-left of the viewport + // As this is most likely the span with the line number we take the parent + const line = document.elementFromPoint(0, 0).parentElement; + if (line.parentElement !== document.getElementById("source")) { + // The element is not a source line but the header or similar + coverage.select_line_or_chunk(1); + } else { + // We extract the line number from the id + coverage.select_line_or_chunk(parseInt(line.id.substring(1), 10)); + } + } + coverage.to_next_chunk(); +}; + +coverage.to_prev_chunk_nicely = function () { + if (coverage.selection_ends_on_screen() === 0) { + // The selection is entirely off the screen: + // Set the lowest line on the screen as selection. + + // This will select the bottom-left of the viewport + // As this is most likely the span with the line number we take the parent + const line = document.elementFromPoint(document.documentElement.clientHeight-1, 0).parentElement; + if (line.parentElement !== document.getElementById("source")) { + // The element is not a source line but the header or similar + coverage.select_line_or_chunk(coverage.lines_len); + } else { + // We extract the line number from the id + coverage.select_line_or_chunk(parseInt(line.id.substring(1), 10)); + } + } + coverage.to_prev_chunk(); +}; + +// Select line number lineno, or if it is in a colored chunk, select the +// entire chunk +coverage.select_line_or_chunk = function (lineno) { + var c = coverage; + var probe_line = c.line_elt(lineno); + if (!probe_line) { + return; + } + var the_indicator = c.chunk_indicator(probe_line); + if (the_indicator) { + // The line is in a highlighted chunk. + // Search backward for the first line. + var probe = lineno; + var indicator = the_indicator; + while (probe > 0 && indicator === the_indicator) { + probe--; + probe_line = c.line_elt(probe); + if (!probe_line) { + break; + } + indicator = c.chunk_indicator(probe_line); + } + var begin = probe + 1; + + // Search forward for the last line. + probe = lineno; + indicator = the_indicator; + while (indicator === the_indicator) { + probe++; + probe_line = c.line_elt(probe); + indicator = c.chunk_indicator(probe_line); + } + + coverage.set_sel(begin, probe); + } + else { + coverage.set_sel(lineno); + } +}; + +coverage.show_selection = function () { + // Highlight the lines in the chunk + document.querySelectorAll("#source .highlight").forEach(e => e.classList.remove("highlight")); + for (let probe = coverage.sel_begin; probe < coverage.sel_end; probe++) { + coverage.line_elt(probe).querySelector(".n").classList.add("highlight"); + } + + coverage.scroll_to_selection(); +}; + +coverage.scroll_to_selection = function () { + // Scroll the page if the chunk isn't fully visible. + if (coverage.selection_ends_on_screen() < 2) { + const element = coverage.line_elt(coverage.sel_begin); + coverage.scroll_window(element.offsetTop - 60); + } +}; + +coverage.scroll_window = function (to_pos) { + window.scroll({top: to_pos, behavior: "smooth"}); +}; + +coverage.init_scroll_markers = function () { + // Init some variables + coverage.lines_len = document.querySelectorAll("#source > p").length; + + // Build html + coverage.build_scroll_markers(); +}; + +coverage.build_scroll_markers = function () { + const temp_scroll_marker = document.getElementById("scroll_marker") + if (temp_scroll_marker) temp_scroll_marker.remove(); + // Don't build markers if the window has no scroll bar. + if (document.body.scrollHeight <= window.innerHeight) { + return; + } + + const marker_scale = window.innerHeight / document.body.scrollHeight; + const line_height = Math.min(Math.max(3, window.innerHeight / coverage.lines_len), 10); + + let previous_line = -99, last_mark, last_top; + + const scroll_marker = document.createElement("div"); + scroll_marker.id = "scroll_marker"; + document.getElementById("source").querySelectorAll( + "p.show_run, p.show_mis, p.show_exc, p.show_exc, p.show_par" + ).forEach(element => { + const line_top = Math.floor(element.offsetTop * marker_scale); + const line_number = parseInt(element.querySelector(".n a").id.substr(1)); + + if (line_number === previous_line + 1) { + // If this solid missed block just make previous mark higher. + last_mark.style.height = `${line_top + line_height - last_top}px`; + } else { + // Add colored line in scroll_marker block. + last_mark = document.createElement("div"); + last_mark.id = `m${line_number}`; + last_mark.classList.add("marker"); + last_mark.style.height = `${line_height}px`; + last_mark.style.top = `${line_top}px`; + scroll_marker.append(last_mark); + last_top = line_top; + } + + previous_line = line_number; + }); + + // Append last to prevent layout calculation + document.body.append(scroll_marker); +}; + +coverage.wire_up_sticky_header = function () { + const header = document.querySelector("header"); + const header_bottom = ( + header.querySelector(".content h2").getBoundingClientRect().top - + header.getBoundingClientRect().top + ); + + function updateHeader() { + if (window.scrollY > header_bottom) { + header.classList.add("sticky"); + } else { + header.classList.remove("sticky"); + } + } + + window.addEventListener("scroll", updateHeader); + updateHeader(); +}; + +coverage.expand_contexts = function (e) { + var ctxs = e.target.parentNode.querySelector(".ctxs"); + + if (!ctxs.classList.contains("expanded")) { + var ctxs_text = ctxs.textContent; + var width = Number(ctxs_text[0]); + ctxs.textContent = ""; + for (var i = 1; i < ctxs_text.length; i += width) { + key = ctxs_text.substring(i, i + width).trim(); + ctxs.appendChild(document.createTextNode(contexts[key])); + ctxs.appendChild(document.createElement("br")); + } + ctxs.classList.add("expanded"); + } +}; + +document.addEventListener("DOMContentLoaded", () => { + if (document.body.classList.contains("indexfile")) { + coverage.index_ready(); + } else { + coverage.pyfile_ready(); + } +}); diff --git a/d_0bf1b44744b37667___init___py.html b/d_0bf1b44744b37667___init___py.html new file mode 100644 index 0000000..39439da --- /dev/null +++ b/d_0bf1b44744b37667___init___py.html @@ -0,0 +1,97 @@ + + + + + Coverage for binette/__init__.py: 100% + + + + + +
+
+

+ Coverage for binette/__init__.py: + 100% +

+ +

+ 0 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.3.2, + created at 2023-12-04 22:59 +0000 +

+ +
+
+
+
+ + + diff --git a/d_0bf1b44744b37667_bin_manager_py.html b/d_0bf1b44744b37667_bin_manager_py.html new file mode 100644 index 0000000..048f2a3 --- /dev/null +++ b/d_0bf1b44744b37667_bin_manager_py.html @@ -0,0 +1,544 @@ + + + + + Coverage for binette/bin_manager.py: 0% + + + + + +
+
+

+ Coverage for binette/bin_manager.py: + 0% +

+ +

+ 184 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.3.2, + created at 2023-12-04 22:59 +0000 +

+ +
+
+
+

1import logging 

+

2import os 

+

3from collections import defaultdict 

+

4import pyfastx 

+

5 

+

6import itertools 

+

7import networkx as nx 

+

8from typing import List, Dict, Iterable, Tuple, Set 

+

9 

+

10class Bin: 

+

11 counter = 0 

+

12 

+

13 def __init__(self, contigs: Iterable[str], origin: str, name: str) -> None: 

+

14 """ 

+

15 Initialize a Bin object. 

+

16 

+

17 :param contigs: Iterable of contig names belonging to the bin. 

+

18 :param origin: Origin/source of the bin. 

+

19 :param name: Name of the bin. 

+

20 """ 

+

21 Bin.counter += 1 

+

22 

+

23 self.origin = origin 

+

24 self.name = name 

+

25 self.id = Bin.counter 

+

26 self.contigs = set(contigs) 

+

27 self.hash = hash(str(sorted(self.contigs))) 

+

28 

+

29 self.length = None 

+

30 self.N50 = None 

+

31 

+

32 self.completeness = None 

+

33 self.contamination = None 

+

34 self.score = None 

+

35 

+

36 def __eq__(self, other: 'Bin') -> bool: 

+

37 """ 

+

38 Compare the Bin object with another object for equality. 

+

39 

+

40 :param other: The object to compare with. 

+

41 :return: True if the objects are equal, False otherwise. 

+

42 """ 

+

43 return self.contigs == other.contigs 

+

44 

+

45 def __hash__(self) -> int: 

+

46 """ 

+

47 Compute the hash value of the Bin object. 

+

48 

+

49 :return: The hash value. 

+

50 """ 

+

51 return self.hash 

+

52 

+

53 def __str__(self) -> str: 

+

54 """ 

+

55 Return a string representation of the Bin object. 

+

56 

+

57 :return: The string representation of the Bin object. 

+

58 """ 

+

59 return f"{self.origin}_{self.id} ({len(self.contigs)} contigs)" 

+

60 

+

61 def overlaps_with(self, other: 'Bin') -> Set[str]: 

+

62 """ 

+

63 Find the contigs that overlap between this bin and another bin. 

+

64 

+

65 :param other: The other Bin object. 

+

66 :return: A set of contig names that overlap between the bins. 

+

67 """ 

+

68 return self.contigs & other.contigs 

+

69 

+

70 def __and__(self, other: 'Bin') -> 'Bin': 

+

71 """ 

+

72 Perform a logical AND operation between this bin and another bin. 

+

73 

+

74 :param other: The other Bin object. 

+

75 :return: A new Bin object representing the intersection of the bins. 

+

76 """ 

+

77 contigs = self.contigs & other.contigs 

+

78 name = f"{self.name} & {other.name}" 

+

79 origin = f"{self.origin} & {other.origin}" 

+

80 

+

81 return Bin(contigs, origin, name) 

+

82 

+

83 

+

84 def add_length(self, length: int) -> None: 

+

85 """ 

+

86 Add the length attribute to the Bin object if the provided length is a positive integer. 

+

87 

+

88 :param length: The length value to add. 

+

89 :return: None 

+

90 """ 

+

91 if isinstance(length, int) and length > 0: 

+

92 self.length = length 

+

93 else: 

+

94 raise ValueError("Length should be a positive integer.") 

+

95 

+

96 def add_N50(self, n50: int) -> None: 

+

97 """ 

+

98 Add the N50 attribute to the Bin object. 

+

99 

+

100 :param n50: The N50 value to add. 

+

101 :return: None 

+

102 """ 

+

103 if isinstance(n50, int) and n50 >= 0: 

+

104 self.N50 = n50 

+

105 else: 

+

106 raise ValueError("N50 should be a positive integer.") 

+

107 

+

108 

+

109 def add_quality(self, completeness: float, contamination: float, contamination_weight: float) -> None: 

+

110 """ 

+

111 Set the quality attributes of the bin. 

+

112 

+

113 :param completeness: The completeness value. 

+

114 :param contamination: The contamination value. 

+

115 :param contamination_weight: The weight assigned to contamination in the score calculation. 

+

116 """ 

+

117 self.completeness = completeness 

+

118 self.contamination = contamination 

+

119 self.score = completeness - contamination_weight * contamination 

+

120 

+

121 def intersection(self, *others: 'Bin') -> 'Bin': 

+

122 """ 

+

123 Compute the intersection of the bin with other bins. 

+

124 

+

125 :param others: Other bins to compute the intersection with. 

+

126 :return: A new Bin representing the intersection of the bins. 

+

127 """ 

+

128 other_contigs = (o.contigs for o in others) 

+

129 contigs = self.contigs.intersection(*other_contigs) 

+

130 name = f"{self.name} & {' & '.join([other.name for other in others])}" 

+

131 origin = "intersec" 

+

132 

+

133 return Bin(contigs, origin, name) 

+

134 

+

135 def difference(self, *others: 'Bin') -> 'Bin': 

+

136 """ 

+

137 Compute the difference between the bin and other bins. 

+

138 

+

139 :param others: Other bins to compute the difference with. 

+

140 :return: A new Bin representing the difference between the bins. 

+

141 """ 

+

142 other_contigs = (o.contigs for o in others) 

+

143 contigs = self.contigs.difference(*other_contigs) 

+

144 name = f"{self.name} - {' - '.join([other.name for other in others])}" 

+

145 origin = "diff" 

+

146 

+

147 return Bin(contigs, origin, name) 

+

148 

+

149 def union(self, *others: 'Bin') -> 'Bin': 

+

150 """ 

+

151 Compute the union of the bin with other bins. 

+

152 

+

153 :param others: Other bins to compute the union with. 

+

154 :return: A new Bin representing the union of the bins. 

+

155 """ 

+

156 other_contigs = (o.contigs for o in others) 

+

157 contigs = self.contigs.union(*other_contigs) 

+

158 name = f"{self.name} | {' | '.join([other.name for other in others])}" 

+

159 origin = "union" 

+

160 

+

161 return Bin(contigs, origin, name) 

+

162 

+

163 

+

164def get_bins_from_directory(bin_dir: str, set_name: str) -> List[Bin]: 

+

165 """ 

+

166 Retrieves a list of Bin objects from a directory containing bin FASTA files. 

+

167 

+

168 :param bin_dir: The directory path containing bin FASTA files. 

+

169 :param set_name: The name of the set the bins belong to. 

+

170 

+

171 :return: A list of Bin objects created from the bin FASTA files. 

+

172 """ 

+

173 bins = [] 

+

174 

+

175 for bin_fasta_file in os.listdir(bin_dir): 

+

176 

+

177 bin_fasta_path = os.path.join(bin_dir, bin_fasta_file) 

+

178 bin_name = bin_fasta_file 

+

179 

+

180 contigs = {name for name, _ in pyfastx.Fasta(bin_fasta_path, build_index=False)} 

+

181 

+

182 bin_obj = Bin(contigs, set_name, bin_name) 

+

183 

+

184 bins.append(bin_obj) 

+

185 

+

186 return bins 

+

187 

+

188 

+

189 

+

190def parse_bin_directories(bin_name_to_bin_dir: Dict[str, str]) -> Dict[str, list]: 

+

191 """ 

+

192 Parses multiple bin directories and returns a dictionary mapping bin names to a list of Bin objects. 

+

193 

+

194 :param bin_name_to_bin_dir: A dictionary mapping bin names to their respective bin directories. 

+

195 

+

196 :return: A dictionary mapping bin names to a list of Bin objects created from the bin directories. 

+

197 """ 

+

198 bin_name_to_bins = {} 

+

199 

+

200 for name, bin_dir in bin_name_to_bin_dir.items(): 

+

201 bin_name_to_bins[name] = get_bins_from_directory(bin_dir, name) 

+

202 

+

203 return bin_name_to_bins 

+

204 

+

205 

+

206def parse_contig2bin_tables(bin_name_to_bin_tables: Dict[str, str]) -> Dict[str, list]: 

+

207 """ 

+

208 Parses multiple contig-to-bin tables and returns a dictionary mapping bin names to a list of Bin objects. 

+

209 

+

210 :param bin_name_to_bin_tables: A dictionary mapping bin names to their respective contig-to-bin tables. 

+

211 

+

212 :return: A dictionary mapping bin names to a list of Bin objects created from the contig-to-bin tables. 

+

213 """ 

+

214 bin_name_to_bins = {} 

+

215 

+

216 for name, contig2bin_table in bin_name_to_bin_tables.items(): 

+

217 bin_name_to_bins[name] = get_bins_from_contig2bin_table(contig2bin_table, name) 

+

218 

+

219 return bin_name_to_bins 

+

220 

+

221 

+

222def get_bins_from_contig2bin_table(contig2bin_table: str, set_name: str) -> List[Bin]: 

+

223 """ 

+

224 Retrieves a list of Bin objects from a contig-to-bin table. 

+

225 

+

226 :param contig2bin_table: The path to the contig-to-bin table. 

+

227 :param set_name: The name of the set the bins belong to. 

+

228 

+

229 :return: A list of Bin objects created from the contig-to-bin table. 

+

230 """ 

+

231 bin_name2contigs = defaultdict(set) 

+

232 with open(contig2bin_table) as fl: 

+

233 for line in fl: 

+

234 if line.startswith("#") or line.startswith("@"): 

+

235 logging.debug(f"Ignoring a line from {contig2bin_table}: {line}") 

+

236 continue 

+

237 contig_name = line.strip().split("\t")[0] 

+

238 bin_name = line.strip().split("\t")[1] 

+

239 bin_name2contigs[bin_name].add(contig_name) 

+

240 

+

241 bins = [] 

+

242 for bin_name, contigs in bin_name2contigs.items(): 

+

243 bin_obj = Bin(contigs, set_name, bin_name) 

+

244 bins.append(bin_obj) 

+

245 return bins 

+

246 

+

247 

+

248def from_bin_sets_to_bin_graph(bin_name_to_bin_set: Dict[str, set]) -> nx.Graph: 

+

249 """ 

+

250 Creates a bin graph from a dictionary of bin sets. 

+

251 

+

252 :param bin_name_to_bin_set: A dictionary mapping bin names to their respective bin sets. 

+

253 

+

254 :return: A networkx Graph representing the bin graph of overlapping bins. 

+

255 """ 

+

256 G = nx.Graph() 

+

257 

+

258 for set1_name, set2_name in itertools.combinations(bin_name_to_bin_set, 2): 

+

259 set1 = bin_name_to_bin_set[set1_name] 

+

260 set2 = bin_name_to_bin_set[set2_name] 

+

261 

+

262 for bin1, bin2 in itertools.product(set1, set2): 

+

263 

+

264 if bin1.overlaps_with(bin2): 

+

265 G.add_edge(bin1, bin2) 

+

266 return G 

+

267 

+

268 

+

269 

+

270def get_all_possible_combinations(clique: Iterable) -> Iterable[Tuple]: 

+

271 """ 

+

272 Generates all possible combinations of elements from a given clique. 

+

273 

+

274 :param clique: An iterable representing a clique. 

+

275 

+

276 :return: An iterable of tuples representing all possible combinations of elements from the clique. 

+

277 """ 

+

278 return (c for r in range(2, len(clique) + 1) for c in itertools.combinations(clique, r)) 

+

279 

+

280 

+

281def get_intersection_bins(G: nx.Graph) -> Set[Bin]: 

+

282 """ 

+

283 Retrieves the intersection bins from a given graph. 

+

284 

+

285 :param G: A networkx Graph representing the graph. 

+

286 

+

287 :return: A set of Bin objects representing the intersection bins. 

+

288 """ 

+

289 intersect_bins = set() 

+

290 

+

291 for clique in nx.clique.find_cliques(G): 

+

292 bins_combinations = get_all_possible_combinations(clique) 

+

293 for bins in bins_combinations: 

+

294 if max((b.completeness for b in bins)) < 20: 

+

295 logging.debug("completeness is not good enough to create a new bin on intersection") 

+

296 logging.debug(f"{[(str(b), b.completeness, b.contamination) for b in bins]}") 

+

297 continue 

+

298 

+

299 intersec_bin = bins[0].intersection(*bins[1:]) 

+

300 

+

301 if intersec_bin.contigs: 

+

302 intersect_bins.add(intersec_bin) 

+

303 

+

304 return intersect_bins 

+

305 

+

306 

+

307def get_difference_bins(G: nx.Graph) -> Set[Bin]: 

+

308 """ 

+

309 Retrieves the difference bins from a given graph. 

+

310 

+

311 :param G: A networkx Graph representing the graph. 

+

312 

+

313 :return: A set of Bin objects representing the difference bins. 

+

314 """ 

+

315 difference_bins = set() 

+

316 

+

317 for clique in nx.clique.find_cliques(G): 

+

318 

+

319 bins_combinations = get_all_possible_combinations(clique) 

+

320 for bins in bins_combinations: 

+

321 

+

322 for bin_a in bins: 

+

323 

+

324 bin_diff = bin_a.difference(*(b for b in bins if b != bin_a)) 

+

325 if bin_a.completeness < 20: 

+

326 logging.debug(f"completeness of {bin_a} is not good enough to do difference... ") 

+

327 logging.debug(f"{[(str(b), b.completeness, b.contamination) for b in bins]}") 

+

328 continue 

+

329 

+

330 if bin_diff.contigs: 

+

331 difference_bins.add(bin_diff) 

+

332 

+

333 return difference_bins 

+

334 

+

335 

+

336def get_union_bins(G: nx.Graph, max_conta: int = 50) -> Set[Bin]: 

+

337 """ 

+

338 Retrieves the union bins from a given graph. 

+

339 

+

340 :param G: A networkx Graph representing the graph of bins. 

+

341 :param max_conta: Maximum allowed contamination value for a bin to be included in the union. 

+

342 

+

343 :return: A set of Bin objects representing the union bins. 

+

344 """ 

+

345 union_bins = set() 

+

346 for clique in nx.clique.find_cliques(G): 

+

347 bins_combinations = get_all_possible_combinations(clique) 

+

348 for bins in bins_combinations: 

+

349 if max((b.contamination for b in bins)) > max_conta: 

+

350 logging.debug("Some bin are too contaminated to make a useful union bin") 

+

351 logging.debug(f"{[(str(b), b.completeness, b.contamination) for b in bins]}") 

+

352 continue 

+

353 

+

354 bins = set(bins) 

+

355 bin_a = bins.pop() 

+

356 bin_union = bin_a.union(*bins) 

+

357 

+

358 if bin_union.contigs: 

+

359 union_bins.add(bin_union) 

+

360 

+

361 return union_bins 

+

362 

+

363 

+

364def select_best_bins(bins: List[Bin]) -> List[Bin]: 

+

365 """ 

+

366 Selects the best bins from a list of bins based on their scores, N50 values, and IDs. 

+

367 

+

368 :param bins: A list of Bin objects. 

+

369 

+

370 :return: A list of selected Bin objects. 

+

371 """ 

+

372 

+

373 logging.info("Sorting bins") 

+

374 # Sort on score, N50, and ID. Smaller ID values are preferred to select original bins first. 

+

375 sorted_bins = sorted(bins, key=lambda x: (x.score, x.N50, -x.id), reverse=True) 

+

376 

+

377 logging.info("Selecting bins") 

+

378 selected_bins = [] 

+

379 for b in sorted_bins: 

+

380 if b in bins: 

+

381 overlapping_bins = {b2 for b2 in bins if b.overlaps_with(b2)} 

+

382 bins -= overlapping_bins 

+

383 

+

384 selected_bins.append(b) 

+

385 

+

386 logging.info(f"Selected {len(selected_bins)} bins") 

+

387 return selected_bins 

+

388 

+

389 

+

390def dereplicate_bin_sets(bin_sets): 

+

391 """ 

+

392 Dereplicates bins from different bin sets to obtain a non-redundant bin set. 

+

393 

+

394 :param bin_sets: A list of bin sets. 

+

395 

+

396 :return: A set of non-redundant bins. 

+

397 """ 

+

398 return set().union(*bin_sets) 

+

399 

+

400 

+

401def get_contigs_in_bins(bins: List[Bin]) -> Set[str]: 

+

402 """ 

+

403 Retrieves all contigs present in the given list of bins. 

+

404 

+

405 :param bins: A list of Bin objects. 

+

406 

+

407 :return: A set of contigs present in the bins. 

+

408 """ 

+

409 return set().union(*(b.contigs for b in bins)) 

+

410 

+

411 

+

412def rename_bin_contigs(bins: List[Bin], contig_to_index: dict): 

+

413 """ 

+

414 Renames the contigs in the bins based on the provided mapping. 

+

415 

+

416 :param bins: A list of Bin objects. 

+

417 :param contig_to_index: A dictionary mapping old contig names to new index names. 

+

418 """ 

+

419 for b in bins: 

+

420 b.contigs = {contig_to_index[contig] for contig in b.contigs} 

+

421 b.hash = hash(str(sorted(b.contigs))) 

+

422 

+

423def create_intermediate_bins(bin_set_name_to_bins: Dict[str, Set[Bin]]) -> Set[Bin]: 

+

424 """ 

+

425 Creates intermediate bins from a dictionary of bin sets. 

+

426 

+

427 :param bin_set_name_to_bins: A dictionary mapping bin set names to corresponding bins. 

+

428 

+

429 :return: A set of intermediate bins created from intersections, differences, and unions. 

+

430 """ 

+

431 logging.info("Making bin graph...") 

+

432 connected_bins_graph = from_bin_sets_to_bin_graph(bin_set_name_to_bins) 

+

433 

+

434 logging.info("Creating intersection bins...") 

+

435 intersection_bins = get_intersection_bins(connected_bins_graph) 

+

436 logging.info(f"{len(intersection_bins)} bins created on intersections.") 

+

437 

+

438 logging.info("Creating difference bins...") 

+

439 difference_bins = get_difference_bins(connected_bins_graph) 

+

440 logging.info(f"{len(difference_bins)} bins created based on symmetric difference.") 

+

441 

+

442 logging.info("Creating union bins...") 

+

443 union_bins = get_union_bins(connected_bins_graph) 

+

444 logging.info(f"{len(union_bins)} bins created on unions.") 

+

445 

+

446 return difference_bins | intersection_bins | union_bins 

+

447 

+
+ + + diff --git a/d_0bf1b44744b37667_bin_quality_py.html b/d_0bf1b44744b37667_bin_quality_py.html new file mode 100644 index 0000000..808a74c --- /dev/null +++ b/d_0bf1b44744b37667_bin_quality_py.html @@ -0,0 +1,387 @@ + + + + + Coverage for binette/bin_quality.py: 0% + + + + + +
+
+

+ Coverage for binette/bin_quality.py: + 0% +

+ +

+ 112 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.3.2, + created at 2023-12-04 22:59 +0000 +

+ +
+
+
+

1#!/usr/bin/env python3 

+

2import concurrent.futures as cf 

+

3import logging 

+

4import os 

+

5from collections import Counter 

+

6from itertools import islice 

+

7from typing import Dict, Iterable, List, Tuple, Iterator 

+

8 

+

9import numpy as np 

+

10import pandas as pd 

+

11 

+

12# Suppress unnecessary TensorFlow warnings 

+

13os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" 

+

14logging.getLogger("tensorflow").setLevel(logging.FATAL) 

+

15 

+

16 

+

17from checkm2 import keggData, modelPostprocessing, modelProcessing 

+

18from binette.bin_manager import Bin 

+

19 

+

20def get_bins_metadata_df(bins: List, contig_to_cds_count: Dict[str, int], contig_to_aa_counter: Dict[str, Counter], contig_to_aa_length: Dict[str, int]) -> pd.DataFrame: 

+

21 """ 

+

22 Generate a DataFrame containing metadata for a list of bins. 

+

23 

+

24 :param bins: A list of bin objects. 

+

25 :param contig_to_cds_count: A dictionary mapping contig names to CDS counts. 

+

26 :param contig_to_aa_counter: A dictionary mapping contig names to amino acid composition (Counter object). 

+

27 :param contig_to_aa_length: A dictionary mapping contig names to total amino acid length. 

+

28 :return: A DataFrame containing bin metadata. 

+

29 """ 

+

30 

+

31 metadata_order = keggData.KeggCalculator().return_proper_order("Metadata") 

+

32 

+

33 # Get bin metadata 

+

34 bin_metadata_list = [] 

+

35 for bin_obj in bins: 

+

36 bin_metadata = { 

+

37 "Name": bin_obj.id, 

+

38 "CDS": sum((contig_to_cds_count[c] for c in bin_obj.contigs if c in contig_to_cds_count)), 

+

39 "AALength": sum((contig_to_aa_length[c] for c in bin_obj.contigs if c in contig_to_aa_length)), 

+

40 } 

+

41 

+

42 bin_aa_counter = Counter() 

+

43 for contig in bin_obj.contigs: 

+

44 if contig in contig_to_aa_counter: 

+

45 bin_aa_counter += contig_to_aa_counter[contig] 

+

46 

+

47 bin_metadata.update(dict(bin_aa_counter)) 

+

48 bin_metadata_list.append(bin_metadata) 

+

49 

+

50 metadata_df = pd.DataFrame(bin_metadata_list, columns=["Name"] + metadata_order) 

+

51 

+

52 metadata_df = metadata_df.fillna(0) 

+

53 

+

54 metadata_df = metadata_df.astype({col: int for col in metadata_order}) 

+

55 

+

56 metadata_df = metadata_df.set_index("Name", drop=False) 

+

57 return metadata_df 

+

58 

+

59def get_diamond_feature_per_bin_df(bins: List, contig_to_kegg_counter: Dict[str, Counter]) -> Tuple[pd.DataFrame, int]: 

+

60 """ 

+

61 Generate a DataFrame containing Diamond feature counts per bin and completeness information for pathways, categories, and modules. 

+

62 

+

63 :param bins: A list of bin objects. 

+

64 :param contig_to_kegg_counter: A dictionary mapping contig names to KEGG annotation counters. 

+

65 :type bins: List 

+

66 :type contig_to_kegg_counter: Dict[str, Counter] 

+

67 :return: A tuple containing the DataFrame and the number of default KEGG orthologs (KOs). 

+

68 :rtype: Tuple[pd.DataFrame, int] 

+

69 """ 

+

70 KeggCalc = keggData.KeggCalculator() 

+

71 defaultKOs = KeggCalc.return_default_values_from_category("KO_Genes") 

+

72 

+

73 bin_to_ko_counter = {} 

+

74 for bin_obj in bins: 

+

75 bin_ko_counter = Counter() 

+

76 for contig in bin_obj.contigs: 

+

77 try: 

+

78 bin_ko_counter += contig_to_kegg_counter[contig] 

+

79 except KeyError: 

+

80 # No KO annotation found in this contig 

+

81 continue 

+

82 

+

83 bin_to_ko_counter[bin_obj.id] = bin_ko_counter 

+

84 

+

85 ko_count_per_bin_df = pd.DataFrame(bin_to_ko_counter, index=defaultKOs).transpose().fillna(0) 

+

86 ko_count_per_bin_df = ko_count_per_bin_df.astype(int) 

+

87 ko_count_per_bin_df["Name"] = ko_count_per_bin_df.index 

+

88 

+

89 logging.info("Calculating completeness of pathways and modules.") 

+

90 logging.debug("Calculating pathway completeness information") 

+

91 KO_pathways = KeggCalc.calculate_KO_group("KO_Pathways", ko_count_per_bin_df.copy()) 

+

92 

+

93 logging.debug("Calculating category completeness information") 

+

94 KO_categories = KeggCalc.calculate_KO_group("KO_Categories", ko_count_per_bin_df.copy()) 

+

95 

+

96 logging.debug("Calculating module completeness information") 

+

97 KO_modules = KeggCalc.calculate_module_completeness(ko_count_per_bin_df.copy()) 

+

98 

+

99 diamond_complete_results = pd.concat([ko_count_per_bin_df, KO_pathways, KO_modules, KO_categories], axis=1) 

+

100 

+

101 return diamond_complete_results, len(defaultKOs) 

+

102 

+

103 

+

104def compute_N50(list_of_lengths) -> int: 

+

105 """ 

+

106 Calculate N50 for a sequence of numbers. 

+

107 

+

108 :param list_of_lengths: List of numbers. 

+

109 :param list_of_lengths: list 

+

110 :return: N50 value. 

+

111 """ 

+

112 list_of_lengths = sorted(list_of_lengths) 

+

113 sum_len = sum(list_of_lengths) 

+

114 

+

115 cum_length = 0 

+

116 length = 0 

+

117 for length in list_of_lengths: 

+

118 if cum_length + length >= sum_len / 2: 

+

119 return length 

+

120 cum_length += length 

+

121 return length 

+

122 

+

123def add_bin_size_and_N50(bins: Iterable[Bin], contig_to_size: Dict[str,int]): 

+

124 """ 

+

125 Add bin size and N50 to a list of bin objects. 

+

126 

+

127 :param bins: List of bin objects. 

+

128 :param contig_to_size: Dictionary mapping contig names to their sizes. 

+

129 """ 

+

130 for bin_obj in bins: 

+

131 lengths = [contig_to_size[c] for c in bin_obj.contigs] 

+

132 n50 = compute_N50(lengths) 

+

133 

+

134 bin_obj.add_length(sum(lengths)) 

+

135 bin_obj.add_N50(n50) 

+

136 

+

137 

+

138def add_bin_metrics(bins: List, contig_info: Dict, contamination_weight: float, threads: int = 1): 

+

139 """ 

+

140 Add metrics to a list of bins. 

+

141 

+

142 :param bins: List of bin objects. 

+

143 :param contig_info: Dictionary containing contig information. 

+

144 :param contamination_weight: Weight for contamination assessment. 

+

145 :param threads: Number of threads for parallel processing (default is 1). 

+

146 

+

147 :return: List of processed bin objects. 

+

148 """ 

+

149 postProcessor = modelPostprocessing.modelProcessor(threads) 

+

150 

+

151 contig_to_kegg_counter = contig_info["contig_to_kegg_counter"] 

+

152 contig_to_cds_count = contig_info["contig_to_cds_count"] 

+

153 contig_to_aa_counter = contig_info["contig_to_aa_counter"] 

+

154 contig_to_aa_length = contig_info["contig_to_aa_length"] 

+

155 contig_to_length = contig_info["contig_to_length"] 

+

156 

+

157 logging.info("Assess bin length and N50") 

+

158 

+

159 add_bin_size_and_N50(bins, contig_to_length) 

+

160 

+

161 logging.info("Assess bin quality") 

+

162 assess_bins_quality_by_chunk( 

+

163 bins, 

+

164 contig_to_kegg_counter, 

+

165 contig_to_cds_count, 

+

166 contig_to_aa_counter, 

+

167 contig_to_aa_length, 

+

168 contamination_weight, 

+

169 postProcessor, 

+

170 ) 

+

171 return bins 

+

172 

+

173 

+

174def chunks(iterable: Iterable, size: int) -> Iterator[Tuple]: 

+

175 """ 

+

176 Generate adjacent chunks of data from an iterable. 

+

177 

+

178 :param iterable: The iterable to be divided into chunks. 

+

179 :param size: The size of each chunk. 

+

180 :return: An iterator that produces tuples of elements in chunks. 

+

181 """ 

+

182 it = iter(iterable) 

+

183 return iter(lambda: tuple(islice(it, size)), ()) 

+

184 

+

185 

+

186def assess_bins_quality_by_chunk(bins: List, 

+

187 contig_to_kegg_counter: Dict, 

+

188 contig_to_cds_count: Dict, 

+

189 contig_to_aa_counter: Dict, 

+

190 contig_to_aa_length: Dict, 

+

191 contamination_weight: float, 

+

192 postProcessor:modelPostprocessing.modelProcessor = None, 

+

193 threads: int = 1, 

+

194 chunk_size: int = 2500): 

+

195 """ 

+

196 Assess the quality of bins in chunks. 

+

197 

+

198 This function assesses the quality of bins in chunks to improve processing efficiency. 

+

199 

+

200 :param bins: List of bin objects. 

+

201 :param contig_to_kegg_counter: Dictionary mapping contig names to KEGG counters. 

+

202 :param contig_to_cds_count: Dictionary mapping contig names to CDS counts. 

+

203 :param contig_to_aa_counter: Dictionary mapping contig names to amino acid counters. 

+

204 :param contig_to_aa_length: Dictionary mapping contig names to amino acid lengths. 

+

205 :param contamination_weight: Weight for contamination assessment. 

+

206 :param postProcessor: post-processor from checkm2 

+

207 :param threads: Number of threads for parallel processing (default is 1). 

+

208 :param chunk_size: The size of each chunk.  

+

209 """ 

+

210 

+

211 for i, chunk_bins_iter in enumerate(chunks(bins, chunk_size)): 

+

212 chunk_bins = set(chunk_bins_iter) 

+

213 logging.debug(f"chunk {i}: assessing quality of {len(chunk_bins)}") 

+

214 assess_bins_quality( 

+

215 bins=chunk_bins, 

+

216 contig_to_kegg_counter= contig_to_kegg_counter, 

+

217 contig_to_cds_count=contig_to_cds_count, 

+

218 contig_to_aa_counter=contig_to_aa_counter, 

+

219 contig_to_aa_length=contig_to_aa_length, 

+

220 contamination_weight=contamination_weight, 

+

221 postProcessor=postProcessor, 

+

222 threads=threads 

+

223 ) 

+

224 

+

225def assess_bins_quality( 

+

226 bins: List, 

+

227 contig_to_kegg_counter: Dict, 

+

228 contig_to_cds_count: Dict, 

+

229 contig_to_aa_counter: Dict, 

+

230 contig_to_aa_length: Dict, 

+

231 contamination_weight: float, 

+

232 postProcessor: modelPostprocessing.modelProcessor = None, 

+

233 threads: int = 1,): 

+

234 """ 

+

235 Assess the quality of bins. 

+

236 

+

237 This function assesses the quality of bins based on various criteria and assigns completeness and contamination scores.  

+

238 This code is taken from checkm2 and adjusted 

+

239 

+

240 :param bins: List of bin objects. 

+

241 :param contig_to_kegg_counter: Dictionary mapping contig names to KEGG counters. 

+

242 :param contig_to_cds_count: Dictionary mapping contig names to CDS counts. 

+

243 :param contig_to_aa_counter: Dictionary mapping contig names to amino acid counters. 

+

244 :param contig_to_aa_length: Dictionary mapping contig names to amino acid lengths. 

+

245 :param contamination_weight: Weight for contamination assessment. 

+

246 :param postProcessor: A post-processor from checkm2 

+

247 :param threads: Number of threads for parallel processing (default is 1). 

+

248 """ 

+

249 if postProcessor is None: 

+

250 postProcessor = modelPostprocessing.modelProcessor(threads) 

+

251 

+

252 metadata_df = get_bins_metadata_df(bins, contig_to_cds_count, contig_to_aa_counter, contig_to_aa_length) 

+

253 

+

254 diamond_complete_results, ko_list_length = get_diamond_feature_per_bin_df(bins, contig_to_kegg_counter) 

+

255 diamond_complete_results = diamond_complete_results.drop(columns=["Name"]) 

+

256 

+

257 feature_vectors = pd.concat([metadata_df, diamond_complete_results], axis=1) 

+

258 feature_vectors = feature_vectors.sort_values(by="Name") 

+

259 

+

260 # 4: Call general model & specific models and derive predictions""" 

+

261 modelProc = modelProcessing.modelProcessor(threads) 

+

262 

+

263 vector_array = feature_vectors.iloc[:, 1:].values.astype(np.float) 

+

264 

+

265 logging.info("Predicting completeness and contamination using the general model.") 

+

266 general_results_comp, general_results_cont = modelProc.run_prediction_general(vector_array) 

+

267 

+

268 logging.info("Predicting completeness using the specific model.") 

+

269 specific_model_vector_len = ( 

+

270 ko_list_length + len(metadata_df.columns) 

+

271 ) - 1 

+

272 

+

273 # also retrieve scaled data for CSM calculations 

+

274 specific_results_comp, scaled_features = modelProc.run_prediction_specific(vector_array, specific_model_vector_len) 

+

275 

+

276 logging.info("Using cosine similarity to reference data to select an appropriate predictor model.") 

+

277 

+

278 final_comp, final_cont, models_chosen, csm_array = postProcessor.calculate_general_specific_ratio( 

+

279 vector_array[:, 20], scaled_features, general_results_comp, general_results_cont, specific_results_comp 

+

280 ) 

+

281 

+

282 final_results = feature_vectors[["Name"]].copy() 

+

283 final_results["Completeness"] = np.round(final_comp, 2) 

+

284 final_results["Contamination"] = np.round(final_cont, 2) 

+

285 

+

286 for bin_obj in bins: 

+

287 completeness = final_results.loc[bin_obj.id, "Completeness"] 

+

288 contamination = final_results.loc[bin_obj.id, "Contamination"] 

+

289 

+

290 bin_obj.add_quality(completeness, contamination, contamination_weight) 

+
+ + + diff --git a/d_0bf1b44744b37667_binette_py.html b/d_0bf1b44744b37667_binette_py.html new file mode 100644 index 0000000..5bac69c --- /dev/null +++ b/d_0bf1b44744b37667_binette_py.html @@ -0,0 +1,455 @@ + + + + + Coverage for binette/binette.py: 0% + + + + + +
+
+

+ Coverage for binette/binette.py: + 0% +

+ +

+ 134 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.3.2, + created at 2023-12-04 22:59 +0000 +

+ +
+
+
+

1#!/usr/bin/env python 

+

2""" 

+

3Module : Main 

+

4Description : The main entry point for the program. 

+

5Copyright : (c) Jean Mainguy, 28 nov. 2022  

+

6License : MIT 

+

7Maintainer : Jean Mainguy 

+

8Portability : POSIX 

+

9""" 

+

10 

+

11from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter 

+

12 

+

13import sys 

+

14import logging 

+

15import os 

+

16import pkg_resources 

+

17 

+

18from binette import contig_manager, cds, diamond, bin_quality, bin_manager, io_manager as io 

+

19from typing import List, Dict, Set, Tuple 

+

20 

+

21 

+

22def init_logging(verbose, debug): 

+

23 """Initialise logging.""" 

+

24 if debug: 

+

25 level = logging.DEBUG 

+

26 elif verbose: 

+

27 level = logging.INFO 

+

28 else: 

+

29 level = logging.WARNING 

+

30 

+

31 logging.basicConfig( 

+

32 level=level, 

+

33 format="%(asctime)s %(levelname)s - %(message)s", 

+

34 datefmt="[%Y-%m-%d %H:%M:%S]", 

+

35 ) 

+

36 

+

37 logging.info("Program started") 

+

38 logging.info( 

+

39 f'command line: {" ".join(sys.argv)}', 

+

40 ) 

+

41 

+

42 

+

43def parse_arguments(args): 

+

44 """Parse script arguments.""" 

+

45 program_version = pkg_resources.get_distribution("Binette").version 

+

46 

+

47 parser = ArgumentParser( 

+

48 description=f"Binette version={program_version}", 

+

49 formatter_class=ArgumentDefaultsHelpFormatter, 

+

50 ) 

+

51 # TODO add catagory to better visualize the required and the optional args 

+

52 input_arg = parser.add_mutually_exclusive_group(required=True) 

+

53 

+

54 input_arg.add_argument( 

+

55 "-d", 

+

56 "--bin_dirs", 

+

57 nargs="+", 

+

58 help="list of bin folders containing each bin in a fasta file.", 

+

59 ) 

+

60 

+

61 input_arg.add_argument( 

+

62 "-b", 

+

63 "--contig2bin_tables", 

+

64 nargs="+", 

+

65 help="list of contig2bin table with two columns separated\ 

+

66 with a tabulation: contig, bin", 

+

67 ) 

+

68 

+

69 parser.add_argument("-c", "--contigs", required=True, help="Contigs in fasta format.") 

+

70 

+

71 parser.add_argument( 

+

72 "-m", 

+

73 "--min_completeness", 

+

74 default=10, 

+

75 type=int, 

+

76 help="Minimum completeness required for final bin selections.", 

+

77 ) 

+

78 

+

79 parser.add_argument("-t", "--threads", default=1, type=int, help="Number of threads.") 

+

80 

+

81 parser.add_argument("-o", "--outdir", default="results", help="Output directory.") 

+

82 

+

83 parser.add_argument( 

+

84 "-w", 

+

85 "--contamination_weight", 

+

86 default=5, 

+

87 type=float, 

+

88 help="Bin are scored as follow: completeness - weight * contamination. " 

+

89 "A low contamination_weight favor complete bins over low contaminated bins.", 

+

90 ) 

+

91 

+

92 parser.add_argument( 

+

93 "-e", 

+

94 "--extension", 

+

95 default="fasta", 

+

96 help="Extension of fasta files in bin folders " 

+

97 "(necessary when --bin_dirs is used).", 

+

98 ) 

+

99 

+

100 parser.add_argument( 

+

101 "--checkm2_db", 

+

102 help="Provide a path for the CheckM2 diamond database. " 

+

103 "By default the database set via <checkm2 database> is used.", 

+

104 ) 

+

105 

+

106 parser.add_argument("-v", "--verbose", help="increase output verbosity", action="store_true") 

+

107 

+

108 parser.add_argument("--debug", help="active debug mode", action="store_true") 

+

109 

+

110 parser.add_argument("--resume", help="active resume mode", action="store_true") 

+

111 

+

112 parser.add_argument("--low_mem", help="low mem mode", action="store_true") 

+

113 

+

114 parser.add_argument("--version", action="version", version=program_version) 

+

115 

+

116 args = parser.parse_args(args) 

+

117 return args 

+

118 

+

119 

+

120def parse_input_files(bin_dirs: List[str], contig2bin_tables: List[str], contigs_fasta: str) -> Tuple[Dict[str, List], List, Dict[str, List], Dict[str, int]]: 

+

121 """ 

+

122 Parses input files to retrieve information related to bins and contigs. 

+

123 

+

124 :param bin_dirs: List of paths to directories containing bin FASTA files. 

+

125 :param contig2bin_tables: List of paths to contig-to-bin tables. 

+

126 :param contigs_fasta: Path to the contigs FASTA file. 

+

127 

+

128 :return: A tuple containing: 

+

129 - Dictionary mapping bin set names to lists of bins. 

+

130 - List of original bins. 

+

131 - Dictionary mapping bins to lists of contigs. 

+

132 - Dictionary mapping contig names to their lengths. 

+

133 """ 

+

134 

+

135 if bin_dirs: 

+

136 logging.info("Parsing bin directories.") 

+

137 bin_name_to_bin_dir = io.infer_bin_name_from_bin_inputs(bin_dirs) 

+

138 bin_set_name_to_bins = bin_manager.parse_bin_directories(bin_name_to_bin_dir) 

+

139 else: 

+

140 logging.info("Parsing bin2contig files.") 

+

141 bin_name_to_bin_table = io.infer_bin_name_from_bin_inputs(contig2bin_tables) 

+

142 bin_set_name_to_bins = bin_manager.parse_contig2bin_tables(bin_name_to_bin_table) 

+

143 

+

144 logging.info(f"{len(bin_set_name_to_bins)} bin sets processed:") 

+

145 for bin_set_id, bins in bin_set_name_to_bins.items(): 

+

146 logging.info(f" {bin_set_id} - {len(bins)} bins") 

+

147 

+

148 original_bins = bin_manager.dereplicate_bin_sets(bin_set_name_to_bins.values()) 

+

149 contigs_in_bins = bin_manager.get_contigs_in_bins(original_bins) 

+

150 

+

151 logging.info(f"Parsing contig fasta file: {contigs_fasta}") 

+

152 contigs_object = contig_manager.parse_fasta_file(contigs_fasta) 

+

153 contig_to_length = {seq.name: len(seq) for seq in contigs_object if seq.name in contigs_in_bins} 

+

154 

+

155 return bin_set_name_to_bins, original_bins, contigs_in_bins, contig_to_length 

+

156 

+

157 

+

158def manage_protein_alignement(faa_file: str, contigs_fasta: str, contig_to_length: Dict[str, List], 

+

159 contigs_in_bins: Dict[str, List], diamond_result_file: str, 

+

160 checkm2_db: str, threads: int, resume: bool, low_mem: bool) -> Tuple[Dict[str, int], Dict[str, int]]: 

+

161 """ 

+

162 Predicts or reuses proteins prediction and runs diamond on them. 

+

163  

+

164 :param faa_file: The path to the .faa file. 

+

165 :param contigs_fasta: The path to the contigs FASTA file. 

+

166 :param contig_to_length: Dictionary mapping contig names to their lengths. 

+

167 :param contigs_in_bins: Dictionary mapping bin names to lists of contigs. 

+

168 :param diamond_result_file: The path to the diamond result file. 

+

169 :param checkm2_db: The path to the CheckM2 database. 

+

170 :param threads: Number of threads for parallel processing. 

+

171 :param resume: Boolean indicating whether to resume the process. 

+

172 :param low_mem: Boolean indicating whether to use low memory mode. 

+

173 

+

174 :return: A tuple containing dictionaries - contig_to_kegg_counter and contig_to_genes. 

+

175 """ 

+

176 

+

177 # Predict or reuse proteins prediction and run diamond on them 

+

178 if resume: 

+

179 logging.info(f"Parsing faa file: {faa_file}.") 

+

180 contig_to_genes = cds.parse_faa_file(faa_file) 

+

181 io.check_contig_consistency(contig_to_length, contig_to_genes, contigs_fasta, faa_file) 

+

182 

+

183 else: 

+

184 contigs_iterator = (s for s in contig_manager.parse_fasta_file(contigs_fasta) if s.name in contigs_in_bins) 

+

185 contig_to_genes = cds.predict(contigs_iterator, faa_file, threads) 

+

186 

+

187 if checkm2_db: 

+

188 diamond_db_path = checkm2_db 

+

189 else: 

+

190 # get checkm2 db stored in checkm2 install 

+

191 diamond_db_path = diamond.get_checkm2_db() 

+

192 

+

193 diamond_log = f"{os.path.splitext(diamond_result_file)[0]}.log" 

+

194 

+

195 diamond.run( 

+

196 faa_file, 

+

197 diamond_result_file, 

+

198 diamond_db_path, 

+

199 diamond_log, 

+

200 threads, 

+

201 low_mem=low_mem, 

+

202 ) 

+

203 

+

204 logging.info("Parsing diamond results.") 

+

205 contig_to_kegg_counter = diamond.get_contig_to_kegg_id(diamond_result_file) 

+

206 

+

207 # Check contigs from diamond vs input assembly consistency 

+

208 io.check_contig_consistency(contig_to_length, contig_to_kegg_counter, contigs_fasta, diamond_result_file) 

+

209 

+

210 return contig_to_kegg_counter, contig_to_genes 

+

211 

+

212 

+

213def select_bins_and_write_them(all_bins: Set[bin_manager.Bin], contigs_fasta: str, final_bin_report: str, min_completeness: float, 

+

214 index_to_contig: dict, outdir: str, debug: bool) -> List[bin_manager.Bin]: 

+

215 """ 

+

216 Selects and writes bins based on specific criteria. 

+

217 

+

218 :param all_bins: Set of Bin objects. 

+

219 :param contigs_fasta: Path to the contigs FASTA file. 

+

220 :param final_bin_report: Path to write the final bin report. 

+

221 :param min_completeness: Minimum completeness threshold for bin selection. 

+

222 :param index_to_contig: Dictionary mapping indices to contig names. 

+

223 :param outdir: Output directory to save final bins and reports. 

+

224 :param debug: Debug mode flag. 

+

225 :return: Selected bins that meet the completeness threshold. 

+

226 """ 

+

227 

+

228 outdir_final_bin_set = os.path.join(outdir, "final_bins") 

+

229 os.makedirs(outdir_final_bin_set, exist_ok=True) 

+

230 

+

231 if debug: 

+

232 all_bins_for_debug = set(all_bins) 

+

233 all_bin_compo_file = os.path.join(outdir, "all_bins_quality_reports.tsv") 

+

234 

+

235 logging.info(f"Writing all bins in {all_bin_compo_file}") 

+

236 

+

237 io.write_bin_info(all_bins_for_debug, all_bin_compo_file, add_contigs=True) 

+

238 

+

239 with open(os.path.join(outdir, "index_to_contig.tsv"), 'w') as flout: 

+

240 flout.write('\n'.join((f'{i}\t{c}' for i, c in index_to_contig.items()))) 

+

241 

+

242 logging.info("Selecting best bins") 

+

243 selected_bins = bin_manager.select_best_bins(all_bins) 

+

244 

+

245 logging.info(f"Bin Selection: {len(selected_bins)} selected bins") 

+

246 

+

247 logging.info(f"Filtering bins: only bins with completeness >= {min_completeness} are kept") 

+

248 selected_bins = [b for b in selected_bins if b.completeness >= min_completeness] 

+

249 

+

250 logging.info(f"Filtering bins: {len(selected_bins)} selected bins") 

+

251 

+

252 logging.info(f"Writing selected bins in {final_bin_report}") 

+

253 

+

254 for b in selected_bins: 

+

255 b.contigs = {index_to_contig[c_index] for c_index in b.contigs} 

+

256 

+

257 io.write_bin_info(selected_bins, final_bin_report) 

+

258 

+

259 io.write_bins_fasta(selected_bins, contigs_fasta, outdir_final_bin_set) 

+

260 

+

261 return selected_bins 

+

262 

+

263 

+

264 

+

265def log_selected_bin_info(selected_bins: List[bin_manager.Bin], hq_min_completeness: float, hq_max_conta: float): 

+

266 """ 

+

267 Log information about selected bins based on quality thresholds. 

+

268 

+

269 :param selected_bins: List of Bin objects to analyze. 

+

270 :param hq_min_completeness: Minimum completeness threshold for high-quality bins. 

+

271 :param hq_max_conta: Maximum contamination threshold for high-quality bins. 

+

272 

+

273 This function logs information about selected bins that meet specified quality thresholds. 

+

274 It counts the number of high-quality bins based on completeness and contamination values. 

+

275 """ 

+

276 

+

277 # Log completeness and contamination in debug log 

+

278 logging.debug("High quality bins:") 

+

279 for sb in selected_bins: 

+

280 if sb.completeness >= hq_min_completeness and sb.contamination <= hq_max_conta: 

+

281 logging.debug(f"> {sb} completeness={sb.completeness}, contamination={sb.contamination}") 

+

282 

+

283 # Count high-quality bins and single-contig high-quality bins 

+

284 hq_bins = len([sb for sb in selected_bins if sb.completeness >= hq_min_completeness and sb.contamination <= hq_max_conta]) 

+

285 

+

286 # Log information about high-quality bins 

+

287 thresholds = f"(completeness >= {hq_min_completeness} and contamination <= {hq_max_conta})" 

+

288 logging.info(f"{hq_bins}/{len(selected_bins)} selected bins have a high quality {thresholds}.") 

+

289 

+

290 

+

291def main(): 

+

292 "Orchestrate the execution of the program" 

+

293 

+

294 args = parse_arguments(sys.argv[1:]) # sys.argv is passed in order to be able to test the function parse_arguments 

+

295 

+

296 init_logging(args.verbose, args.debug) 

+

297 

+

298 # High quality threshold used just to log number of high quality bins. 

+

299 hq_max_conta = 5 

+

300 hq_min_completeness = 90 

+

301 

+

302 # Temporary files # 

+

303 out_tmp_dir = os.path.join(args.outdir, "temporary_files") 

+

304 os.makedirs(out_tmp_dir, exist_ok=True) 

+

305 

+

306 faa_file = os.path.join(out_tmp_dir, "assembly_proteins.faa") 

+

307 diamond_result_file = os.path.join(out_tmp_dir, "diamond_result.tsv") 

+

308 

+

309 # Output files # 

+

310 final_bin_report = os.path.join(args.outdir, "final_bins_quality_reports.tsv") 

+

311 

+

312 

+

313 if args.resume: 

+

314 io.check_resume_file(faa_file, diamond_result_file) 

+

315 

+

316 bin_set_name_to_bins, original_bins, contigs_in_bins, contig_to_length = parse_input_files(args.bin_dirs, args.contig2bin_tables, args.contigs) 

+

317 

+

318 contig_to_kegg_counter, contig_to_genes = manage_protein_alignement(faa_file=faa_file, contigs_fasta=args.contigs, contig_to_length=contig_to_length, 

+

319 contigs_in_bins=contigs_in_bins, 

+

320 diamond_result_file=diamond_result_file, checkm2_db=args.checkm2_db, 

+

321 threads=args.threads, resume=args.resume, low_mem=args.low_mem) 

+

322 

+

323 # Use contig index instead of contig name to save memory 

+

324 contig_to_index, index_to_contig = contig_manager.make_contig_index(contigs_in_bins) 

+

325 

+

326 contig_to_kegg_counter = contig_manager.apply_contig_index(contig_to_index, contig_to_kegg_counter) 

+

327 contig_to_genes = contig_manager.apply_contig_index(contig_to_index, contig_to_genes) 

+

328 contig_to_length = contig_manager.apply_contig_index(contig_to_index, contig_to_length) 

+

329 

+

330 bin_manager.rename_bin_contigs(original_bins, contig_to_index) 

+

331 

+

332 

+

333 # Extract cds metadata ## 

+

334 logging.info("Compute cds metadata.") 

+

335 contig_metadat = cds.get_contig_cds_metadata(contig_to_genes, args.threads) 

+

336 

+

337 contig_metadat["contig_to_kegg_counter"] = contig_to_kegg_counter 

+

338 contig_metadat["contig_to_length"] = contig_to_length 

+

339 

+

340 

+

341 logging.info("Add size and assess quality of input bins") 

+

342 bin_quality.add_bin_metrics(original_bins, contig_metadat, args.contamination_weight, args.threads) 

+

343 

+

344 logging.info("Create intermediate bins:") 

+

345 new_bins = bin_manager.create_intermediate_bins(bin_set_name_to_bins) 

+

346 

+

347 logging.info("Assess quality for supplementary intermediate bins.") 

+

348 new_bins = bin_quality.add_bin_metrics(new_bins, contig_metadat, args.contamination_weight, args.threads) 

+

349 

+

350 

+

351 logging.info("Dereplicating input bins and new bins") 

+

352 all_bins = original_bins | new_bins 

+

353 

+

354 selected_bins = select_bins_and_write_them(all_bins, args.contigs, final_bin_report, args.min_completeness, index_to_contig, args.outdir, args.debug) 

+

355 

+

356 log_selected_bin_info(selected_bins, hq_min_completeness, hq_max_conta) 

+

357 

+

358 return 0 

+
+ + + diff --git a/d_0bf1b44744b37667_cds_py.html b/d_0bf1b44744b37667_cds_py.html new file mode 100644 index 0000000..0f48b3d --- /dev/null +++ b/d_0bf1b44744b37667_cds_py.html @@ -0,0 +1,243 @@ + + + + + Coverage for binette/cds.py: 0% + + + + + +
+
+

+ Coverage for binette/cds.py: + 0% +

+ +

+ 59 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.3.2, + created at 2023-12-04 22:59 +0000 +

+ +
+
+
+

1 

+

2import concurrent.futures as cf 

+

3import multiprocessing.pool 

+

4import logging 

+

5from collections import Counter, defaultdict 

+

6from typing import Dict, List, Iterator, Tuple 

+

7 

+

8import pyfastx 

+

9import pyrodigal 

+

10from tqdm import tqdm 

+

11 

+

12 

+

13def get_contig_from_cds_name(cds_name: str) -> str: 

+

14 """ 

+

15 Extract the contig name from a CDS name. 

+

16 

+

17 :param cds_name: The name of the CDS. 

+

18 :type cds_name: str 

+

19 :return: The name of the contig. 

+

20 :rtype: str 

+

21 """ 

+

22 return "_".join(cds_name.split("_")[:-1]) 

+

23 

+

24def predict(contigs_iterator: Iterator, outfaa: str, threads: int =1) -> Dict[str, List[str]]: 

+

25 """ 

+

26 Predict open reading frames with Pyrodigal. 

+

27 

+

28 :param contigs_iterator: An iterator of contig sequences. 

+

29 :param outfaa: The output file path for predicted protein sequences (in FASTA format). 

+

30 :param threads: Number of CPU threads to use (default is 1). 

+

31 

+

32 :return: A dictionary mapping contig names to predicted genes. 

+

33 """ 

+

34 try: 

+

35 # for version >=3 of pyrodigal 

+

36 orf_finder = pyrodigal.GeneFinder(meta="meta") 

+

37 except AttributeError: 

+

38 orf_finder = pyrodigal.OrfFinder(meta="meta") 

+

39 

+

40 logging.info(f"Predicting cds sequences with Pyrodigal using {threads} threads.") 

+

41 

+

42 with multiprocessing.pool.ThreadPool(processes=threads) as pool: 

+

43 contig_and_genes = pool.starmap(predict_genes, ((orf_finder.find_genes, seq) for seq in contigs_iterator)) 

+

44 

+

45 write_faa(outfaa, contig_and_genes) 

+

46 

+

47 

+

48 contig_to_genes = { 

+

49 contig_id: [gene.translate() for gene in pyrodigal_genes] 

+

50 for contig_id, pyrodigal_genes in contig_and_genes 

+

51 } 

+

52 

+

53 return contig_to_genes 

+

54 

+

55def predict_genes(find_genes, seq): 

+

56 

+

57 

+

58 return (seq.name, find_genes(seq.seq) ) 

+

59 

+

60 

+

61def write_faa(outfaa: str, contig_to_genes: Dict[str, List[str]]) -> None: 

+

62 """ 

+

63 Write predicted protein sequences to a FASTA file. 

+

64 

+

65 :param outfaa: The output file path for predicted protein sequences (in FASTA format). 

+

66 :param contig_to_genes: A dictionary mapping contig names to predicted genes. 

+

67 

+

68 """ 

+

69 logging.info("Writing predicted protein sequences.") 

+

70 with open(outfaa, "w") as fl: 

+

71 for contig_id, genes in contig_to_genes: 

+

72 genes.write_translations(fl, contig_id) 

+

73 

+

74def parse_faa_file(faa_file: str) -> Dict[str, List]: 

+

75 """ 

+

76 Parse a FASTA file containing protein sequences and organize them by contig. 

+

77 

+

78 :param faa_file: Path to the input FASTA file. 

+

79 :return: A dictionary mapping contig names to lists of protein sequences. 

+

80 """ 

+

81 contig_to_genes = defaultdict(list) 

+

82 for name, seq in pyfastx.Fastx(faa_file): 

+

83 contig = get_contig_from_cds_name(name) 

+

84 contig_to_genes[contig].append(seq) 

+

85 

+

86 return dict(contig_to_genes) 

+

87 

+

88def get_aa_composition(genes: List[str]) -> Counter: 

+

89 """ 

+

90 Calculate the amino acid composition of a list of protein sequences. 

+

91 

+

92 :param genes: A list of protein sequences. 

+

93 :return: A Counter object representing the amino acid composition. 

+

94 """ 

+

95 aa_counter = Counter() 

+

96 for gene in genes: 

+

97 aa_counter += Counter(gene) 

+

98 

+

99 return aa_counter 

+

100 

+

101def get_contig_cds_metadata_flat(contig_to_genes: Dict[str, List[str]]) -> Tuple[Dict[str, int], Dict[str, Counter], Dict[str, int]]: 

+

102 """ 

+

103 Calculate metadata for contigs, including CDS count, amino acid composition, and total amino acid length. 

+

104 

+

105 :param contig_to_genes: A dictionary mapping contig names to lists of protein sequences. 

+

106 :return: A tuple containing dictionaries for CDS count, amino acid composition, and total amino acid length. 

+

107 """ 

+

108 contig_to_cds_count = {contig: len(genes) for contig, genes in contig_to_genes.items()} 

+

109 

+

110 contig_to_aa_counter = {contig: get_aa_composition(genes) for contig, genes in tqdm(contig_to_genes.items(), unit="contig")} 

+

111 logging.info("Calculating amino acid composition.") 

+

112 

+

113 contig_to_aa_length = {contig: sum(counter.values()) for contig, counter in tqdm(contig_to_aa_counter.items(), unit="contig")} 

+

114 logging.info("Calculating total amino acid length.") 

+

115 

+

116 return contig_to_cds_count, contig_to_aa_counter, contig_to_aa_length 

+

117 

+

118def get_contig_cds_metadata(contig_to_genes: Dict[str, List[str]], threads: int) -> Tuple[Dict[str, int], Dict[str, Counter], Dict[str, int]]: 

+

119 """ 

+

120 Calculate metadata for contigs in parallel, including CDS count, amino acid composition, and total amino acid length. 

+

121 

+

122 :param contig_to_genes: A dictionary mapping contig names to lists of protein sequences. 

+

123 :param threads: Number of CPU threads to use. 

+

124 :return: A tuple containing dictionaries for CDS count, amino acid composition, and total amino acid length. 

+

125 """ 

+

126 contig_to_cds_count = {contig: len(genes) for contig, genes in contig_to_genes.items()} 

+

127 

+

128 contig_to_future = {} 

+

129 logging.info(f"Collecting contig amino acid composition using {threads} threads.") 

+

130 with cf.ProcessPoolExecutor(max_workers=threads) as tpe: 

+

131 for contig, genes in tqdm(contig_to_genes.items()): 

+

132 contig_to_future[contig] = tpe.submit(get_aa_composition, genes) 

+

133 

+

134 contig_to_aa_counter = {contig: future.result() for contig, future in tqdm(contig_to_future.items(), unit="contig")} 

+

135 logging.info("Calculating amino acid composition in parallel.") 

+

136 

+

137 contig_to_aa_length = {contig: sum(counter.values()) for contig, counter in tqdm(contig_to_aa_counter.items(), unit="contig")} 

+

138 logging.info("Calculating total amino acid length in parallel.") 

+

139 

+

140 contig_info = { 

+

141 "contig_to_cds_count": contig_to_cds_count, 

+

142 "contig_to_aa_counter": contig_to_aa_counter, 

+

143 "contig_to_aa_length": contig_to_aa_length, 

+

144 } 

+

145 

+

146 return contig_info 

+
+ + + diff --git a/d_0bf1b44744b37667_contig_manager_py.html b/d_0bf1b44744b37667_contig_manager_py.html new file mode 100644 index 0000000..7caee16 --- /dev/null +++ b/d_0bf1b44744b37667_contig_manager_py.html @@ -0,0 +1,136 @@ + + + + + Coverage for binette/contig_manager.py: 0% + + + + + +
+
+

+ Coverage for binette/contig_manager.py: + 0% +

+ +

+ 11 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.3.2, + created at 2023-12-04 22:59 +0000 +

+ +
+
+
+

1import pyfastx 

+

2from typing import Dict, Tuple 

+

3 

+

4 

+

5def parse_fasta_file(fasta_file: str) -> pyfastx.Fasta: 

+

6 """ 

+

7 Parse a FASTA file and return a pyfastx.Fasta object. 

+

8 

+

9 :param fasta_file: The path to the FASTA file. 

+

10 

+

11 :return: A pyfastx.Fasta object representing the parsed FASTA file. 

+

12 """ 

+

13 fa = pyfastx.Fasta(fasta_file, build_index=True) 

+

14 return fa 

+

15 

+

16 

+

17def make_contig_index(contigs: list) -> Tuple[Dict[str, int], Dict[int, str]]: 

+

18 """ 

+

19 Create an index mapping for contigs. 

+

20 

+

21 :param contigs: A list of contig names. 

+

22 

+

23 :return: A tuple containing the contig index mapping dictionaries (contig_to_index, index_to_contig). 

+

24 """ 

+

25 contig_to_index = {contig: index for index, contig in enumerate(contigs)} 

+

26 index_to_contig = {index: contig for contig, index in contig_to_index.items()} 

+

27 return contig_to_index, index_to_contig 

+

28 

+

29 

+

30def apply_contig_index(contig_to_index: Dict[str, int], contig_to_info: Dict[str, str]) -> Dict[int, str]: 

+

31 """ 

+

32 Apply the contig index mapping to the contig info dictionary. 

+

33 

+

34 :param contig_to_index: A dictionary mapping contig names to their corresponding index. 

+

35 :param contig_to_info: A dictionary mapping contig names to their associated information. 

+

36 

+

37 :return: A dictionary mapping contig indices to their associated information. 

+

38 """ 

+

39 return {contig_to_index[contig]: info for contig, info in contig_to_info.items()} 

+
+ + + diff --git a/d_0bf1b44744b37667_diamond_py.html b/d_0bf1b44744b37667_diamond_py.html new file mode 100644 index 0000000..5187535 --- /dev/null +++ b/d_0bf1b44744b37667_diamond_py.html @@ -0,0 +1,217 @@ + + + + + Coverage for binette/diamond.py: 0% + + + + + +
+
+

+ Coverage for binette/diamond.py: + 0% +

+ +

+ 47 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.3.2, + created at 2023-12-04 22:59 +0000 +

+ +
+
+
+

1import subprocess 

+

2import logging 

+

3import sys 

+

4import shutil 

+

5import re 

+

6import pandas as pd 

+

7from collections import Counter 

+

8 

+

9from checkm2 import keggData 

+

10 

+

11 

+

12def get_checkm2_db() -> str: 

+

13 """ 

+

14 Get the path to the CheckM2 database. 

+

15 

+

16 :return: The path to the CheckM2 database. 

+

17 """ 

+

18 if shutil.which("checkm2") is None: 

+

19 logging.error("Make sure checkm2 is on your system path.") 

+

20 sys.exit(1) 

+

21 

+

22 checkm2_database_raw = subprocess.run(["checkm2", "database", "--current"], text=True, stderr=subprocess.PIPE) 

+

23 

+

24 if checkm2_database_raw.returncode != 0: 

+

25 logging.error(f"Something went wrong with checkm2:\n=======\n{checkm2_database_raw.stderr}========") 

+

26 sys.exit(1) 

+

27 

+

28 reg_result = re.search("INFO: (/.*.dmnd)", checkm2_database_raw.stderr) 

+

29 

+

30 try: 

+

31 db_path = reg_result.group(1) 

+

32 except AttributeError: 

+

33 logging.error(f"Something went wrong when retrieving checkm2 db path:\n{checkm2_database_raw.stderr}") 

+

34 sys.exit(1) 

+

35 

+

36 return db_path 

+

37 

+

38 

+

39 

+

40def check_tool_exists(tool_name: str): 

+

41 """ 

+

42 Check if a specified tool is on the system's PATH. 

+

43 

+

44 :param tool_name: The name of the tool to check for. 

+

45 :type tool_name: str 

+

46 :raises FileNotFoundError: If the tool is not found on the system's PATH. 

+

47 """ 

+

48 if shutil.which(tool_name) is None: 

+

49 raise FileNotFoundError(f"The '{tool_name}' tool is not found on your system PATH.") 

+

50 

+

51 

+

52def run( 

+

53 faa_file: str, output: str, db: str, log: str, threads: int = 1, query_cover: int = 80, subject_cover: int = 80, 

+

54 percent_id: int = 30, evalue: float = 1e-05, low_mem: bool = False): 

+

55 """ 

+

56 Run Diamond with specified parameters. 

+

57 

+

58 :param faa_file: Path to the input protein sequence file (FASTA format). 

+

59 :param output: Path to the Diamond output file. 

+

60 :param db: Path to the Diamond database. 

+

61 :param log: Path to the log file. 

+

62 :param threads: Number of CPU threads to use (default is 1). 

+

63 :param query_cover: Minimum query coverage percentage (default is 80). 

+

64 :param subject_cover: Minimum subject coverage percentage (default is 80). 

+

65 :param percent_id: Minimum percent identity (default is 30). 

+

66 :param evalue: Maximum e-value threshold (default is 1e-05). 

+

67 :param low_mem: Use low memory mode if True (default is False). 

+

68 """ 

+

69 check_tool_exists("diamond") 

+

70 

+

71 blocksize = 0.5 if low_mem else 2 

+

72 

+

73 cmd = ( 

+

74 "diamond blastp --outfmt 6 --max-target-seqs 1 " 

+

75 f"--query {faa_file} " 

+

76 f"-o {output} " 

+

77 f"--threads {threads} " 

+

78 f"--db {db} " 

+

79 f"--query-cover {query_cover} " 

+

80 f"--subject-cover {subject_cover} " 

+

81 f"--id {percent_id} " 

+

82 f"--evalue {evalue} --block-size {blocksize} 2> {log}" 

+

83 ) 

+

84 

+

85 logging.info("Running diamond") 

+

86 logging.info(cmd) 

+

87 

+

88 run = subprocess.run(cmd, shell=True) 

+

89 

+

90 if run.returncode != 0: 

+

91 logging.error(f"An error occurred while running DIAMOND. Check log file: {log}") 

+

92 sys.exit(1) 

+

93 

+

94 logging.info("Finished Running DIAMOND") 

+

95 

+

96def get_contig_to_kegg_id(diamond_result_file: str) -> dict: 

+

97 """ 

+

98 Get a dictionary mapping contig IDs to KEGG annotations from a Diamond result file. 

+

99 

+

100 :param diamond_result_file: Path to the Diamond result file. 

+

101 :return: A dictionary mapping contig IDs to KEGG annotations. 

+

102 """ 

+

103 diamon_results_df = pd.read_csv(diamond_result_file, sep="\t", usecols=[0, 1], names=["ProteinID", "annotation"]) 

+

104 diamon_results_df[["Ref100_hit", "Kegg_annotation"]] = diamon_results_df["annotation"].str.split( 

+

105 "~", n=1, expand=True 

+

106 ) 

+

107 

+

108 KeggCalc = keggData.KeggCalculator() 

+

109 defaultKOs = KeggCalc.return_default_values_from_category("KO_Genes") 

+

110 

+

111 diamon_results_df = diamon_results_df.loc[diamon_results_df["Kegg_annotation"].isin(defaultKOs.keys())] 

+

112 diamon_results_df["contig"] = diamon_results_df["ProteinID"].str.split("_", n=-1).str[:-1].str.join("_") 

+

113 

+

114 contig_to_kegg_counter = ( 

+

115 diamon_results_df.groupby("contig").agg({"Kegg_annotation": Counter}).reset_index() 

+

116 ) 

+

117 

+

118 contig_to_kegg_counter = dict(zip(contig_to_kegg_counter["contig"], contig_to_kegg_counter["Kegg_annotation"])) 

+

119 

+

120 return contig_to_kegg_counter 

+
+ + + diff --git a/d_0bf1b44744b37667_io_manager_py.html b/d_0bf1b44744b37667_io_manager_py.html new file mode 100644 index 0000000..26ac87b --- /dev/null +++ b/d_0bf1b44744b37667_io_manager_py.html @@ -0,0 +1,233 @@ + + + + + Coverage for binette/io_manager.py: 0% + + + + + +
+
+

+ Coverage for binette/io_manager.py: + 0% +

+ +

+ 55 statements   + + + +

+

+ « prev     + ^ index     + » next +       + coverage.py v7.3.2, + created at 2023-12-04 22:59 +0000 +

+ +
+
+
+

1import logging 

+

2import os 

+

3import pyfastx 

+

4from typing import List, Dict 

+

5import csv 

+

6 

+

7from binette.bin_manager import Bin 

+

8 

+

9 

+

10def infer_bin_name_from_bin_inputs(input_bins: List[str]) -> Dict[str, str]: 

+

11 """ 

+

12 Infer bin names from a list of bin input directories. 

+

13 

+

14 :param input_bins: List of input bin directories. 

+

15 :return: Dictionary mapping inferred bin names to their corresponding directories. 

+

16 """ 

+

17 logging.debug(f"Inferring bin names from input bins:") 

+

18 

+

19 commonprefix_len = len(os.path.commonprefix(input_bins)) 

+

20 reversed_strings = [s[::-1] for s in input_bins] 

+

21 commonsufix_len = len(os.path.commonprefix(reversed_strings)) 

+

22 

+

23 bin_name_to_bin_dir = {d[commonprefix_len: len(d) - commonsufix_len]: d for d in input_bins} 

+

24 

+

25 logging.debug(f"Input bins: {' '.join(input_bins)}") 

+

26 logging.debug(f"Common prefix to remove: {os.path.commonprefix(reversed_strings)[::-1]}") 

+

27 logging.debug(f"Common suffix to remove: {os.path.commonprefix(input_bins)}") 

+

28 logging.debug(f"bin_name_to_bin_dir: {bin_name_to_bin_dir}") 

+

29 

+

30 return bin_name_to_bin_dir 

+

31 

+

32 

+

33def write_bin_info(bins: List[Bin], output: str, add_contigs: bool = False): 

+

34 """ 

+

35 Write bin information to a TSV file. 

+

36 

+

37 :param bins: List of Bin objects. 

+

38 :param output: Output file path for writing the TSV. 

+

39 :param add_contigs: Flag indicating whether to include contig information. 

+

40 """ 

+

41 

+

42 header = ["bin_id", "origin", "name", "completeness", "contamination", "score", "size", "N50", "contig_count"] 

+

43 if add_contigs: 

+

44 header.append('contigs') 

+

45 

+

46 bin_infos = [] 

+

47 for bin_obj in sorted(bins, key=lambda x: (x.score, x.N50, -x.id), reverse=True): 

+

48 bin_info = [ 

+

49 bin_obj.id, 

+

50 bin_obj.origin, 

+

51 bin_obj.name, 

+

52 bin_obj.completeness, 

+

53 bin_obj.contamination, 

+

54 bin_obj.score, 

+

55 bin_obj.length, 

+

56 bin_obj.N50, 

+

57 len(bin_obj.contigs), 

+

58 ] 

+

59 if add_contigs: 

+

60 bin_info.append(";".join(str(c) for c in bin_obj.contigs) if add_contigs else "") 

+

61 

+

62 bin_infos.append(bin_info) 

+

63 

+

64 with open(output, "w", newline="") as fl: 

+

65 writer = csv.writer(fl, delimiter="\t") 

+

66 writer.writerow(header) 

+

67 writer.writerows(bin_infos) 

+

68 

+

69 

+

70def write_bins_fasta(selected_bins: List[Bin], contigs_fasta: str, outdir: str): 

+

71 """ 

+

72 Write selected bins' contigs to separate FASTA files. 

+

73 

+

74 :param selected_bins: List of Bin objects representing the selected bins. 

+

75 :param contigs_fasta: Path to the input FASTA file containing contig sequences. 

+

76 :param outdir: Output directory to save the individual bin FASTA files. 

+

77 """ 

+

78 

+

79 fa = pyfastx.Fasta(contigs_fasta, build_index=True) 

+

80 

+

81 for sbin in selected_bins: 

+

82 outfile = os.path.join(outdir, f"bin_{sbin.id}.fa") 

+

83 

+

84 with open(outfile, "w") as outfl: 

+

85 sequences = (f">{c}\n{fa[c]}" for c in sbin.contigs) 

+

86 outfl.write("\n".join(sequences) + "\n") 

+

87 

+

88 

+

89def check_contig_consistency(contigs_from_assembly: List[str], 

+

90 contigs_from_elsewhere: List[str], 

+

91 assembly_file: str, 

+

92 elsewhere_file: str ): 

+

93 """ 

+

94 Check the consistency of contig names between different sources. 

+

95 

+

96 :param contigs_from_assembly: List of contig names from the assembly file. 

+

97 :param contigs_from_elsewhere: List of contig names from an external source. 

+

98 :param assembly_file: Path to the assembly file. 

+

99 :param elsewhere_file: Path to the file from an external source. 

+

100 :raises AssertionError: If inconsistencies in contig names are found. 

+

101 """ 

+

102 logging.debug("check_contig_consistency.") 

+

103 are_contigs_consistent = len(set(contigs_from_elsewhere) | set(contigs_from_assembly)) <= len( 

+

104 set(contigs_from_assembly) 

+

105 ) 

+

106 

+

107 issue_countigs = len(set(contigs_from_elsewhere) - set(contigs_from_assembly)) 

+

108 

+

109 message = f"{issue_countigs} contigs found in file {elsewhere_file} \ 

+

110 were not found in assembly_file ({assembly_file})." 

+

111 assert are_contigs_consistent, message 

+

112 

+

113 

+

114def check_resume_file(faa_file: str, diamond_result_file: str) -> None: 

+

115 """ 

+

116 Check the existence of files required for resuming the process. 

+

117 

+

118 :param faa_file: Path to the protein file. 

+

119 :param diamond_result_file: Path to the Diamond result file. 

+

120 :raises FileNotFoundError: If the required files don't exist for resuming. 

+

121 """ 

+

122 

+

123 if os.path.isfile(faa_file) and os.path.isfile(diamond_result_file): 

+

124 return 

+

125 

+

126 if not os.path.isfile(faa_file): 

+

127 error_msg = f"Protein file '{faa_file}' does not exist. Resuming is not possible." 

+

128 logging.error(error_msg) 

+

129 raise FileNotFoundError(error_msg) 

+

130 

+

131 if not os.path.isfile(diamond_result_file): 

+

132 error_msg = f"Diamond result file '{diamond_result_file}' does not exist. Resuming is not possible." 

+

133 logging.error(error_msg) 

+

134 raise FileNotFoundError(error_msg) 

+

135 

+

136 

+
+ + + diff --git a/favicon_32.png b/favicon_32.png new file mode 100644 index 0000000..8649f04 Binary files /dev/null and b/favicon_32.png differ diff --git a/index.html b/index.html new file mode 100644 index 0000000..0a8c3d6 --- /dev/null +++ b/index.html @@ -0,0 +1,151 @@ + + + + + Coverage report + + + + + +
+
+

Coverage report: + 0% +

+ +
+ +
+

+ coverage.py v7.3.2, + created at 2023-12-04 22:59 +0000 +

+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Modulestatementsmissingexcludedcoverage
binette/__init__.py000100%
binette/bin_manager.py18418400%
binette/bin_quality.py11211200%
binette/binette.py13413400%
binette/cds.py595900%
binette/contig_manager.py111100%
binette/diamond.py474700%
binette/io_manager.py555500%
Total60260200%
+

+ No items found using the specified filter. +

+
+ + + diff --git a/keybd_closed.png b/keybd_closed.png new file mode 100644 index 0000000..ba119c4 Binary files /dev/null and b/keybd_closed.png differ diff --git a/keybd_open.png b/keybd_open.png new file mode 100644 index 0000000..a8bac6c Binary files /dev/null and b/keybd_open.png differ diff --git a/status.json b/status.json new file mode 100644 index 0000000..e7651d9 --- /dev/null +++ b/status.json @@ -0,0 +1 @@ +{"format":2,"version":"7.3.2","globals":"fdc2064e7b95d3d73e1d833c22b501e2","files":{"d_0bf1b44744b37667___init___py":{"hash":"8f96e0487d400bed7b4c91e861310773","index":{"nums":[0,1,0,0,0,0,0,0],"html_filename":"d_0bf1b44744b37667___init___py.html","relative_filename":"binette/__init__.py"}},"d_0bf1b44744b37667_bin_manager_py":{"hash":"cc56eba74b68935c0ef076ccc0599976","index":{"nums":[0,1,184,0,184,0,0,0],"html_filename":"d_0bf1b44744b37667_bin_manager_py.html","relative_filename":"binette/bin_manager.py"}},"d_0bf1b44744b37667_bin_quality_py":{"hash":"a39fdb4ba1ac72e49d33a76aaf0cc366","index":{"nums":[0,1,112,0,112,0,0,0],"html_filename":"d_0bf1b44744b37667_bin_quality_py.html","relative_filename":"binette/bin_quality.py"}},"d_0bf1b44744b37667_binette_py":{"hash":"af19de72e3d4d07a391c272057daec33","index":{"nums":[0,1,134,0,134,0,0,0],"html_filename":"d_0bf1b44744b37667_binette_py.html","relative_filename":"binette/binette.py"}},"d_0bf1b44744b37667_cds_py":{"hash":"af5c7c3f47a47701660554bd995bb0f3","index":{"nums":[0,1,59,0,59,0,0,0],"html_filename":"d_0bf1b44744b37667_cds_py.html","relative_filename":"binette/cds.py"}},"d_0bf1b44744b37667_contig_manager_py":{"hash":"c79a1387268f2146c8d274286ee3b2ff","index":{"nums":[0,1,11,0,11,0,0,0],"html_filename":"d_0bf1b44744b37667_contig_manager_py.html","relative_filename":"binette/contig_manager.py"}},"d_0bf1b44744b37667_diamond_py":{"hash":"2d38602a51b08302ce1a0f74e0b6880b","index":{"nums":[0,1,47,0,47,0,0,0],"html_filename":"d_0bf1b44744b37667_diamond_py.html","relative_filename":"binette/diamond.py"}},"d_0bf1b44744b37667_io_manager_py":{"hash":"eb5aac92e0166d28212dfde27c15df3c","index":{"nums":[0,1,55,0,55,0,0,0],"html_filename":"d_0bf1b44744b37667_io_manager_py.html","relative_filename":"binette/io_manager.py"}}}} \ No newline at end of file diff --git a/style.css b/style.css new file mode 100644 index 0000000..11b24c4 --- /dev/null +++ b/style.css @@ -0,0 +1,309 @@ +@charset "UTF-8"; +/* Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 */ +/* For details: https://github.com/nedbat/coveragepy/blob/master/NOTICE.txt */ +/* Don't edit this .css file. Edit the .scss file instead! */ +html, body, h1, h2, h3, p, table, td, th { margin: 0; padding: 0; border: 0; font-weight: inherit; font-style: inherit; font-size: 100%; font-family: inherit; vertical-align: baseline; } + +body { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; font-size: 1em; background: #fff; color: #000; } + +@media (prefers-color-scheme: dark) { body { background: #1e1e1e; } } + +@media (prefers-color-scheme: dark) { body { color: #eee; } } + +html > body { font-size: 16px; } + +a:active, a:focus { outline: 2px dashed #007acc; } + +p { font-size: .875em; line-height: 1.4em; } + +table { border-collapse: collapse; } + +td { vertical-align: top; } + +table tr.hidden { display: none !important; } + +p#no_rows { display: none; font-size: 1.2em; } + +a.nav { text-decoration: none; color: inherit; } + +a.nav:hover { text-decoration: underline; color: inherit; } + +.hidden { display: none; } + +header { background: #f8f8f8; width: 100%; z-index: 2; border-bottom: 1px solid #ccc; } + +@media (prefers-color-scheme: dark) { header { background: black; } } + +@media (prefers-color-scheme: dark) { header { border-color: #333; } } + +header .content { padding: 1rem 3.5rem; } + +header h2 { margin-top: .5em; font-size: 1em; } + +header p.text { margin: .5em 0 -.5em; color: #666; font-style: italic; } + +@media (prefers-color-scheme: dark) { header p.text { color: #aaa; } } + +header.sticky { position: fixed; left: 0; right: 0; height: 2.5em; } + +header.sticky .text { display: none; } + +header.sticky h1, header.sticky h2 { font-size: 1em; margin-top: 0; display: inline-block; } + +header.sticky .content { padding: 0.5rem 3.5rem; } + +header.sticky .content p { font-size: 1em; } + +header.sticky ~ #source { padding-top: 6.5em; } + +main { position: relative; z-index: 1; } + +footer { margin: 1rem 3.5rem; } + +footer .content { padding: 0; color: #666; font-style: italic; } + +@media (prefers-color-scheme: dark) { footer .content { color: #aaa; } } + +#index { margin: 1rem 0 0 3.5rem; } + +h1 { font-size: 1.25em; display: inline-block; } + +#filter_container { float: right; margin: 0 2em 0 0; } + +#filter_container input { width: 10em; padding: 0.2em 0.5em; border: 2px solid #ccc; background: #fff; color: #000; } + +@media (prefers-color-scheme: dark) { #filter_container input { border-color: #444; } } + +@media (prefers-color-scheme: dark) { #filter_container input { background: #1e1e1e; } } + +@media (prefers-color-scheme: dark) { #filter_container input { color: #eee; } } + +#filter_container input:focus { border-color: #007acc; } + +header button { font-family: inherit; font-size: inherit; border: 1px solid; border-radius: .2em; color: inherit; padding: .1em .5em; margin: 1px calc(.1em + 1px); cursor: pointer; border-color: #ccc; } + +@media (prefers-color-scheme: dark) { header button { border-color: #444; } } + +header button:active, header button:focus { outline: 2px dashed #007acc; } + +header button.run { background: #eeffee; } + +@media (prefers-color-scheme: dark) { header button.run { background: #373d29; } } + +header button.run.show_run { background: #dfd; border: 2px solid #00dd00; margin: 0 .1em; } + +@media (prefers-color-scheme: dark) { header button.run.show_run { background: #373d29; } } + +header button.mis { background: #ffeeee; } + +@media (prefers-color-scheme: dark) { header button.mis { background: #4b1818; } } + +header button.mis.show_mis { background: #fdd; border: 2px solid #ff0000; margin: 0 .1em; } + +@media (prefers-color-scheme: dark) { header button.mis.show_mis { background: #4b1818; } } + +header button.exc { background: #f7f7f7; } + +@media (prefers-color-scheme: dark) { header button.exc { background: #333; } } + +header button.exc.show_exc { background: #eee; border: 2px solid #808080; margin: 0 .1em; } + +@media (prefers-color-scheme: dark) { header button.exc.show_exc { background: #333; } } + +header button.par { background: #ffffd5; } + +@media (prefers-color-scheme: dark) { header button.par { background: #650; } } + +header button.par.show_par { background: #ffa; border: 2px solid #bbbb00; margin: 0 .1em; } + +@media (prefers-color-scheme: dark) { header button.par.show_par { background: #650; } } + +#help_panel, #source p .annotate.long { display: none; position: absolute; z-index: 999; background: #ffffcc; border: 1px solid #888; border-radius: .2em; color: #333; padding: .25em .5em; } + +#source p .annotate.long { white-space: normal; float: right; top: 1.75em; right: 1em; height: auto; } + +#help_panel_wrapper { float: right; position: relative; } + +#keyboard_icon { margin: 5px; } + +#help_panel_state { display: none; } + +#help_panel { top: 25px; right: 0; padding: .75em; border: 1px solid #883; color: #333; } + +#help_panel .keyhelp p { margin-top: .75em; } + +#help_panel .legend { font-style: italic; margin-bottom: 1em; } + +.indexfile #help_panel { width: 25em; } + +.pyfile #help_panel { width: 18em; } + +#help_panel_state:checked ~ #help_panel { display: block; } + +kbd { border: 1px solid black; border-color: #888 #333 #333 #888; padding: .1em .35em; font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace; font-weight: bold; background: #eee; border-radius: 3px; } + +#source { padding: 1em 0 1em 3.5rem; font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace; } + +#source p { position: relative; white-space: pre; } + +#source p * { box-sizing: border-box; } + +#source p .n { float: left; text-align: right; width: 3.5rem; box-sizing: border-box; margin-left: -3.5rem; padding-right: 1em; color: #999; } + +@media (prefers-color-scheme: dark) { #source p .n { color: #777; } } + +#source p .n.highlight { background: #ffdd00; } + +#source p .n a { margin-top: -4em; padding-top: 4em; text-decoration: none; color: #999; } + +@media (prefers-color-scheme: dark) { #source p .n a { color: #777; } } + +#source p .n a:hover { text-decoration: underline; color: #999; } + +@media (prefers-color-scheme: dark) { #source p .n a:hover { color: #777; } } + +#source p .t { display: inline-block; width: 100%; box-sizing: border-box; margin-left: -.5em; padding-left: 0.3em; border-left: 0.2em solid #fff; } + +@media (prefers-color-scheme: dark) { #source p .t { border-color: #1e1e1e; } } + +#source p .t:hover { background: #f2f2f2; } + +@media (prefers-color-scheme: dark) { #source p .t:hover { background: #282828; } } + +#source p .t:hover ~ .r .annotate.long { display: block; } + +#source p .t .com { color: #008000; font-style: italic; line-height: 1px; } + +@media (prefers-color-scheme: dark) { #source p .t .com { color: #6a9955; } } + +#source p .t .key { font-weight: bold; line-height: 1px; } + +#source p .t .str { color: #0451a5; } + +@media (prefers-color-scheme: dark) { #source p .t .str { color: #9cdcfe; } } + +#source p.mis .t { border-left: 0.2em solid #ff0000; } + +#source p.mis.show_mis .t { background: #fdd; } + +@media (prefers-color-scheme: dark) { #source p.mis.show_mis .t { background: #4b1818; } } + +#source p.mis.show_mis .t:hover { background: #f2d2d2; } + +@media (prefers-color-scheme: dark) { #source p.mis.show_mis .t:hover { background: #532323; } } + +#source p.run .t { border-left: 0.2em solid #00dd00; } + +#source p.run.show_run .t { background: #dfd; } + +@media (prefers-color-scheme: dark) { #source p.run.show_run .t { background: #373d29; } } + +#source p.run.show_run .t:hover { background: #d2f2d2; } + +@media (prefers-color-scheme: dark) { #source p.run.show_run .t:hover { background: #404633; } } + +#source p.exc .t { border-left: 0.2em solid #808080; } + +#source p.exc.show_exc .t { background: #eee; } + +@media (prefers-color-scheme: dark) { #source p.exc.show_exc .t { background: #333; } } + +#source p.exc.show_exc .t:hover { background: #e2e2e2; } + +@media (prefers-color-scheme: dark) { #source p.exc.show_exc .t:hover { background: #3c3c3c; } } + +#source p.par .t { border-left: 0.2em solid #bbbb00; } + +#source p.par.show_par .t { background: #ffa; } + +@media (prefers-color-scheme: dark) { #source p.par.show_par .t { background: #650; } } + +#source p.par.show_par .t:hover { background: #f2f2a2; } + +@media (prefers-color-scheme: dark) { #source p.par.show_par .t:hover { background: #6d5d0c; } } + +#source p .r { position: absolute; top: 0; right: 2.5em; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; } + +#source p .annotate { font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; color: #666; padding-right: .5em; } + +@media (prefers-color-scheme: dark) { #source p .annotate { color: #ddd; } } + +#source p .annotate.short:hover ~ .long { display: block; } + +#source p .annotate.long { width: 30em; right: 2.5em; } + +#source p input { display: none; } + +#source p input ~ .r label.ctx { cursor: pointer; border-radius: .25em; } + +#source p input ~ .r label.ctx::before { content: "▶ "; } + +#source p input ~ .r label.ctx:hover { background: #e8f4ff; color: #666; } + +@media (prefers-color-scheme: dark) { #source p input ~ .r label.ctx:hover { background: #0f3a42; } } + +@media (prefers-color-scheme: dark) { #source p input ~ .r label.ctx:hover { color: #aaa; } } + +#source p input:checked ~ .r label.ctx { background: #d0e8ff; color: #666; border-radius: .75em .75em 0 0; padding: 0 .5em; margin: -.25em 0; } + +@media (prefers-color-scheme: dark) { #source p input:checked ~ .r label.ctx { background: #056; } } + +@media (prefers-color-scheme: dark) { #source p input:checked ~ .r label.ctx { color: #aaa; } } + +#source p input:checked ~ .r label.ctx::before { content: "▼ "; } + +#source p input:checked ~ .ctxs { padding: .25em .5em; overflow-y: scroll; max-height: 10.5em; } + +#source p label.ctx { color: #999; display: inline-block; padding: 0 .5em; font-size: .8333em; } + +@media (prefers-color-scheme: dark) { #source p label.ctx { color: #777; } } + +#source p .ctxs { display: block; max-height: 0; overflow-y: hidden; transition: all .2s; padding: 0 .5em; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Ubuntu, Cantarell, "Helvetica Neue", sans-serif; white-space: nowrap; background: #d0e8ff; border-radius: .25em; margin-right: 1.75em; text-align: right; } + +@media (prefers-color-scheme: dark) { #source p .ctxs { background: #056; } } + +#index { font-family: SFMono-Regular, Menlo, Monaco, Consolas, monospace; font-size: 0.875em; } + +#index table.index { margin-left: -.5em; } + +#index td, #index th { text-align: right; width: 5em; padding: .25em .5em; border-bottom: 1px solid #eee; } + +@media (prefers-color-scheme: dark) { #index td, #index th { border-color: #333; } } + +#index td.name, #index th.name { text-align: left; width: auto; } + +#index th { font-style: italic; color: #333; cursor: pointer; } + +@media (prefers-color-scheme: dark) { #index th { color: #ddd; } } + +#index th:hover { background: #eee; } + +@media (prefers-color-scheme: dark) { #index th:hover { background: #333; } } + +#index th[aria-sort="ascending"], #index th[aria-sort="descending"] { white-space: nowrap; background: #eee; padding-left: .5em; } + +@media (prefers-color-scheme: dark) { #index th[aria-sort="ascending"], #index th[aria-sort="descending"] { background: #333; } } + +#index th[aria-sort="ascending"]::after { font-family: sans-serif; content: " ↑"; } + +#index th[aria-sort="descending"]::after { font-family: sans-serif; content: " ↓"; } + +#index td.name a { text-decoration: none; color: inherit; } + +#index tr.total td, #index tr.total_dynamic td { font-weight: bold; border-top: 1px solid #ccc; border-bottom: none; } + +#index tr.file:hover { background: #eee; } + +@media (prefers-color-scheme: dark) { #index tr.file:hover { background: #333; } } + +#index tr.file:hover td.name { text-decoration: underline; color: inherit; } + +#scroll_marker { position: fixed; z-index: 3; right: 0; top: 0; width: 16px; height: 100%; background: #fff; border-left: 1px solid #eee; will-change: transform; } + +@media (prefers-color-scheme: dark) { #scroll_marker { background: #1e1e1e; } } + +@media (prefers-color-scheme: dark) { #scroll_marker { border-color: #333; } } + +#scroll_marker .marker { background: #ccc; position: absolute; min-height: 3px; width: 100%; } + +@media (prefers-color-scheme: dark) { #scroll_marker .marker { background: #444; } }