diff --git a/.github/workflows/black_lint_test.yml b/.github/workflows/black_lint_test.yml index a0bdd5b..b7d6428 100644 --- a/.github/workflows/black_lint_test.yml +++ b/.github/workflows/black_lint_test.yml @@ -23,7 +23,7 @@ jobs: - name: Install spoc, black and pylint run: pip install . && pip install black==23.9.1 && pip install pylint==2.15.5 - name: Run black - run: black --check --extend-exclude . + run: black --check . - name: Run pylint run: pylint --fail-under=8 spoc/* tests/test_*.py - name: Run tests diff --git a/spoc/cli.py b/spoc/cli.py index 88a0d1c..72f0a1a 100644 --- a/spoc/cli.py +++ b/spoc/cli.py @@ -29,7 +29,7 @@ def expand(fragments_path, expanded_contacts_path, n_fragments): fragments_path (str): Path to the labelled fragments file. expanded_contacts_path (str): Path to the output contacts file. n_fragments (int, optional): Number of fragments per read to expand. Defaults to 3. - + """ expander = FragmentExpander(number_fragments=n_fragments) file_manager = FileManager() @@ -49,7 +49,7 @@ def annotate(fragments_path, label_library_path, labelled_fragments_path): fragments_path (str): Path to the input fragments file. label_library_path (str): Path to the label library file. labelled_fragments_path (str): Path to the output labelled fragments file. - + """ file_manager = FileManager() label_library = file_manager.load_label_library(label_library_path) @@ -71,13 +71,13 @@ def bin_contacts( same_chromosome, ): """Script for binning contacts - + Args: contact_path (str): Path to the input contact file. pixel_path (str): Path to the output pixel file. bin_size (int, optional): Size of the bins. Defaults to 10000. same_chromosome (bool, optional): Only bin contacts on the same chromosome. Defaults to False. - + """ # load data from disk file_manager = FileManager(use_dask=True) @@ -99,7 +99,7 @@ def merge(): @click.option("-o", "--output", help="output path") def merge_contacts(contact_paths, output): """Functionality to merge annotated fragments - + Args: contact_paths (tuple): Paths to the input contact files. output (str, optional): Path to the output merged contact file. diff --git a/spoc/contacts.py b/spoc/contacts.py index d7b056e..592a1ed 100644 --- a/spoc/contacts.py +++ b/spoc/contacts.py @@ -12,7 +12,7 @@ class Contacts: """N-way genomic contacts - + Args: contact_frame (DataFrame): DataFrame containing the contact data. number_fragments (int, optional): Number of fragments. Defaults to None. @@ -20,7 +20,7 @@ class Contacts: label_sorted (bool, optional): Whether the labels are sorted. Defaults to False. binary_labels_equal (bool, optional): Whether the binary labels are equal. Defaults to False. symmetry_flipped (bool, optional): Whether the symmetry is flipped. Defaults to False. - + Attributes: contains_metadata (bool): Whether the contact data contains metadata. number_fragments (int): Number of fragments. @@ -107,7 +107,7 @@ def data(self): @data.setter def data(self, contact_frame): """Sets the contact data - + Args: contact_frame (DataFrame): DataFrame containing the contact data. """ @@ -123,7 +123,7 @@ class ContactManipulator: def merge_contacts(self, merge_list: List[Contacts]) -> Contacts: """Merge contacts - + Args: merge_list (List[Contacts]): List of Contacts objects to merge. @@ -274,7 +274,7 @@ def _flip_labelled_contacts( def sort_labels(self, contacts: Contacts) -> Contacts: """Sorts labels in ascending, alphabetical order - + Args: contacts (Contacts): Contacts object to sort. @@ -367,7 +367,7 @@ def _generate_binary_label_mapping( def equate_binary_labels(self, contacts: Contacts) -> Contacts: """ Equate binary labels. - + Binary labels often only carry information about whether they happen between the same or different fragments. This method equates these labels be replacing all equivalent binary labels with @@ -423,14 +423,14 @@ def subset_on_metadata( self, contacts: Contacts, metadata_combi: List[str] ) -> Contacts: """Subset contacts based on metadata - + Args: contacts (Contacts): Contacts object to subset. metadata_combi (List[str]): List of metadata combinations to subset on. Returns: Contacts: Subsetted Contacts object. - + """ # check if metadata is present assert contacts.contains_metadata, "Contacts do not contain metadata!" @@ -462,14 +462,14 @@ def flip_symmetric_contacts( self, contacts: Contacts, sort_chromosomes: bool = False ) -> Contacts: """Flips contacts based on inherent symmetry - + Args: contacts (Contacts): Contacts object to flip symmetric contacts. sort_chromosomes (bool, optional): Whether to sort chromosomes. Defaults to False. Returns: Contacts: Contacts object with flipped symmetric contacts. - + """ if contacts.contains_metadata: if not contacts.label_sorted: diff --git a/spoc/dataframe_models.py b/spoc/dataframe_models.py index 6d851ae..4633d40 100644 --- a/spoc/dataframe_models.py +++ b/spoc/dataframe_models.py @@ -35,7 +35,7 @@ class ContactSchema: """Dynamic schema for N-way contacts - + Args: number_fragments (int, optional): Number of fragments. Defaults to 3. contains_metadata (bool, optional): Whether the contact data contains metadata. Defaults to True. @@ -103,7 +103,7 @@ def _expand_contact_fields( def validate_header(self, data_frame: DataFrame) -> None: """Validates only header, needed to validate that dask taskgraph can be built before evaluation. - + Args: data_frame (DataFrame): The DataFrame to validate. """ @@ -115,7 +115,7 @@ def validate_header(self, data_frame: DataFrame) -> None: def validate(self, data_frame: DataFrame) -> DataFrame: """Validate multiway contact dataframe - + Args: data_frame (DataFrame): The DataFrame to validate. """ @@ -125,7 +125,7 @@ def validate(self, data_frame: DataFrame) -> DataFrame: class PixelSchema: """Dynamic schema for N-way pixels - + Args: number_fragments (int, optional): Number of fragments. Defaults to 3. same_chromosome (bool, optional): Whether the fragments are on the same chromosome. Defaults to True. @@ -172,7 +172,7 @@ def _expand_contact_fields(self, expansions: Iterable = (1, 2, 3)) -> dict: def validate_header(self, data_frame: DataFrame) -> None: """Validates only header, needed to validate that dask taskgraph can be built before evaluation - + Args: data_frame (DataFrame): The DataFrame to validate. """ @@ -184,9 +184,9 @@ def validate_header(self, data_frame: DataFrame) -> None: def validate(self, data_frame: DataFrame) -> DataFrame: """Validate multiway contact dataframe - + Args: data_frame (DataFrame): The DataFrame to validate. - + """ return self._schema.validate(data_frame) diff --git a/spoc/fragments.py b/spoc/fragments.py index dba2eaf..af7ac62 100644 --- a/spoc/fragments.py +++ b/spoc/fragments.py @@ -13,7 +13,7 @@ class Fragments: """Genomic fragments that can be labelled or not. - + Args: fragment_frame (DataFrame): DataFrame containing the fragment data. """ @@ -25,7 +25,7 @@ def __init__(self, fragment_frame: DataFrame) -> None: @property def data(self) -> DataFrame: """Returns the underlying dataframe. - + Returns: DataFrame: Fragment data. """ @@ -34,7 +34,7 @@ def data(self) -> DataFrame: @property def contains_metadata(self) -> bool: """Returns whether the dataframe contains metadata. - + Returns: bool: Whether the fragment data contains metadata. """ @@ -43,7 +43,7 @@ def contains_metadata(self) -> bool: @property def is_dask(self) -> bool: """Returns whether the underlying dataframe is dask. - + Returns: bool: Whether the underlying dataframe is a dask dataframe. """ @@ -53,7 +53,7 @@ def is_dask(self) -> bool: # TODO: make generic such that label library can hold arbitrary information class FragmentAnnotator: """Responsible for annotating labels and sister identity of mapped read fragments. - + Args: label_library (Dict[str, bool]): Dictionary containing the label library. """ @@ -93,13 +93,13 @@ def annotate_fragments(self, fragments: Fragments) -> Fragments: """Takes fragment dataframe and returns a copy of it with its labelling state in a separate column with name `is_labelled`. If drop_uninformative is true, drops fragments that are not in label library. - + Args: fragments (Fragments): Fragments object containing the fragment data. Returns: Fragments: Fragments object with annotated fragment data. - + """ return Fragments( fragments.data.assign(is_labelled=self._assign_label_state) @@ -112,11 +112,11 @@ def annotate_fragments(self, fragments: Fragments) -> Fragments: class FragmentExpander: """Expands n-way fragments over sequencing reads to yield contacts. - + Args: number_fragments (int): Number of fragments. contains_metadata (bool, optional): Whether the fragment data contains metadata. Defaults to True. - + """ def __init__(self, number_fragments: int, contains_metadata: bool = True) -> None: @@ -162,7 +162,7 @@ def _expand_single_read( def expand(self, fragments: Fragments) -> Contacts: """expand contacts n-ways - + Args: fragments (Fragments): Fragments object containing the fragment data. diff --git a/spoc/io.py b/spoc/io.py index 3b07c8f..0a60203 100644 --- a/spoc/io.py +++ b/spoc/io.py @@ -20,7 +20,7 @@ class FileManager: """Is responsible for loading and writing files - + Args: use_dask (bool, optional): Whether to use Dask for reading Parquet files. Defaults to False. """ @@ -74,7 +74,7 @@ def _load_parquet_global_parameters(path: str) -> BaseModel: @staticmethod def write_label_library(path: str, data: Dict[str, bool]) -> None: """Writes label library to file - + Args: path (str): Path to write the file to. data (Dict[str, bool]): Label library data. @@ -88,7 +88,7 @@ def write_label_library(path: str, data: Dict[str, bool]) -> None: @staticmethod def load_label_library(path: str) -> Dict: """Load label library - + Args: path (str): Path to the label library file. @@ -107,7 +107,7 @@ def load_fragments(self, path: str) -> Fragments: Returns: Fragments: Fragments object containing the fragment data. - + """ data = self._parquet_reader_func(path) return Fragments(data) @@ -115,7 +115,7 @@ def load_fragments(self, path: str) -> Fragments: @staticmethod def write_fragments(path: str, fragments: Fragments) -> None: """Write annotated fragments - + Args: path (str): Path to write the file to. fragments (Fragments): Fragments object containing the fragment data. @@ -129,14 +129,14 @@ def write_fragments(path: str, fragments: Fragments) -> None: def write_multiway_contacts(self, path: str, contacts: Contacts) -> None: """Write multiway contacts - + Args: path (str): Path to write the file to. contacts (Contacts): Contacts object containing the contact data. Returns: None - + """ if contacts.is_dask: self._write_parquet_dask( @@ -151,7 +151,7 @@ def load_contacts( self, path: str, global_parameters: Optional[ContactsParameters] = None ) -> Contacts: """Load multiway contacts - + Args: path (str): Path to the contacts file. global_parameters (Optional[ContactsParameters], optional): Global parameters. Defaults to None. @@ -169,7 +169,7 @@ def load_contacts( @staticmethod def load_chromosome_sizes(path: str): """Load chromosome sizes - + Args: path (str): Path to the chromosome sizes file. @@ -200,7 +200,7 @@ def _load_pixel_metadata(path: str): @staticmethod def list_pixels(path: str): """List available pixels - + Args: path (str): Path to the pixel data. @@ -219,7 +219,7 @@ def load_pixels( """Loads specific pixels instance based on global parameters. load_dataframe specifies whether the dataframe should be loaded, or whether pixels should be instantiated based on the path alone. - + Args: path (str): Path to the pixel data. global_parameters (PixelParameters): Global parameters. @@ -227,8 +227,8 @@ def load_pixels( Returns: Pixels: Pixels object containing the pixel data. - - """ + + """ metadata = self._load_pixel_metadata(path) # find matching pixels for pixel_path, value in metadata.items(): @@ -253,14 +253,14 @@ def _get_pixel_hash_path(path: str, pixels: Pixels) -> str: def write_pixels(self, path: str, pixels: Pixels) -> None: """Write pixels - + Args: path (str): Path to write the pixel data to. pixels (Pixels): Pixels object containing the pixel data. Returns: None - + """ # check whether path exists metadata_path = Path(path) / "metadata.json" diff --git a/spoc/pixels.py b/spoc/pixels.py index 89cc687..cd61e6a 100644 --- a/spoc/pixels.py +++ b/spoc/pixels.py @@ -144,7 +144,7 @@ def from_uri(uri, mode="path"): def get_global_parameters(self): """Returns global parameters of pixels - + Returns: PixelParameters: The global parameters of the pixels. """ @@ -161,7 +161,7 @@ def get_global_parameters(self): @property def path(self) -> str: """Returns path of pixels - + Returns: str: The path of the pixels. """ @@ -170,17 +170,17 @@ def path(self) -> str: @property def data(self) -> DataFrame: """Returns pixels as dataframe - + Returns: DataFrame: The pixels as a dataframe. - + """ return self._data @property def number_fragments(self) -> int: """Returns number of fragments in pixels - + Returns: int: The number of fragments in the pixels. """ @@ -189,7 +189,7 @@ def number_fragments(self) -> int: @property def binsize(self) -> int: """Returns binsize of pixels - + Returns: int: The binsize of the pixels. """ @@ -198,7 +198,7 @@ def binsize(self) -> int: @property def binary_labels_equal(self) -> bool: """Returns whether binary labels are equal - + Returns: bool: Whether binary labels are equal. """ @@ -207,7 +207,7 @@ def binary_labels_equal(self) -> bool: @property def symmetry_flipped(self) -> bool: """Returns whether pixels are symmetry flipped - + Returns: bool: Whether pixels are symmetry flipped. """ @@ -216,7 +216,7 @@ def symmetry_flipped(self) -> bool: @property def metadata_combi(self) -> Optional[List[str]]: """Returns metadata combination of pixels - + Returns: Optional[List[str]]: The metadata combination of the pixels. """ @@ -225,11 +225,11 @@ def metadata_combi(self) -> Optional[List[str]]: @property def same_chromosome(self) -> bool: """Returns whether pixels are on same chromosome - - + + Returns: bool: Whether pixels are on same chromosome. - + """ return self._same_chromosome @@ -238,10 +238,10 @@ class GenomicBinner: """Bins higher order contacts into genomic bins of fixed size. Is capable of sorting genomic bins along columns based on sister chromatid identity - + Args: bin_size (int): The size of the genomic bins. - + """ def __init__(self, bin_size: int) -> None: @@ -283,18 +283,16 @@ def _assign_midpoints(self, contacts: dd.DataFrame) -> dd.DataFrame: axis=1, ) - def bin_contacts( - self, contacts: Contacts, same_chromosome: bool = True - ) -> Pixels: + def bin_contacts(self, contacts: Contacts, same_chromosome: bool = True) -> Pixels: """Bins genomic contacts - + Args: contacts (Contacts): The genomic contacts to bin. same_chromosome (bool, optional): Whether to only retain pixels on the same chromosome. Defaults to True. Returns: Pixels: The binned genomic pixels. - + """ self._contact_order = contacts.number_fragments contacts_w_midpoints = self._assign_midpoints(contacts.data)