Add docstrings and expand tests for IO sub-module (#65)

* add docstrings for IO/cells.py * update remaining IO docstrings * expand and refactor test_cell_io * add test for find_relevant_tiffs * fix find_relevant_tiffs test * minor updates to docstrings
brainglobe · Apr 10, 2024 · 3b632c2 · 3b632c2
1 parent 17e5ebc
commit 3b632c2
Show file tree

Hide file tree

Showing 4 changed files with 383 additions and 62 deletions.
diff --git a/brainglobe_utils/IO/cells.py b/brainglobe_utils/IO/cells.py
@@ -7,7 +7,8 @@
 
 import logging
 import os
-from typing import List, Optional
+import pathlib
+from typing import List, Optional, Union
 from xml.dom import minidom
 from xml.etree import ElementTree
 from xml.etree.ElementTree import Element as EtElement
@@ -29,6 +30,31 @@ def get_cells(
     cells_only: bool = False,
     cell_type: Optional[int] = None,
 ):
+    """
+    Read cells from a file or directory.
+
+    Parameters
+    ----------
+    cells_file_path : str
+        Path to cells file to read. Can be .xml, .yml, or a directory.
+    cells_only : bool, optional
+        Only relevant for .xml files. If True, will only read Cells with
+        type Cell.CELL i.e. exclude all Cell.ARTIFACT, Cell.UNKNOWN, and
+        Cell.NO_CELL.
+    cell_type : int, optional
+        Only relevant for directories. This determines the type to assign
+        to all read cells. Refer to the Cell documentation for
+        a full explanation of cell_type options.
+
+    Returns
+    -------
+    list of Cell
+        A list of the cells contained in the file or directory.
+
+    See Also
+    --------
+    Cell : For full description of cell_type parameter.
+    """
     if cells_file_path.endswith(".xml"):
         return get_cells_xml(cells_file_path, cells_only=cells_only)
     elif cells_file_path.endswith(".yml"):
@@ -46,6 +72,8 @@ def get_cells(
 
 
 def raise_cell_read_error(cells_file_path):
+    """Raise a NotImplementedError, with an informative message including the
+    cells file path"""
     logging.error(
         "File format of: {} is not supported or contains errors. Please "
         "supply an xml file, or a directory of files with positions in the "
@@ -61,6 +89,22 @@ def raise_cell_read_error(cells_file_path):
 
 
 def get_cells_xml(xml_file_path, cells_only=False):
+    """
+    Read cells from an xml file.
+
+    Parameters
+    ----------
+    xml_file_path : str or pathlib.Path
+        Path to xml file to read from.
+    cells_only : bool, optional
+        Whether to only read Cells with type Cell.CELL i.e. exclude all
+        Cell.ARTIFACT, Cell.UNKNOWN, and Cell.NO_CELL.
+
+    Returns
+    -------
+    list of Cell
+        A list of the cells contained in the file.
+    """
     with open(xml_file_path, "r") as xml_file:
         root = ElementTree.parse(xml_file).getroot()
         cells = []
@@ -78,6 +122,24 @@ def get_cells_xml(xml_file_path, cells_only=False):
 
 
 def get_cells_yml(cells_file_path, ignore_type=False, marker="markers"):
+    """
+    Read cells from a yml file.
+
+    Parameters
+    ----------
+    cells_file_path : str or pathlib.Path
+        Path to yml file to read from.
+    ignore_type : bool, optional
+        Whether to ignore the type of cells - all will be assigned type
+        Cell.UNKNOWN. Currently only True is supported.
+    marker : str, optional
+        Yaml key under which cells information is stored.
+
+    Returns
+    -------
+    list of Cell
+        A list of the cells contained in the file.
+    """
     if not ignore_type:
         raise NotImplementedError(
             "Parsing cell types is not yet implemented for YAML files. "
@@ -97,6 +159,29 @@ def get_cells_yml(cells_file_path, ignore_type=False, marker="markers"):
 
 
 def get_cells_dir(cells_file_path, cell_type=None):
+    """
+    Read cells from a directory. Cells will be created based on the filenames
+    of files in the directory, one cell per file.
+
+    Parameters
+    ----------
+    cells_file_path : str or pathlib.Path
+        Path to directory containing Cells. Each file in the directory will
+        create one Cell, based on its filename. This filename must contain the
+        cell's x, y and z position E.g. 'Cell_z358_y564_x4056'
+    cell_type : int or str or None, optional
+        What type to assign all cells. Refer to the Cell documentation for
+        a full explanation of cell_type options.
+
+    Returns
+    -------
+    list of Cell
+        A list of the cells contained in the directory.
+
+    See Also
+    --------
+    Cell : For full description of cell_type parameter.
+    """
     cells = []
     for file in os.listdir(cells_file_path):
         # ignore hidden files
@@ -112,6 +197,25 @@ def save_cells(
     indentation_str="  ",
     artifact_keep=True,
 ):
+    """
+    Save cells to a file.
+
+    Parameters
+    ----------
+    cells : list of Cell
+        Cells to save to file.
+    xml_file_path : str
+        File path of xml file to save cells to.
+    save_csv : bool, optional
+        If True, will save cells to a csv file (rather than xml). This will use
+        the given xml_file_path with the .xml extension replaced with .csv.
+    indentation_str : str, optional
+        String to use for indent in xml file.
+    artifact_keep : bool, optional
+        Whether to keep cells with type Cell.ARTIFACT in the xml file. If True,
+        they are kept but their type is changed to Cell.UNKNOWN. If False, they
+        are removed.
+    """
     # Assume always save xml file, and maybe save other formats
     cells_to_xml(
         cells,
@@ -128,12 +232,29 @@ def save_cells(
 def cells_to_xml(
     cells, xml_file_path, indentation_str="  ", artifact_keep=True
 ):
+    """
+    Save cells to an xml file.
+
+    Parameters
+    ----------
+    cells : list of Cell
+        Cells to save to file.
+    xml_file_path : str or pathlib.Path
+        Xml file path to write cells to.
+    indentation_str : str, optional
+        String to use for indent in xml file.
+    artifact_keep : bool, optional
+        Whether to keep cells with type Cell.ARTIFACT. If True, they
+        are kept but their type is changed to Cell.UNKNOWN. If False, they are
+        removed.
+    """
     xml_data = make_xml(cells, indentation_str, artifact_keep=artifact_keep)
     with open(xml_file_path, "w") as xml_file:
         xml_file.write(str(xml_data, "UTF-8"))
 
 
 def cells_xml_to_df(xml_file_path):
+    """Read cells from xml file and convert to dataframe"""
     cells = get_cells(xml_file_path)
     return cells_to_dataframe(cells)
 
@@ -142,12 +263,32 @@ def cells_to_dataframe(cells: List[Cell]) -> pd.DataFrame:
     return pd.DataFrame([c.to_dict() for c in cells])
 
 
-def cells_to_csv(cells, csv_file_path):
+def cells_to_csv(cells: List[Cell], csv_file_path: Union[str, pathlib.Path]):
+    """Save cells to csv file"""
     df = cells_to_dataframe(cells)
     df.to_csv(csv_file_path)
 
 
-def make_xml(cell_list, indentration_str, artifact_keep=True):
+def make_xml(cell_list, indentation_str, artifact_keep=True):
+    """
+    Convert a list of cells to xml.
+
+    Parameters
+    ----------
+    cell_list : list of Cell
+        Cells to convert to xml.
+    indentation_str : str
+        String to use for indent in xml file.
+    artifact_keep : bool, optional
+        Whether to keep cells with type Cell.ARTIFACT. If True, they
+        are kept but their type is changed to Cell.UNKNOWN. If False, they are
+        removed.
+
+    Returns
+    -------
+    bytes
+        Cell list as xml, ready to be saved into a file.
+    """
     root = EtElement("CellCounter_Marker_File")
     image_properties = EtElement("Image_Properties")
     file_name = EtElement("Image_Filename")
@@ -176,10 +317,13 @@ def make_xml(cell_list, indentration_str, artifact_keep=True):
         marker_data.append(mt)
     root.append(marker_data)
 
-    return pretty_xml(root, indentration_str)
+    return pretty_xml(root, indentation_str)
 
 
 def deal_with_artifacts(cell_dict, artifact_keep=True):
+    """Deal with cells with type Cell.ARTIFACT in a cell dict. If artifact_keep
+    is True, then convert all Cell.ARTIFACT to Cell.UNKNOWN. Otherwise, remove
+    all Cell.ARTIFACT"""
     # What do we want to do with the artifacts (if they exist)?
     # Might want to keep them for training
     if artifact_keep:
@@ -196,6 +340,8 @@ def deal_with_artifacts(cell_dict, artifact_keep=True):
 
 
 def make_type_dict(cell_list):
+    """Convert a list of Cells to a dictionary with keys of cell type and
+    values of the list of corresponding cells."""
     types = sorted(set([cell.type for cell in cell_list]))
     return {
         cell_type: [cell for cell in cell_list if cell.type == cell_type]
@@ -204,12 +350,33 @@ def make_type_dict(cell_list):
 
 
 def pretty_xml(elem, indentation_str="  "):
+    """Convert xml element to pretty version, using given indentation string"""
     ugly_xml = ElementTree.tostring(elem, "utf-8")
     md_parsed = minidom.parseString(ugly_xml)
     return md_parsed.toprettyxml(indent=indentation_str, encoding="UTF-8")
 
 
 def find_relevant_tiffs(tiffs, cell_def):
+    """
+    Find tiffs that match those read from cell_def.
+
+    Parameters
+    ----------
+    tiffs : list of str
+        List of paths to tiff files. Each tiff filename must contain the cell's
+        x, y and z position.
+    cell_def : str
+        Path to cells to read. Can be an .xml file, .yml file, or a directory.
+        If a directory is passed, each file in the directory will
+        create one Cell, based on its filename. Each filename must contain the
+        cell's x, y and z position.
+
+    Returns
+    -------
+    list of str
+        Filtered list of paths to tiff files, only including those that match
+        cells read from cell_def.
+    """
     cells = [UntypedCell(tiff) for tiff in tiffs]
     if os.path.isdir(cell_def):
         relevant_cells = set(

diff --git a/brainglobe_utils/IO/surfaces.py b/brainglobe_utils/IO/surfaces.py
@@ -2,11 +2,13 @@ def marching_cubes_to_obj(marching_cubes_out, output_file):
     """
     Saves the output of skimage.measure.marching_cubes as an .obj file
 
-    :param marching_cubes_out: tuple
-    :param output_file: str
-
+    Parameters
+    ----------
+    marching_cubes_out : tuple of np.ndarray
+        Output from skimage.measure.marching_cubes.
+    output_file : str or pathlib.Path
+        Path of .obj file to write output into.
     """
-
     verts, faces, normals, _ = marching_cubes_out
     with open(output_file, "w") as f:
         for item in verts:

diff --git a/brainglobe_utils/IO/yaml.py b/brainglobe_utils/IO/yaml.py
@@ -2,18 +2,58 @@
 
 
 def read_yaml_section(yaml_file, section):
+    """
+    Read section from yaml file.
+
+    Parameters
+    ----------
+    yaml_file : str or pathlib.Path
+        Path of .yml file to read.
+    section : str
+        Key of yaml section to read.
+
+    Returns
+    -------
+    The contents of the yaml section.
+    """
     yaml_contents = open_yaml(yaml_file)
     contents = yaml_contents[section]
     return contents
 
 
 def open_yaml(yaml_file):
+    """
+    Read the contents of a yaml file.
+
+    Parameters
+    ----------
+    yaml_file : str or pathlib.Path
+        Path of .yml file to read.
+
+    Returns
+    -------
+    dict
+        The contents of the yaml file.
+    """
     with open(yaml_file) as f:
         yaml_contents = yaml.load(f, Loader=yaml.FullLoader)
     return yaml_contents
 
 
 def save_yaml(yaml_contents, output_file, default_flow_style=False):
+    """
+    Save contents to a yaml file.
+
+    Parameters
+    ----------
+    yaml_contents : dict
+        Content to write to yaml file.
+    output_file : str or pathlib.Path
+        Path to .yml file to write to.
+    default_flow_style : bool, optional
+        If true, will use block style or flow style depending on whether there
+        are nested collections. If false, always uses block style.
+    """
     with open(output_file, "w") as outfile:
         yaml.dump(
             yaml_contents, outfile, default_flow_style=default_flow_style