Skip to content

Commit

Permalink
Add docstrings and expand tests for IO sub-module (#65)
Browse files Browse the repository at this point in the history
* add docstrings for IO/cells.py

* update remaining IO docstrings

* expand and refactor test_cell_io

* add test for find_relevant_tiffs

* fix find_relevant_tiffs test

* minor updates to docstrings
  • Loading branch information
K-Meech authored Apr 10, 2024
1 parent 17e5ebc commit 3b632c2
Show file tree
Hide file tree
Showing 4 changed files with 383 additions and 62 deletions.
175 changes: 171 additions & 4 deletions brainglobe_utils/IO/cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

import logging
import os
from typing import List, Optional
import pathlib
from typing import List, Optional, Union
from xml.dom import minidom
from xml.etree import ElementTree
from xml.etree.ElementTree import Element as EtElement
Expand All @@ -29,6 +30,31 @@ def get_cells(
cells_only: bool = False,
cell_type: Optional[int] = None,
):
"""
Read cells from a file or directory.
Parameters
----------
cells_file_path : str
Path to cells file to read. Can be .xml, .yml, or a directory.
cells_only : bool, optional
Only relevant for .xml files. If True, will only read Cells with
type Cell.CELL i.e. exclude all Cell.ARTIFACT, Cell.UNKNOWN, and
Cell.NO_CELL.
cell_type : int, optional
Only relevant for directories. This determines the type to assign
to all read cells. Refer to the Cell documentation for
a full explanation of cell_type options.
Returns
-------
list of Cell
A list of the cells contained in the file or directory.
See Also
--------
Cell : For full description of cell_type parameter.
"""
if cells_file_path.endswith(".xml"):
return get_cells_xml(cells_file_path, cells_only=cells_only)
elif cells_file_path.endswith(".yml"):
Expand All @@ -46,6 +72,8 @@ def get_cells(


def raise_cell_read_error(cells_file_path):
"""Raise a NotImplementedError, with an informative message including the
cells file path"""
logging.error(
"File format of: {} is not supported or contains errors. Please "
"supply an xml file, or a directory of files with positions in the "
Expand All @@ -61,6 +89,22 @@ def raise_cell_read_error(cells_file_path):


def get_cells_xml(xml_file_path, cells_only=False):
"""
Read cells from an xml file.
Parameters
----------
xml_file_path : str or pathlib.Path
Path to xml file to read from.
cells_only : bool, optional
Whether to only read Cells with type Cell.CELL i.e. exclude all
Cell.ARTIFACT, Cell.UNKNOWN, and Cell.NO_CELL.
Returns
-------
list of Cell
A list of the cells contained in the file.
"""
with open(xml_file_path, "r") as xml_file:
root = ElementTree.parse(xml_file).getroot()
cells = []
Expand All @@ -78,6 +122,24 @@ def get_cells_xml(xml_file_path, cells_only=False):


def get_cells_yml(cells_file_path, ignore_type=False, marker="markers"):
"""
Read cells from a yml file.
Parameters
----------
cells_file_path : str or pathlib.Path
Path to yml file to read from.
ignore_type : bool, optional
Whether to ignore the type of cells - all will be assigned type
Cell.UNKNOWN. Currently only True is supported.
marker : str, optional
Yaml key under which cells information is stored.
Returns
-------
list of Cell
A list of the cells contained in the file.
"""
if not ignore_type:
raise NotImplementedError(
"Parsing cell types is not yet implemented for YAML files. "
Expand All @@ -97,6 +159,29 @@ def get_cells_yml(cells_file_path, ignore_type=False, marker="markers"):


def get_cells_dir(cells_file_path, cell_type=None):
"""
Read cells from a directory. Cells will be created based on the filenames
of files in the directory, one cell per file.
Parameters
----------
cells_file_path : str or pathlib.Path
Path to directory containing Cells. Each file in the directory will
create one Cell, based on its filename. This filename must contain the
cell's x, y and z position E.g. 'Cell_z358_y564_x4056'
cell_type : int or str or None, optional
What type to assign all cells. Refer to the Cell documentation for
a full explanation of cell_type options.
Returns
-------
list of Cell
A list of the cells contained in the directory.
See Also
--------
Cell : For full description of cell_type parameter.
"""
cells = []
for file in os.listdir(cells_file_path):
# ignore hidden files
Expand All @@ -112,6 +197,25 @@ def save_cells(
indentation_str=" ",
artifact_keep=True,
):
"""
Save cells to a file.
Parameters
----------
cells : list of Cell
Cells to save to file.
xml_file_path : str
File path of xml file to save cells to.
save_csv : bool, optional
If True, will save cells to a csv file (rather than xml). This will use
the given xml_file_path with the .xml extension replaced with .csv.
indentation_str : str, optional
String to use for indent in xml file.
artifact_keep : bool, optional
Whether to keep cells with type Cell.ARTIFACT in the xml file. If True,
they are kept but their type is changed to Cell.UNKNOWN. If False, they
are removed.
"""
# Assume always save xml file, and maybe save other formats
cells_to_xml(
cells,
Expand All @@ -128,12 +232,29 @@ def save_cells(
def cells_to_xml(
cells, xml_file_path, indentation_str=" ", artifact_keep=True
):
"""
Save cells to an xml file.
Parameters
----------
cells : list of Cell
Cells to save to file.
xml_file_path : str or pathlib.Path
Xml file path to write cells to.
indentation_str : str, optional
String to use for indent in xml file.
artifact_keep : bool, optional
Whether to keep cells with type Cell.ARTIFACT. If True, they
are kept but their type is changed to Cell.UNKNOWN. If False, they are
removed.
"""
xml_data = make_xml(cells, indentation_str, artifact_keep=artifact_keep)
with open(xml_file_path, "w") as xml_file:
xml_file.write(str(xml_data, "UTF-8"))


def cells_xml_to_df(xml_file_path):
"""Read cells from xml file and convert to dataframe"""
cells = get_cells(xml_file_path)
return cells_to_dataframe(cells)

Expand All @@ -142,12 +263,32 @@ def cells_to_dataframe(cells: List[Cell]) -> pd.DataFrame:
return pd.DataFrame([c.to_dict() for c in cells])


def cells_to_csv(cells, csv_file_path):
def cells_to_csv(cells: List[Cell], csv_file_path: Union[str, pathlib.Path]):
"""Save cells to csv file"""
df = cells_to_dataframe(cells)
df.to_csv(csv_file_path)


def make_xml(cell_list, indentration_str, artifact_keep=True):
def make_xml(cell_list, indentation_str, artifact_keep=True):
"""
Convert a list of cells to xml.
Parameters
----------
cell_list : list of Cell
Cells to convert to xml.
indentation_str : str
String to use for indent in xml file.
artifact_keep : bool, optional
Whether to keep cells with type Cell.ARTIFACT. If True, they
are kept but their type is changed to Cell.UNKNOWN. If False, they are
removed.
Returns
-------
bytes
Cell list as xml, ready to be saved into a file.
"""
root = EtElement("CellCounter_Marker_File")
image_properties = EtElement("Image_Properties")
file_name = EtElement("Image_Filename")
Expand Down Expand Up @@ -176,10 +317,13 @@ def make_xml(cell_list, indentration_str, artifact_keep=True):
marker_data.append(mt)
root.append(marker_data)

return pretty_xml(root, indentration_str)
return pretty_xml(root, indentation_str)


def deal_with_artifacts(cell_dict, artifact_keep=True):
"""Deal with cells with type Cell.ARTIFACT in a cell dict. If artifact_keep
is True, then convert all Cell.ARTIFACT to Cell.UNKNOWN. Otherwise, remove
all Cell.ARTIFACT"""
# What do we want to do with the artifacts (if they exist)?
# Might want to keep them for training
if artifact_keep:
Expand All @@ -196,6 +340,8 @@ def deal_with_artifacts(cell_dict, artifact_keep=True):


def make_type_dict(cell_list):
"""Convert a list of Cells to a dictionary with keys of cell type and
values of the list of corresponding cells."""
types = sorted(set([cell.type for cell in cell_list]))
return {
cell_type: [cell for cell in cell_list if cell.type == cell_type]
Expand All @@ -204,12 +350,33 @@ def make_type_dict(cell_list):


def pretty_xml(elem, indentation_str=" "):
"""Convert xml element to pretty version, using given indentation string"""
ugly_xml = ElementTree.tostring(elem, "utf-8")
md_parsed = minidom.parseString(ugly_xml)
return md_parsed.toprettyxml(indent=indentation_str, encoding="UTF-8")


def find_relevant_tiffs(tiffs, cell_def):
"""
Find tiffs that match those read from cell_def.
Parameters
----------
tiffs : list of str
List of paths to tiff files. Each tiff filename must contain the cell's
x, y and z position.
cell_def : str
Path to cells to read. Can be an .xml file, .yml file, or a directory.
If a directory is passed, each file in the directory will
create one Cell, based on its filename. Each filename must contain the
cell's x, y and z position.
Returns
-------
list of str
Filtered list of paths to tiff files, only including those that match
cells read from cell_def.
"""
cells = [UntypedCell(tiff) for tiff in tiffs]
if os.path.isdir(cell_def):
relevant_cells = set(
Expand Down
10 changes: 6 additions & 4 deletions brainglobe_utils/IO/surfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ def marching_cubes_to_obj(marching_cubes_out, output_file):
"""
Saves the output of skimage.measure.marching_cubes as an .obj file
:param marching_cubes_out: tuple
:param output_file: str
Parameters
----------
marching_cubes_out : tuple of np.ndarray
Output from skimage.measure.marching_cubes.
output_file : str or pathlib.Path
Path of .obj file to write output into.
"""

verts, faces, normals, _ = marching_cubes_out
with open(output_file, "w") as f:
for item in verts:
Expand Down
40 changes: 40 additions & 0 deletions brainglobe_utils/IO/yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,58 @@


def read_yaml_section(yaml_file, section):
"""
Read section from yaml file.
Parameters
----------
yaml_file : str or pathlib.Path
Path of .yml file to read.
section : str
Key of yaml section to read.
Returns
-------
The contents of the yaml section.
"""
yaml_contents = open_yaml(yaml_file)
contents = yaml_contents[section]
return contents


def open_yaml(yaml_file):
"""
Read the contents of a yaml file.
Parameters
----------
yaml_file : str or pathlib.Path
Path of .yml file to read.
Returns
-------
dict
The contents of the yaml file.
"""
with open(yaml_file) as f:
yaml_contents = yaml.load(f, Loader=yaml.FullLoader)
return yaml_contents


def save_yaml(yaml_contents, output_file, default_flow_style=False):
"""
Save contents to a yaml file.
Parameters
----------
yaml_contents : dict
Content to write to yaml file.
output_file : str or pathlib.Path
Path to .yml file to write to.
default_flow_style : bool, optional
If true, will use block style or flow style depending on whether there
are nested collections. If false, always uses block style.
"""
with open(output_file, "w") as outfile:
yaml.dump(
yaml_contents, outfile, default_flow_style=default_flow_style
Expand Down
Loading

0 comments on commit 3b632c2

Please sign in to comment.