Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add docstrings and expand tests for IO sub-module #65

Merged
merged 6 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
173 changes: 169 additions & 4 deletions brainglobe_utils/IO/cells.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@

import logging
import os
from typing import List, Optional
import pathlib
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
import pathlib
from pathlib import Path

As far as I can tell we only use Path, and the rest of the BG codebase errs towards from pathlib import Path.

from typing import List, Optional, Union
from xml.dom import minidom
from xml.etree import ElementTree
from xml.etree.ElementTree import Element as EtElement
Expand All @@ -29,6 +30,31 @@ def get_cells(
cells_only: bool = False,
cell_type: Optional[int] = None,
):
"""
Read cells from a file or directory.

Parameters
----------
cells_file_path : str
Path of cells file to read. Can be .xml, .yml, or a directory.
K-Meech marked this conversation as resolved.
Show resolved Hide resolved
cells_only : bool, optional
Only relevant for .xml files. If True, it will only include Cells with
K-Meech marked this conversation as resolved.
Show resolved Hide resolved
type Cell.CELL i.e. exclude all Cell.ARTIFACT, Cell.UNKNOWN, and
Cell.NO_CELL.
cell_type : int, optional
Only relevant for directories. This determines the type to assign
to all read cells. Refer to the Cell documentation for
a full explanation of cell_type options.

Returns
-------
list of Cell
A list of the cells contained in the file or directory.

See Also
--------
Cell : For full description of cell_type parameter.
"""
if cells_file_path.endswith(".xml"):
return get_cells_xml(cells_file_path, cells_only=cells_only)
elif cells_file_path.endswith(".yml"):
Expand All @@ -46,6 +72,8 @@ def get_cells(


def raise_cell_read_error(cells_file_path):
"""Raise a NotImplementedError, with an informative message including the
cells file path"""
logging.error(
"File format of: {} is not supported or contains errors. Please "
"supply an xml file, or a directory of files with positions in the "
Expand All @@ -61,6 +89,22 @@ def raise_cell_read_error(cells_file_path):


def get_cells_xml(xml_file_path, cells_only=False):
"""
Read cells from an xml file.

Parameters
----------
xml_file_path : str or pathlib.Path
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
xml_file_path : str or pathlib.Path
xml_file_path : str or Path

Path of xml file to read from.
cells_only : bool, optional
Whether to only include Cells with type Cell.CELL i.e. exclude all
Cell.ARTIFACT, Cell.UNKNOWN, and Cell.NO_CELL.

Returns
-------
list of Cell
A list of the cells contained in the file.
"""
with open(xml_file_path, "r") as xml_file:
root = ElementTree.parse(xml_file).getroot()
cells = []
Expand All @@ -78,6 +122,24 @@ def get_cells_xml(xml_file_path, cells_only=False):


def get_cells_yml(cells_file_path, ignore_type=False, marker="markers"):
"""
Read cells from a yml file.

Parameters
----------
cells_file_path : str or pathlib.Path
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
cells_file_path : str or pathlib.Path
cells_file_path : str or Path

Path of yml file to read from.
ignore_type : bool, optional
Whether to ignore the type of cells - all will be assigned type
Cell.UNKNOWN. Currently only True is supported.
marker : str, optional
Yaml key under which cells information is stored.

Returns
-------
list of Cell
A list of the cells contained in the file.
"""
if not ignore_type:
raise NotImplementedError(
"Parsing cell types is not yet implemented for YAML files. "
Expand All @@ -97,6 +159,29 @@ def get_cells_yml(cells_file_path, ignore_type=False, marker="markers"):


def get_cells_dir(cells_file_path, cell_type=None):
"""
Read cells from a directory. Cells will be created based on the filenames
of files in the directory, one cell per file.

Parameters
----------
cells_file_path : str or pathlib.Path
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
cells_file_path : str or pathlib.Path
cells_file_path : str or Path

Path of directory containing Cells. Each file in the directory will
K-Meech marked this conversation as resolved.
Show resolved Hide resolved
create one Cell, based on its filename. This filename must contain the
cell's x, y and z position E.g. 'Cell_z358_y564_x4056'
cell_type : int or str or None, optional
What type to assign all cells. Refer to the Cell documentation for
a full explanation of cell_type options.

Returns
-------
list of Cell
A list of the cells contained in the directory.

See Also
--------
Cell : For full description of cell_type parameter.
"""
cells = []
for file in os.listdir(cells_file_path):
# ignore hidden files
Expand All @@ -112,6 +197,25 @@ def save_cells(
indentation_str=" ",
artifact_keep=True,
):
"""
Save cells to a file.

Parameters
----------
cells : list of Cell
Cells to save to file.
xml_file_path : str
File path of xml file to save cells to.
save_csv : bool, optional
If True, will save cells to a csv file (rather than xml). This will use
the given xml_file_path with the .xml extension replaced with .csv.
indentation_str : str, optional
String to use for indent in xml file.
artifact_keep : bool, optional
Whether to keep cells with type Cell.ARTIFACT in the xml file. If True,
they are kept but their type is changed to Cell.UNKNOWN. If False, they
are removed.
"""
# Assume always save xml file, and maybe save other formats
cells_to_xml(
cells,
Expand All @@ -128,12 +232,29 @@ def save_cells(
def cells_to_xml(
cells, xml_file_path, indentation_str=" ", artifact_keep=True
):
"""
Save cells to xml file.
K-Meech marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
cells : list of Cell
Cells to save to file.
xml_file_path : str or pathlib.Path
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
xml_file_path : str or pathlib.Path
xml_file_path : str or Path

Xml file path to write cells to.
indentation_str : str, optional
String to use for indent in xml file.
artifact_keep : bool, optional
Whether to keep cells with type Cell.ARTIFACT. If True, they
are kept but their type is changed to Cell.UNKNOWN. If False, they are
removed.
"""
xml_data = make_xml(cells, indentation_str, artifact_keep=artifact_keep)
with open(xml_file_path, "w") as xml_file:
xml_file.write(str(xml_data, "UTF-8"))


def cells_xml_to_df(xml_file_path):
"""Read cells from xml file and convert to dataframe"""
cells = get_cells(xml_file_path)
return cells_to_dataframe(cells)

Expand All @@ -142,12 +263,33 @@ def cells_to_dataframe(cells: List[Cell]) -> pd.DataFrame:
return pd.DataFrame([c.to_dict() for c in cells])


def cells_to_csv(cells, csv_file_path):
def cells_to_csv(cells: List[Cell], csv_file_path: Union[str, pathlib.Path]):
"""Save cells to csv file"""

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def cells_to_csv(cells: List[Cell], csv_file_path: Union[str, pathlib.Path]):
"""Save cells to csv file"""
def cells_to_csv(cells: List[Cell], csv_file_path: Union[str, Path]):
"""Save cells to csv file"""

To be consistent with the import change above.

Also there's a trailing newlines between the docstring and first command in the function which isn't present in the others.

df = cells_to_dataframe(cells)
df.to_csv(csv_file_path)


def make_xml(cell_list, indentration_str, artifact_keep=True):
def make_xml(cell_list, indentation_str, artifact_keep=True):
"""
Convert a list of cells to xml.

Parameters
----------
cell_list : list of Cell
Cells to convert to xml.
indentation_str : str
String to use for indent in xml file.
artifact_keep : bool, optional
Whether to keep cells with type Cell.ARTIFACT. If True, they
are kept but their type is changed to Cell.UNKNOWN. If False, they are
removed.
willGraham01 marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
bytes
Cell list as xml, ready to be saved into a file.
"""
root = EtElement("CellCounter_Marker_File")
image_properties = EtElement("Image_Properties")
file_name = EtElement("Image_Filename")
Expand Down Expand Up @@ -176,10 +318,13 @@ def make_xml(cell_list, indentration_str, artifact_keep=True):
marker_data.append(mt)
root.append(marker_data)

return pretty_xml(root, indentration_str)
return pretty_xml(root, indentation_str)


def deal_with_artifacts(cell_dict, artifact_keep=True):
"""Deal with cells with type Cell.ARTIFACT in a cell dict. If artifact_keep
is True, then convert all Cell.ARTIFACT to Cell.UNKNOWN. Otherwise, remove
all Cell.ARTIFACT"""
# What do we want to do with the artifacts (if they exist)?
# Might want to keep them for training
if artifact_keep:
willGraham01 marked this conversation as resolved.
Show resolved Hide resolved
Expand All @@ -196,6 +341,8 @@ def deal_with_artifacts(cell_dict, artifact_keep=True):


def make_type_dict(cell_list):
"""Convert a list of Cells to a dictionary with keys of cell type and
values of the list of corresponding cells."""
types = sorted(set([cell.type for cell in cell_list]))
return {
cell_type: [cell for cell in cell_list if cell.type == cell_type]
Expand All @@ -204,12 +351,30 @@ def make_type_dict(cell_list):


def pretty_xml(elem, indentation_str=" "):
"""Convert xml element to pretty version, using given indentation string"""
ugly_xml = ElementTree.tostring(elem, "utf-8")
md_parsed = minidom.parseString(ugly_xml)
return md_parsed.toprettyxml(indent=indentation_str, encoding="UTF-8")


def find_relevant_tiffs(tiffs, cell_def):
"""
Find tiffs that match those in cell_def.

Parameters
----------
tiffs : list of str
List of paths to tiff files. Each tiff filename must contain the cell's
x, y and z position.
cell_def : str
Path of cells file to read. Can be .xml, .yml, or a directory.
willGraham01 marked this conversation as resolved.
Show resolved Hide resolved

Returns
-------
list of str
List of paths to tiff files, only including those that match cells
within cell_def.
"""
cells = [UntypedCell(tiff) for tiff in tiffs]
if os.path.isdir(cell_def):
relevant_cells = set(
Expand Down
10 changes: 6 additions & 4 deletions brainglobe_utils/IO/surfaces.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ def marching_cubes_to_obj(marching_cubes_out, output_file):
"""
Saves the output of skimage.measure.marching_cubes as an .obj file
:param marching_cubes_out: tuple
:param output_file: str
Parameters
----------
marching_cubes_out : tuple of np.ndarray
Output from skimage.measure.marching_cubes.
output_file : str or pathlib.Path
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Noticed another pathlib.Path here, but in another file. Could you double-check if we're using anything else from pathlib in this file?

Path of .obj file to write output into.
"""

verts, faces, normals, _ = marching_cubes_out
with open(output_file, "w") as f:
for item in verts:
Expand Down
40 changes: 40 additions & 0 deletions brainglobe_utils/IO/yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,58 @@


def read_yaml_section(yaml_file, section):
"""
Read section from yaml file.
Parameters
----------
yaml_file : str or pathlib.Path
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Will's pathlib.Path rant continues)

Path of .yml file to read.
section : str
Key of yaml section to read.
Returns
-------
The contents of the yaml section.
"""
yaml_contents = open_yaml(yaml_file)
contents = yaml_contents[section]
return contents


def open_yaml(yaml_file):
"""
Read the contents of a yaml file.
Parameters
----------
yaml_file : str or pathlib.Path
Path of .yml file to read.
Returns
-------
dict
The contents of the yaml file.
"""
with open(yaml_file) as f:
yaml_contents = yaml.load(f, Loader=yaml.FullLoader)
return yaml_contents


def save_yaml(yaml_contents, output_file, default_flow_style=False):
"""
Save contents to a yaml file.
Parameters
----------
yaml_contents : dict
Content to write to yaml file.
output_file : str or pathlib.Path
Path to .yml file to write to.
default_flow_style : bool, optional
If true, will use block style or flow style depending on whether there
are nested collections. If false, always uses block style.
"""
with open(output_file, "w") as outfile:
yaml.dump(
yaml_contents, outfile, default_flow_style=default_flow_style
Expand Down
Loading
Loading