diff --git a/brainglobe_utils/IO/cells.py b/brainglobe_utils/IO/cells.py index 68592f8..b39ad2c 100644 --- a/brainglobe_utils/IO/cells.py +++ b/brainglobe_utils/IO/cells.py @@ -7,7 +7,8 @@ import logging import os -from typing import List, Optional +import pathlib +from typing import List, Optional, Union from xml.dom import minidom from xml.etree import ElementTree from xml.etree.ElementTree import Element as EtElement @@ -29,6 +30,31 @@ def get_cells( cells_only: bool = False, cell_type: Optional[int] = None, ): + """ + Read cells from a file or directory. + + Parameters + ---------- + cells_file_path : str + Path to cells file to read. Can be .xml, .yml, or a directory. + cells_only : bool, optional + Only relevant for .xml files. If True, will only read Cells with + type Cell.CELL i.e. exclude all Cell.ARTIFACT, Cell.UNKNOWN, and + Cell.NO_CELL. + cell_type : int, optional + Only relevant for directories. This determines the type to assign + to all read cells. Refer to the Cell documentation for + a full explanation of cell_type options. + + Returns + ------- + list of Cell + A list of the cells contained in the file or directory. + + See Also + -------- + Cell : For full description of cell_type parameter. + """ if cells_file_path.endswith(".xml"): return get_cells_xml(cells_file_path, cells_only=cells_only) elif cells_file_path.endswith(".yml"): @@ -46,6 +72,8 @@ def get_cells( def raise_cell_read_error(cells_file_path): + """Raise a NotImplementedError, with an informative message including the + cells file path""" logging.error( "File format of: {} is not supported or contains errors. Please " "supply an xml file, or a directory of files with positions in the " @@ -61,6 +89,22 @@ def raise_cell_read_error(cells_file_path): def get_cells_xml(xml_file_path, cells_only=False): + """ + Read cells from an xml file. + + Parameters + ---------- + xml_file_path : str or pathlib.Path + Path to xml file to read from. + cells_only : bool, optional + Whether to only read Cells with type Cell.CELL i.e. exclude all + Cell.ARTIFACT, Cell.UNKNOWN, and Cell.NO_CELL. + + Returns + ------- + list of Cell + A list of the cells contained in the file. + """ with open(xml_file_path, "r") as xml_file: root = ElementTree.parse(xml_file).getroot() cells = [] @@ -78,6 +122,24 @@ def get_cells_xml(xml_file_path, cells_only=False): def get_cells_yml(cells_file_path, ignore_type=False, marker="markers"): + """ + Read cells from a yml file. + + Parameters + ---------- + cells_file_path : str or pathlib.Path + Path to yml file to read from. + ignore_type : bool, optional + Whether to ignore the type of cells - all will be assigned type + Cell.UNKNOWN. Currently only True is supported. + marker : str, optional + Yaml key under which cells information is stored. + + Returns + ------- + list of Cell + A list of the cells contained in the file. + """ if not ignore_type: raise NotImplementedError( "Parsing cell types is not yet implemented for YAML files. " @@ -97,6 +159,29 @@ def get_cells_yml(cells_file_path, ignore_type=False, marker="markers"): def get_cells_dir(cells_file_path, cell_type=None): + """ + Read cells from a directory. Cells will be created based on the filenames + of files in the directory, one cell per file. + + Parameters + ---------- + cells_file_path : str or pathlib.Path + Path to directory containing Cells. Each file in the directory will + create one Cell, based on its filename. This filename must contain the + cell's x, y and z position E.g. 'Cell_z358_y564_x4056' + cell_type : int or str or None, optional + What type to assign all cells. Refer to the Cell documentation for + a full explanation of cell_type options. + + Returns + ------- + list of Cell + A list of the cells contained in the directory. + + See Also + -------- + Cell : For full description of cell_type parameter. + """ cells = [] for file in os.listdir(cells_file_path): # ignore hidden files @@ -112,6 +197,25 @@ def save_cells( indentation_str=" ", artifact_keep=True, ): + """ + Save cells to a file. + + Parameters + ---------- + cells : list of Cell + Cells to save to file. + xml_file_path : str + File path of xml file to save cells to. + save_csv : bool, optional + If True, will save cells to a csv file (rather than xml). This will use + the given xml_file_path with the .xml extension replaced with .csv. + indentation_str : str, optional + String to use for indent in xml file. + artifact_keep : bool, optional + Whether to keep cells with type Cell.ARTIFACT in the xml file. If True, + they are kept but their type is changed to Cell.UNKNOWN. If False, they + are removed. + """ # Assume always save xml file, and maybe save other formats cells_to_xml( cells, @@ -128,12 +232,29 @@ def save_cells( def cells_to_xml( cells, xml_file_path, indentation_str=" ", artifact_keep=True ): + """ + Save cells to an xml file. + + Parameters + ---------- + cells : list of Cell + Cells to save to file. + xml_file_path : str or pathlib.Path + Xml file path to write cells to. + indentation_str : str, optional + String to use for indent in xml file. + artifact_keep : bool, optional + Whether to keep cells with type Cell.ARTIFACT. If True, they + are kept but their type is changed to Cell.UNKNOWN. If False, they are + removed. + """ xml_data = make_xml(cells, indentation_str, artifact_keep=artifact_keep) with open(xml_file_path, "w") as xml_file: xml_file.write(str(xml_data, "UTF-8")) def cells_xml_to_df(xml_file_path): + """Read cells from xml file and convert to dataframe""" cells = get_cells(xml_file_path) return cells_to_dataframe(cells) @@ -142,12 +263,32 @@ def cells_to_dataframe(cells: List[Cell]) -> pd.DataFrame: return pd.DataFrame([c.to_dict() for c in cells]) -def cells_to_csv(cells, csv_file_path): +def cells_to_csv(cells: List[Cell], csv_file_path: Union[str, pathlib.Path]): + """Save cells to csv file""" df = cells_to_dataframe(cells) df.to_csv(csv_file_path) -def make_xml(cell_list, indentration_str, artifact_keep=True): +def make_xml(cell_list, indentation_str, artifact_keep=True): + """ + Convert a list of cells to xml. + + Parameters + ---------- + cell_list : list of Cell + Cells to convert to xml. + indentation_str : str + String to use for indent in xml file. + artifact_keep : bool, optional + Whether to keep cells with type Cell.ARTIFACT. If True, they + are kept but their type is changed to Cell.UNKNOWN. If False, they are + removed. + + Returns + ------- + bytes + Cell list as xml, ready to be saved into a file. + """ root = EtElement("CellCounter_Marker_File") image_properties = EtElement("Image_Properties") file_name = EtElement("Image_Filename") @@ -176,10 +317,13 @@ def make_xml(cell_list, indentration_str, artifact_keep=True): marker_data.append(mt) root.append(marker_data) - return pretty_xml(root, indentration_str) + return pretty_xml(root, indentation_str) def deal_with_artifacts(cell_dict, artifact_keep=True): + """Deal with cells with type Cell.ARTIFACT in a cell dict. If artifact_keep + is True, then convert all Cell.ARTIFACT to Cell.UNKNOWN. Otherwise, remove + all Cell.ARTIFACT""" # What do we want to do with the artifacts (if they exist)? # Might want to keep them for training if artifact_keep: @@ -196,6 +340,8 @@ def deal_with_artifacts(cell_dict, artifact_keep=True): def make_type_dict(cell_list): + """Convert a list of Cells to a dictionary with keys of cell type and + values of the list of corresponding cells.""" types = sorted(set([cell.type for cell in cell_list])) return { cell_type: [cell for cell in cell_list if cell.type == cell_type] @@ -204,12 +350,33 @@ def make_type_dict(cell_list): def pretty_xml(elem, indentation_str=" "): + """Convert xml element to pretty version, using given indentation string""" ugly_xml = ElementTree.tostring(elem, "utf-8") md_parsed = minidom.parseString(ugly_xml) return md_parsed.toprettyxml(indent=indentation_str, encoding="UTF-8") def find_relevant_tiffs(tiffs, cell_def): + """ + Find tiffs that match those read from cell_def. + + Parameters + ---------- + tiffs : list of str + List of paths to tiff files. Each tiff filename must contain the cell's + x, y and z position. + cell_def : str + Path to cells to read. Can be an .xml file, .yml file, or a directory. + If a directory is passed, each file in the directory will + create one Cell, based on its filename. Each filename must contain the + cell's x, y and z position. + + Returns + ------- + list of str + Filtered list of paths to tiff files, only including those that match + cells read from cell_def. + """ cells = [UntypedCell(tiff) for tiff in tiffs] if os.path.isdir(cell_def): relevant_cells = set( diff --git a/brainglobe_utils/IO/surfaces.py b/brainglobe_utils/IO/surfaces.py index f256908..9da3fea 100644 --- a/brainglobe_utils/IO/surfaces.py +++ b/brainglobe_utils/IO/surfaces.py @@ -2,11 +2,13 @@ def marching_cubes_to_obj(marching_cubes_out, output_file): """ Saves the output of skimage.measure.marching_cubes as an .obj file - :param marching_cubes_out: tuple - :param output_file: str - + Parameters + ---------- + marching_cubes_out : tuple of np.ndarray + Output from skimage.measure.marching_cubes. + output_file : str or pathlib.Path + Path of .obj file to write output into. """ - verts, faces, normals, _ = marching_cubes_out with open(output_file, "w") as f: for item in verts: diff --git a/brainglobe_utils/IO/yaml.py b/brainglobe_utils/IO/yaml.py index 61c4eab..be7f239 100644 --- a/brainglobe_utils/IO/yaml.py +++ b/brainglobe_utils/IO/yaml.py @@ -2,18 +2,58 @@ def read_yaml_section(yaml_file, section): + """ + Read section from yaml file. + + Parameters + ---------- + yaml_file : str or pathlib.Path + Path of .yml file to read. + section : str + Key of yaml section to read. + + Returns + ------- + The contents of the yaml section. + """ yaml_contents = open_yaml(yaml_file) contents = yaml_contents[section] return contents def open_yaml(yaml_file): + """ + Read the contents of a yaml file. + + Parameters + ---------- + yaml_file : str or pathlib.Path + Path of .yml file to read. + + Returns + ------- + dict + The contents of the yaml file. + """ with open(yaml_file) as f: yaml_contents = yaml.load(f, Loader=yaml.FullLoader) return yaml_contents def save_yaml(yaml_contents, output_file, default_flow_style=False): + """ + Save contents to a yaml file. + + Parameters + ---------- + yaml_contents : dict + Content to write to yaml file. + output_file : str or pathlib.Path + Path to .yml file to write to. + default_flow_style : bool, optional + If true, will use block style or flow style depending on whether there + are nested collections. If false, always uses block style. + """ with open(output_file, "w") as outfile: yaml.dump( yaml_contents, outfile, default_flow_style=default_flow_style diff --git a/tests/tests/test_IO/test_cell_io.py b/tests/tests/test_IO/test_cell_io.py index 02b543c..a0e15d4 100644 --- a/tests/tests/test_IO/test_cell_io.py +++ b/tests/tests/test_IO/test_cell_io.py @@ -1,8 +1,11 @@ +import os +from pathlib import Path + import pandas as pd import pytest from natsort import natsorted -from brainglobe_utils.cells.cells import Cell +from brainglobe_utils.cells.cells import Cell, UntypedCell, pos_from_file_name from brainglobe_utils.IO import cells as cell_io @@ -16,16 +19,6 @@ def yml_path(data_path): return str(data_path / "cells" / "cells.yml") -@pytest.fixture -def cubes_dir(data_path): - return str(data_path / "cube_extract" / "cubes") - - -@pytest.fixture -def roi_sorter_output_dir(data_path): - return str(data_path / "IO" / "roi_sorter_output") - - @pytest.fixture def type_vals(): return [1] * 65 @@ -55,82 +48,158 @@ def z_vals(data_path): @pytest.fixture -def cubes_cells(): +def cells_with_artifacts(): return [ - Cell([340, 1004, 15], 1), - Cell([340, 1004, 15], 1), - Cell([392, 522, 10], 1), + Cell([340, 1004, 15], -1), + Cell([340, 1004, 15], -1), Cell([392, 522, 10], 1), + Cell([392, 522, 10], 2), + Cell([392, 522, 10], 2), ] -@pytest.fixture -def roi_sorter_cells(): - return [ - Cell([4056, 564, 358], 1), - Cell([3989, 267, 570], 1), - Cell([4351, 735, 439], 1), - Cell([4395, 677, 367], 1), - ] - - -def test_get_cells( - cubes_cells, - roi_sorter_cells, - xml_path, - yml_path, - cubes_dir, - roi_sorter_output_dir, -): +def test_get_cells_xml(xml_path): + """ + Test that cells can be read from an xml file. + """ cells = cell_io.get_cells(xml_path) assert len(cells) == 65 assert Cell([2536, 523, 1286], 1) == cells[64] + +@pytest.mark.parametrize("cells_only", [True, False]) +def test_get_cells_xml_cells_only(cells_only, tmp_path, cells_with_artifacts): + """ + Test that cells not of type Cell.CELL (type 2) are correctly removed or + kept when reading from xml with the cells_only option. + """ + tmp_cells_out_path = tmp_path / "cells.xml" + cell_io.cells_to_xml( + cells_with_artifacts, tmp_cells_out_path, artifact_keep=True + ) + cells = cell_io.get_cells(str(tmp_cells_out_path), cells_only=cells_only) + + if cells_only: + assert len(cells) == 2 + for cell in cells: + assert cell.type == 2 + else: + assert len(cells) == len(cells_with_artifacts) + + +def test_get_cells_yml(yml_path): + """ + Test that cells can be read from a yml file. + """ cells = cell_io.get_cells(yml_path) assert len(cells) == 250 assert Cell([9170, 2537, 311], 1) == cells[194] - cells = cell_io.get_cells(cubes_dir) - assert len(cells) == 4 - assert natsorted(cubes_cells) == natsorted(cells) - cells = cell_io.get_cells(roi_sorter_output_dir) +@pytest.mark.parametrize( + "cells_dir, expected_cells", + [ + ( + Path("cube_extract") / "cubes", + [ + Cell([340, 1004, 15], 1), + Cell([340, 1004, 15], 1), + Cell([392, 522, 10], 1), + Cell([392, 522, 10], 1), + ], + ), + ( + Path("IO") / "roi_sorter_output", + [ + Cell([4056, 564, 358], 1), + Cell([3989, 267, 570], 1), + Cell([4351, 735, 439], 1), + Cell([4395, 677, 367], 1), + ], + ), + ], +) +def test_get_cells_dir(data_path, cells_dir, expected_cells): + """ + Test that cells can be read from a directory. + """ + cells = cell_io.get_cells(str(data_path / cells_dir)) assert len(cells) == 4 - assert natsorted(roi_sorter_cells) == natsorted(cells) + assert natsorted(cells) == natsorted(expected_cells) - with pytest.raises(NotImplementedError): - assert cell_io.get_cells("misc_format.abc") +def test_get_cells_error(tmp_path): + """ + Test that get_cells throws an error for unknown file formats, and + directories containing files that can't be read. + """ + unknown_file = tmp_path / "misc_format.abc" + unknown_file.touch() -def assert_cells_csv(csv_path, x_vals, y_vals, z_vals, type_vals): - cells_df = pd.read_csv(csv_path) - assert len(cells_df) == 65 - assert cells_df.type.tolist() == type_vals - assert cells_df.x.tolist() == x_vals - assert cells_df.y.tolist() == y_vals - assert cells_df.z.tolist() == z_vals + with pytest.raises(NotImplementedError): + # raise for unknown file format + assert cell_io.get_cells(str(unknown_file)) + + with pytest.raises(NotImplementedError): + # raise for directory with files that can't be read + cell_io.get_cells(str(tmp_path)) def test_save_cells(tmp_path, xml_path, x_vals, y_vals, z_vals, type_vals): + """ + Test that cells can be written to a csv file via the save_csv option of + cell_io.save_cells. + """ cells = cell_io.get_cells(xml_path) tmp_cells_out_path = tmp_path / "cells.xml" cell_io.save_cells(cells, tmp_cells_out_path, save_csv=True) assert cells == cell_io.get_cells(str(tmp_cells_out_path)) - tmp_cells_out_path = tmp_path / "cells.csv" - cell_io.cells_to_csv(cells, tmp_cells_out_path) - assert_cells_csv(tmp_cells_out_path, x_vals, y_vals, z_vals, type_vals) - def test_cells_to_xml(tmp_path, xml_path): + """ + Test that cells can be written to an xml file. + """ cells = cell_io.get_cells(xml_path) tmp_cells_out_path = tmp_path / "cells.xml" cell_io.cells_to_xml(cells, tmp_cells_out_path) assert cells == cell_io.get_cells(str(tmp_cells_out_path)) -def test_cells_xml_to_dataframe(xml_path, x_vals, y_vals, z_vals, type_vals): - cells_df = cell_io.cells_xml_to_df(xml_path) +@pytest.mark.parametrize("artifact_keep", [True, False]) +def test_cells_to_xml_artifacts_keep( + cells_with_artifacts, tmp_path, artifact_keep +): + """ + Test that artifact cells (type -1) are correctly removed or kept when + writing to xml with the artifact_keep option. + """ + tmp_cells_out_path = tmp_path / "cells.xml" + cell_io.cells_to_xml( + cells_with_artifacts, tmp_cells_out_path, artifact_keep=artifact_keep + ) + written_cells = cell_io.get_cells(str(tmp_cells_out_path)) + + if artifact_keep: + # Check that artifact cells (type -1) have been kept, and their + # type changed to 1 + assert len(written_cells) == len(cells_with_artifacts) + for i, cell in enumerate(cells_with_artifacts): + if cell.type == -1: + assert written_cells[i] == 1 + else: + assert written_cells[i].type == cell.type + else: + # Check artifact cells (type -1) have been removed + assert len(written_cells) == 3 + assert written_cells == cells_with_artifacts[2:] + + +def assert_cells_df(cells_df, x_vals, y_vals, z_vals, type_vals): + """ + Check that there are the correct number of cells in the Dataframe, and + that each has the correct type and position. + """ assert len(cells_df) == 65 assert cells_df.type.tolist() == type_vals assert cells_df.x.tolist() == x_vals @@ -138,8 +207,51 @@ def test_cells_xml_to_dataframe(xml_path, x_vals, y_vals, z_vals, type_vals): assert cells_df.z.tolist() == z_vals +def test_cells_xml_to_dataframe(xml_path, x_vals, y_vals, z_vals, type_vals): + """ + Test that cells can be read from an xml file, and correctly converted + into a DataFrame. + """ + cells_df = cell_io.cells_xml_to_df(xml_path) + assert_cells_df(cells_df, x_vals, y_vals, z_vals, type_vals) + + def test_cells_to_csv(tmp_path, xml_path, x_vals, y_vals, z_vals, type_vals): + """ + Test that cells can be written to a csv file. + """ cells = cell_io.get_cells(xml_path) tmp_cells_out_path = tmp_path / "cells.csv" cell_io.cells_to_csv(cells, tmp_cells_out_path) - assert_cells_csv(tmp_cells_out_path, x_vals, y_vals, z_vals, type_vals) + cells_df = pd.read_csv(tmp_cells_out_path) + assert_cells_df(cells_df, x_vals, y_vals, z_vals, type_vals) + + +@pytest.mark.parametrize( + "cell_def_path", ["", "cells.xml"], ids=["directory", "xml"] +) +def test_find_relevant_tiffs(data_path, tmp_path, cell_def_path): + """ + Test that tiff paths can be selected that match cells from an xml file + or directory. + """ + tiff_dir = data_path / "cube_extract" / "cubes" + + # Write a cell_def xml file or directory that only contains 1 of the + # cells from tiff_dir + cell_def_path = tmp_path / cell_def_path + chosen_tiff = os.listdir(tiff_dir)[0] + cell = UntypedCell(chosen_tiff) + if cell_def_path.suffix == ".xml": + cell_io.cells_to_xml([cell], cell_def_path) + else: + cell_path = cell_def_path / f"x{cell.x}_y{cell.y}_z{cell.z}.tif" + cell_path.touch() + + selected = cell_io.find_relevant_tiffs( + os.listdir(tiff_dir), str(cell_def_path) + ) + for selected_path in selected: + assert pos_from_file_name(selected_path) == pos_from_file_name( + chosen_tiff + )