diff --git a/brainglobe_utils/IO/cells.py b/brainglobe_utils/IO/cells.py
index 68592f8..b39ad2c 100644
--- a/brainglobe_utils/IO/cells.py
+++ b/brainglobe_utils/IO/cells.py
@@ -7,7 +7,8 @@
 
 import logging
 import os
-from typing import List, Optional
+import pathlib
+from typing import List, Optional, Union
 from xml.dom import minidom
 from xml.etree import ElementTree
 from xml.etree.ElementTree import Element as EtElement
@@ -29,6 +30,31 @@ def get_cells(
     cells_only: bool = False,
     cell_type: Optional[int] = None,
 ):
+    """
+    Read cells from a file or directory.
+
+    Parameters
+    ----------
+    cells_file_path : str
+        Path to cells file to read. Can be .xml, .yml, or a directory.
+    cells_only : bool, optional
+        Only relevant for .xml files. If True, will only read Cells with
+        type Cell.CELL i.e. exclude all Cell.ARTIFACT, Cell.UNKNOWN, and
+        Cell.NO_CELL.
+    cell_type : int, optional
+        Only relevant for directories. This determines the type to assign
+        to all read cells. Refer to the Cell documentation for
+        a full explanation of cell_type options.
+
+    Returns
+    -------
+    list of Cell
+        A list of the cells contained in the file or directory.
+
+    See Also
+    --------
+    Cell : For full description of cell_type parameter.
+    """
     if cells_file_path.endswith(".xml"):
         return get_cells_xml(cells_file_path, cells_only=cells_only)
     elif cells_file_path.endswith(".yml"):
@@ -46,6 +72,8 @@ def get_cells(
 
 
 def raise_cell_read_error(cells_file_path):
+    """Raise a NotImplementedError, with an informative message including the
+    cells file path"""
     logging.error(
         "File format of: {} is not supported or contains errors. Please "
         "supply an xml file, or a directory of files with positions in the "
@@ -61,6 +89,22 @@ def raise_cell_read_error(cells_file_path):
 
 
 def get_cells_xml(xml_file_path, cells_only=False):
+    """
+    Read cells from an xml file.
+
+    Parameters
+    ----------
+    xml_file_path : str or pathlib.Path
+        Path to xml file to read from.
+    cells_only : bool, optional
+        Whether to only read Cells with type Cell.CELL i.e. exclude all
+        Cell.ARTIFACT, Cell.UNKNOWN, and Cell.NO_CELL.
+
+    Returns
+    -------
+    list of Cell
+        A list of the cells contained in the file.
+    """
     with open(xml_file_path, "r") as xml_file:
         root = ElementTree.parse(xml_file).getroot()
         cells = []
@@ -78,6 +122,24 @@ def get_cells_xml(xml_file_path, cells_only=False):
 
 
 def get_cells_yml(cells_file_path, ignore_type=False, marker="markers"):
+    """
+    Read cells from a yml file.
+
+    Parameters
+    ----------
+    cells_file_path : str or pathlib.Path
+        Path to yml file to read from.
+    ignore_type : bool, optional
+        Whether to ignore the type of cells - all will be assigned type
+        Cell.UNKNOWN. Currently only True is supported.
+    marker : str, optional
+        Yaml key under which cells information is stored.
+
+    Returns
+    -------
+    list of Cell
+        A list of the cells contained in the file.
+    """
     if not ignore_type:
         raise NotImplementedError(
             "Parsing cell types is not yet implemented for YAML files. "
@@ -97,6 +159,29 @@ def get_cells_yml(cells_file_path, ignore_type=False, marker="markers"):
 
 
 def get_cells_dir(cells_file_path, cell_type=None):
+    """
+    Read cells from a directory. Cells will be created based on the filenames
+    of files in the directory, one cell per file.
+
+    Parameters
+    ----------
+    cells_file_path : str or pathlib.Path
+        Path to directory containing Cells. Each file in the directory will
+        create one Cell, based on its filename. This filename must contain the
+        cell's x, y and z position E.g. 'Cell_z358_y564_x4056'
+    cell_type : int or str or None, optional
+        What type to assign all cells. Refer to the Cell documentation for
+        a full explanation of cell_type options.
+
+    Returns
+    -------
+    list of Cell
+        A list of the cells contained in the directory.
+
+    See Also
+    --------
+    Cell : For full description of cell_type parameter.
+    """
     cells = []
     for file in os.listdir(cells_file_path):
         # ignore hidden files
@@ -112,6 +197,25 @@ def save_cells(
     indentation_str="  ",
     artifact_keep=True,
 ):
+    """
+    Save cells to a file.
+
+    Parameters
+    ----------
+    cells : list of Cell
+        Cells to save to file.
+    xml_file_path : str
+        File path of xml file to save cells to.
+    save_csv : bool, optional
+        If True, will save cells to a csv file (rather than xml). This will use
+        the given xml_file_path with the .xml extension replaced with .csv.
+    indentation_str : str, optional
+        String to use for indent in xml file.
+    artifact_keep : bool, optional
+        Whether to keep cells with type Cell.ARTIFACT in the xml file. If True,
+        they are kept but their type is changed to Cell.UNKNOWN. If False, they
+        are removed.
+    """
     # Assume always save xml file, and maybe save other formats
     cells_to_xml(
         cells,
@@ -128,12 +232,29 @@ def save_cells(
 def cells_to_xml(
     cells, xml_file_path, indentation_str="  ", artifact_keep=True
 ):
+    """
+    Save cells to an xml file.
+
+    Parameters
+    ----------
+    cells : list of Cell
+        Cells to save to file.
+    xml_file_path : str or pathlib.Path
+        Xml file path to write cells to.
+    indentation_str : str, optional
+        String to use for indent in xml file.
+    artifact_keep : bool, optional
+        Whether to keep cells with type Cell.ARTIFACT. If True, they
+        are kept but their type is changed to Cell.UNKNOWN. If False, they are
+        removed.
+    """
     xml_data = make_xml(cells, indentation_str, artifact_keep=artifact_keep)
     with open(xml_file_path, "w") as xml_file:
         xml_file.write(str(xml_data, "UTF-8"))
 
 
 def cells_xml_to_df(xml_file_path):
+    """Read cells from xml file and convert to dataframe"""
     cells = get_cells(xml_file_path)
     return cells_to_dataframe(cells)
 
@@ -142,12 +263,32 @@ def cells_to_dataframe(cells: List[Cell]) -> pd.DataFrame:
     return pd.DataFrame([c.to_dict() for c in cells])
 
 
-def cells_to_csv(cells, csv_file_path):
+def cells_to_csv(cells: List[Cell], csv_file_path: Union[str, pathlib.Path]):
+    """Save cells to csv file"""
     df = cells_to_dataframe(cells)
     df.to_csv(csv_file_path)
 
 
-def make_xml(cell_list, indentration_str, artifact_keep=True):
+def make_xml(cell_list, indentation_str, artifact_keep=True):
+    """
+    Convert a list of cells to xml.
+
+    Parameters
+    ----------
+    cell_list : list of Cell
+        Cells to convert to xml.
+    indentation_str : str
+        String to use for indent in xml file.
+    artifact_keep : bool, optional
+        Whether to keep cells with type Cell.ARTIFACT. If True, they
+        are kept but their type is changed to Cell.UNKNOWN. If False, they are
+        removed.
+
+    Returns
+    -------
+    bytes
+        Cell list as xml, ready to be saved into a file.
+    """
     root = EtElement("CellCounter_Marker_File")
     image_properties = EtElement("Image_Properties")
     file_name = EtElement("Image_Filename")
@@ -176,10 +317,13 @@ def make_xml(cell_list, indentration_str, artifact_keep=True):
         marker_data.append(mt)
     root.append(marker_data)
 
-    return pretty_xml(root, indentration_str)
+    return pretty_xml(root, indentation_str)
 
 
 def deal_with_artifacts(cell_dict, artifact_keep=True):
+    """Deal with cells with type Cell.ARTIFACT in a cell dict. If artifact_keep
+    is True, then convert all Cell.ARTIFACT to Cell.UNKNOWN. Otherwise, remove
+    all Cell.ARTIFACT"""
     # What do we want to do with the artifacts (if they exist)?
     # Might want to keep them for training
     if artifact_keep:
@@ -196,6 +340,8 @@ def deal_with_artifacts(cell_dict, artifact_keep=True):
 
 
 def make_type_dict(cell_list):
+    """Convert a list of Cells to a dictionary with keys of cell type and
+    values of the list of corresponding cells."""
     types = sorted(set([cell.type for cell in cell_list]))
     return {
         cell_type: [cell for cell in cell_list if cell.type == cell_type]
@@ -204,12 +350,33 @@ def make_type_dict(cell_list):
 
 
 def pretty_xml(elem, indentation_str="  "):
+    """Convert xml element to pretty version, using given indentation string"""
     ugly_xml = ElementTree.tostring(elem, "utf-8")
     md_parsed = minidom.parseString(ugly_xml)
     return md_parsed.toprettyxml(indent=indentation_str, encoding="UTF-8")
 
 
 def find_relevant_tiffs(tiffs, cell_def):
+    """
+    Find tiffs that match those read from cell_def.
+
+    Parameters
+    ----------
+    tiffs : list of str
+        List of paths to tiff files. Each tiff filename must contain the cell's
+        x, y and z position.
+    cell_def : str
+        Path to cells to read. Can be an .xml file, .yml file, or a directory.
+        If a directory is passed, each file in the directory will
+        create one Cell, based on its filename. Each filename must contain the
+        cell's x, y and z position.
+
+    Returns
+    -------
+    list of str
+        Filtered list of paths to tiff files, only including those that match
+        cells read from cell_def.
+    """
     cells = [UntypedCell(tiff) for tiff in tiffs]
     if os.path.isdir(cell_def):
         relevant_cells = set(
diff --git a/brainglobe_utils/IO/surfaces.py b/brainglobe_utils/IO/surfaces.py
index f256908..9da3fea 100644
--- a/brainglobe_utils/IO/surfaces.py
+++ b/brainglobe_utils/IO/surfaces.py
@@ -2,11 +2,13 @@ def marching_cubes_to_obj(marching_cubes_out, output_file):
     """
     Saves the output of skimage.measure.marching_cubes as an .obj file
 
-    :param marching_cubes_out: tuple
-    :param output_file: str
-
+    Parameters
+    ----------
+    marching_cubes_out : tuple of np.ndarray
+        Output from skimage.measure.marching_cubes.
+    output_file : str or pathlib.Path
+        Path of .obj file to write output into.
     """
-
     verts, faces, normals, _ = marching_cubes_out
     with open(output_file, "w") as f:
         for item in verts:
diff --git a/brainglobe_utils/IO/yaml.py b/brainglobe_utils/IO/yaml.py
index 61c4eab..be7f239 100644
--- a/brainglobe_utils/IO/yaml.py
+++ b/brainglobe_utils/IO/yaml.py
@@ -2,18 +2,58 @@
 
 
 def read_yaml_section(yaml_file, section):
+    """
+    Read section from yaml file.
+
+    Parameters
+    ----------
+    yaml_file : str or pathlib.Path
+        Path of .yml file to read.
+    section : str
+        Key of yaml section to read.
+
+    Returns
+    -------
+    The contents of the yaml section.
+    """
     yaml_contents = open_yaml(yaml_file)
     contents = yaml_contents[section]
     return contents
 
 
 def open_yaml(yaml_file):
+    """
+    Read the contents of a yaml file.
+
+    Parameters
+    ----------
+    yaml_file : str or pathlib.Path
+        Path of .yml file to read.
+
+    Returns
+    -------
+    dict
+        The contents of the yaml file.
+    """
     with open(yaml_file) as f:
         yaml_contents = yaml.load(f, Loader=yaml.FullLoader)
     return yaml_contents
 
 
 def save_yaml(yaml_contents, output_file, default_flow_style=False):
+    """
+    Save contents to a yaml file.
+
+    Parameters
+    ----------
+    yaml_contents : dict
+        Content to write to yaml file.
+    output_file : str or pathlib.Path
+        Path to .yml file to write to.
+    default_flow_style : bool, optional
+        If true, will use block style or flow style depending on whether there
+        are nested collections. If false, always uses block style.
+    """
     with open(output_file, "w") as outfile:
         yaml.dump(
             yaml_contents, outfile, default_flow_style=default_flow_style
diff --git a/tests/tests/test_IO/test_cell_io.py b/tests/tests/test_IO/test_cell_io.py
index 02b543c..a0e15d4 100644
--- a/tests/tests/test_IO/test_cell_io.py
+++ b/tests/tests/test_IO/test_cell_io.py
@@ -1,8 +1,11 @@
+import os
+from pathlib import Path
+
 import pandas as pd
 import pytest
 from natsort import natsorted
 
-from brainglobe_utils.cells.cells import Cell
+from brainglobe_utils.cells.cells import Cell, UntypedCell, pos_from_file_name
 from brainglobe_utils.IO import cells as cell_io
 
 
@@ -16,16 +19,6 @@ def yml_path(data_path):
     return str(data_path / "cells" / "cells.yml")
 
 
-@pytest.fixture
-def cubes_dir(data_path):
-    return str(data_path / "cube_extract" / "cubes")
-
-
-@pytest.fixture
-def roi_sorter_output_dir(data_path):
-    return str(data_path / "IO" / "roi_sorter_output")
-
-
 @pytest.fixture
 def type_vals():
     return [1] * 65
@@ -55,82 +48,158 @@ def z_vals(data_path):
 
 
 @pytest.fixture
-def cubes_cells():
+def cells_with_artifacts():
     return [
-        Cell([340, 1004, 15], 1),
-        Cell([340, 1004, 15], 1),
-        Cell([392, 522, 10], 1),
+        Cell([340, 1004, 15], -1),
+        Cell([340, 1004, 15], -1),
         Cell([392, 522, 10], 1),
+        Cell([392, 522, 10], 2),
+        Cell([392, 522, 10], 2),
     ]
 
 
-@pytest.fixture
-def roi_sorter_cells():
-    return [
-        Cell([4056, 564, 358], 1),
-        Cell([3989, 267, 570], 1),
-        Cell([4351, 735, 439], 1),
-        Cell([4395, 677, 367], 1),
-    ]
-
-
-def test_get_cells(
-    cubes_cells,
-    roi_sorter_cells,
-    xml_path,
-    yml_path,
-    cubes_dir,
-    roi_sorter_output_dir,
-):
+def test_get_cells_xml(xml_path):
+    """
+    Test that cells can be read from an xml file.
+    """
     cells = cell_io.get_cells(xml_path)
     assert len(cells) == 65
     assert Cell([2536, 523, 1286], 1) == cells[64]
 
+
+@pytest.mark.parametrize("cells_only", [True, False])
+def test_get_cells_xml_cells_only(cells_only, tmp_path, cells_with_artifacts):
+    """
+    Test that cells not of type Cell.CELL (type 2) are correctly removed or
+    kept when reading from xml with the cells_only option.
+    """
+    tmp_cells_out_path = tmp_path / "cells.xml"
+    cell_io.cells_to_xml(
+        cells_with_artifacts, tmp_cells_out_path, artifact_keep=True
+    )
+    cells = cell_io.get_cells(str(tmp_cells_out_path), cells_only=cells_only)
+
+    if cells_only:
+        assert len(cells) == 2
+        for cell in cells:
+            assert cell.type == 2
+    else:
+        assert len(cells) == len(cells_with_artifacts)
+
+
+def test_get_cells_yml(yml_path):
+    """
+    Test that cells can be read from a yml file.
+    """
     cells = cell_io.get_cells(yml_path)
     assert len(cells) == 250
     assert Cell([9170, 2537, 311], 1) == cells[194]
 
-    cells = cell_io.get_cells(cubes_dir)
-    assert len(cells) == 4
-    assert natsorted(cubes_cells) == natsorted(cells)
 
-    cells = cell_io.get_cells(roi_sorter_output_dir)
+@pytest.mark.parametrize(
+    "cells_dir, expected_cells",
+    [
+        (
+            Path("cube_extract") / "cubes",
+            [
+                Cell([340, 1004, 15], 1),
+                Cell([340, 1004, 15], 1),
+                Cell([392, 522, 10], 1),
+                Cell([392, 522, 10], 1),
+            ],
+        ),
+        (
+            Path("IO") / "roi_sorter_output",
+            [
+                Cell([4056, 564, 358], 1),
+                Cell([3989, 267, 570], 1),
+                Cell([4351, 735, 439], 1),
+                Cell([4395, 677, 367], 1),
+            ],
+        ),
+    ],
+)
+def test_get_cells_dir(data_path, cells_dir, expected_cells):
+    """
+    Test that cells can be read from a directory.
+    """
+    cells = cell_io.get_cells(str(data_path / cells_dir))
     assert len(cells) == 4
-    assert natsorted(roi_sorter_cells) == natsorted(cells)
+    assert natsorted(cells) == natsorted(expected_cells)
 
-    with pytest.raises(NotImplementedError):
-        assert cell_io.get_cells("misc_format.abc")
 
+def test_get_cells_error(tmp_path):
+    """
+    Test that get_cells throws an error for unknown file formats, and
+    directories containing files that can't be read.
+    """
+    unknown_file = tmp_path / "misc_format.abc"
+    unknown_file.touch()
 
-def assert_cells_csv(csv_path, x_vals, y_vals, z_vals, type_vals):
-    cells_df = pd.read_csv(csv_path)
-    assert len(cells_df) == 65
-    assert cells_df.type.tolist() == type_vals
-    assert cells_df.x.tolist() == x_vals
-    assert cells_df.y.tolist() == y_vals
-    assert cells_df.z.tolist() == z_vals
+    with pytest.raises(NotImplementedError):
+        # raise for unknown file format
+        assert cell_io.get_cells(str(unknown_file))
+
+    with pytest.raises(NotImplementedError):
+        # raise for directory with files that can't be read
+        cell_io.get_cells(str(tmp_path))
 
 
 def test_save_cells(tmp_path, xml_path, x_vals, y_vals, z_vals, type_vals):
+    """
+    Test that cells can be written to a csv file via the save_csv option of
+    cell_io.save_cells.
+    """
     cells = cell_io.get_cells(xml_path)
     tmp_cells_out_path = tmp_path / "cells.xml"
     cell_io.save_cells(cells, tmp_cells_out_path, save_csv=True)
     assert cells == cell_io.get_cells(str(tmp_cells_out_path))
 
-    tmp_cells_out_path = tmp_path / "cells.csv"
-    cell_io.cells_to_csv(cells, tmp_cells_out_path)
-    assert_cells_csv(tmp_cells_out_path, x_vals, y_vals, z_vals, type_vals)
-
 
 def test_cells_to_xml(tmp_path, xml_path):
+    """
+    Test that cells can be written to an xml file.
+    """
     cells = cell_io.get_cells(xml_path)
     tmp_cells_out_path = tmp_path / "cells.xml"
     cell_io.cells_to_xml(cells, tmp_cells_out_path)
     assert cells == cell_io.get_cells(str(tmp_cells_out_path))
 
 
-def test_cells_xml_to_dataframe(xml_path, x_vals, y_vals, z_vals, type_vals):
-    cells_df = cell_io.cells_xml_to_df(xml_path)
+@pytest.mark.parametrize("artifact_keep", [True, False])
+def test_cells_to_xml_artifacts_keep(
+    cells_with_artifacts, tmp_path, artifact_keep
+):
+    """
+    Test that artifact cells (type -1) are correctly removed or kept when
+    writing to xml with the artifact_keep option.
+    """
+    tmp_cells_out_path = tmp_path / "cells.xml"
+    cell_io.cells_to_xml(
+        cells_with_artifacts, tmp_cells_out_path, artifact_keep=artifact_keep
+    )
+    written_cells = cell_io.get_cells(str(tmp_cells_out_path))
+
+    if artifact_keep:
+        # Check that artifact cells (type -1) have been kept, and their
+        # type changed to 1
+        assert len(written_cells) == len(cells_with_artifacts)
+        for i, cell in enumerate(cells_with_artifacts):
+            if cell.type == -1:
+                assert written_cells[i] == 1
+            else:
+                assert written_cells[i].type == cell.type
+    else:
+        # Check artifact cells (type -1) have been removed
+        assert len(written_cells) == 3
+        assert written_cells == cells_with_artifacts[2:]
+
+
+def assert_cells_df(cells_df, x_vals, y_vals, z_vals, type_vals):
+    """
+    Check that there are the correct number of cells in the Dataframe, and
+    that each has the correct type and position.
+    """
     assert len(cells_df) == 65
     assert cells_df.type.tolist() == type_vals
     assert cells_df.x.tolist() == x_vals
@@ -138,8 +207,51 @@ def test_cells_xml_to_dataframe(xml_path, x_vals, y_vals, z_vals, type_vals):
     assert cells_df.z.tolist() == z_vals
 
 
+def test_cells_xml_to_dataframe(xml_path, x_vals, y_vals, z_vals, type_vals):
+    """
+    Test that cells can be read from an xml file, and correctly converted
+    into a DataFrame.
+    """
+    cells_df = cell_io.cells_xml_to_df(xml_path)
+    assert_cells_df(cells_df, x_vals, y_vals, z_vals, type_vals)
+
+
 def test_cells_to_csv(tmp_path, xml_path, x_vals, y_vals, z_vals, type_vals):
+    """
+    Test that cells can be written to a csv file.
+    """
     cells = cell_io.get_cells(xml_path)
     tmp_cells_out_path = tmp_path / "cells.csv"
     cell_io.cells_to_csv(cells, tmp_cells_out_path)
-    assert_cells_csv(tmp_cells_out_path, x_vals, y_vals, z_vals, type_vals)
+    cells_df = pd.read_csv(tmp_cells_out_path)
+    assert_cells_df(cells_df, x_vals, y_vals, z_vals, type_vals)
+
+
+@pytest.mark.parametrize(
+    "cell_def_path", ["", "cells.xml"], ids=["directory", "xml"]
+)
+def test_find_relevant_tiffs(data_path, tmp_path, cell_def_path):
+    """
+    Test that tiff paths can be selected that match cells from an xml file
+    or directory.
+    """
+    tiff_dir = data_path / "cube_extract" / "cubes"
+
+    # Write a cell_def xml file or directory that only contains 1 of the
+    # cells from tiff_dir
+    cell_def_path = tmp_path / cell_def_path
+    chosen_tiff = os.listdir(tiff_dir)[0]
+    cell = UntypedCell(chosen_tiff)
+    if cell_def_path.suffix == ".xml":
+        cell_io.cells_to_xml([cell], cell_def_path)
+    else:
+        cell_path = cell_def_path / f"x{cell.x}_y{cell.y}_z{cell.z}.tif"
+        cell_path.touch()
+
+    selected = cell_io.find_relevant_tiffs(
+        os.listdir(tiff_dir), str(cell_def_path)
+    )
+    for selected_path in selected:
+        assert pos_from_file_name(selected_path) == pos_from_file_name(
+            chosen_tiff
+        )