|
| 1 | +import os |
| 2 | + |
| 3 | +import numpy as np |
| 4 | +import pandas as pd |
| 5 | + |
| 6 | +from fafbseg import flywire |
| 7 | + |
| 8 | +from .core import DataSet |
| 9 | +from .utils import ( |
| 10 | + _add_types, |
| 11 | + _get_table, |
| 12 | + _get_flywire_types, |
| 13 | + _get_fw_sides, |
| 14 | + _is_int |
| 15 | +) |
| 16 | + |
| 17 | +__all__ = ["FlyWire"] |
| 18 | + |
| 19 | +itable = None |
| 20 | +otable = None |
| 21 | + |
| 22 | + |
| 23 | +class FlyWire(DataSet): |
| 24 | + """FlyWire dataset. |
| 25 | +
|
| 26 | + Parameters |
| 27 | + ---------- |
| 28 | + label : str |
| 29 | + A label. |
| 30 | + up/downstream : bool |
| 31 | + Whether to use up- and/or downstream connectivity. |
| 32 | + use_types : bool |
| 33 | + Whether to group by type. This will use `hemibrain_type` first |
| 34 | + and where that doesn't exist fall back to `cell_type`. |
| 35 | + use_side : bool | 'relative' |
| 36 | + Only relevant if `group_by_type=True`: |
| 37 | + - if `True`, will split cell types into left/right/center |
| 38 | + - if `relative`, will label cell types as `ipsi` or |
| 39 | + `contra` depending on the side of the connected neuron |
| 40 | + file : str, optional |
| 41 | + Filepath to one of the connectivity dumps. |
| 42 | +
|
| 43 | + """ |
| 44 | + |
| 45 | + def __init__( |
| 46 | + self, |
| 47 | + label="FlyWire", |
| 48 | + upstream=True, |
| 49 | + downstream=True, |
| 50 | + use_types=True, |
| 51 | + use_sides=False, |
| 52 | + file=None, |
| 53 | + ): |
| 54 | + assert use_sides in (True, False, "relative") |
| 55 | + if file: |
| 56 | + assert os.path.isfile(file) |
| 57 | + |
| 58 | + super().__init__(label=label) |
| 59 | + self.file = file |
| 60 | + self.upstream = upstream |
| 61 | + self.downstream = downstream |
| 62 | + self.use_types = use_types |
| 63 | + self.use_sides = use_sides |
| 64 | + |
| 65 | + def _add_neurons(self, x, exact=True, left=True, right=True): |
| 66 | + """Turn `x` into FlyWire root IDs.""" |
| 67 | + if isinstance(x, type(None)): |
| 68 | + return np.array([], dtype=np.int64) |
| 69 | + |
| 70 | + if not exact and isinstance(x, str) and "," in x: |
| 71 | + x = x.split(",") |
| 72 | + |
| 73 | + if isinstance(x, (list, np.ndarray, set, tuple)): |
| 74 | + ids = np.array([], dtype=np.int64) |
| 75 | + for t in x: |
| 76 | + ids = np.append( |
| 77 | + ids, self._add_neurons(t, exact=exact, left=left, right=right) |
| 78 | + ) |
| 79 | + elif _is_int(x): |
| 80 | + ids = [int(x)] |
| 81 | + else: |
| 82 | + info = _get_table(which="info") |
| 83 | + if exact: |
| 84 | + filt = (info.cell_type == x) | (info.hemibrain_type == x) |
| 85 | + else: |
| 86 | + filt = info.cell_type.str.contains( |
| 87 | + x, na=False |
| 88 | + ) | info.hemibrain_type.str.contains(x, na=False) |
| 89 | + |
| 90 | + if not left: |
| 91 | + filt = filt & (info.side != "left") |
| 92 | + if not right: |
| 93 | + filt = filt & (info.side != "right") |
| 94 | + |
| 95 | + ids = info.loc[filt, "root_id"].values.astype(np.int64).tolist() |
| 96 | + |
| 97 | + optic = _get_table(which="optic") |
| 98 | + if exact: |
| 99 | + filt = (optic.cell_type == x) | (optic.hemibrain_type == x) |
| 100 | + else: |
| 101 | + filt = optic.cell_type.str.contains( |
| 102 | + x, na=False |
| 103 | + ) | optic.hemibrain_type.str.contains(x, na=False) |
| 104 | + |
| 105 | + if not left: |
| 106 | + filt = filt & (optic.side != "left") |
| 107 | + if not right: |
| 108 | + filt = filt & (optic.side != "right") |
| 109 | + |
| 110 | + ids += optic.loc[filt, "root_id"].values.astype(np.int64).tolist() |
| 111 | + |
| 112 | + return np.unique(np.array(ids, dtype=np.int64)) |
| 113 | + |
| 114 | + def get_labels(self, x): |
| 115 | + """Fetch labels for given IDs.""" |
| 116 | + if not isinstance(x, (list, np.ndarray)): |
| 117 | + x = [] |
| 118 | + x = np.asarray(x).astype(np.int64) |
| 119 | + |
| 120 | + # Find a matching materialization version |
| 121 | + mat = flywire.utils.find_mat_version(x) |
| 122 | + |
| 123 | + # Fetch all types for this version |
| 124 | + types = _get_flywire_types(mat, add_side=False) |
| 125 | + |
| 126 | + return np.array([types.get(i, i) for i in x]) |
| 127 | + |
| 128 | + def compile(self): |
| 129 | + """Compile edges.""" |
| 130 | + # Make sure we're working on integers |
| 131 | + x = np.asarray(self.neurons).astype(int) |
| 132 | + |
| 133 | + us, ds = None, None |
| 134 | + if self.file: |
| 135 | + # Extract mat version from filename e.g. "syn_proof_[...]_587.feather" |
| 136 | + us_mat = ds_mat = int(self.file.split("_")[-1].split(".")[0]) |
| 137 | + |
| 138 | + # Check if root IDs existed at the time of the synapse dump |
| 139 | + il = flywire.is_latest_root(x, timestamp=f"mat_{us_mat}") |
| 140 | + if any(~il): |
| 141 | + raise ValueError( |
| 142 | + "Some root IDs did not exist at the time of the " |
| 143 | + f"synapse dump (mat {us_mat}): {x[~il]}" |
| 144 | + ) |
| 145 | + |
| 146 | + cn = pd.read_feather(self.file).rename( |
| 147 | + { |
| 148 | + "pre_pt_root_id": "pre", |
| 149 | + "post_pt_root_id": "post", |
| 150 | + "syn_count": "weight", |
| 151 | + }, |
| 152 | + axis=1, |
| 153 | + ) |
| 154 | + if self.upstream: |
| 155 | + us = cn[cn.post.isin(x)] |
| 156 | + us = us.groupby(["pre", "post"], as_index=False).weight.sum() |
| 157 | + |
| 158 | + if self.downstream: |
| 159 | + ds = cn[cn.pre.isin(x)] |
| 160 | + ds = ds.groupby(["pre", "post"], as_index=False).weight.sum() |
| 161 | + else: |
| 162 | + if self.upstream: |
| 163 | + us = flywire.fetch_connectivity( |
| 164 | + x, upstream=True, downstream=False, proofread_only=True |
| 165 | + ) |
| 166 | + us_mat = us.attrs["materialization"] |
| 167 | + if self.downstream: |
| 168 | + ds = flywire.fetch_connectivity( |
| 169 | + x, upstream=False, downstream=True, proofread_only=True |
| 170 | + ) |
| 171 | + ds_mat = ds.attrs["materialization"] |
| 172 | + |
| 173 | + # For grouping by type simple replace pre and post IDs with their types |
| 174 | + # -> well aggregate later |
| 175 | + if self.use_types: |
| 176 | + if self.upstream: |
| 177 | + us = _add_types( |
| 178 | + us, |
| 179 | + types=_get_flywire_types(us_mat, add_side=False), |
| 180 | + col="pre", |
| 181 | + expand_morphology_types=True, |
| 182 | + sides=None if not self.use_sides else _get_fw_sides(us_mat), |
| 183 | + sides_rel=True if self.use_sides == "relative" else False, |
| 184 | + ) |
| 185 | + |
| 186 | + if self.downstream: |
| 187 | + ds = _add_types( |
| 188 | + ds, |
| 189 | + types=_get_flywire_types(ds_mat, add_side=False), |
| 190 | + col="post", |
| 191 | + expand_morphology_types=True, |
| 192 | + sides=None if not self.use_sides else _get_fw_sides(ds_mat), |
| 193 | + sides_rel=True if self.use_sides == "relative" else False, |
| 194 | + ) |
| 195 | + |
| 196 | + if self.upstream and self.downstream: |
| 197 | + self.edges_ = pd.concat( |
| 198 | + ( |
| 199 | + us.groupby(["pre", "post"], as_index=False).weight.sum(), |
| 200 | + ds.groupby(["pre", "post"], as_index=False).weight.sum(), |
| 201 | + ), |
| 202 | + axis=0, |
| 203 | + ).drop_duplicates() |
| 204 | + elif self.upstream: |
| 205 | + self.edges_ = us.groupby(["pre", "post"]).weight.sum() |
| 206 | + elif self.downstream: |
| 207 | + self.edges_ = ds.groupby(["pre", "post"]).weight.sum() |
| 208 | + else: |
| 209 | + raise ValueError("`upstream` and `downstream` must not both be False") |
| 210 | + |
| 211 | + # Translate morphology types into connectivity types |
| 212 | + # This makes it easier to align with hemibrain |
| 213 | + # self.connectivity_.columns = _morphology_to_connectivity_types( |
| 214 | + # self.connectivity_.columns |
| 215 | + # ) |
0 commit comments