Skip to content

Commit 7630a16

Browse files
committed
first working prototype
1 parent 34485d4 commit 7630a16

17 files changed

+1377
-0
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -127,3 +127,5 @@ dmypy.json
127127

128128
# Pyre type checker
129129
.pyre/
130+
131+
*.DS_Store

README.md

+12
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,15 @@ A Python library for comparative connectomics analyses.
44

55
`cocoa` implements various dataset-agnostic as well as dataset-specific methods
66
for matching, co-clustering and cell typing.
7+
8+
## Install
9+
10+
```bash
11+
pip3 install git+https://github.com/flyconnectome/cocoa.git
12+
```
13+
14+
## Requirements
15+
16+
All dependencies should be installed automatically. However, to use the
17+
pre-define datasets you will need to set a couple environment variables and
18+
secrets:

cocoa/__init__.py

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .normalize import *
2+
from .plotting import *
3+
from .datasets import *

cocoa/__version__.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__version__ = '0.1.0'

cocoa/app.py

Whitespace-only changes.

cocoa/datasets/__init__.py

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .flywire import *
2+
from .hemibrain import *
3+
from .malecns import *

cocoa/datasets/core.py

+63
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import numpy as np
2+
3+
from abc import ABC, abstractmethod
4+
5+
6+
class DataSet(ABC):
7+
def __init__(self, label):
8+
self.label = label
9+
self.neurons = np.zeros((0,), dtype=np.int64)
10+
11+
def __repr__(self):
12+
return f"class {self.type} <label={self.label};neurons={len(self.neurons)}>"
13+
14+
@property
15+
def type(self):
16+
return str(type(self))[:-2].split(".")[-1]
17+
18+
@property
19+
def syn_counts(self):
20+
if not hasattr(self, "edges_"):
21+
raise ValueError("Must first compile connectivity")
22+
up = (
23+
self.edges_[self.edges_.post.isin(self.neurons)]
24+
.groupby("post")
25+
.weight.sum()
26+
.to_dict()
27+
)
28+
down = (
29+
self.edges_[self.edges_.pre.isin(self.neurons)]
30+
.groupby("pre")
31+
.weight.sum()
32+
.to_dict()
33+
)
34+
return {n: up.get(n, 0) + down.get(n, 0) for n in self.neurons}
35+
36+
def add_neurons(self, x, **kwargs):
37+
"""Add neurons to dataset.
38+
39+
Parameters
40+
----------
41+
x : str | int | list thereof
42+
Something that can be parsed into IDs. Details depend on the
43+
dataset.
44+
45+
"""
46+
self.neurons = np.unique(
47+
np.append(self.neurons, self._add_neurons(x, **kwargs))
48+
)
49+
50+
@abstractmethod
51+
def _add_neurons(self, x, **kwargs):
52+
"""Turn `x` into IDs."""
53+
pass
54+
55+
@abstractmethod
56+
def get_labels(self, x, **kwargs):
57+
"""Get label for ID `x`."""
58+
pass
59+
60+
@abstractmethod
61+
def compile(self):
62+
"""Compile connectivity vector."""
63+
pass

cocoa/datasets/flywire.py

+215
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,215 @@
1+
import os
2+
3+
import numpy as np
4+
import pandas as pd
5+
6+
from fafbseg import flywire
7+
8+
from .core import DataSet
9+
from .utils import (
10+
_add_types,
11+
_get_table,
12+
_get_flywire_types,
13+
_get_fw_sides,
14+
_is_int
15+
)
16+
17+
__all__ = ["FlyWire"]
18+
19+
itable = None
20+
otable = None
21+
22+
23+
class FlyWire(DataSet):
24+
"""FlyWire dataset.
25+
26+
Parameters
27+
----------
28+
label : str
29+
A label.
30+
up/downstream : bool
31+
Whether to use up- and/or downstream connectivity.
32+
use_types : bool
33+
Whether to group by type. This will use `hemibrain_type` first
34+
and where that doesn't exist fall back to `cell_type`.
35+
use_side : bool | 'relative'
36+
Only relevant if `group_by_type=True`:
37+
- if `True`, will split cell types into left/right/center
38+
- if `relative`, will label cell types as `ipsi` or
39+
`contra` depending on the side of the connected neuron
40+
file : str, optional
41+
Filepath to one of the connectivity dumps.
42+
43+
"""
44+
45+
def __init__(
46+
self,
47+
label="FlyWire",
48+
upstream=True,
49+
downstream=True,
50+
use_types=True,
51+
use_sides=False,
52+
file=None,
53+
):
54+
assert use_sides in (True, False, "relative")
55+
if file:
56+
assert os.path.isfile(file)
57+
58+
super().__init__(label=label)
59+
self.file = file
60+
self.upstream = upstream
61+
self.downstream = downstream
62+
self.use_types = use_types
63+
self.use_sides = use_sides
64+
65+
def _add_neurons(self, x, exact=True, left=True, right=True):
66+
"""Turn `x` into FlyWire root IDs."""
67+
if isinstance(x, type(None)):
68+
return np.array([], dtype=np.int64)
69+
70+
if not exact and isinstance(x, str) and "," in x:
71+
x = x.split(",")
72+
73+
if isinstance(x, (list, np.ndarray, set, tuple)):
74+
ids = np.array([], dtype=np.int64)
75+
for t in x:
76+
ids = np.append(
77+
ids, self._add_neurons(t, exact=exact, left=left, right=right)
78+
)
79+
elif _is_int(x):
80+
ids = [int(x)]
81+
else:
82+
info = _get_table(which="info")
83+
if exact:
84+
filt = (info.cell_type == x) | (info.hemibrain_type == x)
85+
else:
86+
filt = info.cell_type.str.contains(
87+
x, na=False
88+
) | info.hemibrain_type.str.contains(x, na=False)
89+
90+
if not left:
91+
filt = filt & (info.side != "left")
92+
if not right:
93+
filt = filt & (info.side != "right")
94+
95+
ids = info.loc[filt, "root_id"].values.astype(np.int64).tolist()
96+
97+
optic = _get_table(which="optic")
98+
if exact:
99+
filt = (optic.cell_type == x) | (optic.hemibrain_type == x)
100+
else:
101+
filt = optic.cell_type.str.contains(
102+
x, na=False
103+
) | optic.hemibrain_type.str.contains(x, na=False)
104+
105+
if not left:
106+
filt = filt & (optic.side != "left")
107+
if not right:
108+
filt = filt & (optic.side != "right")
109+
110+
ids += optic.loc[filt, "root_id"].values.astype(np.int64).tolist()
111+
112+
return np.unique(np.array(ids, dtype=np.int64))
113+
114+
def get_labels(self, x):
115+
"""Fetch labels for given IDs."""
116+
if not isinstance(x, (list, np.ndarray)):
117+
x = []
118+
x = np.asarray(x).astype(np.int64)
119+
120+
# Find a matching materialization version
121+
mat = flywire.utils.find_mat_version(x)
122+
123+
# Fetch all types for this version
124+
types = _get_flywire_types(mat, add_side=False)
125+
126+
return np.array([types.get(i, i) for i in x])
127+
128+
def compile(self):
129+
"""Compile edges."""
130+
# Make sure we're working on integers
131+
x = np.asarray(self.neurons).astype(int)
132+
133+
us, ds = None, None
134+
if self.file:
135+
# Extract mat version from filename e.g. "syn_proof_[...]_587.feather"
136+
us_mat = ds_mat = int(self.file.split("_")[-1].split(".")[0])
137+
138+
# Check if root IDs existed at the time of the synapse dump
139+
il = flywire.is_latest_root(x, timestamp=f"mat_{us_mat}")
140+
if any(~il):
141+
raise ValueError(
142+
"Some root IDs did not exist at the time of the "
143+
f"synapse dump (mat {us_mat}): {x[~il]}"
144+
)
145+
146+
cn = pd.read_feather(self.file).rename(
147+
{
148+
"pre_pt_root_id": "pre",
149+
"post_pt_root_id": "post",
150+
"syn_count": "weight",
151+
},
152+
axis=1,
153+
)
154+
if self.upstream:
155+
us = cn[cn.post.isin(x)]
156+
us = us.groupby(["pre", "post"], as_index=False).weight.sum()
157+
158+
if self.downstream:
159+
ds = cn[cn.pre.isin(x)]
160+
ds = ds.groupby(["pre", "post"], as_index=False).weight.sum()
161+
else:
162+
if self.upstream:
163+
us = flywire.fetch_connectivity(
164+
x, upstream=True, downstream=False, proofread_only=True
165+
)
166+
us_mat = us.attrs["materialization"]
167+
if self.downstream:
168+
ds = flywire.fetch_connectivity(
169+
x, upstream=False, downstream=True, proofread_only=True
170+
)
171+
ds_mat = ds.attrs["materialization"]
172+
173+
# For grouping by type simple replace pre and post IDs with their types
174+
# -> well aggregate later
175+
if self.use_types:
176+
if self.upstream:
177+
us = _add_types(
178+
us,
179+
types=_get_flywire_types(us_mat, add_side=False),
180+
col="pre",
181+
expand_morphology_types=True,
182+
sides=None if not self.use_sides else _get_fw_sides(us_mat),
183+
sides_rel=True if self.use_sides == "relative" else False,
184+
)
185+
186+
if self.downstream:
187+
ds = _add_types(
188+
ds,
189+
types=_get_flywire_types(ds_mat, add_side=False),
190+
col="post",
191+
expand_morphology_types=True,
192+
sides=None if not self.use_sides else _get_fw_sides(ds_mat),
193+
sides_rel=True if self.use_sides == "relative" else False,
194+
)
195+
196+
if self.upstream and self.downstream:
197+
self.edges_ = pd.concat(
198+
(
199+
us.groupby(["pre", "post"], as_index=False).weight.sum(),
200+
ds.groupby(["pre", "post"], as_index=False).weight.sum(),
201+
),
202+
axis=0,
203+
).drop_duplicates()
204+
elif self.upstream:
205+
self.edges_ = us.groupby(["pre", "post"]).weight.sum()
206+
elif self.downstream:
207+
self.edges_ = ds.groupby(["pre", "post"]).weight.sum()
208+
else:
209+
raise ValueError("`upstream` and `downstream` must not both be False")
210+
211+
# Translate morphology types into connectivity types
212+
# This makes it easier to align with hemibrain
213+
# self.connectivity_.columns = _morphology_to_connectivity_types(
214+
# self.connectivity_.columns
215+
# )

0 commit comments

Comments
 (0)