Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Read .ndjson CryoET Files #493

Draft
wants to merge 13 commits into
base: main
Choose a base branch
from
Binary file modified molecularnodes/assets/MN_data_file_4.2.blend
Binary file not shown.
5 changes: 2 additions & 3 deletions molecularnodes/entities/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,13 @@
from . import molecule, trajectory
from .density import MN_OT_Import_Map
from .trajectory.dna import MN_OT_Import_OxDNA_Trajectory
from .ensemble import CellPack
from .ensemble import StarFile
from .ensemble import CellPack, StarFile, load_cellpack, load_starfile
from .ensemble.ui import MN_OT_Import_Cell_Pack, MN_OT_Import_Star_File
from .molecule.pdb import PDB
from .molecule.pdbx import BCIF, CIF
from .molecule.sdf import SDF
from .molecule.ui import fetch, load_local, parse
from .trajectory.trajectory import Trajectory
from .trajectory import Trajectory

CLASSES = (
[
Expand Down
104 changes: 95 additions & 9 deletions molecularnodes/entities/ensemble/star.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from pathlib import Path

import bpy
from bpy.types import Object
import mrcfile
import numpy as np
import starfile
Expand All @@ -12,27 +13,27 @@
from databpy import AttributeTypes, BlenderObject
import databpy
from .base import Ensemble
import json
from mathutils import Matrix


class StarFile(Ensemble):
def __init__(self, file_path):
super().__init__(file_path)
self.type = "starfile"
self.data = self._read()
self.df = self._assign_df()
self.current_image = -1

@classmethod
def from_starfile(cls, file_path):
self = cls(file_path)
self.data = self._read()
self.df = self._assign_df()
return self

@classmethod
def from_blender_object(cls, blender_object):
self = cls(blender_object["starfile_path"])
self.object = blender_object
self.data = self._read()
self._create_mn_columns()
bpy.app.handlers.depsgraph_update_post.append(self._update_micrograph_texture)
return self

Expand All @@ -45,9 +46,17 @@ def micrograph_material(self):
return bl.nodes.MN_micrograph_material()

def _read(self):
star: DataFrame = list(
starfile.read(self.file_path, always_dict=True).values()
)[0]
if self._is_json():
star = self._read_json()
return star
else:
starict: dict[str, DataFrame] = starfile.read(
self.file_path, always_dict=True
) # type: ignore
if "particles" in starict:
star = starict["particles"]
else:
star = list(starict.values())[0]

if not isinstance(star, DataFrame):
raise ValueError("Problem opening starfile as dataframe")
Expand All @@ -63,21 +72,32 @@ def n_images(self):
return len(self.data)
return 1

def _is_json(self):
return self.file_path.suffix == ".ndjson"

def _is_relion(self):
if self._is_json():
return False
return (
isinstance(self.data, dict)
and "particles" in self.data
and "optics" in self.data
) or ("rlnAnglePsi" in self.data)

def _is_cistem(self):
if self._is_json():
return False
if isinstance(self.data, np.ndarray):
return False
return "cisTEMAnglePsi" in self.data

def _assign_df(self):
if self._is_relion():
return RelionDataFrame(self.data)
elif self._is_cistem():
return CistemDataFrame(self.data)
elif self._is_json():
return NDJSONDataFrame(self.data)
else:
raise ValueError(
"File is not a valid RELION>=3.1 or cisTEM STAR file, other formats are not currently supported."
Expand Down Expand Up @@ -158,6 +178,34 @@ def _update_micrograph_texture(self, *_):
self.micrograph_material.node_tree.nodes["Image Texture"].image = image_obj
self.star_node.inputs["Micrograph"].default_value = image_obj

def _read_json(self):
with open(self.file_path, "r") as f:
lines = f.readlines()

has_rotation = bool(json.loads(lines[0]).get("xyz_rotation_matrix"))

arr = np.zeros((len(lines), 4, 4), float)

for i, line in enumerate(lines):
matrix = np.identity(4, float)
data = json.loads(line)
pos = [data["location"][axis] for axis in "xyz"]

matrix[:3, 3] = pos
if has_rotation:
matrix[:3, :3] = data["xyz_rotation_matrix"]
# fixes orientation issues for how the matrices are stored
# https://github.com/BradyAJohnston/MolecularNodes/pull/493#issuecomment-2127461280
matrix[:3, :3] = np.flip(matrix[:3, :3])
arr[i] = matrix

# returns a (n, 4, 4) matrix, where the 4x4 rotation matrix is returned for
# each point from the ndjson file
# this currently doesn't handle where there might be different points or different
# proteins being instanced on those points, at which point we will have to change
# what kind of array we are returning
return arr

def create_object(
self,
name="StarFileObject",
Expand Down Expand Up @@ -232,6 +280,7 @@ def image_id_values(self) -> np.ndarray:
"""
Returns the image ids as a numpy array.
"""

for name in [
"rlnImageName",
"rlnMicrographName",
Expand All @@ -243,9 +292,9 @@ def image_id_values(self) -> np.ndarray:
except KeyError:
pass

return np.zeros(len(self.data), dtype=int)
return np.repeat(1, len(self.data))

def store_data_on_object(self, obj: bpy.types.Object):
def store_data_on_object(self, obj: bpy.types.Object) -> None:
"""
Stores the data on the object.
"""
Expand All @@ -262,6 +311,9 @@ def store_data_on_object(self, obj: bpy.types.Object):
atype=AttributeTypes.INT,
)

if isinstance(self.data, np.ndarray):
return

for col in self.data.columns:
if isinstance(self.data[col].dtype, CategoricalDtype):
bob.object[f"{col}_categories"] = list(self.data[col].cat.categories)
Expand Down Expand Up @@ -315,3 +367,37 @@ def _adjust_defocus(self):
self.data["cisTEMZFromDefocus"] = (
self.data["cisTEMZFromDefocus"] - self.data["cisTEMZFromDefocus"].median()
)


class NDJSONDataFrame(EnsembleDataFrame):
def __init__(self, data: np.ndarray):
self.data = data
self.type = "json"
self._store_coordinates()

@property
def scale(self) -> float:
return 10.0

def image_id_values(self) -> np.ndarray:
return np.repeat(1, len(self.data))

@property
def coordinates(self):
return self._positions

def rotation_as_quaternion(self) -> np.ndarray:
return self._rotations

def _store_coordinates(self) -> None:
self._positions = np.zeros((len(self.data), 3), float)
self._rotations = np.zeros((len(self.data), 4), float)

for i, matrix in enumerate(self.data):
pos = matrix[:3, 3]
rot_mat = matrix[:3, :3]
# rot_mat = np.flip(rot_mat)
# rot = R.from_matrix(rot_mat).inv().as_quat(scalar_first=True)
rot = R.from_matrix(rot_mat).as_quat(scalar_first=True)
self._positions[i] = pos
self._rotations[i] = rot
109 changes: 109 additions & 0 deletions tests/__snapshots__/test_star.ambr
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# serializer version: 1
# name: test_read_ndjson_oriented
array([[23.87433052, 31.89172745, 25.37850761],
[33.02005386, 28.06398582, 18.75840378],
[23.46685791, 25.87920952, 19.5944519 ],
[18.09267235, 36.78095627, 17.35915375],
[32.35549927, 37.58826828, 11.6517458 ],
[31.01458931, 39.519207 , 13.43552589],
[28.10414505, 42.05084991, 15.00608635],
[30.01036263, 25.67038155, 13.95381832],
[32.14944458, 27.88355064, 14.40765381],
[34.20890808, 29.50944901, 13.86282635],
[21.27736664, 29.35665703, 8.96019936],
[26.42842674, 26.79534912, 8.91622066],
[31.82834625, 30.15784836, 8.03425217],
[26.9825058 , 37.82344818, 7.63521528],
[19.57832909, 32.44467926, 9.61724854],
[27.08240891, 36.46793365, 7.81259775],
[44.13663483, 53.67095184, 20.43483925],
[49.92914581, 66.25559998, 18.73733711],
[46.72801971, 66.48529816, 16.41119385],
[37.84198761, 56.45783234, 17.32278061],
[43.81853867, 47.19822311, 12.84123707],
[50.95117569, 68.43701172, 10.82436371],
[41.85073471, 65.05987549, 9.85518932],
[40.50820923, 52.35146713, 12.34503937],
[45.19417572, 48.17839432, 10.21124363],
[61.11759186, 56.44265747, 11.09936333],
[58.59801102, 61.78705978, 10.44991875],
[40.0730896 , 53.87436295, 5.11055803],
[48.20633316, 66.35647583, 3.79595304],
[46.47460938, 62.5362587 , 3.68710828],
[57.29676819, 53.93936157, 3.65148497],
[49.68291473, 62.75001526, 3.84358668],
[54.50014496, 53.35533524, 3.77865529],
[57.25868225, 55.54994965, 3.22070813],
[52.32493591, 58.24117279, 3.62839437],
[55.79870987, 65.13088989, 14.83298016],
[58.59644318, 61.86207581, 15.74641037],
[63.83924484, 73.4563446 , 14.62044716],
[67.77477264, 62.04064941, 13.9069519 ],
[58.85812759, 73.30673981, 14.63740253],
[57.69659042, 58.86594391, 11.43960476],
[68.71797943, 73.04043579, 12.37947083],
[60.41506195, 75.70902252, 12.08103752],
[67.9124527 , 58.83675003, 12.08523178],
[64.42563629, 74.6596756 , 6.21566582],
[70.59613037, 62.10912323, 9.58167458],
[71.3404541 , 64.22844696, 7.00058746],
[68.76173401, 71.54763031, 3.80314946],
[61.89402771, 75.00282288, 2.53922963],
[56.38106918, 73.17893219, 2.72062659],
[69.51998901, 63.39735794, 3.91468668],
[61.58407211, 73.56692505, 2.38359976]])
# ---
# name: test_read_ndjson_oriented.1
array([[-0.46720752, 0.66329151, -0.03304094, -0.58366925],
[ 0.12011801, 0.91485447, -0.32788453, 0.20274286],
[-0.03171144, -0.59320384, 0.35137537, 0.723629 ],
[-0.19200005, -0.24918218, -0.08924829, 0.94502854],
[ 0.01118462, -0.05068073, 0.93531823, -0.34998026],
[-0.03089962, -0.02655442, 0.99915159, 0.00601635],
[-0.27980441, 0.03517388, 0.81216043, -0.51075214],
[ 0.65255266, -0.56346714, 0.22439645, -0.45423129],
[-0.24432151, 0.86978418, -0.42279455, 0.07090327],
[-0.31966069, -0.48121914, 0.78034323, -0.2393942 ],
[ 0.70030981, -0.00739502, 0.22205707, -0.67838198],
[ 0.4968349 , -0.32295665, 0.80341798, 0.05808304],
[ 0.25621903, -0.50134301, 0.81793118, 0.11830288],
[ 0.45764095, 0.44784227, -0.05973107, 0.76578999],
[ 0.64386481, -0.03921127, 0.62544423, 0.43899903],
[ 0.13245901, 0.67954278, 0.57353872, 0.43786934],
[ 0.45280361, -0.55400658, 0.34156483, 0.6094088 ],
[ 0.62990534, -0.33379927, -0.46779251, 0.5224629 ],
[ 0.67541873, 0.12079696, -0.45652995, 0.56639034],
[ 0.34280553, -0.60027158, 0.15467614, 0.70585674],
[-0.30094582, 0.63507724, -0.44388458, -0.5559451 ],
[-0.14420539, 0.41927826, 0.69232762, -0.56929165],
[ 0.65801847, 0.14431359, -0.04408751, 0.73772728],
[ 0.58154052, -0.42297933, 0.14222495, 0.68019938],
[-0.07721653, -0.04280182, 0.12656845, 0.98802125],
[-0.25571635, -0.27441421, 0.87974936, -0.29214215],
[-0.39674431, -0.5430761 , 0.70551825, -0.22339706],
[ 0.65568638, -0.33034965, 0.54463178, 0.40536487],
[ 0.59983915, 0.41064155, 0.23391005, 0.64564121],
[ 0.4012568 , 0.48989102, 0.73561037, -0.24057664],
[-0.4065074 , 0.61161119, -0.42075971, 0.53258306],
[ 0.62283301, 0.3139337 , 0.15204787, 0.70029002],
[-0.06682844, 0.70369375, -0.47372463, 0.52529424],
[ 0.16581939, 0.58134323, 0.69784969, 0.38412207],
[-0.39641926, 0.34594041, -0.53667098, 0.65966749],
[ 0.85798866, -0.37084046, -0.0926818 , 0.34313682],
[ 0.72770751, -0.68348235, -0.02458482, -0.05185724],
[-0.17110497, 0.55774367, 0.68311793, -0.43931192],
[-0.51995379, -0.18816932, 0.56595784, 0.61149991],
[ 0.733832 , 0.62354684, 0.20824824, -0.17120926],
[ 0.6844275 , -0.67024094, 0.12461573, -0.25847042],
[-0.51616597, 0.28499767, 0.69851905, -0.4054876 ],
[ 0.29564893, -0.4324317 , -0.45579931, 0.71961206],
[-0.59977877, -0.29825485, 0.7119993 , 0.21063349],
[ 0.06755421, 0.59245598, 0.58896536, -0.54548341],
[-0.31654108, 0.53705853, 0.56126648, 0.54438019],
[-0.50906521, 0.40167496, 0.75398499, 0.10495941],
[-0.37205219, -0.23857826, 0.79400361, -0.41739169],
[ 0.1982006 , 0.64666992, 0.51341838, 0.52814406],
[ 0.81773907, -0.02891332, 0.11298642, 0.56364965],
[-0.13209042, 0.71269614, -0.30696592, 0.61675626],
[ 0.63431442, 0.35421583, -0.14755557, 0.67112124]])
# ---
Loading
Loading