-
Notifications
You must be signed in to change notification settings - Fork 0
/
parsers.py
67 lines (55 loc) · 2.55 KB
/
parsers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# Lint as: python3.
# Copyright 2019 DeepMind Technologies Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Parsers for various standard biology or AlphaFold-specific formats."""
import pickle
import tensorflow as tf # pylint: disable=g-explicit-tensorflow-version-import
def distance_histogram_dict(f):
"""Parses distance histogram dict pickle.
Distance histograms are stored as pickles of dicts.
Write one of these with contacts/write_rr_file.write_pickle_file()
Args:
f: File-like handle to distance histogram dict pickle.
Returns:
Dict with fields:
probs: (an L x L x num_bins) histogram.
num_bins: number of bins for each residue pair
min_range: left hand edge of the distance histogram
max_range: the extent of the histogram NOT the right hand edge.
"""
contact_dict = pickle.load(f, encoding='latin1')
num_res = len(contact_dict['sequence'])
if not all(key in contact_dict.keys()
for key in ['probs', 'num_bins', 'min_range', 'max_range']):
raise ValueError('The pickled contact dict doesn\'t contain all required '
'keys: probs, num_bins, min_range, max_range but %s.' %
contact_dict.keys())
if contact_dict['probs'].ndim != 3:
raise ValueError(
'Probs is not rank 3 but %d' % contact_dict['probs'].ndim)
if contact_dict['num_bins'] != contact_dict['probs'].shape[2]:
raise ValueError(
'The probs shape doesn\'t match num_bins in the third dimension. '
'Expected %d got %d.' % (contact_dict['num_bins'],
contact_dict['probs'].shape[2]))
if contact_dict['probs'].shape[:2] != (num_res, num_res):
raise ValueError(
'The first two probs dims (%i, %i) aren\'t equal to len(sequence) %i'
% (contact_dict['probs'].shape[0], contact_dict['probs'].shape[1],
num_res))
return contact_dict
def parse_distance_histogram_dict(filepath):
"""Parses distance histogram piclkle from filepath."""
with tf.io.gfile.GFile(filepath, 'rb') as f:
return distance_histogram_dict(f)