-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathEOL_tools.py
171 lines (128 loc) · 4.92 KB
/
EOL_tools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
'''
This is a set of functions used to process the Tecan Sunrise
data, to calculate area under the curve and compare AUCs.
'''
import pandas as pd
from itertools import product
import numpy as np
import datetime
import numpy as np
def read_rawdata(file, strain, plate):
'''
Reads output table from tecan sunrise and gets it into
"tidy data" mode. Adds columns with strain and plate information
'''
# read dataframe
df = pd.read_csv(file,
sep='\t',
skiprows=1,
index_col=0)
# drop last column if nan
df = df.dropna(axis='columns')
# melt into tidy data format
df = pd.melt(df, ignore_index=False)
# reset index to make wells a column
df = df.reset_index()
# convert string into seconds variable
df['variable'] = df['variable'].str.strip('s')
# rename columns approp
df = df.rename(columns = {'variable':'seconds',
'index':'well',
'value':'OD600'})
# convert to timedelta
df['timedelta'] = pd.to_timedelta(df['seconds'].astype(int), unit='s')
# clean columns
df = df[['well', 'timedelta', 'OD600']]
# add strain and plate data
df['strain'] = [strain] * len(df)
df['plate'] = [plate] * len(df)
return df
def read_experiment(file, strain, plate, metadata_file, clean=True, crop_min=600):
'''
read table with read_rawdata, merge with metadata
and clean up
'''
# => READ
# read experiment data
df_data = read_rawdata(file, strain, plate)
# read original table
df_metadata = pd.read_csv(metadata_file,
sep='\t')
# merge
df_exp = pd.merge(df_data,
df_metadata,
on=['well', 'plate'])
# => CLEAN
if clean:
# remove non-control wells with no phage
df_exp = df_exp[df_exp['phage'].notnull()]
# keep only wells with culture, (remove phage neg ctrls)
df_exp = df_exp[df_exp['culture'] == 1]
if crop_min:
# crop after specified time
df_exp = df_exp[df_exp['timedelta'] < datetime.timedelta(minutes=crop_min)]
df_exp = df_exp[['well', 'strain', 'phage', 'timedelta', 'OD600']]
df_exp = df_exp.reset_index(drop=True)
return df_exp
def auc_experiment(df_experiment, mean=True):
'''
Calculates area under the curve for each well,
from an experiment dataframe.
It can return the auc per well,
or the mean/sem per phage-strain pair
'''
auc_results = []
for well in df_experiment['well'].unique():
# subset per well, sort
df_well = df_experiment[df_experiment['well'] == well]
df_well = df_well.sort_values('minutes')
# ground by substracting lowest value
min_val = np.array(df_well['OD600'].to_list()).min()
df_well['OD600_norm'] = df_well['OD600'] - min_val
# convert to list
od_list = df_well['OD600_norm'].to_list()
# do the average for two consecutive entries
sigma = []
for i in range(len(od_list) - 1):
partial_avg = (od_list[i + 1] + od_list[i]) / 2
sigma.append(partial_avg)
# make last value 0
sigma.append(0)
# sum all values
auc = np.array(sigma).sum()
# append data to results
phage = df_well['phage'].unique()[0]
strain = df_well['strain'].unique()[0]
auc_results.append([well, phage, strain, auc])
df_auc = pd.DataFrame(auc_results)
df_auc.columns = ['well', 'phage', 'strain', 'auc']
if mean:
# group and get mean
df_auc_mean = df_auc.groupby(['strain', 'phage']).agg({'auc': 'mean'}).reset_index()
# group and get sem
df_auc_mean['auc_sem'] = df_auc.groupby(['strain', 'phage']).agg({'auc': 'sem'}).reset_index()['auc']
# rename columns
df_auc_mean.columns = ['strain', 'phage', 'auc_mean', 'auc_sem']
return df_auc_mean
else:
return df_auc
def las_experiment(df_experiment):
'''
Calculate liquid assay score, by substracting the
experiment auc from the control auc, dividing by the
control auc and mult. by 100
'''
# get mean area under the curve for experiment
df_auc = auc_experiment(df_experiment)
las_results = []
# liquid assay score calculated per strain
for strain in df_auc['strain'].unique():
# subset to strain
df_strain = df_auc[df_auc['strain'] == strain].copy()
# extract control growth value
control_auc = df_strain[df_strain['phage'] == '-']['auc_mean'].to_list()[0]
# calculate liquid assay score
df_strain['las'] = ((control_auc - df_strain['auc_mean']) / control_auc) * 100
las_results.append(df_strain)
df_las = pd.concat(las_results)
return df_las