-
Notifications
You must be signed in to change notification settings - Fork 1
/
nonquarterly-processed.conseq
75 lines (68 loc) · 3.49 KB
/
nonquarterly-processed.conseq
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
include "xrefs-nonquarterly-unprocessed.conseq"
rule process_taiga_pulled_artifact:
inputs:
download={"type": "download_from_taiga"},
hdf5_utils=fileref("scripts/hdf5_utils.py")
run "python" with """
import shutil
import taigapy
import os
tc = taigapy.TaigaClient() # TODO: Need to update to Taiga Client V3 but don't know what the requested format should be for download_to_cache
### The if blocks are here to convert the new oncref format to the old oncref format so that
### the oncref datasets can be ingested in the pipeline without much additional changes.
if {{ inputs.download.label == "Prism_oncology_AUC" or inputs.download.label == "Prism_oncology_IC50"}}:
prism_oncref_df = tc.get("{{ inputs.download.dataset_id }}")
prism_oncref_df = prism_oncref_df.transpose()
prism_oncref_df.to_csv("out.csv", index=True)
elif {{ inputs.download.label == "Prism_oncology_per_curve" }}:
prism_oncref_curves_df = tc.get("{{ inputs.download.dataset_id }}")
prism_oncref_curves_df = prism_oncref_curves_df.rename(columns={
'ModelID': 'cell_line_name',
'SampleID': 'compound_name',
'EC50': 'ec50',
'LowerAsymptote': 'lower_asymptote',
'UpperAsymptote': 'upper_asymptote',
'Slope': 'slope'
})
prism_oncref_curves_df.to_csv("out.csv", index=False)
else:
cached = tc.download_to_cache("{{ inputs.download.dataset_id }}")
shutil.copy2(cached, "out.csv")
assert os.path.exists("out.csv"), "Output file 'out.csv' not generated"
"""
# Convert files to HDF5 if needed
run "bash" with """
if [[ "{{ inputs.download.format }}" == "hdf5" ]]; then
python {{ inputs.hdf5_utils.filename }} to_hdf5 out.csv csv out.hdf5
fi
"""
# The input is polymorphic and we want the output to have (mostly) the same keys
run "python" with """
import json
artifact = {{inputs.download | quoted}}
artifact["filename"] = {"$filename": "out.{{ inputs.download.format }}"}
artifact["type"] = artifact["target_type"]
del artifact["target_type"]
del artifact["$manually-added"]
with open("results.json", "w") as f:
json.dump({"outputs": [artifact]}, f)
"""
# TODO: Need to update to Taiga Client V3 but don't know what the requested format should be for download_to_cache
rule process_taiga_pulled_aggregated_dose_artifact:
inputs:
download={"type": "download_aggregated_dose_from_taiga"},
hdf5_utils=fileref("scripts/hdf5_utils.py"),
script=fileref("scripts/process_taiga_pulled_aggregated_dose_artifact.py")
outputs: {
"type": "aggregated-dose-replicate-level",
"label": "{{ inputs.download.label }}",
"dataset_id": "{{ inputs.download.dataset_id }}",
"orig_dataset_id": "{{ inputs.download.orig_dataset_id }}",
"cell_lines_dataset_id": "{{ inputs.download.cell_lines_dataset_id }}",
"perturbations_dataset_id": "{{ inputs.download.perturbations_dataset_id }}",
"hdf5_filename": {"$filename": "out.hdf5"},
"cell_lines_filename": {"$filename": "cell_lines.csv"},
"perturbations": {"$filename": "perturbations.csv"}
}
run "python {{ inputs.script.filename }} {{ inputs.download.label }} {{ inputs.download.cell_lines_dataset_id }} {{ inputs.download.perturbations_dataset_id }} {{ inputs.download.dataset_id }}"
run "python {{ inputs.hdf5_utils.filename }} to_hdf5 out.csv csv out.hdf5"