forked from Matteopaluh/KEMET
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsetup.py
executable file
·129 lines (104 loc) · 4.02 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/env python
# coding: utf-8
import os
from os import path
import argparse
def set_directories(dir_base, gapfill_usage=False):
"""
Setup function to generate folders and instruction files for
KEMET script execution.
Args:
dir_base (str): folder path in which "kemet.py" is going to be executed
gapfill_usage (bool, optional): flag to indicate that GSMMs functionalities are wanted.
Defaults to False.
"""
directories_to_make = [
"KEGG_annotations",
"ktests",
"klists",
"reports_txt",
"reports_tsv",
"taxonomies",
"Knumber_ntsequences",
"multiple_fasta",
"HMM",
"HMM_HITS",
"genomes",
"oneBM_modules",
]
if gapfill_usage:
directories_to_make += [
"report_gapfill",
"biggapi_download",
"DB",
"models",
"models_gapfilled",
"de_novo_models",
]
genome_instruction_file = path.join(dir_base, "genomes.instruction")
module_file = path.join(dir_base, "module_file.instruction")
ko_file = path.join(dir_base, "ko_file.instruction")
kegg_brite_organisms = path.join(dir_base, "br08601.keg")
# Use requests module to replace os.system? Could improve portability
os.system(f"curl --silent http://rest.kegg.jp/get/br:br08601 > {kegg_brite_organisms}")
print("KEGG Organisms hierarchy DOWNLOADED")
# This part about creating files can probably be improved
if path.isfile(genome_instruction_file):
print("Instruction file ALREADY EXISTS")
else:
with open(genome_instruction_file) as f:
print("id", "taxonomy", "universe", sep="\t", file=f) # Maybe we need an additional newline?
print("genome_instruction file GENERATED")
if path.isfile(module_file):
print("module_file ALREADY EXISTS")
else:
os.mknod(module_file)
print("module_file GENERATED")
if path.isfile(ko_file):
print("ko_file ALREADY EXISTS")
else:
os.mknod(ko_file)
print("ko_file GENERATED")
for el in directories_to_make:
dir = path.join(dir_base, el)
if path.isdir(dir):
print(f"{dir} folder ALREADY EXISTS")
else:
os.mkdir(dir)
print(f"{dir} folder CREATED")
def set_kk_database():
pass
def update_kk_database():
pass
###############################################################################
def main():
parser = argparse.ArgumentParser(description=
'''
Setup command for KEMET pipeline.
Create folders and manage KEGG Module .kk database
''')
# Add command line option for setting the base directory?
parser.add_argument('-k','--set_kk_DB', action="store_true",
help='''
Choose this option to generate KEGG Module DB (.kk files),
in order to perform KEGG Modules Completeness evaluation.
Default: already generated''')
parser.add_argument('-u','--update_kk_DB', action="store_true",
help='''
Choose this option to update already existing KEGG Module DB (.kk files).''')
parser.add_argument('-G','--gapfill_usage', action="store_true",
help='''
Choose this option to create required folders for the GSMM Gapfilling,
follow-up of the HMM search procedures.''')
args = parser.parse_args()
###############################################################################
dir_base = os.getcwd()
# See suggestion above: add command line option for setting the base directory
set_directories(dir_base, args.gapfill_usage)
#NEXT VERSION: ADD kk-files DATABASE WITH THE UPDATED MODULES LIST
if args.set_kk_DB:
set_kk_database()
if args.update_kk_DB:
update_kk_database()
if __name__ == "__main__":
main()