-
Notifications
You must be signed in to change notification settings - Fork 1
/
search
executable file
·131 lines (118 loc) · 4.79 KB
/
search
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env python3
import argparse
import subprocess
import os
import json
import psutil
import re
print("Searching...")
#512MB of reserved system memory
reserved_memory = (1024**3)//2
available_memory = psutil.virtual_memory()[0]
usable_memory = (available_memory - reserved_memory)//1024**2
my_env = os.environ.copy()
my_env["TERRIER_HEAP_MEM"] = f"{usable_memory}M"
parser = argparse.ArgumentParser()
parser.add_argument("--json", type=json.loads, required=True, help="json arguments")
args, _ = parser.parse_known_args()
if not os.path.isdir("/output"):
os.mkdir("/output", 777)
collection_name = args.json["collection"]["name"]
active_configs=["bm25"]
if "opts" in args.json and "config" in args.json["opts"]:
active_configs = [args.json["opts"]["config"]]
print("Active configs are " + str(active_configs))
del args.json["opts"]["config"]
configs = {
"bm25" : {"-w" : "BM25"},
"pl2" : {"-w" : "PL2"},
"dph" : {"-w" : "DPH"},
"ql" : {"-w" : "DirichletLM"},
"bm25_qe" : {"-w" : "BM25", "-q" : ""},
"pl2_qe" : {"-w" : "PL2", "-q" : ""},
"dph_qe" : {"-w" : "DPH", "-q" : ""},
"ql_qe" : {"-w" : "DirichletLM", "-q" : ""},
"bm25_ltr_features" : {
"-w" : "BM25",
"-c" : "labels:on",
"-F" : "Normalised2LETOROutputFormat",
"-Dtrec.matching=FatFeaturedScoringMatching,org.terrier.matching.daat.FatFull" : "",
"-Dfat.featured.scoring.matching.features=FILE" : "",
"-Dproximity.dependency.type=SD" : "",
"-Dfat.featured.scoring.matching.features.file=/output/features.list" : ""
},
"bm25_ltr_jforest" : {
"-w" : "BM25",
"-Dtrec.matching=JforestsModelMatching,FatFeaturedScoringMatching,org.terrier.matching.daat.FatFull" : "",
"-Dfat.featured.scoring.matching.features=FILE" : "",
"-Dfat.featured.scoring.matching.features.file=/output/features.list" : "",
"-Dfat.matching.learned.jforest.model=/output/ensemble.txt" : "",
"-Dfat.matching.learned.jforest.statistics=/output/jforests-feature-stats.txt" : ""
},
"bm25_prox":{
"-w" : "BM25",
"-Dmatching.dsms=DFRDependenceScoreModifier" : "",
"-Dproximity.dependency.type=SD" : "",
},
"bm25_prox_qe":{
"-w" : "BM25",
"-Dmatching.dsms=DFRDependenceScoreModifier" : "",
"-Dproximity.dependency.type=SD" : "",
"-q": ""
},
"pl2_prox":{
"-w" : "PL2",
"-Dmatching.dsms=DFRDependenceScoreModifier" : "",
"-Dproximity.dependency.type=SD" : ""
},
"pl2_prox_qe":{
"-w" : "PL2",
"-Dmatching.dsms=DFRDependenceScoreModifier" : "",
"-Dproximity.dependency.type=SD" : "",
"-q": ""
},
"dph_prox":{
"-w" : "DPH",
"-Dmatching.dsms=DFRDependenceScoreModifier" : "",
"-Dproximity.dependency.type=SD" : ""
},
"dph_prox_qe":{
"-w" : "DPH",
"-Dmatching.dsms=DFRDependenceScoreModifier" : "",
"-Dproximity.dependency.type=SD" : "",
"-q": ""
},
"ql_prox":{
"-w" : "DirichletLM",
"-Dmatching.dsms" : "MRFDependenceScoreModifier",
"-Dproximity.dependency.type=SD" : "",
},
"ql_prox_qe":{
"-w" : "DirichletLM",
"-Dmatching.dsms" : "MRFDependenceScoreModifier",
"-Dproximity.dependency.type=SD" : "",
"-q": ""
},
}
for name in active_configs:
if re.match(r"prox", name):
#Check for blocks (positions) index
blocks = subprocess.check_output('/work/terrier-core/bin/terrier indexstats -I /work/indexes/{}'.format(args.json['collection']['name']).split()).decode("utf-8").split("\n")[-2]== 'blocks: true'
if not blocks:
raise Exception("Index was not built with --opts block.indexing=true")
config=configs[name]
opts = " ".join( [ k + " " + v for k,v in config.items() ] )
if "opts" in args.json:
opts = opts + " ".join( [ k + " " + v for k,v in args.json["opts"].items() ] )
cmd="""/work/terrier-core/bin/terrier batchretrieve
-I /work/indexes/{0}
-o /output/run.{0}.{1}.txt
{2}
-t {3}
-DTrecQueryTags.skip=DESC,NARR,DOM,HEAD,SMRY,CON,FAC,DEF
-Dtrec.querying.dump.settings=false
-Dtrec.output.format.length={4}
""".format(args.json['collection']['name'], name, opts, args.json['topic']['path'], args.json['top_k']).split()
print("Executing terrier cmd "+ " ".join(cmd))
subprocess.run(cmd, env=my_env)
#subprocess.run("""rm /output/run.{0}.{1}.txt.settings""".format(args.json['collection']['name'], name).split())