-
Notifications
You must be signed in to change notification settings - Fork 2
/
config.yaml
105 lines (100 loc) · 4 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# Author
project_name: masters_thesis
author: Jimmy Callin
description: This is the main project for my thesis in Shallow Discourse Parsing, where I develop models for sense classification.
email: [email protected]
base_dir: /Users/jimmy/dev/edu/master-thesis/
train: training_data
test: dev_data
print_report: true
results_db_uri: 'mongodb://localhost:27017'
# Resources
resources:
training_data:
name: conll16st-en-01-12-16-train
path: resources/conll16st-en-zh-dev-train_LDC2016E50/conll16st-en-01-12-16-train/relations.json
max_hierarchical_level: 3
max_words_in_sentence: 50
padding: false
classes: ["Temporal.Asynchronous.Precedence",
"Temporal.Asynchronous.Succession",
"Temporal.Synchrony",
"Contingency.Cause.Reason",
"Contingency.Cause.Result",
"Contingency.Condition",
"Comparison.Contrast",
"Comparison.Concession",
"Expansion.Conjunction",
"Expansion.Instantiation",
"Expansion.Restatement",
"Expansion.Alternative",
"Expansion.Alternative.Chosen alternative",
"Expansion.Exception",
"EntRel"]
separate_dual_classes: true
filter_type: ["Implicit", "EntRel"]
dev_data:
name: conll16st-en-01-12-16-dev
path: resources/conll16st-en-zh-dev-train_LDC2016E50/conll16st-en-01-12-16-dev/relations.json
max_hierarchical_level: 3
max_words_in_sentence: 50
padding: false
classes: ["Temporal.Asynchronous.Precedence",
"Temporal.Asynchronous.Succession",
"Temporal.Synchrony",
"Contingency.Cause.Reason",
"Contingency.Cause.Result",
"Contingency.Condition",
"Comparison.Contrast",
"Comparison.Concession",
"Expansion.Conjunction",
"Expansion.Instantiation",
"Expansion.Restatement",
"Expansion.Alternative",
"Expansion.Alternative.Chosen alternative",
"Expansion.Exception",
"EntRel"]
separate_dual_classes: false
filter_type: ["Implicit", "EntRel"]
# Logging
logging:
version: 1
disable_existing_loggers: false
formatters:
default:
format: '%(asctime)s %(name)-12s %(levelname)-8s %(message)s'
handlers:
console:
class: logging.StreamHandler
stream: ext://sys.stdout
formatter: default
level: DEBUG
file:
class: logging.FileHandler
filename: main.log
formatter: default
level: DEBUG
root:
handlers: [console, file]
level: DEBUG
# Deployment process
deploy:
# Download these files to their location
download:
-
name: brown_clusters
from:
- http://metaoptimize.s3.amazonaws.com/brown-clusters-ACL2010/README.txt
- http://metaoptimize.s3.amazonaws.com/brown-clusters-ACL2010/brown-rcv1.clean.tokenized-CoNLL03.txt-c100-freq1.txt
- http://metaoptimize.s3.amazonaws.com/brown-clusters-ACL2010/brown-rcv1.clean.tokenized-CoNLL03.txt-c320-freq1.txt
- http://metaoptimize.s3.amazonaws.com/brown-clusters-ACL2010/brown-rcv1.clean.tokenized-CoNLL03.txt-c1000-freq1.txt
- http://metaoptimize.s3.amazonaws.com/brown-clusters-ACL2010/brown-rcv1.clean.tokenized-CoNLL03.txt-c3200-freq1.txt
to: resources/brown_clusters
# Make sure these files are where they should be with correct SHA
# These are not possible to download from urls,
# and therefore has to be done manually
check_exists:
-
name: word2vec
path: resources/GoogleNews-vectors-negative300.bin
shasum: df6bee5cbaa95ec7fa389bf666d14d4a9ff91484