-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
29 lines (22 loc) · 933 Bytes
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from gensim.models import word2vec
import time
def convert_corpora_to_sentence_iter():
t8 = word2vec.Text8Corpus('text8/text8')
return t8
def train_and_save_model(sentences, fname, **kwargs):
w2v = word2vec.Word2Vec(sentences, **kwargs)
w2v.save(fname)
return w2v
def get_model_name(**kw):
return u"w2v_model_size_{model_size}_window_{window}.model".format(**kw)
print __name__
if __name__ == '__main__':
sentences_iter = convert_corpora_to_sentence_iter()
for model_size in range(10, 200, 10):
for window in (5, 7, 10):
t0 = time.clock()
print "Training with size={}, window={}".format(model_size, window)
fname = get_model_name(model_size=model_size, window=window)
model = train_and_save_model(sentences_iter, fname, size=model_size, window=window)
t1 = time.clock()
print "Training took {} secs".format(t1-t0)