forked from terrisbecker/codenames-ai
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoriginal_csv_maker.py
31 lines (22 loc) · 997 Bytes
/
original_csv_maker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pandas as pd
import numpy as np
import spacy as sp
from codenames_bot import similarity
nlp = sp.load('en_core_web_lg')
hint_words = pd.read_csv('data/hint_words.csv', index_col=0).hints.tolist()
guess_words = pd.read_csv('data/words.txt').iloc[:, 0].tolist()
hint_words = [i for i in hint_words if len(i) > 2]
data = pd.DataFrame(np.zeros((len(hint_words), len(guess_words))))
for i in range(len(hint_words)):
for j in range(len(guess_words)):
data.iloc[i, j] = nlp(guess_words[j]).similarity(nlp(hint_words[i]))
print(data.head())
data.index = hint_words
data.columns = guess_words
data.index = data.index.str.lower()
data.columns = data.columns.str.lower()
data.to_csv('data/similarity_matrix.csv')
similarity_matrix = pd.read_csv('data/similarity_matrix.csv', index_col=0).drop_duplicates()
linds = [i for i in similarity_matrix.index.tolist() if len(i) > 2]
similarity_matrix = similarity_matrix.loc[linds, :]
similarity_matrix.to_csv('data/similarity_matrix.csv')