Skip to content

Commit

Permalink
adjustments
Browse files Browse the repository at this point in the history
  • Loading branch information
Hämäläinen, Mika K committed Oct 5, 2019
1 parent f4be8e6 commit 11bf8b1
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 9 deletions.
34 changes: 34 additions & 0 deletions finmeter/download.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from mikatools import *
from uralicNLP import uralicApi
import time
import os

def make_dir(path):
try:
os.mkdir(path)
except:
pass

def main():
print("Starting to download... This will take a while")
print("These models are only needed for semantics, sentiment and metaphors")
print("If you only need to assess meter and rhyme or hyphenate, you DO NOT need these models")
print("Sentiment analysis requires tensorflow")
make_dir(script_path("data"))
make_dir(script_path("data/metaphor"))
make_dir(script_path("sentiment/pickle"))
time.sleep(2)
files = {"data/metaphor/unigrams_sorted_5k.txt":"https://zenodo.org/record/3473456/files/unigrams_sorted_5k.txt?download=1","data/metaphor/rel_matrix_n_csr.hkl":"https://zenodo.org/record/3473456/files/rel_matrix_n_csr.hkl?download=1","data/fin-word2vec-lemma.bin":"https://zenodo.org/record/3473456/files/fin-word2vec-lemma.bin?download=1", "sentiment/pickle/en.bin": "https://zenodo.org/record/3473456/files/en.bin?download=1","sentiment/pickle/es.bin": "https://zenodo.org/record/3473456/files/es.bin?download=1", "data/fi_concreteness.json":"https://zenodo.org/record/3473456/files/fi_concreteness.txt?download=1"}
l = len(files.keys())
i = 0
for k,v in files.items():
i = i + 1
print("Downloading", i, "out of", l )
print(v, " -->", script_path(k))
download_file(v, script_path(k), show_progress=True)

print("Downloading Finnish models for uralicNLP")
uralicApi.download("fin")

if __name__== "__main__":
main()
32 changes: 30 additions & 2 deletions finmeter/metaphor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from mikatools import *
from .meta4meaning_fi import Meta4meaningFi
from uralicNLP import uralicApi


rows_path = script_path('data/metaphor/unigrams_sorted_5k.txt')
Expand All @@ -10,5 +11,32 @@
def metaphoricity(tenor, vehicle, expression, k=0):
return m4m.metaphoricity(tenor, vehicle, expression, k=k)

def interpret(tenor, vehicle):
return m4m.interpret(tenor, vehicle)
def interpret(tenor, vehicle, pos_tags=True, maximum=None):
res = m4m.interpret(tenor, vehicle)
if maximum:
res = res[:maximum]
if pos_tags:
return _pos_tag(res)
else:
return res

def _merge_compound_analysis(tags):
ts = tags.split("#")
tag = ts[0].split("+")
for t in range(1,len(ts)):
tag[0] += ts[t].split("+")[0]
return tag

def _pos_tag(words):
pos_tags = {"A":[], "Adv":[], "V":[], "N":[], "UNK":[]}
accepted_tags = set(pos_tags.keys())
for word in words:
analysis = uralicApi.analyze(word[0], "fin", force_local=True)
tag = "UNK"
for analys in analysis:
analys = _merge_compound_analysis(analys[0])
if word[0] == analys[0] and analys[1] in accepted_tags:
tag = analys[1]
break
pos_tags[tag].append(word)
return pos_tags
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,14 +63,14 @@

# You can just specify the packages manually here if your project is
# simple. Or you can use find_packages().
packages=["finmeter"],
packages=["finmeter", "finmeter.utils","finmeter.sentiment","finmeter.sentiment.utils"],
package_dir={'finmeter': 'finmeter'},

# List run-time dependencies here. These will be installed by pip when
# your project is installed. For an analysis of "install_requires" vs pip's
# requirements files see:
# https://packaging.python.org/en/latest/requirements.html
install_requires=["unidecode","sklearn","mikatools","numpy","scipy","tqdm","hickle","argparse"],
install_requires=["unidecode","sklearn","mikatools>=0.0.7","numpy","scipy","tqdm","hickle","argparse","uralicNLP"],

# List additional groups of dependencies here (e.g. development
# dependencies). You can install these using the following syntax,
Expand All @@ -82,7 +82,7 @@
# installed, specify them here. If using Python 2.6 or less, then these
# have to be included in MANIFEST.in as well.
package_data={
'finmeter': ['*.json'],
'finmeter': ['*.json', "sentiment/checkpoint", "sentiment/senti_model.bin.data-00000-of-00001", "sentiment/senti_model.bin.index", "sentiment/senti_model.bin.meta", "sentiment/checkpoints/en-es-bimap-1.bin"],
},

# Although 'package_data' is the preferred approach, in some case you may
Expand Down
11 changes: 7 additions & 4 deletions testi.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,17 @@
print(semantics.similarity_clusters(["koira", "kissa", "hevonen"], ["talo", "koti", "ovi"]))
#print(semantics.cluster_centroid(["koira", "kissa", "hevonen"]))
"""
from finmeter import metaphor

print(metaphor.metaphoricity("luovuus", "liekki", ["luovuus", "olla", "liekki", "se", "syttyä", "rinta", "ja", "polttaa"]))
print(metaphor.interpret("aika", "raha")[:10])

print(metaphor.interpret("mies", "susi", maximum=10))

"""
from finmeter import sentiment
print(sentiment.predict(["täällä on sika kivaa"]))
print(sentiment.predict(["tällä on tylsää ja huonoa"]))
print(sentiment.predict(["tällä on tylsää ja huonoa"]))
"""

0 comments on commit 11bf8b1

Please sign in to comment.