diff --git a/finmeter/download.py b/finmeter/download.py new file mode 100644 index 0000000..7ddb267 --- /dev/null +++ b/finmeter/download.py @@ -0,0 +1,34 @@ +from mikatools import * +from uralicNLP import uralicApi +import time +import os + +def make_dir(path): + try: + os.mkdir(path) + except: + pass + +def main(): + print("Starting to download... This will take a while") + print("These models are only needed for semantics, sentiment and metaphors") + print("If you only need to assess meter and rhyme or hyphenate, you DO NOT need these models") + print("Sentiment analysis requires tensorflow") + make_dir(script_path("data")) + make_dir(script_path("data/metaphor")) + make_dir(script_path("sentiment/pickle")) + time.sleep(2) + files = {"data/metaphor/unigrams_sorted_5k.txt":"https://zenodo.org/record/3473456/files/unigrams_sorted_5k.txt?download=1","data/metaphor/rel_matrix_n_csr.hkl":"https://zenodo.org/record/3473456/files/rel_matrix_n_csr.hkl?download=1","data/fin-word2vec-lemma.bin":"https://zenodo.org/record/3473456/files/fin-word2vec-lemma.bin?download=1", "sentiment/pickle/en.bin": "https://zenodo.org/record/3473456/files/en.bin?download=1","sentiment/pickle/es.bin": "https://zenodo.org/record/3473456/files/es.bin?download=1", "data/fi_concreteness.json":"https://zenodo.org/record/3473456/files/fi_concreteness.txt?download=1"} + l = len(files.keys()) + i = 0 + for k,v in files.items(): + i = i + 1 + print("Downloading", i, "out of", l ) + print(v, " -->", script_path(k)) + download_file(v, script_path(k), show_progress=True) + + print("Downloading Finnish models for uralicNLP") + uralicApi.download("fin") + +if __name__== "__main__": + main() \ No newline at end of file diff --git a/finmeter/metaphor.py b/finmeter/metaphor.py index c2471d7..c9e531c 100644 --- a/finmeter/metaphor.py +++ b/finmeter/metaphor.py @@ -1,5 +1,6 @@ from mikatools import * from .meta4meaning_fi import Meta4meaningFi +from uralicNLP import uralicApi rows_path = script_path('data/metaphor/unigrams_sorted_5k.txt') @@ -10,5 +11,32 @@ def metaphoricity(tenor, vehicle, expression, k=0): return m4m.metaphoricity(tenor, vehicle, expression, k=k) -def interpret(tenor, vehicle): - return m4m.interpret(tenor, vehicle) \ No newline at end of file +def interpret(tenor, vehicle, pos_tags=True, maximum=None): + res = m4m.interpret(tenor, vehicle) + if maximum: + res = res[:maximum] + if pos_tags: + return _pos_tag(res) + else: + return res + +def _merge_compound_analysis(tags): + ts = tags.split("#") + tag = ts[0].split("+") + for t in range(1,len(ts)): + tag[0] += ts[t].split("+")[0] + return tag + +def _pos_tag(words): + pos_tags = {"A":[], "Adv":[], "V":[], "N":[], "UNK":[]} + accepted_tags = set(pos_tags.keys()) + for word in words: + analysis = uralicApi.analyze(word[0], "fin", force_local=True) + tag = "UNK" + for analys in analysis: + analys = _merge_compound_analysis(analys[0]) + if word[0] == analys[0] and analys[1] in accepted_tags: + tag = analys[1] + break + pos_tags[tag].append(word) + return pos_tags diff --git a/setup.py b/setup.py index f1c9ce5..070ee22 100644 --- a/setup.py +++ b/setup.py @@ -63,14 +63,14 @@ # You can just specify the packages manually here if your project is # simple. Or you can use find_packages(). - packages=["finmeter"], + packages=["finmeter", "finmeter.utils","finmeter.sentiment","finmeter.sentiment.utils"], package_dir={'finmeter': 'finmeter'}, # List run-time dependencies here. These will be installed by pip when # your project is installed. For an analysis of "install_requires" vs pip's # requirements files see: # https://packaging.python.org/en/latest/requirements.html - install_requires=["unidecode","sklearn","mikatools","numpy","scipy","tqdm","hickle","argparse"], + install_requires=["unidecode","sklearn","mikatools>=0.0.7","numpy","scipy","tqdm","hickle","argparse","uralicNLP"], # List additional groups of dependencies here (e.g. development # dependencies). You can install these using the following syntax, @@ -82,7 +82,7 @@ # installed, specify them here. If using Python 2.6 or less, then these # have to be included in MANIFEST.in as well. package_data={ - 'finmeter': ['*.json'], + 'finmeter': ['*.json', "sentiment/checkpoint", "sentiment/senti_model.bin.data-00000-of-00001", "sentiment/senti_model.bin.index", "sentiment/senti_model.bin.meta", "sentiment/checkpoints/en-es-bimap-1.bin"], }, # Although 'package_data' is the preferred approach, in some case you may diff --git a/testi.py b/testi.py index c926d1b..797e35f 100644 --- a/testi.py +++ b/testi.py @@ -14,14 +14,17 @@ print(semantics.similarity_clusters(["koira", "kissa", "hevonen"], ["talo", "koti", "ovi"])) #print(semantics.cluster_centroid(["koira", "kissa", "hevonen"])) - +""" from finmeter import metaphor -print(metaphor.metaphoricity("luovuus", "liekki", ["luovuus", "olla", "liekki", "se", "syttyä", "rinta", "ja", "polttaa"])) -print(metaphor.interpret("aika", "raha")[:10]) + +print(metaphor.interpret("mies", "susi", maximum=10)) """ + from finmeter import sentiment print(sentiment.predict(["täällä on sika kivaa"])) -print(sentiment.predict(["tällä on tylsää ja huonoa"])) \ No newline at end of file +print(sentiment.predict(["tällä on tylsää ja huonoa"])) + +""" \ No newline at end of file