-
Notifications
You must be signed in to change notification settings - Fork 2
/
extract_from_text.py
63 lines (48 loc) · 1.91 KB
/
extract_from_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import argparse
from src import nlp_lib
from src.entities_lib import EntitiesLib
from src.relations_lib import RelationsLib
from src.wordnet_lib import WordNetDictionary, WordNetLemmatizerWrapped
parser = argparse.ArgumentParser()
# make sure to use "." to end sentences, and only to end sentences
parser.add_argument('-t', '--text', type=str, required=True)
args = parser.parse_args()
if __name__ == "__main__":
text = args.text
if text[-1] != ".":
exit("Error: please make sure to use \".\" to end sentences, and only to end sentences!")
text = text[:-1]
sentences = text.split(". ")
# The following is necessary to feed them to the language parser
timestamps = []
for i in range(len(sentences)):
timestamps.append(([0.0, 0.0]))
# load WordNet
wn_dictionary = WordNetDictionary()
wn_lemmatizer = WordNetLemmatizerWrapped()
# create linguistic annotations using the language parser
doc = nlp_lib.parse(sentences)
# 1) extract video- and event-level entities and entity-property pairs
video_level_entities, _, entity_property_pairs = EntitiesLib.extract_entities_and_properties(
doc, timestamps, wn_dictionary, wn_lemmatizer
)
# 2) extract video- and event-level relations
video_level_relations, _ = RelationsLib.extract_relations(
doc, timestamps, wn_dictionary, wn_lemmatizer
)
# print results
print(f"--------------------------------------------------------------\n"
f"Input: {text}\n")
print("Detected Sentences:")
for timestamp, sentence in zip(timestamps, sentences):
print(f"{sentence}.")
print("\nEntities:")
for e in video_level_entities:
print(e.to_string())
print("\nEntity-Property Pairs:")
for ep in entity_property_pairs:
print(ep.to_string())
print("\nRelations:")
for r in video_level_relations:
print(r.to_string())
print()