-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathchatbot.py
127 lines (100 loc) · 4.09 KB
/
chatbot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import json
import string
from sentence_transformers import SentenceTransformer, util
import spacy
nlp = spacy.load("nl_core_news_sm")
def read_qa(qa_path):
'''Read in JSON file with topics and corresponding responses'''
with open(qa_path, encoding='utf8') as f:
qa_data = json.load(f)
return qa_data
def initQAData():
return read_qa("./data/QandAdata.json")
def initEmbeddings():
print("Loading embedding model...")
return SentenceTransformer('distiluse-base-multilingual-cased-v1')
def lemmatize(sentence):
doc = nlp(sentence)
lemmas = [word.lemma_ for word in doc]
lemmatized_sentence = ' '.join(lemmas)
return lemmatized_sentence
def preprocess(question):
# lowercasing
question = question.lower()
# punctuation removal
question = question.translate(str.maketrans('', '', string.punctuation))
# lemmatization
question = lemmatize(question)
return question
# Alle generatie logic wordt ook in een specifieke functie gestopt
def generate_response(encoded_input_question, encoded_questions, questions, answers):
'''Generate a response from the chatbot'''
answer = []
hits = util.semantic_search(encoded_input_question, encoded_questions, top_k=5)
hits = hits[0] # Get the hits for the first query
# print("\nDit zijn de top 5 matches voor jouw vraag:\n")
hitList = []
for i, hit in enumerate(hits):
# print(questions[hit['corpus_id']], "(Score: {:.4f})".format(hit['score']))
hitList.append(questions[hit['corpus_id']] + "(Score: {:.4f})".format(hit['score']))
if i == 0:
top_scoring_question = questions[hit['corpus_id']]
top_score = hit['score']
# get the index of the top-scoring question
target_idx = questions.index(top_scoring_question)
answer.append(hitList)
if top_score > 0.4:
# return the answer matching the top-scoring question
answer.append( f'\nCaetennia: {answers[target_idx]}')
# answer[0] = f'\nCaetennia: {answers[target_idx]}'
else:
answer.append( f'\nCaetennia: Ik heb daar helaas geen antwoord op. Heb je nog andere vragen?')
# answer[0] = f'\nCaetennia: Ik heb daar helaas geen antwoord op. Heb je nog andere vragen?'
return answer
embedding_model = initEmbeddings()
qa_data = initQAData()
# Load questions and answers from the QA json file
questions = [info['Vraag'] for info in qa_data.values()]
answers = [info['Antwoord'] for info in qa_data.values()]
clean_questions = [preprocess(q) for q in questions]
# Encode the questions into vectors
encoded_questions = embedding_model.encode(clean_questions)
# Deze functie staat direct in contact met de Flask Front-end
# TODO: Statische berichten verwerken binnen generate_response functie
def returnResponse(user_input):
# if user_txt == "hallo":
# return "Caetennia: Welkom bij de demo, stel hier jouw vraag!"
# elif user_input == "doei":
# return "Caetennia: Fijn om je vragen te beantwoorden!"
# else:
# encode user question
encoded_input_question = embedding_model.encode(preprocess(user_input))
response = generate_response(encoded_input_question, encoded_questions, questions, answers)
return response
# def test():
# embedding_model = initEmbeddings()
# qa_data = initQAData()
#
# # Load questions and answers from the QA json file
# questions = [info['Vraag'] for info in qa_data.values()]
# answers = [info['Antwoord'] for info in qa_data.values()]
#
# clean_questions = [preprocess(q) for q in questions]
#
# # Encode the questions into vectors
# encoded_questions = embedding_model.encode(clean_questions)
#
# # start conversation
# continue_dialogue = True
#
# while continue_dialogue:
#
# # get user input
# input_question = (input("\nStel een vraag aan Caetennia: "))
#
# # encode user question
# encoded_input_question = embedding_model.encode(preprocess(input_question))
#
# # generate response from chatbot
# answer = generate_response(encoded_input_question, encoded_questions, questions, answers)
# print(answer)