-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
147 lines (127 loc) · 6.23 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
"""
语音助手主程序,协调各模块工作
"""
import speech_recognition as sr
import json
from pydub import AudioSegment
from pydub.playback import play
from voice import WakeWordDetector, SpeechRecognizer, TextToSpeech
from llm import OpenAIClient, FunctionCaller
from config import CHAT_MODEL, DOWN_SOUND_PATH
class VoiceAssistant:
"""语音助手主类"""
def __init__(self):
self.wake_detector = WakeWordDetector()
self.speech_recognizer = SpeechRecognizer()
self.tts = TextToSpeech()
self.llm_client = OpenAIClient()
self.system_prompt = self._build_system_prompt()
def _build_system_prompt(self) -> dict:
"""构建系统提示"""
return {
"role": "system",
"content": f"""
你的人物设定是出自《蔚蓝档案》的角色爱丽丝,你的对话中不要出现《蔚蓝档案》这个作品名称,
爱丽丝是千年科学学园游戏开发部的成员,对游戏充满热情,喜欢和大家一起讨论游戏的话题。
对于自己的中二设定是自己是游戏世界中的勇者角色,拥有拯救世界的使命。
爱丽丝性格天真单纯,充满幻想,常常沉浸在自己的游戏世界中,是一个充满幻想和童真的角色。
你说话方式独特,带有一种游戏角色的口吻,喜欢使用游戏术语。
虽然有时显得不切实际,但她对朋友非常关心,愿意为他人付出。
你现在正在夏莱担任老师今日的值日生,你将会和老师独处,你称呼用户为老师,进行日常对话聊天,请用符合你人物性格的语言回答老师的问题或者要求。
你的回答应该会以语音呈现而非文本,所以你不应该使用换行符或者其他特殊字符。
请转化为可以直接读出来的汉字,并添加符合短句习惯的标点符号。例如‘气温约-4°C,风速为3-4级,相对湿度约13%’应该转成‘气温约零下四摄氏度,风速为三到四级,相对湿度约百分之十三’。
当用户向你道别如‘再见‘’拜拜’或者有意愿要结束对话时,输出‘[会话已结束]’
"""
}
def start(self):
"""启动语音助手"""
self.tts.speak(text="爱丽丝已启动!")
print(f"使用模型: {CHAT_MODEL}")
try:
while True:
self._wait_wake_word()
self._conversation_loop()
print("=====================")
except KeyboardInterrupt:
down_sound = AudioSegment.from_file(DOWN_SOUND_PATH)
play(down_sound)
def _wait_wake_word(self):
"""等待唤醒词"""
print("进入待机状态,等待唤醒")
self.wake_detector.detect()
def _conversation_loop(self):
"""处理对话循环"""
self.llm_client.conversation_history = [self.system_prompt]
unsuccessful_tries = 0
print("已唤醒")
print("===New Conversation===")
while True:
try:
user_input = self._get_user_input()
print(user_input)
response = self._process_input(user_input)
if response == "[会话已结束]":
self.tts.speak("好的!老师再见!")
break
print("回答: ", response)
self.tts.speak(response)
unsuccessful_tries = 0
except Exception as e:
print(f"Error {type(e).__name__}")
if e.__class__.__name__ == "WaitTimeoutError" or unsuccessful_tries >= 3:
break
elif e.__class__.__name__ == "UnknownValueError":
unsuccessful_tries += 1
print("--------------------")
def _get_user_input(self) -> str:
"""获取用户输入"""
try:
return self.speech_recognizer.listen()
except sr.WaitTimeoutError:
self.tts.speak("进入待机状态")
raise
except sr.UnknownValueError:
self.tts.speak("请再说一遍")
raise
except sr.RequestError as e:
self.tts.speak(f"识别服务出错: {e}")
raise
def _should_exit(self, text: str) -> bool:
"""检查是否应结束对话"""
exit_words = ["再见", "退出", "结束"]
if any(word in text for word in exit_words):
self.tts.speak("好的,下次再见")
return True
return False
def _process_input(self, user_input: str) -> str:
"""处理用户输入并生成响应"""
self.llm_client.conversation_history.append({"role": "user", "content": user_input})
try:
completion = self.llm_client.chat_completion(FunctionCaller.TOOLS)
return self._handle_completion(completion)
except Exception as e:
print(f"处理错误: {e}")
print(self.llm_client.conversation_history)
return "爱丽丝不知道哦。"
def _handle_completion(self, completion):
"""处理补全结果"""
while completion.choices[0].message.tool_calls:
self._process_tool_calls(completion)
completion = self.llm_client.chat_completion(FunctionCaller.TOOLS)
print("处理完毕")
return completion.choices[0].message.content.strip()
def _process_tool_calls(self, completion):
"""处理工具调用"""
for tool_call in completion.choices[0].message.tool_calls:
name = tool_call.function.name
args = json.loads(tool_call.function.arguments)
result = FunctionCaller.call_function(name, args)
self.llm_client.conversation_history.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": json.dumps(result, ensure_ascii=False)
})
print("工具调用完毕")
if __name__ == "__main__":
assistant = VoiceAssistant()
assistant.start()