-
-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathoverlay.py
301 lines (229 loc) · 11.3 KB
/
overlay.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
import sys
import base64
import threading
from PyQt5.QtCore import Qt, QTimer, QPoint, QByteArray, QBuffer, QIODevice
from PyQt5.QtWidgets import QApplication, QLabel, QMainWindow, QVBoxLayout, QHBoxLayout, QWidget, QGridLayout, QScrollArea, QSizeGrip, QPushButton
from PyQt5.QtGui import QPixmap
from openai_wrapper import text_fallacy_classification, openAI_TTS
from real_time_classifier import continuous_audio_transcription
from real_time_classifier import WHISPER_TEXTS
from audio import play_audio, change_playback_speed
GPT_TEXTS = []
class TransparentOverlay(QMainWindow):
def __init__(self, whs_model, auto):
super().__init__()
self.whs_model = whs_model
self.auto = auto
self.dragPos = QPoint()
self.opacity = 0.6
self.is_tts_enabled = False
self.initUI()
def initUI(self):
self.setWindowTitle('Transparent Overlay')
self.setGeometry(0, 0, 1000, 600)
self.setWindowOpacity(self.opacity)
self.setAttribute(Qt.WA_TranslucentBackground)
self.setWindowFlags(Qt.Window | Qt.FramelessWindowHint | Qt.WindowStaysOnTopHint)
# Create Scroll Areas
self.scroll_area1 = QScrollArea(self)
self.scroll_area2 = QScrollArea(self)
# Increase the dimensions of the scroll areas
self.scroll_area1.setMinimumSize(380, 120)
self.scroll_area2.setMinimumSize(380, 120)
# Create Labels
self.whisper_label = QLabel('Whisper Output Will Appear Here')
self.chatgpt_label = QLabel('ChatGPT Output Will Appear Here')
# Enable word-wrap on labels
self.whisper_label.setWordWrap(True)
self.chatgpt_label.setWordWrap(True)
# Add labels to Scroll Areas
self.scroll_area1.setWidget(self.whisper_label)
self.scroll_area2.setWidget(self.chatgpt_label)
# Enable scroll bars on the scroll areas
self.scroll_area1.setWidgetResizable(True)
self.scroll_area2.setWidgetResizable(True)
# Style labels with bold text and increased font size, using professional grey shades
self.whisper_label.setStyleSheet('background-color: #dcdcdc; font-weight: bold; font-size: 12px; color: black;')
self.chatgpt_label.setStyleSheet('background-color: #696969; font-weight: bold; font-size: 15px; color: white;')
# Layout setup
# QVBoxLayout for the scroll areas
vbox_layout = QVBoxLayout()
vbox_layout.addWidget(self.scroll_area2)
vbox_layout.addWidget(self.scroll_area1)
# QGridLayout to include QVBoxLayout and QSizeGrip
grid_layout = QGridLayout()
grid_layout.addLayout(vbox_layout, 0, 0)
# Add QSizeGrip to the QGridLayout
size_grip = QSizeGrip(self)
grid_layout.addWidget(size_grip, 1, 1, Qt.AlignBottom | Qt.AlignRight)
container = QWidget()
container.setLayout(grid_layout)
self.setCentralWidget(container)
# Run the continuous transcription and classification in a separate thread
self.stop_event = threading.Event()
self.transcription_thread = threading.Thread(target=continuous_audio_transcription, args=(self.whs_model, self.stop_event))
self.transcription_thread.start()
# Timer to update Whisper and ChatGPT outputs
self.timer = QTimer(self)
self.timer.timeout.connect(self.update_labels)
self.timer.start(500)
# Create a label to display the screenshot
# self.screenshot_label = QLabel(self)
# self.screenshot_label.setWordWrap(True)
# vbox_layout.addWidget(self.screenshot_label)
# Add a button for screen capture
self.capture_button = QPushButton('Analyze Transcript', self)
self.capture_button.clicked.connect(self.start_text_thread)
# Toogles
self.toggle_whisper_button = QPushButton('Toggle Transcript', self)
self.toggle_whisper_button.clicked.connect(self.toggle_whisper_box)
self.toggle_chatgpt_button = QPushButton('Toggle Analysis', self)
self.toggle_chatgpt_button.clicked.connect(self.toggle_chatgpt_box)
self.toggle_tts_button = QPushButton('Toggle TTS', self)
self.toggle_tts_button.clicked.connect(self.toggle_tts)
# Style buttons
self.capture_button.setStyleSheet("QPushButton { background-color: grey; font-weight: bold; }")
self.toggle_whisper_button.setStyleSheet("QPushButton { background-color: green; font-weight: bold; }")
self.toggle_chatgpt_button.setStyleSheet("QPushButton { background-color: green; font-weight: bold; }")
self.toggle_tts_button.setStyleSheet("QPushButton { background-color: red; font-weight: bold; }")
# Create a horizontal layout for the buttons
button_layout = QHBoxLayout()
# Add buttons to the horizontal layout
button_layout.addWidget(self.capture_button)
button_layout.addWidget(self.toggle_whisper_button)
button_layout.addWidget(self.toggle_chatgpt_button)
button_layout.addWidget(self.toggle_tts_button)
# Now add the horizontal layout of buttons to the main vertical layout
vbox_layout.addLayout(button_layout)
def update_labels(self):
# get_whisper_transcription returns a list of text segments, newest last.
whisper_segments = get_whisper_transcription()
# Concatenate the segments and set the label text.
self.whisper_label.setText("Transcript: " + '- '.join(whisper_segments))
# Color old response grey new reponse black
chatgpt_output_list = get_chatgpt_output()
chatgpt_text = "".join(chatgpt_output_list)
self.chatgpt_label.setText(f"{chatgpt_text}")
self.whisper_label.setMouseTracking(True)
self.chatgpt_label.setMouseTracking(True)
self.scroll_area1.setMouseTracking(True)
self.scroll_area2.setMouseTracking(True)
def toggle_whisper_box(self):
is_visible = self.scroll_area1.isVisible()
self.scroll_area1.setVisible(not is_visible)
self.toggle_whisper_button.setStyleSheet(
"QPushButton { background-color: %s; }" % ('green' if not is_visible else 'red')
)
def toggle_chatgpt_box(self):
is_visible = self.scroll_area2.isVisible()
self.scroll_area2.setVisible(not is_visible)
self.toggle_chatgpt_button.setStyleSheet(
"QPushButton { background-color: %s; }" % ('green' if not is_visible else 'red')
)
def toggle_tts(self):
self.is_tts_enabled = not self.is_tts_enabled # Assume this flag exists
# Update the button color based on the state
self.toggle_tts_button.setStyleSheet(
"QPushButton { background-color: %s; }" % ('green' if self.is_tts_enabled else 'red')
)
print(f'TTS is set to {self.is_tts_enabled}')
def mousePressEvent(self, event):
self.dragPos = event.globalPos()
def mouseMoveEvent(self, event):
if event.buttons() == Qt.LeftButton:
self.move(self.pos() + event.globalPos() - self.dragPos)
self.dragPos = event.globalPos()
def keyPressEvent(self, event):
global TRANSCRIBE
if event.key() == Qt.Key_Escape:
# To stop the thread
self.stop_event.set()
self.transcription_thread.join() # Optional: Wait for the thread to finish
self.close()
def start_img_text_thread(self):
capture_thread = threading.Thread(target=self.capture_and_process)
capture_thread.start()
def start_text_thread(self):
process_thread = threading.Thread(target=self.process_text)
process_thread.start()
def capture_and_process(self):
# Increase transparency to 100%
self.setWindowOpacity(0.0)
# Process all pending application events
QApplication.processEvents()
# Delay the screenshot to ensure the overlay is fully transparent
self.capture_screen()
def capture_screen(self):
# Use the overlay's geometry as the capture area
capture_area = self.geometry()
# Capture the screen
screen = QApplication.primaryScreen()
screenshot = screen.grabWindow(0, capture_area.x()-50, capture_area.y()-50, capture_area.width()+100, capture_area.height()+100)
# Reset the transparency
self.setWindowOpacity(self.opacity) # Assuming 0.6 is your default opacity
# Process all pending application events
QApplication.processEvents()
# Display the screenshot
self.process_screenshot(screenshot)
def process_screenshot(self, screenshot):
# Convert screenshot to QPixmap and display it in the label
pixmap = QPixmap(screenshot)
self.screenshot_label.setPixmap(pixmap.scaled(self.screenshot_label.size(), Qt.KeepAspectRatio))
# Convert QPixmap to QImage
image = screenshot.toImage()
# Scale the image by a factor, e.g., 0.5 for half size
scale_factor = 0.3
new_width = image.width() * scale_factor
new_height = image.height() * scale_factor
scaled_image = image.scaled(new_width, new_height, Qt.KeepAspectRatio, Qt.SmoothTransformation)
print(scaled_image.width(), scaled_image.height())
# Prepare a byte array and a buffer to hold the image data
byte_array = QByteArray()
buffer = QBuffer(byte_array)
buffer.open(QIODevice.WriteOnly)
# Save the image to the buffer in PNG format
scaled_image.save(buffer, "PNG")
# Save the image to a file
file_path = "img/screenshot.png" # Specify your directory path and file name here
scaled_image.save(file_path, "PNG") # Saving as a PNG file
# Convert byte array to base64
base64_data = base64.b64encode(byte_array.data()).decode()
# Format the base64 string for API use
formatted_base64_image = "data:image/png;base64," + base64_data
# Here, you can use formatted_base64_image with your API
# For demonstration, let's just print it
text = text_fallacy_classification(formatted_base64_image, get_whisper_transcription())
GPT_TEXTS.append(text)
if self.is_tts_enabled:
# Play GPT4
audio_file = openAI_TTS(text)
audio_file = change_playback_speed(audio_file)
play_audio(audio_file)
def process_text(self):
# Here, you can use formatted_base64_image with your API
# For demonstration, let's just print it
text = text_fallacy_classification(None, get_whisper_transcription())
GPT_TEXTS.append(text)
if self.is_tts_enabled:
# Play GPT4
audio_file = openAI_TTS(text)
audio_file = change_playback_speed(audio_file)
play_audio(audio_file)
def get_whisper_transcription():
global WHISPER_TEXTS
last_n_segments = WHISPER_TEXTS[-9:] # Assuming you want the last 10 segments
# text = ' - '.join(last_n_segments)
return last_n_segments
def get_chatgpt_output():
global GPT_TEXTS
if len(GPT_TEXTS):
return GPT_TEXTS[-1]
else:
return [""]
def launch_overlay(whs_model, use_gpt):
app = QApplication(sys.argv)
overlay = TransparentOverlay(whs_model, use_gpt)
overlay.show()
sys.exit(app.exec_())
if __name__ == '__main__':
launch_overlay()