Skip to content

Commit

Permalink
video labelling attempt
Browse files Browse the repository at this point in the history
  • Loading branch information
hbarnard committed Oct 26, 2022
1 parent 0979d95 commit 05861d1
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 28 deletions.
Binary file modified __pycache__/intent_server.cpython-37.pyc
Binary file not shown.
33 changes: 18 additions & 15 deletions etc/mema.ini
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
[main]
db = /var/spool/mema/db/memories.db
media_directory = /var/spool/mema/
picture_program = /home/pi/projects/mema/picture.py
story_program = /home/pi/projects/mema/record_story.py
video_command = raspivid -o - -t 60000 -ex night -br 50 -w 1920 -h 1080 -rot 279 -fps 8 | sudo ffmpeg -nostats -loglevel 0 -r 8 -i - -y -vcodec copy
record_command = arecord -f cd -c 2 -D plug:dsnooped --duration 10
intent_server = http://localhost:8000
confidence = 0.98
db = /var/spool/mema/db/memories.db
media_directory = /var/spool/mema/
picture_program = /home/pi/projects/mema/picture.py
story_program = /home/pi/projects/mema/record_story.py
video_command = raspivid -o - -t 60000 -ex night -br 50 -w 1920 -h 1080 -rot 279 -fps 8 | sudo ffmpeg -nostats -loglevel 0 -r 8 -i - -y -vcodec copy
record_command = arecord -f cd -c 2 -D plug:dsnooped --duration 30
label_program = /home/pi/projects/mema/label_video.py
intent_server = http://localhost:8000
confidence = 0.98

# some small prompts within the intent server, should probably be [en][prompts]
# some small prompts within the intent server, should probably be [en][prompts] to give full multilingual
[en_prompts]
sorry = I_cannot_find_that_sorry
not_understood = Sorry_I_cannot_understand_that
start_record = starting to record in a second or two
end_record = finished recording starting to transcribe
sorry = I_cannot_find_that_sorry
not_understood = Sorry_I_cannot_understand_that
start_record = starting to record in a second or two
end_record = finished recording starting to transcribe
end_transcription = finished transcription
taking_picture = taking picture now
trying_caption = trying to caption picture, please wait a moment
taking_picture = taking picture now
trying_caption = trying to caption picture, please wait a moment
not_video = Sorry_this_is_not_a_video
didnt_get = Sorry_no_transcription try again
51 changes: 49 additions & 2 deletions intent_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,27 @@ def curl_speak(phrase):
uvicorn.run(app, host="0.0.0.0", port=8000, log_config=log_config)
'''

'''
or start to use an url for each intent
def handle_one():
do_stuff
def handle_two():
do_stuff
def handle_three():
do_stuff
{'one': handle_one,
'two': handle_two,
'three': handle_three}[option]()
'''





# Python 3.10 has 'match' which would tidy this up a little.
# needs tidying anyway

Expand Down Expand Up @@ -91,8 +112,12 @@ async def getInformation(info : Request):
run_record_command()
#print("record story found")
elif intent == "RecordVideo":
print("record video found")
run_video_command()
story_number = re.findall(r'\b\d+\b', raw_speech)
run_video_command()
elif intent == "LabelVideo":
video_number = re.findall(r'\b\d+\b', raw_speech)
run_label_video_command(video_number)
#print("label video found for " + str(video_number))
else:
print("nothing found")
return {
Expand Down Expand Up @@ -145,6 +170,28 @@ def run_video_command():
con.commit()
return text

def run_label_video_command(video_number):
cur = con.cursor()
result = cur.execute("SELECT * FROM memories WHERE memory_id=?",(video_number))
fields = result.fetchone()
if fields is not None:
if (fields[7] != 'video'):
curl_speak(config['en_prompts']['not_video'])
return
else:
result = subprocess.run([config['main']['label_program']], check=True, capture_output=True, text=True).stdout
#print(result)
(text, file_path) = result.split('|')
if (text != 'empty'):
#print('video number is ' + video_number)
cur.execute("update memories set description = ? WHERE memory_id=?",(text, video_number[0]))
else:
curl_speak(config['en_prompts']['didnt_get'])
else:
curl_speak(config['en_prompts']['sorry'])
print(fields)
return


def run_associate_command():
print("running classifier")
Expand Down
29 changes: 18 additions & 11 deletions label_video.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
#!/usr/bin/env python3

import os
import sys

from time import sleep
import board
import subprocess
import requests
import datetime
from pathlib import Path
import threading

import replicate

Expand Down Expand Up @@ -45,8 +47,8 @@ def curl_speak(phrase):

phrase = config['en_prompts']['start_record'].replace(' ','_')
curl_speak(phrase)
sleep(1)

dots[2] = (0,0,255) # red

try:
subprocess.run(["docker", "stop", "rhasspy"], check=True, capture_output=True, text=True).stdout
Expand All @@ -55,10 +57,10 @@ def curl_speak(phrase):

# make a file name from the current unix timestamp
unix_time = int(datetime.datetime.now().timestamp())
file_path = config['main']['media_directory'] + "rec/" + str(unix_time) + ".wav"
file_path = config['main']['media_directory'] + "tmp/" + str(unix_time) + ".wav"

sleep(1)
dots[0] = (255,0,0) # green
sleep(10)
dots[2] = (255,0,0) # green

try:
record_command = config['main']['record_command'] + ' ' + file_path
Expand All @@ -68,37 +70,42 @@ def curl_speak(phrase):
except subprocess.CalledProcessError as e:
raise RuntimeError("command '{}' return here with error (code {}): {}".format(e.cmd, e.returncode, e.output))

dots[0] = (0,0,255) # red
dots[2] = (0,0,255) # red

try:
subprocess.run(["docker", "start", "rhasspy"], check=True, capture_output=True, text=True).stdout
except subprocess.CalledProcessError as e:
raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))

sleep(5) # give rhasspy time to reload!
sleep(10) # give rhasspy time to reload!

phrase = config['en_prompts']['end_record'].replace(' ','_')
curl_speak(phrase)


# select speech to text model
model = replicate.models.get("openai/whisper")
# format file as path object (openai needs this)
audio_file = Path(file_path)

# give a little feedback
dots[0] = (0,255,0) # blue
dots[2] = (0,255,0) # blue

# speech to text on remote server
# signal transacription empty if something goes wrong
result = {"transcription" : "empty"}
result = model.predict(audio=audio_file)

phrase = config['en_prompts']['end_transcription'].replace(' ','_')
curl_speak(phrase)

# done, feedback, stop blinking lights
dots[0] = (255,0,0) # green
dots[2] = (255,0,0) # green
sleep(5)
dots.deinit()

# delete temporary file
os.remove(file_path)

# return result and file path to intent server
print(result['transcription'] + "|" + file_path)
# the format of reply is maintained although this doesn't keep a file
print(result['transcription'] + "|" + 'no_path')
Binary file modified static/favicon.ico
Binary file not shown.
20 changes: 20 additions & 0 deletions templates/list.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,28 @@
background-color: LightBlue;
}

.dotred {
height: 25px;
width: 25px;
background-color: #f00;
border-radius: 50%;
display: inline-block;
}


.dotgreen {
height: 25px;
width: 25px;
background-color: #0f0;
border-radius: 50%;
display: inline-block;
}



</style>
<link rel="icon" type="image/x-icon" href="/static/favicon.ico">
<title>Memory Lister: Testing Only</title>
</head>

<h2>Memory Lister: Testing Only</h2>
Expand Down

0 comments on commit 05861d1

Please sign in to comment.