video labelling attempt

hbarnard · Oct 26, 2022 · 05861d1 · 05861d1
1 parent 0979d95
commit 05861d1
Show file tree

Hide file tree

Showing 6 changed files with 105 additions and 28 deletions.
diff --git a/__pycache__/intent_server.cpython-37.pyc b/__pycache__/intent_server.cpython-37.pyc
diff --git a/etc/mema.ini b/etc/mema.ini
@@ -1,19 +1,22 @@
 [main]
-db = /var/spool/mema/db/memories.db
-media_directory = /var/spool/mema/
-picture_program = /home/pi/projects/mema/picture.py
-story_program = /home/pi/projects/mema/record_story.py
-video_command = raspivid -o -  -t 60000 -ex night -br 50  -w 1920 -h 1080 -rot 279 -fps 8 | sudo ffmpeg  -nostats -loglevel 0   -r 8 -i - -y -vcodec copy
-record_command = arecord -f cd -c 2 -D plug:dsnooped  --duration 10 
-intent_server = http://localhost:8000
-confidence = 0.98
+db               = /var/spool/mema/db/memories.db
+media_directory  = /var/spool/mema/
+picture_program  = /home/pi/projects/mema/picture.py
+story_program    = /home/pi/projects/mema/record_story.py
+video_command    = raspivid -o -  -t 60000 -ex night -br 50  -w 1920 -h 1080 -rot 279 -fps 8 | sudo ffmpeg  -nostats -loglevel 0   -r 8 -i - -y -vcodec copy
+record_command   = arecord -f cd -c 2 -D plug:dsnooped  --duration 30 
+label_program    = /home/pi/projects/mema/label_video.py
+intent_server    = http://localhost:8000
+confidence       = 0.98
 
-# some small prompts within the intent server, should probably be [en][prompts]
+# some small prompts within the intent server, should probably be [en][prompts] to give full multilingual
 [en_prompts]
-sorry = I_cannot_find_that_sorry
-not_understood = Sorry_I_cannot_understand_that
-start_record = starting to record in a second or two
-end_record = finished recording starting to transcribe
+sorry             = I_cannot_find_that_sorry
+not_understood    = Sorry_I_cannot_understand_that
+start_record      = starting to record in a second or two
+end_record        = finished recording starting to transcribe
 end_transcription = finished transcription    
-taking_picture = taking picture now
-trying_caption = trying to caption picture, please wait a moment
+taking_picture    = taking picture now
+trying_caption    = trying to caption picture, please wait a moment
+not_video         = Sorry_this_is_not_a_video
+didnt_get         = Sorry_no_transcription try again
diff --git a/intent_server.py b/intent_server.py
@@ -57,6 +57,27 @@ def curl_speak(phrase):
 uvicorn.run(app, host="0.0.0.0", port=8000, log_config=log_config)
 '''
 
+'''
+or start to use an url for each intent
+def handle_one():
+  do_stuff
+
+def handle_two():
+  do_stuff
+
+def handle_three():
+  do_stuff
+
+
+{'one': handle_one, 
+ 'two': handle_two, 
+ 'three': handle_three}[option]()
+'''
+
+
+
+
+
 # Python 3.10 has 'match' which would tidy this up a little.
 # needs tidying anyway
 
@@ -91,8 +112,12 @@ async def getInformation(info : Request):
         run_record_command()
         #print("record story found")
     elif intent == "RecordVideo":
-        print("record video found")
-        run_video_command()        
+        story_number = re.findall(r'\b\d+\b', raw_speech)
+        run_video_command()  
+    elif intent == "LabelVideo":
+        video_number = re.findall(r'\b\d+\b', raw_speech)
+        run_label_video_command(video_number)        
+        #print("label video found for " + str(video_number))
     else:
         print("nothing found")        
     return {
@@ -145,6 +170,28 @@ def run_video_command():
     con.commit()
     return text      
 
+def run_label_video_command(video_number):
+    cur = con.cursor()
+    result = cur.execute("SELECT * FROM memories WHERE memory_id=?",(video_number))
+    fields = result.fetchone()
+    if fields is not None:
+        if (fields[7] != 'video'):
+            curl_speak(config['en_prompts']['not_video'])
+            return
+        else:
+            result = subprocess.run([config['main']['label_program']],  check=True, capture_output=True, text=True).stdout
+            #print(result)
+            (text, file_path) = result.split('|')
+            if (text != 'empty'):
+                #print('video number is ' + video_number)
+                cur.execute("update memories set description = ? WHERE memory_id=?",(text, video_number[0]))
+            else:
+                curl_speak(config['en_prompts']['didnt_get'])
+    else:
+        curl_speak(config['en_prompts']['sorry'])
+    print(fields)
+    return
+
 
 def run_associate_command():
     print("running classifier")

diff --git a/label_video.py b/label_video.py
@@ -1,12 +1,14 @@
 #!/usr/bin/env python3
 
+import os
+import sys
+
 from time import sleep
 import board
 import subprocess
 import requests
 import datetime
 from pathlib import Path
-import threading
 
 import replicate
 
@@ -45,8 +47,8 @@ def curl_speak(phrase):
 
 phrase = config['en_prompts']['start_record'].replace(' ','_')
 curl_speak(phrase)
-sleep(1)
 
+dots[2] = (0,0,255)  # red
 
 try:
     subprocess.run(["docker", "stop", "rhasspy"], check=True, capture_output=True, text=True).stdout
@@ -55,10 +57,10 @@ def curl_speak(phrase):
 
 # make a file name from the current unix timestamp
 unix_time = int(datetime.datetime.now().timestamp())
-file_path = config['main']['media_directory'] + "rec/" + str(unix_time) + ".wav" 
+file_path = config['main']['media_directory'] + "tmp/" + str(unix_time) + ".wav" 
 
-sleep(1)
-dots[0] = (255,0,0)  # green
+sleep(10)
+dots[2] = (255,0,0)  # green
 
 try:
     record_command = config['main']['record_command'] + ' ' + file_path
@@ -68,37 +70,42 @@ def curl_speak(phrase):
 except subprocess.CalledProcessError as e:
     raise RuntimeError("command '{}' return here with error (code {}): {}".format(e.cmd, e.returncode, e.output))
 
-dots[0] = (0,0,255)  # red
+dots[2] = (0,0,255)  # red
 
 try:
     subprocess.run(["docker", "start", "rhasspy"], check=True, capture_output=True, text=True).stdout
 except subprocess.CalledProcessError as e:
     raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
 
-sleep(5)  # give rhasspy time to reload!
+sleep(10)  # give rhasspy time to reload!
 
 phrase =  config['en_prompts']['end_record'].replace(' ','_')
 curl_speak(phrase)
 
-
 # select speech to text model
 model = replicate.models.get("openai/whisper")
 # format file as path object (openai needs this)
 audio_file = Path(file_path)
 
 # give a little feedback
-dots[0] = (0,255,0)  # blue
+dots[2] = (0,255,0)  # blue
 
 # speech to text on remote server
+# signal transacription empty if something goes wrong
+result = {"transcription" : "empty"}
 result = model.predict(audio=audio_file)
 
 phrase = config['en_prompts']['end_transcription'].replace(' ','_')
 curl_speak(phrase)
 
 # done, feedback, stop blinking lights
-dots[0] = (255,0,0)  # green
+dots[2] = (255,0,0)  # green
 sleep(5)
 dots.deinit()
 
+# delete temporary file
+os.remove(file_path)
+
 # return result and file path to intent server
-print(result['transcription']  + "|" + file_path)
+# the format of reply is maintained although this doesn't keep a file
+print(result['transcription']  + "|" + 'no_path')
diff --git a/static/favicon.ico b/static/favicon.ico
diff --git a/templates/list.html b/templates/list.html
@@ -9,8 +9,28 @@
   background-color: LightBlue;
 }
 
+.dotred {
+  height: 25px;
+  width: 25px;
+  background-color: #f00;
+  border-radius: 50%;
+  display: inline-block;
+}
+
+
+.dotgreen {
+  height: 25px;
+  width: 25px;
+  background-color: #0f0;
+  border-radius: 50%;
+  display: inline-block;
+}
+
+
+
 </style>
 <link rel="icon" type="image/x-icon" href="/static/favicon.ico">
+<title>Memory Lister: Testing Only</title>
 </head>
 
 <h2>Memory Lister: Testing Only</h2>