Skip to content

Commit

Permalink
Make audio handling generic & start Whisper
Browse files Browse the repository at this point in the history
  • Loading branch information
ButterscotchV committed Mar 10, 2024
1 parent 1f9ab13 commit fb07166
Show file tree
Hide file tree
Showing 5 changed files with 274 additions and 179 deletions.
2 changes: 1 addition & 1 deletion ButterSTT/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
};
oscHandler.StartMessageLoop();

using var speechToTextHandler = new SpeechToTextHandler(
using var speechToTextHandler = new ButterSTT.STT.AprilAsr(
modelFile,
oscHandler.MessageQueue,
deviceNumber: config.MicrophoneDeviceNumber
Expand Down
110 changes: 110 additions & 0 deletions ButterSTT/STT/AprilAsr.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
using System.Text;
using AprilAsr;
using ButterSTT.MessageSystem;
using ButterSTT.TextProcessing;

namespace ButterSTT.STT
{
public class AprilAsr : IDisposable
{
// Audio
public readonly AudioHandler AudioHandler;

// Model
public readonly FileInfo ModelFile;
private readonly AprilModel _model;

// Session
private readonly AprilSession _session;

// Output
private readonly StringBuilder _consoleOutput = new();
private readonly StringBuilder _aprilOutput = new();
private readonly MessageQueue _messageQueue;

public AprilAsr(FileInfo modelFile, MessageQueue messageQueue, int deviceNumber = 0)
{
_messageQueue = messageQueue;

// Load model
ModelFile = modelFile;
_model = new AprilModel(modelFile.FullName);

Console.WriteLine(
$"Model loaded from \"{modelFile.FullName}\":\n > Name: {_model.Name}\n > Description: {_model.Description}\n > Language: {_model.Language}\n > Sample Rate: {_model.SampleRate} Hz"
);

// Initialize session
_session = new AprilSession(_model, OnAprilTokens, async: true);

// Initialize microphone
AudioHandler = new(_model.SampleRate, deviceNumber);
AudioHandler.OnMicData += OnMicData;
AudioHandler.OnMicStop += OnMicStop;
}

private void OnMicData(object? sender, (short[] data, int length) data)
{
if (data.length <= 0)
return;

_session.FeedPCM16(data.data, data.length);
}

private void OnMicStop(object? sender, EventArgs e)
{
_session.Flush();
}

private void OnAprilTokens(AprilResultKind result, AprilToken[] tokens)
{
_consoleOutput.Clear();
_aprilOutput.Clear();

switch (result)
{
case AprilResultKind.PartialRecognition:
_consoleOutput.Append("- ");
break;
case AprilResultKind.FinalRecognition:
_consoleOutput.Append("@ ");
break;
default:
_consoleOutput.Append(' ');
break;
}

foreach (var token in tokens)
{
_aprilOutput.Append(token.Token);
}

var aprilOutputString =
tokens.Length > 0
? EnglishCapitalization.Capitalize(_aprilOutput.ToString().Trim())
: "";

if (result == AprilResultKind.FinalRecognition)
{
_messageQueue.CurParagraph = EnglishTextParser.ParseParagraph(
aprilOutputString,
wordRegex: EnglishTextParser.WordKeepUrl()
);
_messageQueue.FinishCurrentParagraph();
}
else
{
_messageQueue.CurParagraph = EnglishTextParser.ParseParagraph(aprilOutputString);
}

_consoleOutput.Append(aprilOutputString);
Console.WriteLine(_consoleOutput);
}

public void Dispose()
{
AudioHandler.Dispose();
GC.SuppressFinalize(this);
}
}
}
107 changes: 107 additions & 0 deletions ButterSTT/STT/AudioHandler.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
using ButterSTT.Config;
using NAudio.Wave;

namespace ButterSTT.STT
{
public class AudioHandler : IDisposable
{
// Audio
private readonly WaveInEvent _audioIn;
private bool _restartRecordingNextStop = false;

public int WaveDeviceNumber { get; private set; } =
STTConfig.Default.MicrophoneDeviceNumber;
public bool IsMicrophoneRecording { get; private set; } = false;

public event EventHandler? OnMicStart;
public event EventHandler? OnMicStop;
public event EventHandler<(short[] data, int length)>? OnMicData;

public AudioHandler(int sampleRate = 16000, int deviceNumber = 0)
{
// Initialize microphone
_audioIn = new WaveInEvent()
{
DeviceNumber = deviceNumber,
WaveFormat = new(sampleRate, 16, 1)
};
WaveDeviceNumber = deviceNumber;

// Register microphone events
_audioIn.DataAvailable += OnWaveData;
_audioIn.RecordingStopped += OnWaveStop;
}

public void StartRecording()
{
_audioIn.StartRecording();
IsMicrophoneRecording = true;
OnMicStart?.Invoke(this, EventArgs.Empty);
}

public void StopRecording()
{
// Tell the recording not to restart
_restartRecordingNextStop = false;

// This keeps recording for a little bit longer, it will call the event when it's done
_audioIn.StopRecording();
}

public void SwapMicrophoneDevice(int deviceNumber)
{
// If it's already using this device, ignore it and continue
if (_audioIn.DeviceNumber == deviceNumber)
return;

var wasRecording = IsMicrophoneRecording;

// Make sure the recording is stopped
StopRecording();

// Swap devices
_audioIn.DeviceNumber = deviceNumber;
WaveDeviceNumber = deviceNumber;

// If it's already stopped, restart it immediately
// Otherwise, start it again when it's done stopping
if (wasRecording && !IsMicrophoneRecording)
{
StartRecording();
}
else
{
_restartRecordingNextStop = true;
}
}

private void OnWaveData(object? sender, WaveInEventArgs args)
{
if (args.BytesRecorded <= 0)
return;

// Convert the bytes to shorts
var shorts = new short[args.BytesRecorded / sizeof(short)];
Buffer.BlockCopy(args.Buffer, 0, shorts, 0, args.BytesRecorded);

OnMicData?.Invoke(this, (shorts, shorts.Length));
}

private void OnWaveStop(object? sender, StoppedEventArgs args)
{
IsMicrophoneRecording = false;

if (_restartRecordingNextStop)
StartRecording();
else
OnMicStop?.Invoke(this, EventArgs.Empty);
}

public void Dispose()
{
StopRecording();
_audioIn.Dispose();
GC.SuppressFinalize(this);
}
}
}
56 changes: 56 additions & 0 deletions ButterSTT/STT/WhisperAsr.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
using Whisper.net;

namespace ButterSTT.STT
{
public class WhisperAsr : IDisposable
{
// Audio
public readonly AudioHandler AudioHandler;

// Model
public readonly FileInfo ModelFile;
private readonly WhisperProcessor _processor;

public WhisperAsr(FileInfo modelFile, int deviceNumber = 0)
{
// Load model
ModelFile = modelFile;
using var whisperFactory = WhisperFactory.FromPath(modelFile.FullName);
_processor = whisperFactory
.CreateBuilder()
.WithLanguage("auto")
.WithSegmentEventHandler(OnSegmentEvent)
.Build();

Console.WriteLine($"Model loaded from \"{modelFile.FullName}\".");

// Initialize microphone
AudioHandler = new(16000, deviceNumber);
AudioHandler.OnMicData += OnMicData;
}

private void OnMicData(object? sender, (short[] data, int length) data)
{
if (data.length <= 0)
return;

var floats = new float[data.length];
for (var i = 0; i < data.length; i++)
floats[i] = data.data[i] / 32768f;

_processor.Process(floats);
}

private void OnSegmentEvent(SegmentData segment)
{
Console.WriteLine(segment.Text.Trim());
}

public void Dispose()
{
AudioHandler?.Dispose();
_processor.Dispose();
GC.SuppressFinalize(this);
}
}
}
Loading

0 comments on commit fb07166

Please sign in to comment.