-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Make audio handling generic & start Whisper
- Loading branch information
1 parent
1f9ab13
commit fb07166
Showing
5 changed files
with
274 additions
and
179 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
using System.Text; | ||
using AprilAsr; | ||
using ButterSTT.MessageSystem; | ||
using ButterSTT.TextProcessing; | ||
|
||
namespace ButterSTT.STT | ||
{ | ||
public class AprilAsr : IDisposable | ||
{ | ||
// Audio | ||
public readonly AudioHandler AudioHandler; | ||
|
||
// Model | ||
public readonly FileInfo ModelFile; | ||
private readonly AprilModel _model; | ||
|
||
// Session | ||
private readonly AprilSession _session; | ||
|
||
// Output | ||
private readonly StringBuilder _consoleOutput = new(); | ||
private readonly StringBuilder _aprilOutput = new(); | ||
private readonly MessageQueue _messageQueue; | ||
|
||
public AprilAsr(FileInfo modelFile, MessageQueue messageQueue, int deviceNumber = 0) | ||
{ | ||
_messageQueue = messageQueue; | ||
|
||
// Load model | ||
ModelFile = modelFile; | ||
_model = new AprilModel(modelFile.FullName); | ||
|
||
Console.WriteLine( | ||
$"Model loaded from \"{modelFile.FullName}\":\n > Name: {_model.Name}\n > Description: {_model.Description}\n > Language: {_model.Language}\n > Sample Rate: {_model.SampleRate} Hz" | ||
); | ||
|
||
// Initialize session | ||
_session = new AprilSession(_model, OnAprilTokens, async: true); | ||
|
||
// Initialize microphone | ||
AudioHandler = new(_model.SampleRate, deviceNumber); | ||
AudioHandler.OnMicData += OnMicData; | ||
AudioHandler.OnMicStop += OnMicStop; | ||
} | ||
|
||
private void OnMicData(object? sender, (short[] data, int length) data) | ||
{ | ||
if (data.length <= 0) | ||
return; | ||
|
||
_session.FeedPCM16(data.data, data.length); | ||
} | ||
|
||
private void OnMicStop(object? sender, EventArgs e) | ||
{ | ||
_session.Flush(); | ||
} | ||
|
||
private void OnAprilTokens(AprilResultKind result, AprilToken[] tokens) | ||
{ | ||
_consoleOutput.Clear(); | ||
_aprilOutput.Clear(); | ||
|
||
switch (result) | ||
{ | ||
case AprilResultKind.PartialRecognition: | ||
_consoleOutput.Append("- "); | ||
break; | ||
case AprilResultKind.FinalRecognition: | ||
_consoleOutput.Append("@ "); | ||
break; | ||
default: | ||
_consoleOutput.Append(' '); | ||
break; | ||
} | ||
|
||
foreach (var token in tokens) | ||
{ | ||
_aprilOutput.Append(token.Token); | ||
} | ||
|
||
var aprilOutputString = | ||
tokens.Length > 0 | ||
? EnglishCapitalization.Capitalize(_aprilOutput.ToString().Trim()) | ||
: ""; | ||
|
||
if (result == AprilResultKind.FinalRecognition) | ||
{ | ||
_messageQueue.CurParagraph = EnglishTextParser.ParseParagraph( | ||
aprilOutputString, | ||
wordRegex: EnglishTextParser.WordKeepUrl() | ||
); | ||
_messageQueue.FinishCurrentParagraph(); | ||
} | ||
else | ||
{ | ||
_messageQueue.CurParagraph = EnglishTextParser.ParseParagraph(aprilOutputString); | ||
} | ||
|
||
_consoleOutput.Append(aprilOutputString); | ||
Console.WriteLine(_consoleOutput); | ||
} | ||
|
||
public void Dispose() | ||
{ | ||
AudioHandler.Dispose(); | ||
GC.SuppressFinalize(this); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
using ButterSTT.Config; | ||
using NAudio.Wave; | ||
|
||
namespace ButterSTT.STT | ||
{ | ||
public class AudioHandler : IDisposable | ||
{ | ||
// Audio | ||
private readonly WaveInEvent _audioIn; | ||
private bool _restartRecordingNextStop = false; | ||
|
||
public int WaveDeviceNumber { get; private set; } = | ||
STTConfig.Default.MicrophoneDeviceNumber; | ||
public bool IsMicrophoneRecording { get; private set; } = false; | ||
|
||
public event EventHandler? OnMicStart; | ||
public event EventHandler? OnMicStop; | ||
public event EventHandler<(short[] data, int length)>? OnMicData; | ||
|
||
public AudioHandler(int sampleRate = 16000, int deviceNumber = 0) | ||
{ | ||
// Initialize microphone | ||
_audioIn = new WaveInEvent() | ||
{ | ||
DeviceNumber = deviceNumber, | ||
WaveFormat = new(sampleRate, 16, 1) | ||
}; | ||
WaveDeviceNumber = deviceNumber; | ||
|
||
// Register microphone events | ||
_audioIn.DataAvailable += OnWaveData; | ||
_audioIn.RecordingStopped += OnWaveStop; | ||
} | ||
|
||
public void StartRecording() | ||
{ | ||
_audioIn.StartRecording(); | ||
IsMicrophoneRecording = true; | ||
OnMicStart?.Invoke(this, EventArgs.Empty); | ||
} | ||
|
||
public void StopRecording() | ||
{ | ||
// Tell the recording not to restart | ||
_restartRecordingNextStop = false; | ||
|
||
// This keeps recording for a little bit longer, it will call the event when it's done | ||
_audioIn.StopRecording(); | ||
} | ||
|
||
public void SwapMicrophoneDevice(int deviceNumber) | ||
{ | ||
// If it's already using this device, ignore it and continue | ||
if (_audioIn.DeviceNumber == deviceNumber) | ||
return; | ||
|
||
var wasRecording = IsMicrophoneRecording; | ||
|
||
// Make sure the recording is stopped | ||
StopRecording(); | ||
|
||
// Swap devices | ||
_audioIn.DeviceNumber = deviceNumber; | ||
WaveDeviceNumber = deviceNumber; | ||
|
||
// If it's already stopped, restart it immediately | ||
// Otherwise, start it again when it's done stopping | ||
if (wasRecording && !IsMicrophoneRecording) | ||
{ | ||
StartRecording(); | ||
} | ||
else | ||
{ | ||
_restartRecordingNextStop = true; | ||
} | ||
} | ||
|
||
private void OnWaveData(object? sender, WaveInEventArgs args) | ||
{ | ||
if (args.BytesRecorded <= 0) | ||
return; | ||
|
||
// Convert the bytes to shorts | ||
var shorts = new short[args.BytesRecorded / sizeof(short)]; | ||
Buffer.BlockCopy(args.Buffer, 0, shorts, 0, args.BytesRecorded); | ||
|
||
OnMicData?.Invoke(this, (shorts, shorts.Length)); | ||
} | ||
|
||
private void OnWaveStop(object? sender, StoppedEventArgs args) | ||
{ | ||
IsMicrophoneRecording = false; | ||
|
||
if (_restartRecordingNextStop) | ||
StartRecording(); | ||
else | ||
OnMicStop?.Invoke(this, EventArgs.Empty); | ||
} | ||
|
||
public void Dispose() | ||
{ | ||
StopRecording(); | ||
_audioIn.Dispose(); | ||
GC.SuppressFinalize(this); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
using Whisper.net; | ||
|
||
namespace ButterSTT.STT | ||
{ | ||
public class WhisperAsr : IDisposable | ||
{ | ||
// Audio | ||
public readonly AudioHandler AudioHandler; | ||
|
||
// Model | ||
public readonly FileInfo ModelFile; | ||
private readonly WhisperProcessor _processor; | ||
|
||
public WhisperAsr(FileInfo modelFile, int deviceNumber = 0) | ||
{ | ||
// Load model | ||
ModelFile = modelFile; | ||
using var whisperFactory = WhisperFactory.FromPath(modelFile.FullName); | ||
_processor = whisperFactory | ||
.CreateBuilder() | ||
.WithLanguage("auto") | ||
.WithSegmentEventHandler(OnSegmentEvent) | ||
.Build(); | ||
|
||
Console.WriteLine($"Model loaded from \"{modelFile.FullName}\"."); | ||
|
||
// Initialize microphone | ||
AudioHandler = new(16000, deviceNumber); | ||
AudioHandler.OnMicData += OnMicData; | ||
} | ||
|
||
private void OnMicData(object? sender, (short[] data, int length) data) | ||
{ | ||
if (data.length <= 0) | ||
return; | ||
|
||
var floats = new float[data.length]; | ||
for (var i = 0; i < data.length; i++) | ||
floats[i] = data.data[i] / 32768f; | ||
|
||
_processor.Process(floats); | ||
} | ||
|
||
private void OnSegmentEvent(SegmentData segment) | ||
{ | ||
Console.WriteLine(segment.Text.Trim()); | ||
} | ||
|
||
public void Dispose() | ||
{ | ||
AudioHandler?.Dispose(); | ||
_processor.Dispose(); | ||
GC.SuppressFinalize(this); | ||
} | ||
} | ||
} |
Oops, something went wrong.