diff --git a/ButterSTT/SpeechToTextHandler.cs b/ButterSTT/SpeechToTextHandler.cs index 09b45f3..31b2ca4 100644 --- a/ButterSTT/SpeechToTextHandler.cs +++ b/ButterSTT/SpeechToTextHandler.cs @@ -147,9 +147,20 @@ private void OnAprilTokens(AprilResultKind result, AprilToken[] tokens) ? EnglishCapitalization.Capitalize(aprilOutput.ToString().Trim()) : ""; - messageQueue.CurParagraph = EnglishTextParser.ParseParagraph(aprilOutputString); if (result == AprilResultKind.FinalRecognition) + { + messageQueue.CurParagraph = EnglishTextParser.ParseParagraph( + aprilOutputString, + wordRegex: EnglishTextParser.WordKeepUrl() + ); messageQueue.FinishCurrentParagraph(); + } + else + { + messageQueue.CurParagraph = EnglishTextParser.ParseParagraph(aprilOutputString); + } + + Console.WriteLine(messageQueue.GetCurrentMessage()); try { diff --git a/ButterSTT/TextProcessing/EnglishCapitalization.cs b/ButterSTT/TextProcessing/EnglishCapitalization.cs index 54bbea9..6aeac70 100644 --- a/ButterSTT/TextProcessing/EnglishCapitalization.cs +++ b/ButterSTT/TextProcessing/EnglishCapitalization.cs @@ -4,9 +4,9 @@ namespace ButterSTT.TextProcessing { public static partial class EnglishCapitalization { - public static string Capitalize(string message) + public static string Capitalize(string message, Regex? regex = null) { - return BasicCapitals().Replace(message.ToLower(), c => c.Value.ToUpper()); + return (regex ?? BasicCapitals()).Replace(message.ToLower(), c => c.Value.ToUpper()); } // Capitalizes starts of sentences and standalone "I"s, must be run on a lowercase string diff --git a/ButterSTT/TextProcessing/EnglishTextParser.cs b/ButterSTT/TextProcessing/EnglishTextParser.cs index f77e9bb..c456957 100644 --- a/ButterSTT/TextProcessing/EnglishTextParser.cs +++ b/ButterSTT/TextProcessing/EnglishTextParser.cs @@ -5,20 +5,31 @@ namespace ButterSTT.TextProcessing { public static partial class EnglishTextParser { - public static Paragraph ParseParagraph(string text) + public static Paragraph ParseParagraph( + string text, + Regex? regex = null, + Regex? wordRegex = null, + bool addSpaces = true + ) { - Sentence[] sentences = SentenceKeepUrl() + Sentence[] sentences = (regex ?? SentenceKeepUrl()) .Matches(text) - .Select(m => ParseSentence(m.Value)) + .Select(m => ParseSentence(m.Value, regex: wordRegex, addSpaces: addSpaces)) .ToArray(); return new Paragraph(sentences); } - public static Sentence ParseSentence(string text) + public static Sentence ParseSentence( + string text, + Regex? regex = null, + bool addSpaces = true + ) { - Word[] words = WordOnlyCompleteKeepUrl() + Word[] words = (regex ?? WordOnlyCompleteKeepUrl()) .Matches(text) - .Select(m => new Word(m.Value.EndsWith(' ') ? m.Value : m.Value + " ")) + .Select(m => new Word( + addSpaces && !m.Value.EndsWith(' ') ? m.Value + " " : m.Value + )) .ToArray(); return new Sentence(words); }