Skip to content

Commit

Permalink
Improved regex to allow decimal durations. Added configurable fade-in…
Browse files Browse the repository at this point in the history
… and fade-out times. Needs timing tests.
  • Loading branch information
Jeffrey Wright committed Feb 20, 2023
1 parent e332c98 commit d22b39b
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 26 deletions.
24 changes: 20 additions & 4 deletions audio_program_generator/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@
command-line interface for audio program generator
"""
import typer
from enum import Enum
from io import StringIO
from typing import Optional
from pathlib import Path
from enum import Enum
from typing import Optional

import typer

from audio_program_generator.apg import AudioProgramGenerator

CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"])
Expand Down Expand Up @@ -82,6 +84,20 @@ def generate_subcommand(
show_default=True,
help="Set background file attenuation in dB.",
),
fadein: int = typer.Option(
3000,
"--fi",
"--fadein",
show_default=True,
help="Set fade-in duration in milliseconds.",
),
fadeout: int = typer.Option(
6000,
"--fo",
"--fadeout",
show_default=True,
help="Set fade-out duration in milliseconds.",
),
slow: bool = typer.Option(
False,
"-s",
Expand All @@ -108,6 +124,7 @@ def generate_subcommand(
hide_progress_bar: bool = typer.Option(
False,
"-H",
"--hide",
"--hide-progress-bar",
is_flag=True,
show_default=True,
Expand All @@ -121,7 +138,6 @@ def generate_subcommand(
callback=version_callback,
),
) -> None:

try:
sound_file = sound_path.open("rb")
except (AttributeError, FileNotFoundError):
Expand Down
60 changes: 38 additions & 22 deletions audio_program_generator/apg.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
"""
Generate audio program of spoken phrases, with optional background sound file mixed in
"""
import re
import math
import concurrent.futures

import math
import re
from dataclasses import dataclass
from io import StringIO, TextIOWrapper, BytesIO, BufferedReader
from typing import Union
from io import BufferedReader, BytesIO, StringIO, TextIOWrapper
from pathlib import Path
from typing import Union

from alive_progress import alive_bar, config_handler
from gtts import gTTS
from pydub import AudioSegment
from alive_progress import alive_bar, config_handler
from sentence_splitter import split_text_into_sentences
from single_source import get_version

Expand All @@ -22,24 +22,22 @@ def parse_textfile(phrase_file_contents: str = "") -> list:
"""

def clean(dirty: str = "") -> str:
cleaner = r"[^A-Za-z0-9\*\s;\v]"
cleaner = r"[^A-Za-z0-9\.\*\s;\v]"
cleaned = re.compile(cleaner, flags=re.MULTILINE | re.UNICODE)
return re.sub(cleaned, "", dirty)

def capture(cleaned: str = "") -> list:
capturer = r"^\s*([\w\s\*]+?)\s*;\s*(\d+)\s*$"
# capturer = r"^\s*([\w\s\*]+?)\s*;\s*(\.??\d+)\s*$"
capturer = r"s*([\w\s\*]+?)\s*;\s*([1-9]\d*(\.\d*[1-9])?|0\.\d*[1-9]+)|\d+(\.\d*[1-9])?\s*$"
captured = re.compile(capturer, flags=re.MULTILINE | re.UNICODE)
return re.findall(captured, cleaned)

cln = clean(phrase_file_contents)
cpt = capture(cln)

cpt = [c[:2] for c in capture(cln)]
return cpt

# return capture(clean(phrase_file_contents))



class AudioProgramGenerator:
"""Main class to generate speech output file with mixed-in background sound"""

Expand Down Expand Up @@ -76,6 +74,8 @@ def __init__(
self.book_mode = kwargs.get("book_mode", False)
self.output_format = kwargs.get("output_format", "wav")
self.phrase_handlers = []
self.fadein = kwargs.get("fadein", 3000)
self.fadeout = kwargs.get("fadeout", 6000)

# Config items for progress bar
config_handler.set_global(
Expand All @@ -92,10 +92,14 @@ def _gen_speech(self):
snippets from each line in the phrase_file + corresponding silence.
"""

def _create_tmp_speech_file(phrase_handler: AudioProgramGenerator.PhraseHandler) -> None:
def _create_tmp_speech_file(
phrase_handler: AudioProgramGenerator.PhraseHandler,
) -> None:
"""Thread worker function to turn a phrase into encoded snippet or silence"""
if phrase_handler.phrase == "*":
tempfile = audio_segment = AudioSegment.silent(duration=int(float(phrase_handler.duration)*1000))
tempfile = audio_segment = AudioSegment.silent(
duration=phrase_handler.duration * 1000
)
else:
tempfile = BytesIO(None)
speech = gTTS(phrase_handler.phrase, slow=self.slow, tld=self.tld)
Expand All @@ -110,7 +114,11 @@ def _create_tmp_speech_file(phrase_handler: AudioProgramGenerator.PhraseHandler)
if self.book_mode:
for sentence in split_text_into_sentences(self.phrases, language="en"):
phrase_handler = AudioProgramGenerator.PhraseHandler(
index=i, phrase=sentence, duration=1, tempfile=None, audio_segment=None
index=i,
phrase=sentence,
duration=1,
tempfile=None,
audio_segment=None,
)
self.phrase_handlers.append(phrase_handler)
i += 1
Expand All @@ -130,7 +138,11 @@ def _create_tmp_speech_file(phrase_handler: AudioProgramGenerator.PhraseHandler)
with concurrent.futures.ThreadPoolExecutor() as executor:
futures = []
for phrase_handler in self.phrase_handlers:
futures.append(executor.submit(_create_tmp_speech_file, phrase_handler=phrase_handler))
futures.append(
executor.submit(
_create_tmp_speech_file, phrase_handler=phrase_handler
)
)
concurrent.futures.as_completed(futures)

self.speech_file = AudioSegment.empty()
Expand All @@ -140,21 +152,23 @@ def _create_tmp_speech_file(phrase_handler: AudioProgramGenerator.PhraseHandler)
if type(phrase_handler.tempfile) == AudioSegment:
self.speech_file += phrase_handler.tempfile
elif type(phrase_handler.tempfile) == BytesIO:
self.speech_file += AudioSegment.from_file(phrase_handler.tempfile, format="mp3")
self.speech_file += AudioSegment.from_file(
phrase_handler.tempfile, format="mp3"
)
if phrase_handler.duration:
self.speech_file += AudioSegment.silent(
duration=1000 * int(phrase_handler.duration)
duration=1000 * float(phrase_handler.duration)
)
else:
raise TypeError(f"Unexpected type {type(phrase_handler.tempfile)} for phrase_handler.tempfile")
raise TypeError(
f"Unexpected type {type(phrase_handler.tempfile)} for phrase_handler.tempfile"
)

def _mix(
self,
segment1: AudioSegment,
segment2: AudioSegment,
seg2_atten: int = 0,
fadein: int = 3000,
fadeout: int = 6000,
) -> AudioSegment:
"""
Mixes two pydub AudioSegments, then fades the result in/out.
Expand All @@ -171,7 +185,9 @@ def _mix(
segment2_normalized = segment2[:duration1]

return segment1.overlay(
(segment2_normalized - float(seg2_atten)).fade_in(fadein).fade_out(fadeout)
(segment2_normalized - float(seg2_atten))
.fade_in(self.fadein)
.fade_out(self.fadeout)
)

def invoke(self) -> BytesIO:
Expand Down

0 comments on commit d22b39b

Please sign in to comment.