Skip to content

Commit

Permalink
refactor: converter methods do not need to ingest segments
Browse files Browse the repository at this point in the history
  • Loading branch information
winstxnhdw committed Sep 6, 2024
1 parent 7f7184e commit 946e7f5
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 37 deletions.
49 changes: 36 additions & 13 deletions capgen/transcriber/converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@

from faster_whisper.transcribe import Segment

from capgen.utils import convert_seconds_to_hhmmssmmm


class Converter:
"""
Expand All @@ -17,17 +15,42 @@ class Converter:
Methods
-------
convert_seconds_to_hhmmssmmm(seconds: float) -> str:
convert_seconds_to_hhmmssmmm(seconds: float, millisecond_separator: str) -> str:
converts seconds to hh:mm:ss,mmm format
as_srt(segments: Iterable[Segment]) -> str:
to_srt() -> str:
converts transcription segments into a SRT file
to_vtt() -> str:
converts transcription segments into a VTT file
"""

__slots__ = ('segments',)

def __init__(self, segments: Iterable[Segment]):
self.segments = segments

def to_srt(self, segments: Iterable[Segment]) -> str:
def convert_seconds_to_hhmmssmmm(self, seconds: float, millisecond_separator: str) -> str:
"""
Summary
-------
converts seconds to hh:mm:ss,mmm format
Parameters
----------
seconds (float) : the number of seconds to convert
Returns
-------
converted_time (str) : the converted time in hh:mm:ss,mmm format
"""
hours, remainder = divmod(seconds, 3600)
minutes, seconds = divmod(remainder, 60)
milliseconds = int((seconds % 1) * 1000)

return f'{int(hours):02}:{int(minutes):02}:{int(seconds):02}{millisecond_separator}{milliseconds:03}'

def to_srt(self) -> str:
"""
Summary
-------
Expand All @@ -42,13 +65,13 @@ def to_srt(self, segments: Iterable[Segment]) -> str:
subrip_subtitle (str) : the SRT subtitles
"""
return '\n\n'.join(
f'{segment.id}\n'
f'{convert_seconds_to_hhmmssmmm(segment.start, ",")} --> '
f'{convert_seconds_to_hhmmssmmm(segment.end, ",")}\n{segment.text[1:]}'
for segment in segments
f'{id}\n'
f'{self.convert_seconds_to_hhmmssmmm(start, ",")} --> '
f'{self.convert_seconds_to_hhmmssmmm(end, ",")}\n{text[1:]}'
for id, _, start, end, text, *_ in self.segments
)

def to_vtt(self, segments: Iterable[Segment]) -> str:
def to_vtt(self) -> str:
"""
Summary
-------
Expand All @@ -63,9 +86,9 @@ def to_vtt(self, segments: Iterable[Segment]) -> str:
video_text_tracks_subtitle (str) : the VTT subtitles
"""
captions = '\n\n'.join(
f'{convert_seconds_to_hhmmssmmm(segment.start, ".")} --> '
f'{convert_seconds_to_hhmmssmmm(segment.end, ".")}\n{segment.text[1:]}'
for segment in segments
f'{self.convert_seconds_to_hhmmssmmm(start, ".")} --> '
f'{self.convert_seconds_to_hhmmssmmm(end, ".")}\n{text[1:]}'
for _, _, start, end, text, *_ in self.segments
)

return f'WEBVTT\n\n{captions}'
4 changes: 2 additions & 2 deletions capgen/transcriber/transcriber.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ def transcribe(self, file: str | BinaryIO, caption_format: str) -> str | None:
converter = Converter(segments)

if caption_format == 'srt':
return converter.to_srt(segments)
return converter.to_srt()

if caption_format == 'vtt':
return converter.to_vtt(segments)
return converter.to_vtt()

return None
3 changes: 0 additions & 3 deletions capgen/utils/__init__.py

This file was deleted.

19 changes: 0 additions & 19 deletions capgen/utils/convert_seconds_to_hhmmssmmm.py

This file was deleted.

0 comments on commit 946e7f5

Please sign in to comment.