diff --git a/assemblyai/__init__.py b/assemblyai/__init__.py index 241d536..1531dfc 100644 --- a/assemblyai/__init__.py +++ b/assemblyai/__init__.py @@ -19,6 +19,7 @@ IABResponse, IABResult, LanguageCode, + LanguageDetectionOptions, LemurActionItemsResponse, LemurError, LemurModel, @@ -92,6 +93,7 @@ "IABResponse", "IABResult", "LanguageCode", + "LanguageDetectionOptions", "Lemur", "LemurActionItemsResponse", "LemurError", diff --git a/assemblyai/__version__.py b/assemblyai/__version__.py index 3d8b602..1a92310 100644 --- a/assemblyai/__version__.py +++ b/assemblyai/__version__.py @@ -1 +1 @@ -__version__ = "0.42.1" +__version__ = "0.43.0" diff --git a/assemblyai/types.py b/assemblyai/types.py index 1585068..5e74540 100644 --- a/assemblyai/types.py +++ b/assemblyai/types.py @@ -489,6 +489,18 @@ class SpeechModel(str, Enum): "The model optimized for accuracy, low latency, ease of use, and multi-language support" +class LanguageDetectionOptions(BaseModel): + """Options for controlling the behavior or Automatic Language Detection""" + + expected_languages: Optional[List[str]] = Field( + None, description="A list of languages that the audio could be expected to be." + ) + fallback_language: Optional[str] = Field( + None, + description="The language to fallback to in case the language detection does not predict any of the expected ones.", + ) + + class SpeakerOptions(BaseModel): """ Speaker options for controlling speaker diarization parameters @@ -633,6 +645,9 @@ class RawTranscriptionConfig(BaseModel): if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive. """ + language_detection_options: Optional[LanguageDetectionOptions] = None + "Options for controlling the behavior or Automatic Language Detection." + speech_threshold: Optional[float] = None "Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive." @@ -688,6 +703,7 @@ def __init__( auto_highlights: Optional[bool] = None, language_detection: Optional[bool] = None, language_confidence_threshold: Optional[float] = None, + language_detection_options: Optional[LanguageDetectionOptions] = None, speech_threshold: Optional[float] = None, raw_transcription_config: Optional[RawTranscriptionConfig] = None, speech_model: Optional[SpeechModel] = None, @@ -731,6 +747,7 @@ def __init__( language_detection: Identify the dominant language that's spoken in an audio file, and route the file to the appropriate model for the detected language. language_confidence_threshold: The confidence threshold that must be reached if `language_detection` is enabled. An error will be returned if the language confidence is below this threshold. Valid values are in the range [0,1] inclusive. + language_detection_options: Options for controlling the behavior or Automatic Language Detection. speech_threshold: Reject audio files that contain less than this fraction of speech. Valid values are in the range [0,1] inclusive. raw_transcription_config: Create the config from a `RawTranscriptionConfig` """ @@ -780,6 +797,7 @@ def __init__( self.auto_highlights = auto_highlights self.language_detection = language_detection self.language_confidence_threshold = language_confidence_threshold + self.language_detection_options = language_detection_options self.speech_threshold = speech_threshold self.speech_model = speech_model self.prompt = prompt @@ -1175,6 +1193,20 @@ def language_confidence_threshold(self, threshold: Optional[float]) -> None: self._raw_transcription_config.language_confidence_threshold = threshold + @property + def language_detection_options(self) -> Optional[LanguageDetectionOptions]: + "Returns the options for controlling the behavior or Automatic Language Detection." + + return self._raw_transcription_config.language_detection_options + + @language_detection_options.setter + def language_detection_options( + self, options: Optional[LanguageDetectionOptions] + ) -> None: + "Set the options for controlling the behavior or Automatic Language Detection." + + self._raw_transcription_config.language_detection_options = options + @property def speech_threshold(self) -> Optional[float]: "Returns the current speech threshold." @@ -1441,6 +1473,44 @@ def set_summarize( return self + def set_language_detection( + self, + enable: Optional[bool] = True, + confidence_threshold: Optional[float] = None, + expected_languages: Optional[List[str]] = None, + fallback_language: Optional[str] = None, + ) -> Self: + """ + Enable Automatic Language Detection with optional configuration. + + Args: + enable: whether to enable or disable the Language Detection feature. + confidence_threshold: The confidence threshold that must be reached. + expected_languages: A list of languages that the audio could be expected to be. + fallback_language: The language to fallback to if detection fails. + """ + + if not enable: + self._raw_transcription_config.language_detection = None + self._raw_transcription_config.language_confidence_threshold = None + self._raw_transcription_config.language_detection_options = None + return self + + self._raw_transcription_config.language_detection = True + self._raw_transcription_config.language_confidence_threshold = ( + confidence_threshold + ) + + if expected_languages or fallback_language: + self._raw_transcription_config.language_detection_options = ( + LanguageDetectionOptions( + expected_languages=expected_languages, + fallback_language=fallback_language, + ) + ) + + return self + # endregion @@ -1818,6 +1888,9 @@ class BaseTranscript(BaseModel): language_confidence_threshold: Optional[float] = None "The confidence threshold that must be reached if `language_detection` is enabled." + language_detection_options: Optional[LanguageDetectionOptions] = None + "Options for controlling the behavior or Automatic Language Detection." + language_confidence: Optional[float] = None "The confidence score for the detected language, between 0.0 (low confidence) and 1.0 (high confidence)." diff --git a/tests/unit/test_domains.py b/tests/unit/test_domains.py index 030a187..ef531be 100644 --- a/tests/unit/test_domains.py +++ b/tests/unit/test_domains.py @@ -20,6 +20,7 @@ def test_configuration_drift(): "set_webhook", # webhook "set_speaker_diarization", # speaker diarization "set_content_safety", # content safety + "set_language_detection", # language detection } # get all members diff --git a/tests/unit/test_language_detection_options.py b/tests/unit/test_language_detection_options.py new file mode 100644 index 0000000..3ae2d17 --- /dev/null +++ b/tests/unit/test_language_detection_options.py @@ -0,0 +1,129 @@ +import assemblyai as aai + + +def test_language_detection_options_creation(): + """Test that LanguageDetectionOptions can be created with valid parameters.""" + options = aai.LanguageDetectionOptions( + expected_languages=["en", "es", "fr"], fallback_language="en" + ) + assert options.expected_languages == ["en", "es", "fr"] + assert options.fallback_language == "en" + + +def test_language_detection_options_expected_languages_only(): + """Test that LanguageDetectionOptions can be created with only expected_languages.""" + options = aai.LanguageDetectionOptions(expected_languages=["en", "de"]) + assert options.expected_languages == ["en", "de"] + assert options.fallback_language is None + + +def test_language_detection_options_fallback_language_only(): + """Test that LanguageDetectionOptions can be created with only fallback_language.""" + options = aai.LanguageDetectionOptions(fallback_language="es") + assert options.expected_languages is None + assert options.fallback_language == "es" + + +def test_language_detection_options_empty(): + """Test that LanguageDetectionOptions can be created with no parameters.""" + options = aai.LanguageDetectionOptions() + assert options.expected_languages is None + assert options.fallback_language is None + + +def test_transcription_config_with_language_detection_options(): + """Test that TranscriptionConfig accepts language_detection_options parameter.""" + options = aai.LanguageDetectionOptions( + expected_languages=["en", "fr"], fallback_language="en" + ) + + config = aai.TranscriptionConfig( + language_detection=True, language_detection_options=options + ) + + assert config.language_detection is True + assert config.language_detection_options == options + assert config.language_detection_options.expected_languages == ["en", "fr"] + assert config.language_detection_options.fallback_language == "en" + + +def test_language_detection_options_property_getter(): + """Test the language_detection_options property getter.""" + options = aai.LanguageDetectionOptions( + expected_languages=["ja", "ko"], fallback_language="ja" + ) + + config = aai.TranscriptionConfig() + config.language_detection_options = options + + assert config.language_detection_options == options + assert config.language_detection_options.expected_languages == ["ja", "ko"] + assert config.language_detection_options.fallback_language == "ja" + + +def test_language_detection_options_property_setter(): + """Test the language_detection_options property setter.""" + config = aai.TranscriptionConfig() + + options = aai.LanguageDetectionOptions( + expected_languages=["zh", "zh_cn"], fallback_language="zh" + ) + config.language_detection_options = options + + assert config.language_detection_options == options + + +def test_language_detection_options_property_setter_none(): + """Test setting language_detection_options to None.""" + options = aai.LanguageDetectionOptions(fallback_language="en") + config = aai.TranscriptionConfig(language_detection_options=options) + + # Verify it was set + assert config.language_detection_options == options + + # Now set to None + config.language_detection_options = None + assert config.language_detection_options is None + + +def test_language_detection_options_in_raw_config(): + """Test that language_detection_options is properly set in the raw config.""" + options = aai.LanguageDetectionOptions( + expected_languages=["en", "es"], fallback_language="en" + ) + + config = aai.TranscriptionConfig(language_detection_options=options) + + assert config.raw.language_detection_options == options + + +def test_set_language_detection(): + """Test the set_language_detection method.""" + config = aai.TranscriptionConfig().set_language_detection( + confidence_threshold=0.8, + expected_languages=["en", "fr"], + fallback_language="en", + ) + + assert config.language_detection is True + assert config.language_confidence_threshold == 0.8 + assert config.language_detection_options.expected_languages == ["en", "fr"] + assert config.language_detection_options.fallback_language == "en" + + +def test_set_language_detection_disable(): + """Test disabling language detection clears all related options.""" + config = aai.TranscriptionConfig().set_language_detection( + expected_languages=["en", "es"], fallback_language="en" + ) + + # Verify it was set + assert config.language_detection is True + assert config.language_detection_options is not None + + # Now disable + config.set_language_detection(enable=False) + + assert config.language_detection is None + assert config.language_confidence_threshold is None + assert config.language_detection_options is None