From 94d8ea4339d99bcebfeaad56629dadf7aa302d29 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Thu, 5 Dec 2024 12:11:17 +0100 Subject: [PATCH 01/31] Endpoint to extract audio from brightcove files --- .../no/ndla/audioapi/AudioApiProperties.scala | 8 ++ .../no/ndla/audioapi/ComponentRegistry.scala | 30 ++++--- .../controller/TranscriptionController.scala | 40 +++++++++ .../service/TranscriptionService.scala | 90 +++++++++++++++++++ .../service/TranscriptionServiceTest.scala | 3 + .../no/ndla/common/aws/NdlaS3Client.scala | 28 ++++++ .../brightcove/NdlaBrightcoveClient.scala | 75 ++++++++++++++++ project/Dependencies.scala | 4 + project/Module.scala | 1 - project/audioapi.scala | 3 +- 10 files changed, 269 insertions(+), 13 deletions(-) create mode 100644 audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala create mode 100644 audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala create mode 100644 audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala create mode 100644 common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala diff --git a/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala b/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala index 7a298818a..2b6cdf5d5 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala @@ -9,6 +9,7 @@ package no.ndla.audioapi import com.typesafe.scalalogging.StrictLogging +import no.ndla.common.Environment.prop import no.ndla.common.configuration.{BaseProps, HasBaseProps} import no.ndla.database.{DatabaseProps, HasDatabaseProps} import no.ndla.network.{AuthUser, Domains} @@ -36,6 +37,13 @@ class AudioApiProperties extends BaseProps with DatabaseProps with StrictLogging val StorageName: String = propOrElse("AUDIO_FILE_S3_BUCKET", s"$Environment.audio.ndla") val StorageRegion: Option[String] = propOrNone("AUDIO_FILE_S3_BUCKET_REGION") + val TranscribeStorageName: String = propOrElse("TRANSCRIBE_FILE_S3_BUCKET", s"$Environment.transcribe.ndla") + val TranscribeStorageRegion: Option[String] = propOrNone("TRANSCRIBE_FILE_S3_BUCKET_REGION") + + val BrightcoveClientId: String = prop("BRIGHTCOVE_API_CLIENT_ID") + val BrightcoveClientSecret: String = prop("BRIGHTCOVE_API_CLIENT_SECRET") + val BrightcoveAccountId: String = prop("BRIGHTCOVE_ACCOUNT") + val SearchServer: String = propOrElse("SEARCH_SERVER", "http://search-audio-api.ndla-local") val RunWithSignedSearchRequests: Boolean = propOrElse("RUN_WITH_SIGNED_SEARCH_REQUESTS", "true").toBoolean val SearchIndex: String = propOrElse("SEARCH_INDEX_NAME", "audios") diff --git a/audio-api/src/main/scala/no/ndla/audioapi/ComponentRegistry.scala b/audio-api/src/main/scala/no/ndla/audioapi/ComponentRegistry.scala index 3684fcee8..56f2049cf 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/ComponentRegistry.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/ComponentRegistry.scala @@ -17,6 +17,7 @@ import no.ndla.audioapi.service.* import no.ndla.audioapi.service.search.* import no.ndla.common.Clock import no.ndla.common.aws.NdlaS3Client +import no.ndla.common.brightcove.NdlaBrightcoveClient import no.ndla.common.configuration.BaseComponentRegistry import no.ndla.database.{DBMigrator, DataSource} import no.ndla.network.NdlaClient @@ -38,6 +39,7 @@ class ComponentRegistry(properties: AudioApiProperties) with HealthController with AudioController with SeriesController + with TranscriptionController with SearchService with AudioSearchService with SeriesSearchService @@ -54,7 +56,9 @@ class ComponentRegistry(properties: AudioApiProperties) with DBMigrator with ErrorHandling with SwaggerDocControllerConfig - with NdlaS3Client { + with NdlaS3Client + with TranscriptionService + with NdlaBrightcoveClient { override val props: AudioApiProperties = properties override val migrator: DBMigrator = DBMigrator( new V5__AddAgreementToAudio, @@ -63,7 +67,8 @@ class ComponentRegistry(properties: AudioApiProperties) override val dataSource: HikariDataSource = DataSource.getHikariDataSource DataSource.connectToDatabase() - lazy val s3Client = new NdlaS3Client(props.StorageName, props.StorageRegion) + lazy val s3Client = new NdlaS3Client(props.StorageName, props.StorageRegion) + lazy val brightcoveClient = new NdlaBrightcoveClient() lazy val audioRepository = new AudioRepository lazy val seriesRepository = new SeriesRepository @@ -71,15 +76,17 @@ class ComponentRegistry(properties: AudioApiProperties) lazy val ndlaClient = new NdlaClient lazy val myndlaApiClient: MyNDLAApiClient = new MyNDLAApiClient - lazy val readService = new ReadService - lazy val writeService = new WriteService - lazy val validationService = new ValidationService - lazy val converterService = new ConverterService + lazy val readService = new ReadService + lazy val writeService = new WriteService + lazy val validationService = new ValidationService + lazy val converterService = new ConverterService + lazy val transcriptionService = new TranscriptionService - lazy val internController = new InternController - lazy val audioApiController = new AudioController - lazy val seriesController = new SeriesController - lazy val healthController = new HealthController + lazy val internController = new InternController + lazy val audioApiController = new AudioController + lazy val seriesController = new SeriesController + lazy val healthController = new HealthController + lazy val transcriptionController = new TranscriptionController var e4sClient: NdlaE4sClient = Elastic4sClientFactory.getClient(props.SearchServer) lazy val searchConverterService = new SearchConverterService @@ -97,7 +104,8 @@ class ComponentRegistry(properties: AudioApiProperties) audioApiController, seriesController, internController, - healthController + healthController, + transcriptionController ), SwaggerDocControllerConfig.swaggerInfo ) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala new file mode 100644 index 000000000..1b77682c2 --- /dev/null +++ b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala @@ -0,0 +1,40 @@ +package no.ndla.audioapi.controller + +import no.ndla.audioapi.Props +import no.ndla.audioapi.service.{ReadService, TranscriptionService} +import no.ndla.network.tapir.TapirController +import no.ndla.network.tapir.TapirUtil.errorOutputsFor +import sttp.tapir.server.ServerEndpoint +import sttp.tapir.{EndpointInput, endpoint, path, query} +import sttp.tapir.* + +import scala.util.{Failure, Success} +trait TranscriptionController { + this: Props & TapirController & ReadService & TranscriptionService => + val transcriptionController: TranscriptionController + class TranscriptionController() extends TapirController { + import props.* + + override val serviceName: String = "transcription" + override val prefix: EndpointInput[Unit] = "audio-api" / "v1" / serviceName + + private val videoId = path[String]("videoId").description("The video id to transcribe") + private val language = query[String]("language").description("The language to transcribe the video to") + + def postExtractAudio: ServerEndpoint[Any, Eff] = endpoint.post + .summary("Extract audio from video") + .description("Extracts audio from a Brightcove video and uploads it to S3.") + .in(videoId) + .in(language) + .errorOut(errorOutputsFor(400, 500)) + .serverLogicPure { case (videoId, language) => + transcriptionService.extractAudioFromVideo(videoId, language) match { + case Success(_) => Right(()) + case Failure(ex) => returnLeftError(ex) + } + } + + override val endpoints: List[ServerEndpoint[Any, Eff]] = List(postExtractAudio) + } + +} diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala new file mode 100644 index 000000000..a0e1b1274 --- /dev/null +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala @@ -0,0 +1,90 @@ +package no.ndla.audioapi.service + +import no.ndla.audioapi.Props +import no.ndla.common.aws.NdlaS3Client +import no.ndla.common.brightcove.NdlaBrightcoveClient +import sttp.client3.{HttpURLConnectionBackend, UriContext, asFile, basicRequest} +import ws.schild.jave.{Encoder, MultimediaObject} +import ws.schild.jave.encode.{AudioAttributes, EncodingAttributes} + +import java.io.File +import scala.util.{Failure, Success, Try} + +trait TranscriptionService { + this: NdlaS3Client & Props & NdlaBrightcoveClient => + val transcriptionService: TranscriptionService + class TranscriptionService { + + private lazy val s3TranscribeClient = new NdlaS3Client(props.TranscribeStorageName, props.TranscribeStorageRegion) + + def extractAudioFromVideo(videoId: String, language: String): Try[Unit] = { + val accountId = props.BrightcoveAccountId + val videoUrl = getVideo(accountId, videoId) match { + case Right(sources) => sources.head + case Left(error) => throw new RuntimeException(s"Failed to get video sources: $error") + } + val videoFile = downloadVideo(videoId, videoUrl) + + val audioFile = new File(s"/tmp/audio_${videoId}.mp3") + + val audioAttributes = new AudioAttributes() + audioAttributes.setCodec("libmp3lame") + audioAttributes.setBitRate(128000) + audioAttributes.setChannels(2) + audioAttributes.setSamplingRate(44100) + + val encodingAttributes = new EncodingAttributes() + encodingAttributes.setOutputFormat("mp3") + encodingAttributes.setAudioAttributes(audioAttributes) + + val encoder = new Encoder() + Try { + encoder.encode(new MultimediaObject(videoFile), audioFile, encodingAttributes) + } match { + case Success(_) => + val s3Key = s"/audio/$language/${videoId}.mp3" + s3TranscribeClient.putObject(s3Key, audioFile, "audio/mpeg") match { + case Success(_) => + s3TranscribeClient.deleteObject(videoFile.getName).map(_ => ()) + case _ => + Failure(new RuntimeException(s"Failed to upload audio file to S3.")) + } + case Failure(exception) => Failure(exception) + + } + } + + private def getVideo(accountId: String, videoId: String): Either[String, Vector[String]] = { + val clientId = props.BrightcoveClientId + val clientSecret = props.BrightcoveClientSecret + val token = brightcoveClient.getToken(clientId, clientSecret) + token match { + case Right(bearerToken) => + val cake = brightcoveClient.getVideoSource(accountId, videoId, bearerToken) + cake match { + case Right(videoSources) => + val mp4Sources = videoSources + .filter(source => source.hcursor.get[String]("container").toOption.contains("MP4")) + .map(source => source.hcursor.get[String]("src").toOption.getOrElse("")) + if (mp4Sources.nonEmpty) Right(mp4Sources) + else Left("No MP4 sources found for video.") + case Left(error) => Left(s"Failed to get video sources: $error") + } + case Left(error) => + Left(s"Failed to retrieve bearer token: $error") + } + } + + private def downloadVideo(videoId: String, videoUrl: String): File = { + val videoFile = new File(s"/tmp/video_$videoId.mp4") + val connection = HttpURLConnectionBackend() + + val response = basicRequest.get(uri"$videoUrl").response(asFile(videoFile)).send(connection) + + response.body match { + case Right(file) => file + case Left(error) => throw new RuntimeException(s"Failed to download video: $error") + } + } + } +} diff --git a/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala b/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala new file mode 100644 index 000000000..c15bf9f21 --- /dev/null +++ b/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala @@ -0,0 +1,3 @@ +package no.ndla.audioapi.service class TranscriptionServiceTest { + +} diff --git a/common/src/main/scala/no/ndla/common/aws/NdlaS3Client.scala b/common/src/main/scala/no/ndla/common/aws/NdlaS3Client.scala index 8803eb385..f8a7e43a2 100644 --- a/common/src/main/scala/no/ndla/common/aws/NdlaS3Client.scala +++ b/common/src/main/scala/no/ndla/common/aws/NdlaS3Client.scala @@ -13,6 +13,7 @@ import software.amazon.awssdk.regions.Region import software.amazon.awssdk.services.s3.model.* import software.amazon.awssdk.services.s3.{S3Client, S3ClientBuilder} +import java.io.File import scala.util.Try trait NdlaS3Client { @@ -81,6 +82,33 @@ trait NdlaS3Client { ) } + def putObject( + key: String, + file: File, + contentType: String, + cacheControl: Option[String] = None + ): Try[PutObjectResponse] = Try { + + val por = PutObjectRequest + .builder() + .bucket(bucket) + .key(key) + .contentLength(file.length()) + .contentType(contentType) + + val porWithCacheControl = cacheControl match { + case Some(value) => por.cacheControl(value) + case None => por + } + + val requestBody = RequestBody.fromFile(file) + + client.putObject( + porWithCacheControl.build(), + requestBody + ) + } + def updateMetadata(key: String, metadata: java.util.Map[String, String]): Try[_] = Try { val cor = CopyObjectRequest diff --git a/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala b/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala new file mode 100644 index 000000000..25b646304 --- /dev/null +++ b/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala @@ -0,0 +1,75 @@ +package no.ndla.common.brightcove + +import io.circe.{Decoder, Json} +import io.circe.generic.codec.DerivedAsObjectCodec.deriveCodec +import io.circe.generic.semiauto.deriveDecoder +import io.circe.parser.* +import sttp.client3.{HttpClientSyncBackend, UriContext, basicRequest} + +case class TokenResponse(access_token: String, token_type: String, expires_in: Int) +case class VideoSource( + src: String, + `type`: String, + container: String, + codec: Option[String] = None, + encoding_rate: Option[Int] = None, + duration: Option[Int] = None, + height: Option[Int] = None, + width: Option[Int] = None, + size: Option[Long] = None, + uploaded_at: Option[String] = None, + ext_x_version: Option[String] = None, + profiles: Option[String] = None, + remote: Option[Boolean] = None +) + +trait NdlaBrightcoveClient { + val brightcoveClient: NdlaBrightcoveClient + + class NdlaBrightcoveClient { + private val authUrl = "https://oauth.brightcove.com/v4/access_token" + private val backend = HttpClientSyncBackend() // Or AsyncHttpClientFutureBackend() + + def getToken(clientID: String, clientSecret: String): Either[String, String] = { + val request = + basicRequest.auth + .basic(clientID, clientSecret) + .post(uri"$authUrl?grant_type=client_credentials") + val authResponse = request.send(backend) + + authResponse.body match { + case Right(jsonString) => + decode[TokenResponse](jsonString) match { + case Right(tokenResponse) => Right(tokenResponse.access_token) + case Left(error) => Left(s"Failed to decode token response: ${error.getMessage}") + } + case Left(error) => Left(s"Failed to get token: ${error}") + } + } + + def getVideoSource(accountId: String, videoId: String, bearerToken: String): Either[String, Vector[Json]] = { + + val videoSourceUrl = uri"https://cms.api.brightcove.com/v1/accounts/$accountId/videos/$videoId/sources" + val request = basicRequest + .header("Authorization", s"Bearer $bearerToken") + .get(videoSourceUrl) + + implicit val backend = HttpClientSyncBackend() + + val response = request.send(backend) + + response.body match { + case Right(jsonString) => + parse(jsonString) match { + case Right(json) => + json.asArray match { + case Some(videoSources) => Right(videoSources) + case None => Left("Expected a JSON array but got something else.") + } + case Left(error) => Left(s"Failed to decode video source response: ${error.getMessage}") + } + case Left(error) => Left(s"Failed to get video source: ${error}") + } + } + } +} diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 99c72f0c3..42fee02c0 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -134,5 +134,9 @@ object Dependencies { "org.apache.httpcomponents" % "httpclient" % "4.5.14", "org.yaml" % "snakeyaml" % "2.0" ) + lazy val jave: Seq[ModuleID] = Seq( + "ws.schild" % "jave-core" % "3.5.0", + "ws.schild" % "jave-all-deps" % "3.5.0" + ) } } diff --git a/project/Module.scala b/project/Module.scala index ed4fd485d..2ec80029a 100644 --- a/project/Module.scala +++ b/project/Module.scala @@ -168,7 +168,6 @@ trait Module { ) ) } - val checkfmt = taskKey[Unit]("Check for code style errors") val fmt = taskKey[Unit]("Automatically apply code style fixes") diff --git a/project/audioapi.scala b/project/audioapi.scala index 716f544e7..7ca36a8b3 100644 --- a/project/audioapi.scala +++ b/project/audioapi.scala @@ -23,7 +23,8 @@ object audioapi extends Module { elastic4s, database, tapirHttp4sCirce, - vulnerabilityOverrides + vulnerabilityOverrides, + jave ) lazy val tsSettings: Seq[Def.Setting[?]] = typescriptSettings( From f46fd636188db4a8d864338792351a560f7646e0 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Fri, 6 Dec 2024 12:56:21 +0100 Subject: [PATCH 02/31] Add get endpoint --- .../no/ndla/audioapi/AudioApiProperties.scala | 4 +-- .../no/ndla/audioapi/ComponentRegistry.scala | 5 +-- .../controller/TranscriptionController.scala | 35 ++++++++++++++----- .../service/TranscriptionService.scala | 15 +++++--- .../brightcove/NdlaBrightcoveClient.scala | 18 +--------- 5 files changed, 44 insertions(+), 33 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala b/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala index 2b6cdf5d5..3430227d1 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala @@ -37,12 +37,12 @@ class AudioApiProperties extends BaseProps with DatabaseProps with StrictLogging val StorageName: String = propOrElse("AUDIO_FILE_S3_BUCKET", s"$Environment.audio.ndla") val StorageRegion: Option[String] = propOrNone("AUDIO_FILE_S3_BUCKET_REGION") - val TranscribeStorageName: String = propOrElse("TRANSCRIBE_FILE_S3_BUCKET", s"$Environment.transcribe.ndla") + val TranscribeStorageName: String = propOrElse("TRANSCRIBE_FILE_S3_BUCKET_NAME", s"$Environment.transcribe.ndla") val TranscribeStorageRegion: Option[String] = propOrNone("TRANSCRIBE_FILE_S3_BUCKET_REGION") val BrightcoveClientId: String = prop("BRIGHTCOVE_API_CLIENT_ID") val BrightcoveClientSecret: String = prop("BRIGHTCOVE_API_CLIENT_SECRET") - val BrightcoveAccountId: String = prop("BRIGHTCOVE_ACCOUNT") + val BrightcoveAccountId: String = prop("NDLA_BRIGHTCOVE_ACCOUNT_ID") val SearchServer: String = propOrElse("SEARCH_SERVER", "http://search-audio-api.ndla-local") val RunWithSignedSearchRequests: Boolean = propOrElse("RUN_WITH_SIGNED_SEARCH_REQUESTS", "true").toBoolean diff --git a/audio-api/src/main/scala/no/ndla/audioapi/ComponentRegistry.scala b/audio-api/src/main/scala/no/ndla/audioapi/ComponentRegistry.scala index 56f2049cf..906104bb7 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/ComponentRegistry.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/ComponentRegistry.scala @@ -67,8 +67,9 @@ class ComponentRegistry(properties: AudioApiProperties) override val dataSource: HikariDataSource = DataSource.getHikariDataSource DataSource.connectToDatabase() - lazy val s3Client = new NdlaS3Client(props.StorageName, props.StorageRegion) - lazy val brightcoveClient = new NdlaBrightcoveClient() + lazy val s3Client = new NdlaS3Client(props.StorageName, props.StorageRegion) + lazy val s3TranscribeClient = new NdlaS3Client(props.TranscribeStorageName, props.TranscribeStorageRegion) + lazy val brightcoveClient = new NdlaBrightcoveClient() lazy val audioRepository = new AudioRepository lazy val seriesRepository = new SeriesRepository diff --git a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala index 1b77682c2..55519e6f5 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala @@ -4,8 +4,9 @@ import no.ndla.audioapi.Props import no.ndla.audioapi.service.{ReadService, TranscriptionService} import no.ndla.network.tapir.TapirController import no.ndla.network.tapir.TapirUtil.errorOutputsFor +import no.ndla.network.tapir.auth.Permission.DRAFT_API_WRITE import sttp.tapir.server.ServerEndpoint -import sttp.tapir.{EndpointInput, endpoint, path, query} +import sttp.tapir.{EndpointInput, endpoint, path} import sttp.tapir.* import scala.util.{Failure, Success} @@ -13,13 +14,12 @@ trait TranscriptionController { this: Props & TapirController & ReadService & TranscriptionService => val transcriptionController: TranscriptionController class TranscriptionController() extends TapirController { - import props.* override val serviceName: String = "transcription" override val prefix: EndpointInput[Unit] = "audio-api" / "v1" / serviceName private val videoId = path[String]("videoId").description("The video id to transcribe") - private val language = query[String]("language").description("The language to transcribe the video to") + private val language = path[String]("language").description("The language to transcribe the video to") def postExtractAudio: ServerEndpoint[Any, Eff] = endpoint.post .summary("Extract audio from video") @@ -27,14 +27,33 @@ trait TranscriptionController { .in(videoId) .in(language) .errorOut(errorOutputsFor(400, 500)) - .serverLogicPure { case (videoId, language) => - transcriptionService.extractAudioFromVideo(videoId, language) match { - case Success(_) => Right(()) - case Failure(ex) => returnLeftError(ex) + .requirePermission(DRAFT_API_WRITE) + .serverLogicPure { _ => + { case (videoId, language) => + transcriptionService.extractAudioFromVideo(videoId, language) match { + case Success(_) => Right(()) + case Failure(ex) => returnLeftError(ex) + } } } - override val endpoints: List[ServerEndpoint[Any, Eff]] = List(postExtractAudio) + def getAudioExtraction: ServerEndpoint[Any, Eff] = endpoint.get + .summary("Get audio extraction status") + .description("Get the status of the audio extraction from a Brightcove video.") + .in(videoId) + .in(language) + .errorOut(errorOutputsFor(400, 500)) + .requirePermission(DRAFT_API_WRITE) + .serverLogicPure { _ => + { case (videoId, language) => + transcriptionService.getAudioExtractionStatus(videoId, language) match { + case Success(_) => Right(()) + case Failure(ex) => returnLeftError(ex) + } + } + } + + override val endpoints: List[ServerEndpoint[Any, Eff]] = List(postExtractAudio, getAudioExtraction) } } diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala index a0e1b1274..53e9af0e6 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala @@ -13,9 +13,9 @@ import scala.util.{Failure, Success, Try} trait TranscriptionService { this: NdlaS3Client & Props & NdlaBrightcoveClient => val transcriptionService: TranscriptionService - class TranscriptionService { + val s3TranscribeClient: NdlaS3Client - private lazy val s3TranscribeClient = new NdlaS3Client(props.TranscribeStorageName, props.TranscribeStorageRegion) + class TranscriptionService { def extractAudioFromVideo(videoId: String, language: String): Try[Unit] = { val accountId = props.BrightcoveAccountId @@ -46,14 +46,21 @@ trait TranscriptionService { s3TranscribeClient.putObject(s3Key, audioFile, "audio/mpeg") match { case Success(_) => s3TranscribeClient.deleteObject(videoFile.getName).map(_ => ()) - case _ => - Failure(new RuntimeException(s"Failed to upload audio file to S3.")) + case Failure(ex) => + Failure(new RuntimeException(s"Failed to upload audio file to S3.", ex)) } case Failure(exception) => Failure(exception) } } + def getAudioExtractionStatus(videoId: String, language: String): Try[Unit] = { + s3TranscribeClient.getObject(s"audio/$language/${videoId}.mp3") match { + case Success(_) => Success(()) + case Failure(exception) => Failure(exception) + } + } + private def getVideo(accountId: String, videoId: String): Either[String, Vector[String]] = { val clientId = props.BrightcoveClientId val clientSecret = props.BrightcoveClientSecret diff --git a/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala b/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala index 25b646304..135c7f399 100644 --- a/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala +++ b/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala @@ -1,27 +1,11 @@ package no.ndla.common.brightcove -import io.circe.{Decoder, Json} +import io.circe.Json import io.circe.generic.codec.DerivedAsObjectCodec.deriveCodec -import io.circe.generic.semiauto.deriveDecoder import io.circe.parser.* import sttp.client3.{HttpClientSyncBackend, UriContext, basicRequest} case class TokenResponse(access_token: String, token_type: String, expires_in: Int) -case class VideoSource( - src: String, - `type`: String, - container: String, - codec: Option[String] = None, - encoding_rate: Option[Int] = None, - duration: Option[Int] = None, - height: Option[Int] = None, - width: Option[Int] = None, - size: Option[Long] = None, - uploaded_at: Option[String] = None, - ext_x_version: Option[String] = None, - profiles: Option[String] = None, - remote: Option[Boolean] = None -) trait NdlaBrightcoveClient { val brightcoveClient: NdlaBrightcoveClient From 873028897b1a1abb4468d3416ce5447f44a66820 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Thu, 12 Dec 2024 09:13:31 +0100 Subject: [PATCH 03/31] Update NotFound Error handling in audio-api --- .../scala/no/ndla/audioapi/model/api/Error.scala | 15 +++++++-------- .../no/ndla/audioapi/service/ReadService.scala | 5 ++--- .../no/ndla/audioapi/service/WriteService.scala | 6 +++--- .../ndla/audioapi/service/WriteServiceTest.scala | 4 ++-- .../ndla/common/aws/NdlaAWSTranscribeClient.scala | 3 +++ 5 files changed, 17 insertions(+), 16 deletions(-) create mode 100644 common/src/main/scala/no/ndla/common/aws/NdlaAWSTranscribeClient.scala diff --git a/audio-api/src/main/scala/no/ndla/audioapi/model/api/Error.scala b/audio-api/src/main/scala/no/ndla/audioapi/model/api/Error.scala index bfc3d8c7c..f2b294366 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/model/api/Error.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/model/api/Error.scala @@ -10,7 +10,7 @@ package no.ndla.audioapi.model.api import no.ndla.audioapi.Props import no.ndla.common.Clock -import no.ndla.common.errors.{AccessDeniedException, FileTooBigException, ValidationException} +import no.ndla.common.errors.{AccessDeniedException, FileTooBigException, NotFoundException, ValidationException} import no.ndla.database.DataSource import no.ndla.network.model.HttpRequestException import no.ndla.network.tapir.{AllErrors, ErrorBody, TapirErrorHandling, ValidationErrorBody} @@ -59,10 +59,9 @@ trait ErrorHandling extends TapirErrorHandling { } -class NotFoundException(message: String = "The audio was not found") extends RuntimeException(message) -case class MissingIdException(message: String) extends RuntimeException(message) -case class CouldNotFindLanguageException(message: String) extends RuntimeException(message) -class AudioStorageException(message: String) extends RuntimeException(message) -class LanguageMappingException(message: String) extends RuntimeException(message) -class ImportException(message: String) extends RuntimeException(message) -case class ElasticIndexingException(message: String) extends RuntimeException(message) +case class MissingIdException(message: String) extends RuntimeException(message) +case class CouldNotFindLanguageException(message: String) extends RuntimeException(message) +class AudioStorageException(message: String) extends RuntimeException(message) +class LanguageMappingException(message: String) extends RuntimeException(message) +class ImportException(message: String) extends RuntimeException(message) +case class ElasticIndexingException(message: String) extends RuntimeException(message) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/ReadService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/ReadService.scala index d9682baf9..fd954c354 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/ReadService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/ReadService.scala @@ -8,12 +8,11 @@ package no.ndla.audioapi.service -import cats.implicits._ +import cats.implicits.* import no.ndla.audioapi.model.api -import no.ndla.audioapi.model.api.NotFoundException import no.ndla.audioapi.repository.{AudioRepository, SeriesRepository} import no.ndla.audioapi.service.search.{SearchConverterService, TagSearchService} -import no.ndla.common.errors.ValidationException +import no.ndla.common.errors.{NotFoundException, ValidationException} import scala.util.{Failure, Success, Try} diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/WriteService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/WriteService.scala index 47a0a6c23..a94213c07 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/WriteService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/WriteService.scala @@ -9,14 +9,14 @@ package no.ndla.audioapi.service import cats.implicits.* import com.typesafe.scalalogging.StrictLogging -import no.ndla.audioapi.model.api.{AudioStorageException, MissingIdException, NotFoundException} +import no.ndla.audioapi.model.api.{AudioStorageException, MissingIdException} import no.ndla.audioapi.model.domain.Audio import no.ndla.audioapi.model.{api, domain} import no.ndla.audioapi.repository.{AudioRepository, SeriesRepository} import no.ndla.audioapi.service.search.{AudioIndexService, SeriesIndexService, TagIndexService} import no.ndla.common.Clock import no.ndla.common.aws.NdlaS3Client -import no.ndla.common.errors.ValidationException +import no.ndla.common.errors.{NotFoundException, ValidationException} import no.ndla.common.model.domain.UploadedFile import no.ndla.common.model.domain as common import no.ndla.language.Language.findByLanguageOrBestEffort @@ -278,7 +278,7 @@ trait WriteService { user: TokenUser ): Try[api.AudioMetaInformation] = { audioRepository.withId(id) match { - case None => Failure(new NotFoundException) + case None => Failure(NotFoundException("Audio not found")) case Some(existingMetadata) => val metadataAndFile = fileOpt match { case None => mergeAudioMeta(existingMetadata, metadataToUpdate, None, user) diff --git a/audio-api/src/test/scala/no/ndla/audioapi/service/WriteServiceTest.scala b/audio-api/src/test/scala/no/ndla/audioapi/service/WriteServiceTest.scala index dba17278f..1f15dba0b 100644 --- a/audio-api/src/test/scala/no/ndla/audioapi/service/WriteServiceTest.scala +++ b/audio-api/src/test/scala/no/ndla/audioapi/service/WriteServiceTest.scala @@ -12,7 +12,7 @@ import no.ndla.audioapi.model.api.* import no.ndla.audioapi.model.domain.{Audio, AudioType} import no.ndla.audioapi.model.{api, domain} import no.ndla.audioapi.{TestData, TestEnvironment, UnitSuite} -import no.ndla.common.errors.{ValidationException, ValidationMessage} +import no.ndla.common.errors.{NotFoundException, ValidationException, ValidationMessage} import no.ndla.common.model import no.ndla.common.model.api.{Copyright, License} import no.ndla.common.model.domain.UploadedFile @@ -393,7 +393,7 @@ class WriteServiceTest extends UnitSuite with TestEnvironment { val result = writeService.updateAudio(1, updatedAudioMeta, None, testUser) result.isFailure should be(true) - result.failed.get.getMessage should equal(new NotFoundException().getMessage) + result.failed.get.getMessage should equal(NotFoundException("Audio not found").getMessage) } test("that updateAudio returns Failure when audio file validation fails") { diff --git a/common/src/main/scala/no/ndla/common/aws/NdlaAWSTranscribeClient.scala b/common/src/main/scala/no/ndla/common/aws/NdlaAWSTranscribeClient.scala new file mode 100644 index 000000000..a17bc22a3 --- /dev/null +++ b/common/src/main/scala/no/ndla/common/aws/NdlaAWSTranscribeClient.scala @@ -0,0 +1,3 @@ +package no.ndla.common.aws trait NdlaAWSTranscribeClient { + +} From 37edf26a0d7073e11595c87080deb100acf29e2c Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Thu, 12 Dec 2024 09:16:19 +0100 Subject: [PATCH 04/31] Add transcription client --- .../no/ndla/audioapi/ComponentRegistry.scala | 4 +- .../controller/InternController.scala | 5 +- .../no/ndla/audioapi/TestEnvironment.scala | 20 +++-- .../common/aws/NdlaAWSTranscribeClient.scala | 75 ++++++++++++++++++- project/Dependencies.scala | 4 + project/commonlib.scala | 1 + 6 files changed, 99 insertions(+), 10 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/ComponentRegistry.scala b/audio-api/src/main/scala/no/ndla/audioapi/ComponentRegistry.scala index 906104bb7..4d055dd79 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/ComponentRegistry.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/ComponentRegistry.scala @@ -16,7 +16,7 @@ import no.ndla.audioapi.repository.{AudioRepository, SeriesRepository} import no.ndla.audioapi.service.* import no.ndla.audioapi.service.search.* import no.ndla.common.Clock -import no.ndla.common.aws.NdlaS3Client +import no.ndla.common.aws.{NdlaAWSTranscribeClient, NdlaS3Client} import no.ndla.common.brightcove.NdlaBrightcoveClient import no.ndla.common.configuration.BaseComponentRegistry import no.ndla.database.{DBMigrator, DataSource} @@ -58,6 +58,7 @@ class ComponentRegistry(properties: AudioApiProperties) with SwaggerDocControllerConfig with NdlaS3Client with TranscriptionService + with NdlaAWSTranscribeClient with NdlaBrightcoveClient { override val props: AudioApiProperties = properties override val migrator: DBMigrator = DBMigrator( @@ -70,6 +71,7 @@ class ComponentRegistry(properties: AudioApiProperties) lazy val s3Client = new NdlaS3Client(props.StorageName, props.StorageRegion) lazy val s3TranscribeClient = new NdlaS3Client(props.TranscribeStorageName, props.TranscribeStorageRegion) lazy val brightcoveClient = new NdlaBrightcoveClient() + lazy val transcribeClient = new NdlaAWSTranscribeClient(props.TranscribeStorageRegion) lazy val audioRepository = new AudioRepository lazy val seriesRepository = new SeriesRepository diff --git a/audio-api/src/main/scala/no/ndla/audioapi/controller/InternController.scala b/audio-api/src/main/scala/no/ndla/audioapi/controller/InternController.scala index 6e02d4803..0848543b4 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/controller/InternController.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/controller/InternController.scala @@ -12,11 +12,12 @@ import cats.implicits.* import io.circe.generic.auto.* import no.ndla.audioapi.Props import no.ndla.audioapi.model.api -import no.ndla.audioapi.model.api.{AudioMetaDomainDump, ErrorHandling, NotFoundException} +import no.ndla.audioapi.model.api.{AudioMetaDomainDump, ErrorHandling} import no.ndla.audioapi.model.domain.AudioMetaInformation import no.ndla.audioapi.repository.AudioRepository import no.ndla.audioapi.service.search.{AudioIndexService, SeriesIndexService, TagIndexService} import no.ndla.audioapi.service.{ConverterService, ReadService} +import no.ndla.common.errors.NotFoundException import no.ndla.network.tapir.NoNullJsonPrinter.jsonBody import no.ndla.network.tapir.TapirController import no.ndla.network.tapir.TapirUtil.errorOutputsFor @@ -116,7 +117,7 @@ trait InternController { .serverLogicPure { id => audioRepository.withId(id) match { case Some(image) => image.asRight - case None => returnLeftError(new NotFoundException(s"Could not find audio with id: '$id'")) + case None => returnLeftError(NotFoundException(s"Could not find audio with id: '$id'")) } }, endpoint.post diff --git a/audio-api/src/test/scala/no/ndla/audioapi/TestEnvironment.scala b/audio-api/src/test/scala/no/ndla/audioapi/TestEnvironment.scala index 5f006c814..101529331 100644 --- a/audio-api/src/test/scala/no/ndla/audioapi/TestEnvironment.scala +++ b/audio-api/src/test/scala/no/ndla/audioapi/TestEnvironment.scala @@ -15,7 +15,8 @@ import no.ndla.audioapi.repository.{AudioRepository, SeriesRepository} import no.ndla.audioapi.service.* import no.ndla.audioapi.service.search.* import no.ndla.common.Clock -import no.ndla.common.aws.NdlaS3Client +import no.ndla.common.aws.{NdlaAWSTranscribeClient, NdlaS3Client} +import no.ndla.common.brightcove.NdlaBrightcoveClient import no.ndla.database.DataSource import no.ndla.network.NdlaClient import no.ndla.network.tapir.TapirApplication @@ -51,6 +52,9 @@ trait TestEnvironment with MockitoSugar with Clock with Props + with TranscriptionService + with NdlaAWSTranscribeClient + with NdlaBrightcoveClient with ErrorHandling { override val props: AudioApiProperties = new AudioApiProperties @@ -59,15 +63,19 @@ trait TestEnvironment val audioRepository: AudioRepository = mock[AudioRepository] val seriesRepository: SeriesRepository = mock[SeriesRepository] - val s3Client: NdlaS3Client = mock[NdlaS3Client] + val s3Client: NdlaS3Client = mock[NdlaS3Client] + val brightcoveClient: NdlaBrightcoveClient = mock[NdlaBrightcoveClient] + val transcribeClient: NdlaAWSTranscribeClient = mock[NdlaAWSTranscribeClient] val ndlaClient: NdlaClient = mock[NdlaClient] val myndlaApiClient: MyNDLAApiClient = mock[MyNDLAApiClient] - val readService: ReadService = mock[ReadService] - val writeService: WriteService = mock[WriteService] - val validationService: ValidationService = mock[ValidationService] - val converterService: ConverterService = mock[ConverterService] + val readService: ReadService = mock[ReadService] + val writeService: WriteService = mock[WriteService] + val validationService: ValidationService = mock[ValidationService] + val converterService: ConverterService = mock[ConverterService] + val transcriptionService: TranscriptionService = mock[TranscriptionService] + val s3TranscribeClient: NdlaS3Client = mock[NdlaS3Client] val internController: InternController = mock[InternController] val audioApiController: AudioController = mock[AudioController] diff --git a/common/src/main/scala/no/ndla/common/aws/NdlaAWSTranscribeClient.scala b/common/src/main/scala/no/ndla/common/aws/NdlaAWSTranscribeClient.scala index a17bc22a3..4689207f6 100644 --- a/common/src/main/scala/no/ndla/common/aws/NdlaAWSTranscribeClient.scala +++ b/common/src/main/scala/no/ndla/common/aws/NdlaAWSTranscribeClient.scala @@ -1,3 +1,76 @@ -package no.ndla.common.aws trait NdlaAWSTranscribeClient { +/* + * Part of NDLA common + * Copyright (C) 2024 NDLA + * + * See LICENSE + */ +package no.ndla.common.aws + +import software.amazon.awssdk.services.transcribe.{TranscribeClient, TranscribeClientBuilder} +import software.amazon.awssdk.services.transcribe.model.* + +import scala.util.{Failure, Try} + +trait NdlaAWSTranscribeClient { + val transcribeClient: NdlaAWSTranscribeClient + + class NdlaAWSTranscribeClient(region: Option[String]) { + + private val builder: TranscribeClientBuilder = TranscribeClient.builder() + + val client: TranscribeClient = region match { + case Some(value) => builder.region(software.amazon.awssdk.regions.Region.of(value)).build() + case None => builder.build() + } + + def startTranscriptionJob( + jobName: String, + mediaUri: String, + mediaFormat: String, + languageCode: String + ): Try[StartTranscriptionJobResponse] = Try { + val request = StartTranscriptionJobRequest + .builder() + .transcriptionJobName(jobName) + .media(Media.builder().mediaFileUri(mediaUri).build()) + .mediaFormat(mediaFormat) + .languageCode(languageCode) + .build() + + client.startTranscriptionJob(request) + } + + def getTranscriptionJob(jobName: String): Try[GetTranscriptionJobResponse] = { + Try { + val request = GetTranscriptionJobRequest + .builder() + .transcriptionJobName(jobName) + .build() + client.getTranscriptionJob(request) + }.recoverWith { case e: BadRequestException => + val nfe = no.ndla.common.errors.NotFoundException("Transcription job not found") + Failure(nfe.initCause(e)) + } + } + + def listTranscriptionJobs(status: Option[String] = None): Try[ListTranscriptionJobsResponse] = Try { + val requestBuilder = ListTranscriptionJobsRequest.builder() + val request = status match { + case Some(jobStatus) => requestBuilder.status(jobStatus).build() + case None => requestBuilder.build() + } + + client.listTranscriptionJobs(request) + } + + def deleteTranscriptionJob(jobName: String): Try[DeleteTranscriptionJobResponse] = Try { + val request = DeleteTranscriptionJobRequest + .builder() + .transcriptionJobName(jobName) + .build() + + client.deleteTranscriptionJob(request) + } + } } diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 42fee02c0..b15962d1d 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -65,6 +65,10 @@ object Dependencies { "software.amazon.awssdk" % "s3" % AwsSdkV ) + lazy val awsTranscribe: Seq[ModuleID] = Seq( + "software.amazon.awssdk" % "transcribe" % AwsSdkV + ) + lazy val awsCloudwatch: Seq[ModuleID] = Seq( "software.amazon.awssdk" % "cloudwatch" % AwsSdkV ) diff --git a/project/commonlib.scala b/project/commonlib.scala index 0e8dfabff..40582a16c 100644 --- a/project/commonlib.scala +++ b/project/commonlib.scala @@ -17,6 +17,7 @@ object commonlib extends Module { scalaTsi ), awsS3, + awsTranscribe, melody, tapirHttp4sCirce ) From 87f3560a8c92189bd66693201dc852e5193a8281 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Thu, 12 Dec 2024 09:18:36 +0100 Subject: [PATCH 05/31] Add Transcription endpoints --- .../controller/TranscriptionController.scala | 54 +++++++++++++++++- .../service/TranscriptionService.scala | 56 +++++++++++++++++-- .../service/TranscriptionServiceTest.scala | 27 ++++++++- 3 files changed, 130 insertions(+), 7 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala index 55519e6f5..fefbbaa9e 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala @@ -26,6 +26,7 @@ trait TranscriptionController { .description("Extracts audio from a Brightcove video and uploads it to S3.") .in(videoId) .in(language) + .in("extract-audio") .errorOut(errorOutputsFor(400, 500)) .requirePermission(DRAFT_API_WRITE) .serverLogicPure { _ => @@ -42,6 +43,7 @@ trait TranscriptionController { .description("Get the status of the audio extraction from a Brightcove video.") .in(videoId) .in(language) + .in("extract-audio") .errorOut(errorOutputsFor(400, 500)) .requirePermission(DRAFT_API_WRITE) .serverLogicPure { _ => @@ -53,7 +55,57 @@ trait TranscriptionController { } } - override val endpoints: List[ServerEndpoint[Any, Eff]] = List(postExtractAudio, getAudioExtraction) + def postTranscription: ServerEndpoint[Any, Eff] = endpoint.post + .summary("Transcribe video") + .description("Transcribes a video to a specific language, and uploads the transcription to S3.") + .in(videoId) + .in(language) + .errorOut(errorOutputsFor(400, 500)) + .requirePermission(DRAFT_API_WRITE) + .serverLogicPure { _ => + { case (videoId, language) => + transcriptionService.transcribeVideo(videoId, language) match { + case Success(_) => Right(()) + case Failure(ex) => returnLeftError(ex) + } + } + } + + def getTranscription: ServerEndpoint[Any, Eff] = endpoint.get + .summary("Get transcription") + .description("Get the transcription of a video.") + .in(videoId) + .in(language) + .errorOut(errorOutputsFor(400, 404, 500)) + .requirePermission(DRAFT_API_WRITE) + .serverLogicPure { _ => + { case (videoId, language) => + transcriptionService.getTranscription(videoId, language) match { + case Success(_) => Right(()) + case Failure(ex: NoSuchElementException) => returnLeftError(ex) + case Failure(ex) => returnLeftError(ex) + } + } + } + + /*def postTranscriptionFormatting: ServerEndpoint[Any, Eff] = endpoint.post + .summary("Format transcription") + .description("Formats a transcription to a specific format.") + .in(videoId) + .in(language) + .errorOut(errorOutputsFor(400, 500)) + .requirePermission(DRAFT_API_WRITE) + .serverLogicPure { _ => + { case (videoId, language) => + transcriptionService.formatTranscription(videoId, language) match { + case Success(_) => Right(()) + case Failure(ex) => returnLeftError(ex) + } + } + }*/ + + override val endpoints: List[ServerEndpoint[Any, Eff]] = + List(postExtractAudio, getAudioExtraction, postTranscription, getTranscription) } } diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala index 53e9af0e6..90dfb1173 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala @@ -1,7 +1,8 @@ package no.ndla.audioapi.service +import com.typesafe.scalalogging.StrictLogging import no.ndla.audioapi.Props -import no.ndla.common.aws.NdlaS3Client +import no.ndla.common.aws.{NdlaAWSTranscribeClient, NdlaS3Client} import no.ndla.common.brightcove.NdlaBrightcoveClient import sttp.client3.{HttpURLConnectionBackend, UriContext, asFile, basicRequest} import ws.schild.jave.{Encoder, MultimediaObject} @@ -11,11 +12,50 @@ import java.io.File import scala.util.{Failure, Success, Try} trait TranscriptionService { - this: NdlaS3Client & Props & NdlaBrightcoveClient => + this: NdlaS3Client & Props & NdlaBrightcoveClient & NdlaAWSTranscribeClient => val transcriptionService: TranscriptionService val s3TranscribeClient: NdlaS3Client - class TranscriptionService { + class TranscriptionService extends StrictLogging { + + def transcribeVideo(videoId: String, language: String): Try[Unit] = { + getAudioExtractionStatus(videoId, language) match { + case Success(_) => + logger.info(s"Audio already extracted for videoId: $videoId") + case Failure(_) => + logger.info(s"Audio extraction required for videoId: $videoId") + extractAudioFromVideo(videoId, language) match { + case Success(_) => + logger.info(s"Audio extracted for videoId: $videoId") + case Failure(exception) => + return Failure(new RuntimeException(s"Failed to extract audio for videoId: $videoId", exception)) + + } + } + + val audioUri = s"s3://${props.TranscribeStorageName}/audio/$language/$videoId.mp3" + logger.info(s"Transcribing audio from: $audioUri") + val jobName = s"transcription-$videoId-$language" + val mediaFormat = "mp3" + val languageCode = language + + transcribeClient.startTranscriptionJob(jobName, audioUri, mediaFormat, languageCode) match { + case Success(_) => + logger.info(s"Transcription job started for videoId: $videoId") + Success(()) + case Failure(exception) => + Failure(new RuntimeException(s"Failed to start transcription for videoId: $videoId", exception)) + } + } + + def getTranscription(videoId: String, language: String): Try[String] = { + val jobName = s"transcription-$videoId-$language" + + transcribeClient.getTranscriptionJob(jobName).map { transcriptionJobResponse => + val transcriptionJobStatus = transcriptionJobResponse.transcriptionJob().transcriptionJobStatus() + transcriptionJobStatus.toString + } + } def extractAudioFromVideo(videoId: String, language: String): Try[Unit] = { val accountId = props.BrightcoveAccountId @@ -42,10 +82,16 @@ trait TranscriptionService { encoder.encode(new MultimediaObject(videoFile), audioFile, encodingAttributes) } match { case Success(_) => - val s3Key = s"/audio/$language/${videoId}.mp3" + logger.info("dasjhkdaidashjdas") + val s3Key = s"audio/$language/$videoId.mp3" + logger.info(s"Uploading audio file to S3: $s3Key") s3TranscribeClient.putObject(s3Key, audioFile, "audio/mpeg") match { case Success(_) => - s3TranscribeClient.deleteObject(videoFile.getName).map(_ => ()) + logger.info(s"Audio file uploaded to S3: $s3Key") + for { + _ <- Try(audioFile.delete()) + _ <- Try(videoFile.delete()) + } yield () case Failure(ex) => Failure(new RuntimeException(s"Failed to upload audio file to S3.", ex)) } diff --git a/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala b/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala index c15bf9f21..5d8673f25 100644 --- a/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala +++ b/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala @@ -1,3 +1,28 @@ -package no.ndla.audioapi.service class TranscriptionServiceTest { +package no.ndla.audioapi.service +import no.ndla.audioapi.{AudioApiProperties, TestEnvironment, UnitSuite} +import no.ndla.common.aws.NdlaS3Object +import org.mockito.ArgumentMatchers.any +import org.mockito.Mockito.when + +import scala.util.{Failure, Success, Try} + +class TranscriptionServiceTest extends UnitSuite with TestEnvironment { + override val transcriptionService: TranscriptionService = new TranscriptionService + override val brightcoveClient: NdlaBrightcoveClient = new NdlaBrightcoveClient + override val props: AudioApiProperties = new AudioApiProperties { + override val BrightcoveAccountId: String = "123" + override val BrightcoveClientId: String = "123" + override val BrightcoveClientSecret: String = "123" + } + + test("getAudioExtractionStatus returns Success when audio file exists") { + val videoId = "1" + val language = "en" + val fakeS3Object = mock[NdlaS3Object] + when(s3TranscribeClient.getObject(any)).thenReturn(Success(fakeS3Object)) + val result = transcriptionService.getAudioExtractionStatus(videoId, language) + + result should be(Success(())) + } } From 68690a5e6f76f5120ba41bb6972b49004737a0e4 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Thu, 12 Dec 2024 12:28:49 +0100 Subject: [PATCH 06/31] Add maxSpeakers and subtitles --- .../controller/TranscriptionController.scala | 23 ++++--------------- .../service/TranscriptionService.scala | 13 +++++++++-- .../common/aws/NdlaAWSTranscribeClient.scala | 23 +++++++++++++++++-- 3 files changed, 37 insertions(+), 22 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala index fefbbaa9e..ea2344344 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala @@ -20,6 +20,8 @@ trait TranscriptionController { private val videoId = path[String]("videoId").description("The video id to transcribe") private val language = path[String]("language").description("The language to transcribe the video to") + private val maxSpeaker = + query[Int]("maxSpeaker").description("The maximum number of speakers in the video").default(2) def postExtractAudio: ServerEndpoint[Any, Eff] = endpoint.post .summary("Extract audio from video") @@ -60,11 +62,12 @@ trait TranscriptionController { .description("Transcribes a video to a specific language, and uploads the transcription to S3.") .in(videoId) .in(language) + .in(maxSpeaker) .errorOut(errorOutputsFor(400, 500)) .requirePermission(DRAFT_API_WRITE) .serverLogicPure { _ => - { case (videoId, language) => - transcriptionService.transcribeVideo(videoId, language) match { + { case (videoId, language, maxSpeakerOpt) => + transcriptionService.transcribeVideo(videoId, language, maxSpeakerOpt) match { case Success(_) => Right(()) case Failure(ex) => returnLeftError(ex) } @@ -88,22 +91,6 @@ trait TranscriptionController { } } - /*def postTranscriptionFormatting: ServerEndpoint[Any, Eff] = endpoint.post - .summary("Format transcription") - .description("Formats a transcription to a specific format.") - .in(videoId) - .in(language) - .errorOut(errorOutputsFor(400, 500)) - .requirePermission(DRAFT_API_WRITE) - .serverLogicPure { _ => - { case (videoId, language) => - transcriptionService.formatTranscription(videoId, language) match { - case Success(_) => Right(()) - case Failure(ex) => returnLeftError(ex) - } - } - }*/ - override val endpoints: List[ServerEndpoint[Any, Eff]] = List(postExtractAudio, getAudioExtraction, postTranscription, getTranscription) } diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala index 90dfb1173..d197cc1c2 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala @@ -18,7 +18,7 @@ trait TranscriptionService { class TranscriptionService extends StrictLogging { - def transcribeVideo(videoId: String, language: String): Try[Unit] = { + def transcribeVideo(videoId: String, language: String, maxSpeakers: Int): Try[Unit] = { getAudioExtractionStatus(videoId, language) match { case Success(_) => logger.info(s"Audio already extracted for videoId: $videoId") @@ -37,9 +37,18 @@ trait TranscriptionService { logger.info(s"Transcribing audio from: $audioUri") val jobName = s"transcription-$videoId-$language" val mediaFormat = "mp3" + val outputKey = s"transcription/$language/$videoId" val languageCode = language - transcribeClient.startTranscriptionJob(jobName, audioUri, mediaFormat, languageCode) match { + transcribeClient.startTranscriptionJob( + jobName, + audioUri, + mediaFormat, + languageCode, + props.TranscribeStorageName, + outputKey, + maxSpeakers + ) match { case Success(_) => logger.info(s"Transcription job started for videoId: $videoId") Success(()) diff --git a/common/src/main/scala/no/ndla/common/aws/NdlaAWSTranscribeClient.scala b/common/src/main/scala/no/ndla/common/aws/NdlaAWSTranscribeClient.scala index 4689207f6..3ba02563f 100644 --- a/common/src/main/scala/no/ndla/common/aws/NdlaAWSTranscribeClient.scala +++ b/common/src/main/scala/no/ndla/common/aws/NdlaAWSTranscribeClient.scala @@ -7,8 +7,8 @@ package no.ndla.common.aws -import software.amazon.awssdk.services.transcribe.{TranscribeClient, TranscribeClientBuilder} import software.amazon.awssdk.services.transcribe.model.* +import software.amazon.awssdk.services.transcribe.{TranscribeClient, TranscribeClientBuilder} import scala.util.{Failure, Try} @@ -28,7 +28,11 @@ trait NdlaAWSTranscribeClient { jobName: String, mediaUri: String, mediaFormat: String, - languageCode: String + languageCode: String, + outputBucket: String, + outputKey: String, + maxSpeakers: Int, + outputSubtitleFormat: String = "VTT" ): Try[StartTranscriptionJobResponse] = Try { val request = StartTranscriptionJobRequest .builder() @@ -36,6 +40,21 @@ trait NdlaAWSTranscribeClient { .media(Media.builder().mediaFileUri(mediaUri).build()) .mediaFormat(mediaFormat) .languageCode(languageCode) + .outputBucketName(outputBucket) + .outputKey(outputKey) + .settings( + Settings + .builder() + .showSpeakerLabels(true) + .maxSpeakerLabels(maxSpeakers) + .build() + ) + .subtitles( + Subtitles + .builder() + .formats(SubtitleFormat.valueOf(outputSubtitleFormat)) + .build() + ) .build() client.startTranscriptionJob(request) From 117883855463b9387813c7ce898f67fb565564cb Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Thu, 12 Dec 2024 13:44:59 +0100 Subject: [PATCH 07/31] Fix test coverage for transcription service --- .../no/ndla/audioapi/TestEnvironment.scala | 1 - .../scala/no/ndla/audioapi/UnitSuite.scala | 3 ++ .../service/TranscriptionServiceTest.scala | 40 +++++++++++++++++++ 3 files changed, 43 insertions(+), 1 deletion(-) diff --git a/audio-api/src/test/scala/no/ndla/audioapi/TestEnvironment.scala b/audio-api/src/test/scala/no/ndla/audioapi/TestEnvironment.scala index 101529331..a20e8992a 100644 --- a/audio-api/src/test/scala/no/ndla/audioapi/TestEnvironment.scala +++ b/audio-api/src/test/scala/no/ndla/audioapi/TestEnvironment.scala @@ -59,7 +59,6 @@ trait TestEnvironment override val props: AudioApiProperties = new AudioApiProperties val dataSource: HikariDataSource = mock[HikariDataSource] - val storageName: String = props.StorageName val audioRepository: AudioRepository = mock[AudioRepository] val seriesRepository: SeriesRepository = mock[SeriesRepository] diff --git a/audio-api/src/test/scala/no/ndla/audioapi/UnitSuite.scala b/audio-api/src/test/scala/no/ndla/audioapi/UnitSuite.scala index e06de1ad8..76d6f9edf 100644 --- a/audio-api/src/test/scala/no/ndla/audioapi/UnitSuite.scala +++ b/audio-api/src/test/scala/no/ndla/audioapi/UnitSuite.scala @@ -24,4 +24,7 @@ trait UnitSuite extends UnitTestSuite with PrivateMethodTester { setPropEnv("SEARCH_REGION", "some-region") setPropEnv("RUN_WITH_SIGNED_SEARCH_REQUESTS", "false") setPropEnv("SEARCH_INDEX_NAME", "audio-integration-test-index") + setPropEnv("BRIGHTCOVE_API_CLIENT_ID", "client-id") + setPropEnv("BRIGHTCOVE_API_CLIENT_SECRET", "client") + setPropEnv("NDLA_BRIGHTCOVE_ACCOUNT_ID", "312532") } diff --git a/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala b/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala index 5d8673f25..2976f15c1 100644 --- a/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala +++ b/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala @@ -4,6 +4,12 @@ import no.ndla.audioapi.{AudioApiProperties, TestEnvironment, UnitSuite} import no.ndla.common.aws.NdlaS3Object import org.mockito.ArgumentMatchers.any import org.mockito.Mockito.when +import software.amazon.awssdk.services.transcribe.model.{ + GetTranscriptionJobResponse, + StartTranscriptionJobResponse, + TranscriptionJob, + TranscriptionJobStatus +} import scala.util.{Failure, Success, Try} @@ -25,4 +31,38 @@ class TranscriptionServiceTest extends UnitSuite with TestEnvironment { result should be(Success(())) } + + test("getTranscription returns status of a transcription") { + val videoId = "1" + val language = "en" + val fakeS3Object = mock[NdlaS3Object] + val fakeTranscribeResponse = mock[GetTranscriptionJobResponse] + val fakeJob = mock[TranscriptionJob] + val fakeJobStatus = mock[TranscriptionJobStatus] + when(s3TranscribeClient.getObject(any)).thenReturn(Success(fakeS3Object)) + + when(fakeJob.transcriptionJobStatus()).thenReturn(fakeJobStatus) + when(fakeTranscribeResponse.transcriptionJob()).thenReturn(fakeJob) + when(transcribeClient.getTranscriptionJob(any)).thenReturn(Success(fakeTranscribeResponse)) + + val result = transcriptionService.getTranscription(videoId, language) + + result should be(Success(fakeJobStatus.toString)) + } + + test("transcribeVideo returns Success when transcription is started") { + val videoId = "1" + val language = "no-NO" + val maxSpeakers = 2 + val fakeS3Object = mock[NdlaS3Object] + val fakeTranscribeMock = mock[StartTranscriptionJobResponse] + when(s3TranscribeClient.getObject(any)).thenReturn(Success(fakeS3Object)) + when(transcriptionService.getAudioExtractionStatus(videoId, language)).thenReturn(Success(())) + when(transcribeClient.startTranscriptionJob(any, any, any, any, any, any, any, any)) + .thenReturn(Success(fakeTranscribeMock)) + val result = transcriptionService.transcribeVideo(videoId, language, maxSpeakers) + + result should be(Success(())) + } + } From 461940ce99f4429038f66692a074eda2f592dd7c Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Thu, 12 Dec 2024 13:49:35 +0100 Subject: [PATCH 08/31] remove unused import --- .../no/ndla/audioapi/service/TranscriptionServiceTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala b/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala index 2976f15c1..d8d1ce258 100644 --- a/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala +++ b/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala @@ -11,7 +11,7 @@ import software.amazon.awssdk.services.transcribe.model.{ TranscriptionJobStatus } -import scala.util.{Failure, Success, Try} +import scala.util.Success class TranscriptionServiceTest extends UnitSuite with TestEnvironment { override val transcriptionService: TranscriptionService = new TranscriptionService From 2337237cb7912acaadc7f109c800f9b16f016666 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Fri, 13 Dec 2024 08:14:50 +0100 Subject: [PATCH 09/31] Add 400 if job already exist --- .../controller/TranscriptionController.scala | 5 ++++- .../scala/no/ndla/audioapi/model/api/Error.scala | 2 ++ .../ndla/audioapi/service/TranscriptionService.scala | 12 ++++++++++++ .../no/ndla/network/tapir/TapirErrorHandling.scala | 1 + 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala index ea2344344..e11967e30 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala @@ -1,6 +1,7 @@ package no.ndla.audioapi.controller import no.ndla.audioapi.Props +import no.ndla.audioapi.model.api.JobAlreadyFoundException import no.ndla.audioapi.service.{ReadService, TranscriptionService} import no.ndla.network.tapir.TapirController import no.ndla.network.tapir.TapirUtil.errorOutputsFor @@ -68,7 +69,9 @@ trait TranscriptionController { .serverLogicPure { _ => { case (videoId, language, maxSpeakerOpt) => transcriptionService.transcribeVideo(videoId, language, maxSpeakerOpt) match { - case Success(_) => Right(()) + case Success(_) => Right(()) + case Failure(ex: JobAlreadyFoundException) => + returnLeftError(ex) case Failure(ex) => returnLeftError(ex) } } diff --git a/audio-api/src/main/scala/no/ndla/audioapi/model/api/Error.scala b/audio-api/src/main/scala/no/ndla/audioapi/model/api/Error.scala index 9ff4fad79..2548d19af 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/model/api/Error.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/model/api/Error.scala @@ -55,6 +55,7 @@ trait ErrorHandling extends TapirErrorHandling { if rf.error.rootCause .exists(x => x.`type` == "search_context_missing_exception" || x.reason == "Cannot parse scroll id") => invalidSearchContext + case jafe: JobAlreadyFoundException => ErrorBody(JOB_ALREADY_FOUND, jafe.getMessage, clock.now(), 400) } } @@ -64,3 +65,4 @@ case class CouldNotFindLanguageException(message: String) extends RuntimeExcepti class AudioStorageException(message: String) extends RuntimeException(message) class LanguageMappingException(message: String) extends RuntimeException(message) class ImportException(message: String) extends RuntimeException(message) +case class JobAlreadyFoundException(message: String) extends RuntimeException(message) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala index d197cc1c2..3eb2e7b5c 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala @@ -2,6 +2,7 @@ package no.ndla.audioapi.service import com.typesafe.scalalogging.StrictLogging import no.ndla.audioapi.Props +import no.ndla.audioapi.model.api.JobAlreadyFoundException import no.ndla.common.aws.{NdlaAWSTranscribeClient, NdlaS3Client} import no.ndla.common.brightcove.NdlaBrightcoveClient import sttp.client3.{HttpURLConnectionBackend, UriContext, asFile, basicRequest} @@ -19,6 +20,17 @@ trait TranscriptionService { class TranscriptionService extends StrictLogging { def transcribeVideo(videoId: String, language: String, maxSpeakers: Int): Try[Unit] = { + getTranscription(videoId, language) match { + case Success(status) if status == "COMPLETED" => + logger.info(s"Transcription already completed for videoId: $videoId") + return Failure(new JobAlreadyFoundException(s"Transcription already completed for videoId: $videoId")) + case Success(status) if status == "IN_PROGRESS" => + logger.info(s"Transcription already in progress for videoId: $videoId") + return Failure(new JobAlreadyFoundException(s"Transcription already in progress for videoId: $videoId")) + case _ => + logger.info(s"No existing transcription job for videoId: $videoId") + } + getAudioExtractionStatus(videoId, language) match { case Success(_) => logger.info(s"Audio already extracted for videoId: $videoId") diff --git a/network/src/main/scala/no/ndla/network/tapir/TapirErrorHandling.scala b/network/src/main/scala/no/ndla/network/tapir/TapirErrorHandling.scala index 9c7fb791a..066b249b5 100644 --- a/network/src/main/scala/no/ndla/network/tapir/TapirErrorHandling.scala +++ b/network/src/main/scala/no/ndla/network/tapir/TapirErrorHandling.scala @@ -78,6 +78,7 @@ trait TapirErrorHandling extends StrictLogging { val VALIDATION = "VALIDATION_ERROR" val METHOD_NOT_ALLOWED = "METHOD_NOT_ALLOWED" val CONFLICT = "CONFLICT" + val JOB_ALREADY_FOUND = "JOB_ALREADY_FOUND" val PARAMETER_MISSING = "PARAMETER MISSING" val PROVIDER_NOT_SUPPORTED = "PROVIDER NOT SUPPORTED" From 5cdb3310b26cc0c5045ddff51f69f3779d51bbfa Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Fri, 13 Dec 2024 08:30:01 +0100 Subject: [PATCH 10/31] Clean up folder names and logs --- .../no/ndla/audioapi/service/TranscriptionService.scala | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala index 3eb2e7b5c..7d6d463f9 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala @@ -45,7 +45,7 @@ trait TranscriptionService { } } - val audioUri = s"s3://${props.TranscribeStorageName}/audio/$language/$videoId.mp3" + val audioUri = s"s3://${props.TranscribeStorageName}/audio-extraction/$language/$videoId.mp3" logger.info(s"Transcribing audio from: $audioUri") val jobName = s"transcription-$videoId-$language" val mediaFormat = "mp3" @@ -103,8 +103,7 @@ trait TranscriptionService { encoder.encode(new MultimediaObject(videoFile), audioFile, encodingAttributes) } match { case Success(_) => - logger.info("dasjhkdaidashjdas") - val s3Key = s"audio/$language/$videoId.mp3" + val s3Key = s"audio-extraction/$language/$videoId.mp3" logger.info(s"Uploading audio file to S3: $s3Key") s3TranscribeClient.putObject(s3Key, audioFile, "audio/mpeg") match { case Success(_) => @@ -117,12 +116,11 @@ trait TranscriptionService { Failure(new RuntimeException(s"Failed to upload audio file to S3.", ex)) } case Failure(exception) => Failure(exception) - } } def getAudioExtractionStatus(videoId: String, language: String): Try[Unit] = { - s3TranscribeClient.getObject(s"audio/$language/${videoId}.mp3") match { + s3TranscribeClient.getObject(s"audio-extraction/$language/${videoId}.mp3") match { case Success(_) => Success(()) case Failure(exception) => Failure(exception) } From d6e64b2b24fdac56b8c861437d9ca5f456849787 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Fri, 13 Dec 2024 10:02:48 +0100 Subject: [PATCH 11/31] Update descriptions of transcription endpoints --- .../no/ndla/audioapi/controller/TranscriptionController.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala index e11967e30..f0068b747 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala @@ -60,7 +60,7 @@ trait TranscriptionController { def postTranscription: ServerEndpoint[Any, Eff] = endpoint.post .summary("Transcribe video") - .description("Transcribes a video to a specific language, and uploads the transcription to S3.") + .description("Transcribes a video and uploads the transcription to S3.") .in(videoId) .in(language) .in(maxSpeaker) @@ -78,7 +78,7 @@ trait TranscriptionController { } def getTranscription: ServerEndpoint[Any, Eff] = endpoint.get - .summary("Get transcription") + .summary("Get the transcription status of a video") .description("Get the transcription of a video.") .in(videoId) .in(language) From 47834f40fbe6f366f93f450874c83e78ef117bbf Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Mon, 16 Dec 2024 09:41:07 +0100 Subject: [PATCH 12/31] Update function to send subtitles as response if Get returns 200 --- .../controller/TranscriptionController.scala | 9 +++-- .../service/TranscriptionService.scala | 34 ++++++++++++++----- 2 files changed, 33 insertions(+), 10 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala index f0068b747..d675ec62e 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala @@ -6,9 +6,11 @@ import no.ndla.audioapi.service.{ReadService, TranscriptionService} import no.ndla.network.tapir.TapirController import no.ndla.network.tapir.TapirUtil.errorOutputsFor import no.ndla.network.tapir.auth.Permission.DRAFT_API_WRITE +import software.amazon.awssdk.services.transcribe.model.TranscriptionJobStatus import sttp.tapir.server.ServerEndpoint import sttp.tapir.{EndpointInput, endpoint, path} import sttp.tapir.* +import sttp.tapir.json.circe.jsonBody import scala.util.{Failure, Success} trait TranscriptionController { @@ -82,12 +84,15 @@ trait TranscriptionController { .description("Get the transcription of a video.") .in(videoId) .in(language) - .errorOut(errorOutputsFor(400, 404, 500)) + .errorOut(errorOutputsFor(400, 404, 405, 500)) + .out(stringBody) .requirePermission(DRAFT_API_WRITE) .serverLogicPure { _ => { case (videoId, language) => transcriptionService.getTranscription(videoId, language) match { - case Success(_) => Right(()) + case Success(Right(transcriptionContent)) => Right(transcriptionContent) + case Success(Left(jobStatus)) => + Right(jobStatus.toString) case Failure(ex: NoSuchElementException) => returnLeftError(ex) case Failure(ex) => returnLeftError(ex) } diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala index 7d6d463f9..aeb07ec4c 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala @@ -21,12 +21,14 @@ trait TranscriptionService { def transcribeVideo(videoId: String, language: String, maxSpeakers: Int): Try[Unit] = { getTranscription(videoId, language) match { - case Success(status) if status == "COMPLETED" => + case Success(Right(string)) => logger.info(s"Transcription already completed for videoId: $videoId") return Failure(new JobAlreadyFoundException(s"Transcription already completed for videoId: $videoId")) - case Success(status) if status == "IN_PROGRESS" => + case Success(Left("IN_PROGRESS")) => logger.info(s"Transcription already in progress for videoId: $videoId") return Failure(new JobAlreadyFoundException(s"Transcription already in progress for videoId: $videoId")) + case Success(Left(_)) => + logger.info(s"Error occurred while checking transcription status for videoId") case _ => logger.info(s"No existing transcription job for videoId: $videoId") } @@ -47,7 +49,7 @@ trait TranscriptionService { val audioUri = s"s3://${props.TranscribeStorageName}/audio-extraction/$language/$videoId.mp3" logger.info(s"Transcribing audio from: $audioUri") - val jobName = s"transcription-$videoId-$language" + val jobName = s"transcribe-$videoId-$language" val mediaFormat = "mp3" val outputKey = s"transcription/$language/$videoId" val languageCode = language @@ -69,12 +71,28 @@ trait TranscriptionService { } } - def getTranscription(videoId: String, language: String): Try[String] = { - val jobName = s"transcription-$videoId-$language" + def getTranscription( + videoId: String, + language: String, + subtitles: Boolean = true + ): Try[Either[String, String]] = { + val jobName = s"transcribe-$videoId-$language" - transcribeClient.getTranscriptionJob(jobName).map { transcriptionJobResponse => - val transcriptionJobStatus = transcriptionJobResponse.transcriptionJob().transcriptionJobStatus() - transcriptionJobStatus.toString + transcribeClient.getTranscriptionJob(jobName).flatMap { transcriptionJobResponse => + val transcriptionJob = transcriptionJobResponse.transcriptionJob() + val transcriptionJobStatus = transcriptionJob.transcriptionJobStatus().toString + + if (transcriptionJobStatus == "COMPLETED") { + val transcribeUri = s"transcription/$language/${videoId}" + (if (subtitles) ".vtt" else "") + + s3TranscribeClient.getObject(transcribeUri).map { s3Object => + val content = scala.io.Source.fromInputStream(s3Object.stream).mkString + s3Object.stream.close() + Right(content) + } + } else { + Success(Left(transcriptionJobStatus)) + } } } From 493e7f137dd15921757ce4cbc3424157da54044d Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Mon, 16 Dec 2024 09:53:40 +0100 Subject: [PATCH 13/31] Remove unused imports --- .../no/ndla/audioapi/controller/TranscriptionController.scala | 2 -- 1 file changed, 2 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala index d675ec62e..52025f04c 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala @@ -6,11 +6,9 @@ import no.ndla.audioapi.service.{ReadService, TranscriptionService} import no.ndla.network.tapir.TapirController import no.ndla.network.tapir.TapirUtil.errorOutputsFor import no.ndla.network.tapir.auth.Permission.DRAFT_API_WRITE -import software.amazon.awssdk.services.transcribe.model.TranscriptionJobStatus import sttp.tapir.server.ServerEndpoint import sttp.tapir.{EndpointInput, endpoint, path} import sttp.tapir.* -import sttp.tapir.json.circe.jsonBody import scala.util.{Failure, Success} trait TranscriptionController { From 0342b7fd24936b5e79c494435fab78140deefa36 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Mon, 16 Dec 2024 10:25:25 +0100 Subject: [PATCH 14/31] Add json return for transcription results --- .../controller/TranscriptionController.scala | 11 +++++++---- .../model/api/TranscriptionResultDTO.scala | 15 +++++++++++++++ .../audioapi/service/TranscriptionService.scala | 5 ++--- 3 files changed, 24 insertions(+), 7 deletions(-) create mode 100644 audio-api/src/main/scala/no/ndla/audioapi/model/api/TranscriptionResultDTO.scala diff --git a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala index 52025f04c..881d7d9a9 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala @@ -1,14 +1,16 @@ package no.ndla.audioapi.controller import no.ndla.audioapi.Props -import no.ndla.audioapi.model.api.JobAlreadyFoundException +import no.ndla.audioapi.model.api.{JobAlreadyFoundException, TranscriptionResultDTO} import no.ndla.audioapi.service.{ReadService, TranscriptionService} +import no.ndla.network.tapir.NoNullJsonPrinter.jsonBody import no.ndla.network.tapir.TapirController import no.ndla.network.tapir.TapirUtil.errorOutputsFor import no.ndla.network.tapir.auth.Permission.DRAFT_API_WRITE import sttp.tapir.server.ServerEndpoint import sttp.tapir.{EndpointInput, endpoint, path} import sttp.tapir.* +import sttp.tapir.generic.auto.schemaForCaseClass import scala.util.{Failure, Success} trait TranscriptionController { @@ -83,14 +85,15 @@ trait TranscriptionController { .in(videoId) .in(language) .errorOut(errorOutputsFor(400, 404, 405, 500)) - .out(stringBody) + .out(jsonBody[TranscriptionResultDTO]) .requirePermission(DRAFT_API_WRITE) .serverLogicPure { _ => { case (videoId, language) => transcriptionService.getTranscription(videoId, language) match { - case Success(Right(transcriptionContent)) => Right(transcriptionContent) + case Success(Right(transcriptionContent)) => + Right(TranscriptionResultDTO("COMPLETED", Some(transcriptionContent))) case Success(Left(jobStatus)) => - Right(jobStatus.toString) + Right(TranscriptionResultDTO(jobStatus.toString, None)) case Failure(ex: NoSuchElementException) => returnLeftError(ex) case Failure(ex) => returnLeftError(ex) } diff --git a/audio-api/src/main/scala/no/ndla/audioapi/model/api/TranscriptionResultDTO.scala b/audio-api/src/main/scala/no/ndla/audioapi/model/api/TranscriptionResultDTO.scala new file mode 100644 index 000000000..106abc93c --- /dev/null +++ b/audio-api/src/main/scala/no/ndla/audioapi/model/api/TranscriptionResultDTO.scala @@ -0,0 +1,15 @@ +package no.ndla.audioapi.model.api + +import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} +import io.circe.{Decoder, Encoder} +import sttp.tapir.Schema.annotations.description + +@description("The result of a transcription job") +case class TranscriptionResultDTO( + @description("The status of the transcription job") status: String, + @description("The transcription of the audio") transcription: Option[String] +) +object TranscriptionResultDTO { + implicit val encoder: Encoder[TranscriptionResultDTO] = deriveEncoder + implicit val decoder: Decoder[TranscriptionResultDTO] = deriveDecoder +} diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala index aeb07ec4c..aa12a5ae0 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala @@ -73,8 +73,7 @@ trait TranscriptionService { def getTranscription( videoId: String, - language: String, - subtitles: Boolean = true + language: String ): Try[Either[String, String]] = { val jobName = s"transcribe-$videoId-$language" @@ -83,7 +82,7 @@ trait TranscriptionService { val transcriptionJobStatus = transcriptionJob.transcriptionJobStatus().toString if (transcriptionJobStatus == "COMPLETED") { - val transcribeUri = s"transcription/$language/${videoId}" + (if (subtitles) ".vtt" else "") + val transcribeUri = s"transcription/$language/${videoId}.vtt" s3TranscribeClient.getObject(transcribeUri).map { s3Object => val content = scala.io.Source.fromInputStream(s3Object.stream).mkString From 7d7e95eee11d10ff6c0b09c1af1661ff25e8865a Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Mon, 16 Dec 2024 10:34:45 +0100 Subject: [PATCH 15/31] Fix syntax error --- .../scala/no/ndla/audioapi/service/TranscriptionService.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala index aa12a5ae0..acb5dfc6c 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala @@ -21,7 +21,7 @@ trait TranscriptionService { def transcribeVideo(videoId: String, language: String, maxSpeakers: Int): Try[Unit] = { getTranscription(videoId, language) match { - case Success(Right(string)) => + case Success(Right(_)) => logger.info(s"Transcription already completed for videoId: $videoId") return Failure(new JobAlreadyFoundException(s"Transcription already completed for videoId: $videoId")) case Success(Left("IN_PROGRESS")) => From e0109cd059b61a396aa83fcaff22c4afe9801cf0 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Mon, 16 Dec 2024 11:35:06 +0100 Subject: [PATCH 16/31] Add mock for getting transcription results --- .../no/ndla/audioapi/service/TranscriptionServiceTest.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala b/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala index d8d1ce258..34f5f9080 100644 --- a/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala +++ b/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala @@ -47,7 +47,7 @@ class TranscriptionServiceTest extends UnitSuite with TestEnvironment { val result = transcriptionService.getTranscription(videoId, language) - result should be(Success(fakeJobStatus.toString)) + result should be(Success(Left(fakeJobStatus.toString))) } test("transcribeVideo returns Success when transcription is started") { @@ -56,6 +56,7 @@ class TranscriptionServiceTest extends UnitSuite with TestEnvironment { val maxSpeakers = 2 val fakeS3Object = mock[NdlaS3Object] val fakeTranscribeMock = mock[StartTranscriptionJobResponse] + when(transcribeClient.getTranscriptionJob(any)).thenReturn(Success(mock[GetTranscriptionJobResponse])) when(s3TranscribeClient.getObject(any)).thenReturn(Success(fakeS3Object)) when(transcriptionService.getAudioExtractionStatus(videoId, language)).thenReturn(Success(())) when(transcribeClient.startTranscriptionJob(any, any, any, any, any, any, any, any)) From 850dbd9e116fe4dc0d0949708b29dba15e673ca2 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Tue, 17 Dec 2024 10:23:59 +0100 Subject: [PATCH 17/31] add a temp audio endpoint --- .../no/ndla/audioapi/AudioApiProperties.scala | 2 +- .../controller/TranscriptionController.scala | 60 ++++++++++++++++-- .../service/TranscriptionService.scala | 61 ++++++++++++++++++- .../service/TranscriptionServiceTest.scala | 2 +- 4 files changed, 117 insertions(+), 8 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala b/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala index 3430227d1..fb8571450 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala @@ -34,7 +34,7 @@ class AudioApiProperties extends BaseProps with DatabaseProps with StrictLogging val MaxAudioFileSizeBytes: Int = 1024 * 1024 * 100 // 100 MiB - val StorageName: String = propOrElse("AUDIO_FILE_S3_BUCKET", s"$Environment.audio.ndla") + val StorageName: String = "test.audio.2.ndla" // propOrElse("AUDIO_FILE_S3_BUCKET", s"$Environment.audio.ndla") val StorageRegion: Option[String] = propOrNone("AUDIO_FILE_S3_BUCKET_REGION") val TranscribeStorageName: String = propOrElse("TRANSCRIBE_FILE_S3_BUCKET_NAME", s"$Environment.transcribe.ndla") diff --git a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala index 881d7d9a9..552daa531 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala @@ -21,10 +21,12 @@ trait TranscriptionController { override val serviceName: String = "transcription" override val prefix: EndpointInput[Unit] = "audio-api" / "v1" / serviceName - private val videoId = path[String]("videoId").description("The video id to transcribe") - private val language = path[String]("language").description("The language to transcribe the video to") + private val videoId = path[String]("videoId").description("The video id to transcribe") + private val audioName = path[String]("audioName").description("The audio name to transcribe") + private val language = path[String]("language").description("The language to transcribe the video to") private val maxSpeaker = query[Int]("maxSpeaker").description("The maximum number of speakers in the video").default(2) + private val format = query[String]("format").description("The format of the audio file").default("mp3") def postExtractAudio: ServerEndpoint[Any, Eff] = endpoint.post .summary("Extract audio from video") @@ -89,7 +91,50 @@ trait TranscriptionController { .requirePermission(DRAFT_API_WRITE) .serverLogicPure { _ => { case (videoId, language) => - transcriptionService.getTranscription(videoId, language) match { + transcriptionService.getVideoTranscription(videoId, language) match { + case Success(Right(transcriptionContent)) => + Right(TranscriptionResultDTO("COMPLETED", Some(transcriptionContent))) + case Success(Left(jobStatus)) => + Right(TranscriptionResultDTO(jobStatus.toString, None)) + case Failure(ex: NoSuchElementException) => returnLeftError(ex) + case Failure(ex) => returnLeftError(ex) + } + } + } + + def postAudioTranscription: ServerEndpoint[Any, Eff] = endpoint.post + .summary("Transcribe audio") + .description("Transcribes a video and uploads the transcription to S3.") + .in(audioName) + .in(language) + .in(maxSpeaker) + .in(format) + .in("audio") + .errorOut(errorOutputsFor(400, 500)) + .requirePermission(DRAFT_API_WRITE) + .serverLogicPure { _ => + { case (videoId, language, maxSpeakerOpt, format) => + transcriptionService.transcribeAudio(videoId, language, maxSpeakerOpt, format) match { + case Success(_) => Right(()) + case Failure(ex: JobAlreadyFoundException) => + returnLeftError(ex) + case Failure(ex) => returnLeftError(ex) + } + } + } + + def getAudioTranscription: ServerEndpoint[Any, Eff] = endpoint.get + .summary("Get the transcription status of a video") + .description("Get the transcription of a video.") + .in(audioName) + .in(language) + .in("audio") + .errorOut(errorOutputsFor(400, 404, 405, 500)) + .out(jsonBody[TranscriptionResultDTO]) + .requirePermission(DRAFT_API_WRITE) + .serverLogicPure { _ => + { case (videoId, language) => + transcriptionService.getAudioTranscription(videoId, language) match { case Success(Right(transcriptionContent)) => Right(TranscriptionResultDTO("COMPLETED", Some(transcriptionContent))) case Success(Left(jobStatus)) => @@ -101,7 +146,14 @@ trait TranscriptionController { } override val endpoints: List[ServerEndpoint[Any, Eff]] = - List(postExtractAudio, getAudioExtraction, postTranscription, getTranscription) + List( + postExtractAudio, + getAudioExtraction, + postTranscription, + getTranscription, + postAudioTranscription, + getAudioTranscription + ) } } diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala index acb5dfc6c..9379a6bf8 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala @@ -20,7 +20,7 @@ trait TranscriptionService { class TranscriptionService extends StrictLogging { def transcribeVideo(videoId: String, language: String, maxSpeakers: Int): Try[Unit] = { - getTranscription(videoId, language) match { + getVideoTranscription(videoId, language) match { case Success(Right(_)) => logger.info(s"Transcription already completed for videoId: $videoId") return Failure(new JobAlreadyFoundException(s"Transcription already completed for videoId: $videoId")) @@ -71,7 +71,7 @@ trait TranscriptionService { } } - def getTranscription( + def getVideoTranscription( videoId: String, language: String ): Try[Either[String, String]] = { @@ -95,6 +95,63 @@ trait TranscriptionService { } } + def transcribeAudio(audioName: String, language: String, maxSpeakers: Int, format: String): Try[Unit] = { + getVideoTranscription(audioName, language) match { + case Success(Right(_)) => + logger.info(s"Transcription already completed for audio: $audioName") + return Failure(new JobAlreadyFoundException(s"Transcription already completed for audio: $audioName")) + case Success(Left("IN_PROGRESS")) => + logger.info(s"Transcription already in progress for videoId: $audioName") + return Failure(new JobAlreadyFoundException(s"Transcription already in progress for audio: $audioName")) + case Success(Left(_)) => + logger.info(s"Error occurred while checking transcription status for audio") + case _ => + logger.info(s"No existing transcription job for audio name: $audioName") + } + val audioUri = s"s3://${props.StorageName}/$audioName.mp3" + logger.info(s"Transcribing audio from: $audioUri") + val jobName = s"transcribe-$audioName-$language" + val mediaFormat = format + val outputKey = s"audio-transcription/$language/$audioName" + val languageCode = language + + transcribeClient.startTranscriptionJob( + jobName, + audioUri, + mediaFormat, + languageCode, + props.TranscribeStorageName, + outputKey, + maxSpeakers + ) match { + case Success(_) => + logger.info(s"Transcription job started for audio: $audioName") + Success(()) + case Failure(exception) => + Failure(new RuntimeException(s"Failed to start transcription for audio file: $audioName", exception)) + } + } + + def getAudioTranscription(audioName: String, language: String): Try[Either[String, String]] = { + val jobName = s"transcribe-$audioName-$language" + + transcribeClient.getTranscriptionJob(jobName).flatMap { transcriptionJobResponse => + val transcriptionJob = transcriptionJobResponse.transcriptionJob() + val transcriptionJobStatus = transcriptionJob.transcriptionJobStatus().toString + + if (transcriptionJobStatus == "COMPLETED") { + val transcribeUri = s"audio-transcription/$language/${audioName}" + + s3TranscribeClient.getObject(transcribeUri).map { s3Object => + val content = scala.io.Source.fromInputStream(s3Object.stream).mkString + s3Object.stream.close() + Right(content) + } + } else { + Success(Left(transcriptionJobStatus)) + } + } + } def extractAudioFromVideo(videoId: String, language: String): Try[Unit] = { val accountId = props.BrightcoveAccountId val videoUrl = getVideo(accountId, videoId) match { diff --git a/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala b/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala index 34f5f9080..95c2c79c9 100644 --- a/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala +++ b/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala @@ -45,7 +45,7 @@ class TranscriptionServiceTest extends UnitSuite with TestEnvironment { when(fakeTranscribeResponse.transcriptionJob()).thenReturn(fakeJob) when(transcribeClient.getTranscriptionJob(any)).thenReturn(Success(fakeTranscribeResponse)) - val result = transcriptionService.getTranscription(videoId, language) + val result = transcriptionService.getVideoTranscription(videoId, language) result should be(Success(Left(fakeJobStatus.toString))) } From 9fb082ae583d470c245b9a81b33f97a7ad483cd5 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Tue, 17 Dec 2024 10:25:01 +0100 Subject: [PATCH 18/31] fix audioprop --- .../src/main/scala/no/ndla/audioapi/AudioApiProperties.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala b/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala index fb8571450..3430227d1 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala @@ -34,7 +34,7 @@ class AudioApiProperties extends BaseProps with DatabaseProps with StrictLogging val MaxAudioFileSizeBytes: Int = 1024 * 1024 * 100 // 100 MiB - val StorageName: String = "test.audio.2.ndla" // propOrElse("AUDIO_FILE_S3_BUCKET", s"$Environment.audio.ndla") + val StorageName: String = propOrElse("AUDIO_FILE_S3_BUCKET", s"$Environment.audio.ndla") val StorageRegion: Option[String] = propOrNone("AUDIO_FILE_S3_BUCKET_REGION") val TranscribeStorageName: String = propOrElse("TRANSCRIBE_FILE_S3_BUCKET_NAME", s"$Environment.transcribe.ndla") From 60efcd712fbc9cbd7f56d4eb0bcd2744b9323b96 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Wed, 18 Dec 2024 07:53:17 +0100 Subject: [PATCH 19/31] fix audio endpoints --- .../controller/TranscriptionController.scala | 15 ++++++----- .../service/TranscriptionService.scala | 27 +++++++++++-------- project/audioapi.scala | 3 ++- 3 files changed, 27 insertions(+), 18 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala index 552daa531..97ddb368c 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala @@ -23,6 +23,7 @@ trait TranscriptionController { private val videoId = path[String]("videoId").description("The video id to transcribe") private val audioName = path[String]("audioName").description("The audio name to transcribe") + private val audioId = path[Long]("audioId").description("The audio id to transcribe") private val language = path[String]("language").description("The language to transcribe the video to") private val maxSpeaker = query[Int]("maxSpeaker").description("The maximum number of speakers in the video").default(2) @@ -105,16 +106,17 @@ trait TranscriptionController { def postAudioTranscription: ServerEndpoint[Any, Eff] = endpoint.post .summary("Transcribe audio") .description("Transcribes a video and uploads the transcription to S3.") + .in("audio") .in(audioName) + .in(audioId) .in(language) .in(maxSpeaker) .in(format) - .in("audio") .errorOut(errorOutputsFor(400, 500)) .requirePermission(DRAFT_API_WRITE) .serverLogicPure { _ => - { case (videoId, language, maxSpeakerOpt, format) => - transcriptionService.transcribeAudio(videoId, language, maxSpeakerOpt, format) match { + { case (audioName, audioId, language, maxSpeakerOpt, format) => + transcriptionService.transcribeAudio(audioName, audioId, language, maxSpeakerOpt, format) match { case Success(_) => Right(()) case Failure(ex: JobAlreadyFoundException) => returnLeftError(ex) @@ -126,15 +128,16 @@ trait TranscriptionController { def getAudioTranscription: ServerEndpoint[Any, Eff] = endpoint.get .summary("Get the transcription status of a video") .description("Get the transcription of a video.") + .in("audio") .in(audioName) + .in(audioId) .in(language) - .in("audio") .errorOut(errorOutputsFor(400, 404, 405, 500)) .out(jsonBody[TranscriptionResultDTO]) .requirePermission(DRAFT_API_WRITE) .serverLogicPure { _ => - { case (videoId, language) => - transcriptionService.getAudioTranscription(videoId, language) match { + { case (audioName, audioId, language) => + transcriptionService.getAudioTranscription(audioName, audioId, language) match { case Success(Right(transcriptionContent)) => Right(TranscriptionResultDTO("COMPLETED", Some(transcriptionContent))) case Success(Left(jobStatus)) => diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala index 9379a6bf8..afb4ab9fc 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala @@ -49,7 +49,7 @@ trait TranscriptionService { val audioUri = s"s3://${props.TranscribeStorageName}/audio-extraction/$language/$videoId.mp3" logger.info(s"Transcribing audio from: $audioUri") - val jobName = s"transcribe-$videoId-$language" + val jobName = s"transcribe-video-$videoId-$language" val mediaFormat = "mp3" val outputKey = s"transcription/$language/$videoId" val languageCode = language @@ -75,7 +75,7 @@ trait TranscriptionService { videoId: String, language: String ): Try[Either[String, String]] = { - val jobName = s"transcribe-$videoId-$language" + val jobName = s"transcribe-video-$videoId-$language" transcribeClient.getTranscriptionJob(jobName).flatMap { transcriptionJobResponse => val transcriptionJob = transcriptionJobResponse.transcriptionJob() @@ -95,8 +95,14 @@ trait TranscriptionService { } } - def transcribeAudio(audioName: String, language: String, maxSpeakers: Int, format: String): Try[Unit] = { - getVideoTranscription(audioName, language) match { + def transcribeAudio( + audioName: String, + audioId: Long, + language: String, + maxSpeakers: Int, + format: String + ): Try[Unit] = { + getAudioTranscription(audioName, audioId, language) match { case Success(Right(_)) => logger.info(s"Transcription already completed for audio: $audioName") return Failure(new JobAlreadyFoundException(s"Transcription already completed for audio: $audioName")) @@ -108,13 +114,12 @@ trait TranscriptionService { case _ => logger.info(s"No existing transcription job for audio name: $audioName") } - val audioUri = s"s3://${props.StorageName}/$audioName.mp3" + val audioUri = s"s3://${props.StorageName}/$audioName" logger.info(s"Transcribing audio from: $audioUri") - val jobName = s"transcribe-$audioName-$language" + val jobName = s"transcribe-audio-$audioId-$language" val mediaFormat = format - val outputKey = s"audio-transcription/$language/$audioName" + val outputKey = s"audio-transcription/$language/$audioId" val languageCode = language - transcribeClient.startTranscriptionJob( jobName, audioUri, @@ -132,15 +137,15 @@ trait TranscriptionService { } } - def getAudioTranscription(audioName: String, language: String): Try[Either[String, String]] = { - val jobName = s"transcribe-$audioName-$language" + def getAudioTranscription(audioName: String, audioId: Long, language: String): Try[Either[String, String]] = { + val jobName = s"transcribe-audio-$audioId-$language" transcribeClient.getTranscriptionJob(jobName).flatMap { transcriptionJobResponse => val transcriptionJob = transcriptionJobResponse.transcriptionJob() val transcriptionJobStatus = transcriptionJob.transcriptionJobStatus().toString if (transcriptionJobStatus == "COMPLETED") { - val transcribeUri = s"audio-transcription/$language/${audioName}" + val transcribeUri = s"audio-transcription/$language/${audioId}" s3TranscribeClient.getObject(transcribeUri).map { s3Object => val content = scala.io.Source.fromInputStream(s3Object.stream).mkString diff --git a/project/audioapi.scala b/project/audioapi.scala index 0e5cc75aa..5b5442375 100644 --- a/project/audioapi.scala +++ b/project/audioapi.scala @@ -42,7 +42,8 @@ object audioapi extends Module { "AudioMetaInformationDTO", "UpdatedAudioMetaInformationDTO", "SeriesSummarySearchResultDTO", - "SeriesSearchParamsDTO" + "SeriesSearchParamsDTO", + "TranscriptionResultDTO" ) ) From 8a24b8d61f51d6f6c100720b32aa15de0237ce67 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Wed, 18 Dec 2024 13:07:12 +0100 Subject: [PATCH 20/31] Fix licensing and unused audioName in get call --- .../controller/TranscriptionController.scala | 13 ++++++++++--- .../audioapi/model/api/TranscriptionResultDTO.scala | 8 ++++++++ .../audioapi/service/TranscriptionService.scala | 10 +++++++++- .../audioapi/service/TranscriptionServiceTest.scala | 8 ++++++++ .../common/brightcove/NdlaBrightcoveClient.scala | 8 ++++++++ 5 files changed, 43 insertions(+), 4 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala index 97ddb368c..76e4f8699 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala @@ -1,3 +1,11 @@ +/* + * Part of NDLA audio-api + * Copyright (C) 2024 NDLA + * + * See LICENSE + * + */ + package no.ndla.audioapi.controller import no.ndla.audioapi.Props @@ -129,15 +137,14 @@ trait TranscriptionController { .summary("Get the transcription status of a video") .description("Get the transcription of a video.") .in("audio") - .in(audioName) .in(audioId) .in(language) .errorOut(errorOutputsFor(400, 404, 405, 500)) .out(jsonBody[TranscriptionResultDTO]) .requirePermission(DRAFT_API_WRITE) .serverLogicPure { _ => - { case (audioName, audioId, language) => - transcriptionService.getAudioTranscription(audioName, audioId, language) match { + { case (audioId, language) => + transcriptionService.getAudioTranscription(audioId, language) match { case Success(Right(transcriptionContent)) => Right(TranscriptionResultDTO("COMPLETED", Some(transcriptionContent))) case Success(Left(jobStatus)) => diff --git a/audio-api/src/main/scala/no/ndla/audioapi/model/api/TranscriptionResultDTO.scala b/audio-api/src/main/scala/no/ndla/audioapi/model/api/TranscriptionResultDTO.scala index 106abc93c..faf5ef5aa 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/model/api/TranscriptionResultDTO.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/model/api/TranscriptionResultDTO.scala @@ -1,3 +1,11 @@ +/* + * Part of NDLA audio-api + * Copyright (C) 2024 NDLA + * + * See LICENSE + * + */ + package no.ndla.audioapi.model.api import io.circe.generic.semiauto.{deriveDecoder, deriveEncoder} diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala index afb4ab9fc..e286e07a4 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala @@ -1,3 +1,11 @@ +/* + * Part of NDLA audio-api + * Copyright (C) 2024 NDLA + * + * See LICENSE + * + */ + package no.ndla.audioapi.service import com.typesafe.scalalogging.StrictLogging @@ -137,7 +145,7 @@ trait TranscriptionService { } } - def getAudioTranscription(audioName: String, audioId: Long, language: String): Try[Either[String, String]] = { + def getAudioTranscription(audioId: Long, language: String): Try[Either[String, String]] = { val jobName = s"transcribe-audio-$audioId-$language" transcribeClient.getTranscriptionJob(jobName).flatMap { transcriptionJobResponse => diff --git a/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala b/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala index 95c2c79c9..773e0061e 100644 --- a/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala +++ b/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala @@ -1,3 +1,11 @@ +/* + * Part of NDLA audio-api + * Copyright (C) 2024 NDLA + * + * See LICENSE + * + */ + package no.ndla.audioapi.service import no.ndla.audioapi.{AudioApiProperties, TestEnvironment, UnitSuite} diff --git a/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala b/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala index 135c7f399..64f1011a7 100644 --- a/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala +++ b/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala @@ -1,3 +1,11 @@ +/* + * Part of NDLA common + * Copyright (C) 2024 NDLA + * + * See LICENSE + * + */ + package no.ndla.common.brightcove import io.circe.Json From e54886d753e3703929cc9c995f48493504c0cee0 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Wed, 18 Dec 2024 14:58:30 +0100 Subject: [PATCH 21/31] Make creation of subtitle file optional --- .../service/TranscriptionService.scala | 26 ++++++++++--------- .../common/aws/NdlaAWSTranscribeClient.scala | 11 +++++--- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala index e286e07a4..46121c452 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala @@ -92,11 +92,7 @@ trait TranscriptionService { if (transcriptionJobStatus == "COMPLETED") { val transcribeUri = s"transcription/$language/${videoId}.vtt" - s3TranscribeClient.getObject(transcribeUri).map { s3Object => - val content = scala.io.Source.fromInputStream(s3Object.stream).mkString - s3Object.stream.close() - Right(content) - } + getObjectFromS3(transcribeUri) } else { Success(Left(transcriptionJobStatus)) } @@ -110,7 +106,7 @@ trait TranscriptionService { maxSpeakers: Int, format: String ): Try[Unit] = { - getAudioTranscription(audioName, audioId, language) match { + getAudioTranscription(audioId, language) match { case Success(Right(_)) => logger.info(s"Transcription already completed for audio: $audioName") return Failure(new JobAlreadyFoundException(s"Transcription already completed for audio: $audioName")) @@ -135,7 +131,8 @@ trait TranscriptionService { languageCode, props.TranscribeStorageName, outputKey, - maxSpeakers + maxSpeakers, + includeSubtitles = false ) match { case Success(_) => logger.info(s"Transcription job started for audio: $audioName") @@ -155,16 +152,21 @@ trait TranscriptionService { if (transcriptionJobStatus == "COMPLETED") { val transcribeUri = s"audio-transcription/$language/${audioId}" - s3TranscribeClient.getObject(transcribeUri).map { s3Object => - val content = scala.io.Source.fromInputStream(s3Object.stream).mkString - s3Object.stream.close() - Right(content) - } + getObjectFromS3(transcribeUri) } else { Success(Left(transcriptionJobStatus)) } } } + + private def getObjectFromS3(Uri: String): Try[Either[String, String]] = { + s3TranscribeClient.getObject(Uri).map { s3Object => + val content = scala.io.Source.fromInputStream(s3Object.stream).mkString + s3Object.stream.close() + Right(content) + } + } + def extractAudioFromVideo(videoId: String, language: String): Try[Unit] = { val accountId = props.BrightcoveAccountId val videoUrl = getVideo(accountId, videoId) match { diff --git a/common/src/main/scala/no/ndla/common/aws/NdlaAWSTranscribeClient.scala b/common/src/main/scala/no/ndla/common/aws/NdlaAWSTranscribeClient.scala index 3ba02563f..3704c86dc 100644 --- a/common/src/main/scala/no/ndla/common/aws/NdlaAWSTranscribeClient.scala +++ b/common/src/main/scala/no/ndla/common/aws/NdlaAWSTranscribeClient.scala @@ -32,9 +32,10 @@ trait NdlaAWSTranscribeClient { outputBucket: String, outputKey: String, maxSpeakers: Int, + includeSubtitles: Boolean = true, outputSubtitleFormat: String = "VTT" ): Try[StartTranscriptionJobResponse] = Try { - val request = StartTranscriptionJobRequest + val requestBuilder = StartTranscriptionJobRequest .builder() .transcriptionJobName(jobName) .media(Media.builder().mediaFileUri(mediaUri).build()) @@ -49,15 +50,17 @@ trait NdlaAWSTranscribeClient { .maxSpeakerLabels(maxSpeakers) .build() ) - .subtitles( + + if (includeSubtitles) { + requestBuilder.subtitles( Subtitles .builder() .formats(SubtitleFormat.valueOf(outputSubtitleFormat)) .build() ) - .build() + } - client.startTranscriptionJob(request) + client.startTranscriptionJob(requestBuilder.build()) } def getTranscriptionJob(jobName: String): Try[GetTranscriptionJobResponse] = { From 31b918ef2cd2760b3e8e250bab505919aad4e08c Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Wed, 18 Dec 2024 15:07:45 +0100 Subject: [PATCH 22/31] fix test --- .../no/ndla/audioapi/service/TranscriptionServiceTest.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala b/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala index 773e0061e..8d64d3678 100644 --- a/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala +++ b/audio-api/src/test/scala/no/ndla/audioapi/service/TranscriptionServiceTest.scala @@ -67,7 +67,7 @@ class TranscriptionServiceTest extends UnitSuite with TestEnvironment { when(transcribeClient.getTranscriptionJob(any)).thenReturn(Success(mock[GetTranscriptionJobResponse])) when(s3TranscribeClient.getObject(any)).thenReturn(Success(fakeS3Object)) when(transcriptionService.getAudioExtractionStatus(videoId, language)).thenReturn(Success(())) - when(transcribeClient.startTranscriptionJob(any, any, any, any, any, any, any, any)) + when(transcribeClient.startTranscriptionJob(any, any, any, any, any, any, any, any, any)) .thenReturn(Success(fakeTranscribeMock)) val result = transcriptionService.transcribeVideo(videoId, language, maxSpeakers) From 5163c808dfb972548160264f65d10417b5d265d1 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Fri, 20 Dec 2024 13:01:43 +0100 Subject: [PATCH 23/31] Update permissions and descriptions, refactor to existing s3 putobject --- .../controller/TranscriptionController.scala | 24 ++++++++-------- .../service/TranscriptionService.scala | 10 ++++++- .../no/ndla/common/aws/NdlaS3Client.scala | 28 ------------------- typescript/types-backend/audio-api.ts | 5 ++++ 4 files changed, 27 insertions(+), 40 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala index 76e4f8699..d49beb040 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala @@ -14,7 +14,7 @@ import no.ndla.audioapi.service.{ReadService, TranscriptionService} import no.ndla.network.tapir.NoNullJsonPrinter.jsonBody import no.ndla.network.tapir.TapirController import no.ndla.network.tapir.TapirUtil.errorOutputsFor -import no.ndla.network.tapir.auth.Permission.DRAFT_API_WRITE +import no.ndla.network.tapir.auth.Permission.AUDIO_API_WRITE import sttp.tapir.server.ServerEndpoint import sttp.tapir.{EndpointInput, endpoint, path} import sttp.tapir.* @@ -32,7 +32,7 @@ trait TranscriptionController { private val videoId = path[String]("videoId").description("The video id to transcribe") private val audioName = path[String]("audioName").description("The audio name to transcribe") private val audioId = path[Long]("audioId").description("The audio id to transcribe") - private val language = path[String]("language").description("The language to transcribe the video to") + private val language = path[String]("language").description("The language to run the transcription in") private val maxSpeaker = query[Int]("maxSpeaker").description("The maximum number of speakers in the video").default(2) private val format = query[String]("format").description("The format of the audio file").default("mp3") @@ -44,7 +44,7 @@ trait TranscriptionController { .in(language) .in("extract-audio") .errorOut(errorOutputsFor(400, 500)) - .requirePermission(DRAFT_API_WRITE) + .requirePermission(AUDIO_API_WRITE) .serverLogicPure { _ => { case (videoId, language) => transcriptionService.extractAudioFromVideo(videoId, language) match { @@ -61,7 +61,7 @@ trait TranscriptionController { .in(language) .in("extract-audio") .errorOut(errorOutputsFor(400, 500)) - .requirePermission(DRAFT_API_WRITE) + .requirePermission(AUDIO_API_WRITE) .serverLogicPure { _ => { case (videoId, language) => transcriptionService.getAudioExtractionStatus(videoId, language) match { @@ -74,11 +74,12 @@ trait TranscriptionController { def postTranscription: ServerEndpoint[Any, Eff] = endpoint.post .summary("Transcribe video") .description("Transcribes a video and uploads the transcription to S3.") + .in("video") .in(videoId) .in(language) .in(maxSpeaker) .errorOut(errorOutputsFor(400, 500)) - .requirePermission(DRAFT_API_WRITE) + .requirePermission(AUDIO_API_WRITE) .serverLogicPure { _ => { case (videoId, language, maxSpeakerOpt) => transcriptionService.transcribeVideo(videoId, language, maxSpeakerOpt) match { @@ -93,11 +94,12 @@ trait TranscriptionController { def getTranscription: ServerEndpoint[Any, Eff] = endpoint.get .summary("Get the transcription status of a video") .description("Get the transcription of a video.") + .in("video") .in(videoId) .in(language) .errorOut(errorOutputsFor(400, 404, 405, 500)) .out(jsonBody[TranscriptionResultDTO]) - .requirePermission(DRAFT_API_WRITE) + .requirePermission(AUDIO_API_WRITE) .serverLogicPure { _ => { case (videoId, language) => transcriptionService.getVideoTranscription(videoId, language) match { @@ -113,7 +115,7 @@ trait TranscriptionController { def postAudioTranscription: ServerEndpoint[Any, Eff] = endpoint.post .summary("Transcribe audio") - .description("Transcribes a video and uploads the transcription to S3.") + .description("Transcribes an audiofile and uploads the transcription to S3.") .in("audio") .in(audioName) .in(audioId) @@ -121,7 +123,7 @@ trait TranscriptionController { .in(maxSpeaker) .in(format) .errorOut(errorOutputsFor(400, 500)) - .requirePermission(DRAFT_API_WRITE) + .requirePermission(AUDIO_API_WRITE) .serverLogicPure { _ => { case (audioName, audioId, language, maxSpeakerOpt, format) => transcriptionService.transcribeAudio(audioName, audioId, language, maxSpeakerOpt, format) match { @@ -134,14 +136,14 @@ trait TranscriptionController { } def getAudioTranscription: ServerEndpoint[Any, Eff] = endpoint.get - .summary("Get the transcription status of a video") - .description("Get the transcription of a video.") + .summary("Get the transcription status of an audiofile") + .description("Get the transcription of an audiofile .") .in("audio") .in(audioId) .in(language) .errorOut(errorOutputsFor(400, 404, 405, 500)) .out(jsonBody[TranscriptionResultDTO]) - .requirePermission(DRAFT_API_WRITE) + .requirePermission(AUDIO_API_WRITE) .serverLogicPure { _ => { case (audioId, language) => transcriptionService.getAudioTranscription(audioId, language) match { diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala index 46121c452..eb916823a 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala @@ -13,6 +13,7 @@ import no.ndla.audioapi.Props import no.ndla.audioapi.model.api.JobAlreadyFoundException import no.ndla.common.aws.{NdlaAWSTranscribeClient, NdlaS3Client} import no.ndla.common.brightcove.NdlaBrightcoveClient +import no.ndla.common.model.domain.UploadedFile import sttp.client3.{HttpURLConnectionBackend, UriContext, asFile, basicRequest} import ws.schild.jave.{Encoder, MultimediaObject} import ws.schild.jave.encode.{AudioAttributes, EncodingAttributes} @@ -194,7 +195,14 @@ trait TranscriptionService { case Success(_) => val s3Key = s"audio-extraction/$language/$videoId.mp3" logger.info(s"Uploading audio file to S3: $s3Key") - s3TranscribeClient.putObject(s3Key, audioFile, "audio/mpeg") match { + val uploadedFile = UploadedFile( // convert to uploadedFile object + partName = "", + fileName = Some(s"audio_$videoId.mp3"), + fileSize = audioFile.length(), + contentType = Some("audio/mpeg"), + file = audioFile + ) + s3TranscribeClient.putObject(s3Key, uploadedFile) match { case Success(_) => logger.info(s"Audio file uploaded to S3: $s3Key") for { diff --git a/common/src/main/scala/no/ndla/common/aws/NdlaS3Client.scala b/common/src/main/scala/no/ndla/common/aws/NdlaS3Client.scala index f8a7e43a2..8803eb385 100644 --- a/common/src/main/scala/no/ndla/common/aws/NdlaS3Client.scala +++ b/common/src/main/scala/no/ndla/common/aws/NdlaS3Client.scala @@ -13,7 +13,6 @@ import software.amazon.awssdk.regions.Region import software.amazon.awssdk.services.s3.model.* import software.amazon.awssdk.services.s3.{S3Client, S3ClientBuilder} -import java.io.File import scala.util.Try trait NdlaS3Client { @@ -82,33 +81,6 @@ trait NdlaS3Client { ) } - def putObject( - key: String, - file: File, - contentType: String, - cacheControl: Option[String] = None - ): Try[PutObjectResponse] = Try { - - val por = PutObjectRequest - .builder() - .bucket(bucket) - .key(key) - .contentLength(file.length()) - .contentType(contentType) - - val porWithCacheControl = cacheControl match { - case Some(value) => por.cacheControl(value) - case None => por - } - - val requestBody = RequestBody.fromFile(file) - - client.putObject( - porWithCacheControl.build(), - requestBody - ) - } - def updateMetadata(key: String, metadata: java.util.Map[String, String]): Try[_] = Try { val cor = CopyObjectRequest diff --git a/typescript/types-backend/audio-api.ts b/typescript/types-backend/audio-api.ts index c53e0c2ad..a31ed95b7 100644 --- a/typescript/types-backend/audio-api.ts +++ b/typescript/types-backend/audio-api.ts @@ -196,6 +196,11 @@ export interface ITitleDTO { language: string } +export interface ITranscriptionResultDTO { + status: string + transcription?: string +} + export interface IUpdatedAudioMetaInformationDTO { revision: number title: string From 5759c2d4f8a763e3dcbf6dadf124882557bfa801 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Thu, 2 Jan 2025 08:13:50 +0100 Subject: [PATCH 24/31] Put urls into props --- .../no/ndla/common/brightcove/NdlaBrightcoveClient.scala | 6 ++++-- .../scala/no/ndla/common/configuration/BaseProps.scala | 8 ++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala b/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala index 64f1011a7..592848d19 100644 --- a/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala +++ b/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala @@ -12,14 +12,16 @@ import io.circe.Json import io.circe.generic.codec.DerivedAsObjectCodec.deriveCodec import io.circe.parser.* import sttp.client3.{HttpClientSyncBackend, UriContext, basicRequest} +import no.ndla.common.configuration.HasBaseProps case class TokenResponse(access_token: String, token_type: String, expires_in: Int) trait NdlaBrightcoveClient { + this: HasBaseProps => val brightcoveClient: NdlaBrightcoveClient class NdlaBrightcoveClient { - private val authUrl = "https://oauth.brightcove.com/v4/access_token" + private val authUrl = props.brightCoveAuthUri private val backend = HttpClientSyncBackend() // Or AsyncHttpClientFutureBackend() def getToken(clientID: String, clientSecret: String): Either[String, String] = { @@ -41,7 +43,7 @@ trait NdlaBrightcoveClient { def getVideoSource(accountId: String, videoId: String, bearerToken: String): Either[String, Vector[Json]] = { - val videoSourceUrl = uri"https://cms.api.brightcove.com/v1/accounts/$accountId/videos/$videoId/sources" + val videoSourceUrl = props.brightCoveVideoUri(accountId, videoId) val request = basicRequest .header("Authorization", s"Bearer $bearerToken") .get(videoSourceUrl) diff --git a/common/src/main/scala/no/ndla/common/configuration/BaseProps.scala b/common/src/main/scala/no/ndla/common/configuration/BaseProps.scala index 556cc52f6..6889bb36e 100644 --- a/common/src/main/scala/no/ndla/common/configuration/BaseProps.scala +++ b/common/src/main/scala/no/ndla/common/configuration/BaseProps.scala @@ -1,5 +1,8 @@ package no.ndla.common.configuration +import sttp.client3.UriContext +import sttp.model.Uri + import scala.util.Properties.{propOrElse, propOrNone} trait BaseProps { @@ -53,4 +56,9 @@ trait BaseProps { def SEARCH_INDEX_REPLICAS: Int = intPropOrDefault("SEARCH_INDEX_REPLICAS", 1) def TAPIR_THREADS: Int = intPropOrDefault("TAPIR_THREADS", 100) + + def brightCoveAuthUri: String = s"https://oauth.brightcove.com/v4/access_token" + def brightCoveVideoUri(accountId: String, videoId: String): Uri = + uri"https://cms.api.brightcove.com/v1/accounts/$accountId/videos/$videoId/sources" + } From 46d4e79e8cd07babdfb2bb5a08562e75eab2ca1f Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Thu, 2 Jan 2025 11:30:15 +0100 Subject: [PATCH 25/31] Fix naming and test --- .../no/ndla/common/brightcove/NdlaBrightcoveClient.scala | 7 +++---- .../scala/no/ndla/common/configuration/BaseProps.scala | 4 ++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala b/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala index 592848d19..0be52d591 100644 --- a/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala +++ b/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala @@ -21,14 +21,13 @@ trait NdlaBrightcoveClient { val brightcoveClient: NdlaBrightcoveClient class NdlaBrightcoveClient { - private val authUrl = props.brightCoveAuthUri - private val backend = HttpClientSyncBackend() // Or AsyncHttpClientFutureBackend() + private val backend = HttpClientSyncBackend() def getToken(clientID: String, clientSecret: String): Either[String, String] = { val request = basicRequest.auth .basic(clientID, clientSecret) - .post(uri"$authUrl?grant_type=client_credentials") + .post(uri"${props.BrightCoveAuthUri}?grant_type=client_credentials") val authResponse = request.send(backend) authResponse.body match { @@ -43,7 +42,7 @@ trait NdlaBrightcoveClient { def getVideoSource(accountId: String, videoId: String, bearerToken: String): Either[String, Vector[Json]] = { - val videoSourceUrl = props.brightCoveVideoUri(accountId, videoId) + val videoSourceUrl = props.BrightCoveVideoUri(accountId, videoId) val request = basicRequest .header("Authorization", s"Bearer $bearerToken") .get(videoSourceUrl) diff --git a/common/src/main/scala/no/ndla/common/configuration/BaseProps.scala b/common/src/main/scala/no/ndla/common/configuration/BaseProps.scala index 6889bb36e..7ecd5f951 100644 --- a/common/src/main/scala/no/ndla/common/configuration/BaseProps.scala +++ b/common/src/main/scala/no/ndla/common/configuration/BaseProps.scala @@ -57,8 +57,8 @@ trait BaseProps { def TAPIR_THREADS: Int = intPropOrDefault("TAPIR_THREADS", 100) - def brightCoveAuthUri: String = s"https://oauth.brightcove.com/v4/access_token" - def brightCoveVideoUri(accountId: String, videoId: String): Uri = + def BrightCoveAuthUri: String = s"https://oauth.brightcove.com/v4/access_token" + def BrightCoveVideoUri(accountId: String, videoId: String): Uri = uri"https://cms.api.brightcove.com/v1/accounts/$accountId/videos/$videoId/sources" } From a2a17f593cdfff574e7f5f72e9879b419aa2a7e7 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Mon, 6 Jan 2025 08:30:30 +0100 Subject: [PATCH 26/31] Fix naming of props --- .../src/main/scala/no/ndla/audioapi/AudioApiProperties.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala b/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala index 3430227d1..d6a9b6360 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/AudioApiProperties.scala @@ -37,12 +37,12 @@ class AudioApiProperties extends BaseProps with DatabaseProps with StrictLogging val StorageName: String = propOrElse("AUDIO_FILE_S3_BUCKET", s"$Environment.audio.ndla") val StorageRegion: Option[String] = propOrNone("AUDIO_FILE_S3_BUCKET_REGION") - val TranscribeStorageName: String = propOrElse("TRANSCRIBE_FILE_S3_BUCKET_NAME", s"$Environment.transcribe.ndla") + val TranscribeStorageName: String = propOrElse("TRANSCRIBE_FILE_S3_BUCKET", s"$Environment.transcribe.ndla") val TranscribeStorageRegion: Option[String] = propOrNone("TRANSCRIBE_FILE_S3_BUCKET_REGION") val BrightcoveClientId: String = prop("BRIGHTCOVE_API_CLIENT_ID") val BrightcoveClientSecret: String = prop("BRIGHTCOVE_API_CLIENT_SECRET") - val BrightcoveAccountId: String = prop("NDLA_BRIGHTCOVE_ACCOUNT_ID") + val BrightcoveAccountId: String = prop("BRIGHTCOVE_ACCOUNT_ID") val SearchServer: String = propOrElse("SEARCH_SERVER", "http://search-audio-api.ndla-local") val RunWithSignedSearchRequests: Boolean = propOrElse("RUN_WITH_SIGNED_SEARCH_REQUESTS", "true").toBoolean From 0bd53391355b7bf8bc658467a725b61897462f22 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Mon, 6 Jan 2025 08:39:57 +0100 Subject: [PATCH 27/31] Fix error handling in api --- .../controller/TranscriptionController.scala | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala index d49beb040..ea270a70d 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala @@ -83,9 +83,7 @@ trait TranscriptionController { .serverLogicPure { _ => { case (videoId, language, maxSpeakerOpt) => transcriptionService.transcribeVideo(videoId, language, maxSpeakerOpt) match { - case Success(_) => Right(()) - case Failure(ex: JobAlreadyFoundException) => - returnLeftError(ex) + case Success(_) => Right(()) case Failure(ex) => returnLeftError(ex) } } @@ -107,8 +105,7 @@ trait TranscriptionController { Right(TranscriptionResultDTO("COMPLETED", Some(transcriptionContent))) case Success(Left(jobStatus)) => Right(TranscriptionResultDTO(jobStatus.toString, None)) - case Failure(ex: NoSuchElementException) => returnLeftError(ex) - case Failure(ex) => returnLeftError(ex) + case Failure(ex) => returnLeftError(ex) } } } @@ -127,9 +124,7 @@ trait TranscriptionController { .serverLogicPure { _ => { case (audioName, audioId, language, maxSpeakerOpt, format) => transcriptionService.transcribeAudio(audioName, audioId, language, maxSpeakerOpt, format) match { - case Success(_) => Right(()) - case Failure(ex: JobAlreadyFoundException) => - returnLeftError(ex) + case Success(_) => Right(()) case Failure(ex) => returnLeftError(ex) } } @@ -151,8 +146,7 @@ trait TranscriptionController { Right(TranscriptionResultDTO("COMPLETED", Some(transcriptionContent))) case Success(Left(jobStatus)) => Right(TranscriptionResultDTO(jobStatus.toString, None)) - case Failure(ex: NoSuchElementException) => returnLeftError(ex) - case Failure(ex) => returnLeftError(ex) + case Failure(ex) => returnLeftError(ex) } } } From 628be060f8fa2a5a0c83ff885dd917cdf75d356a Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Mon, 6 Jan 2025 12:01:14 +0100 Subject: [PATCH 28/31] Improve error handling --- .../service/TranscriptionService.scala | 72 ++++++++++--------- .../scala/no/ndla/audioapi/UnitSuite.scala | 2 +- 2 files changed, 41 insertions(+), 33 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala index eb916823a..c727b0477 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala @@ -36,8 +36,6 @@ trait TranscriptionService { case Success(Left("IN_PROGRESS")) => logger.info(s"Transcription already in progress for videoId: $videoId") return Failure(new JobAlreadyFoundException(s"Transcription already in progress for videoId: $videoId")) - case Success(Left(_)) => - logger.info(s"Error occurred while checking transcription status for videoId") case _ => logger.info(s"No existing transcription job for videoId: $videoId") } @@ -93,7 +91,7 @@ trait TranscriptionService { if (transcriptionJobStatus == "COMPLETED") { val transcribeUri = s"transcription/$language/${videoId}.vtt" - getObjectFromS3(transcribeUri) + getObjectFromS3(transcribeUri).map(Right(_)) } else { Success(Left(transcriptionJobStatus)) } @@ -114,8 +112,6 @@ trait TranscriptionService { case Success(Left("IN_PROGRESS")) => logger.info(s"Transcription already in progress for videoId: $audioName") return Failure(new JobAlreadyFoundException(s"Transcription already in progress for audio: $audioName")) - case Success(Left(_)) => - logger.info(s"Error occurred while checking transcription status for audio") case _ => logger.info(s"No existing transcription job for audio name: $audioName") } @@ -153,28 +149,32 @@ trait TranscriptionService { if (transcriptionJobStatus == "COMPLETED") { val transcribeUri = s"audio-transcription/$language/${audioId}" - getObjectFromS3(transcribeUri) + getObjectFromS3(transcribeUri).map(Right(_)) } else { Success(Left(transcriptionJobStatus)) } } } - private def getObjectFromS3(Uri: String): Try[Either[String, String]] = { + private def getObjectFromS3(Uri: String): Try[String] = { s3TranscribeClient.getObject(Uri).map { s3Object => val content = scala.io.Source.fromInputStream(s3Object.stream).mkString s3Object.stream.close() - Right(content) + content } } def extractAudioFromVideo(videoId: String, language: String): Try[Unit] = { val accountId = props.BrightcoveAccountId val videoUrl = getVideo(accountId, videoId) match { - case Right(sources) => sources.head - case Left(error) => throw new RuntimeException(s"Failed to get video sources: $error") + case Success(sources) if sources.nonEmpty => sources.head + case Success(_) => return Failure(new RuntimeException(s"No video sources found for videoId: $videoId")) + case Failure(ex) => return Failure(new RuntimeException(s"Failed to get video sources: $ex")) + } + val videoFile = downloadVideo(videoId, videoUrl) match { + case Success(file) => file + case Failure(ex) => throw new RuntimeException(s"Failed to download video: $ex") } - val videoFile = downloadVideo(videoId, videoUrl) val audioFile = new File(s"/tmp/audio_${videoId}.mp3") @@ -223,37 +223,45 @@ trait TranscriptionService { } } - private def getVideo(accountId: String, videoId: String): Either[String, Vector[String]] = { + private def getVideo(accountId: String, videoId: String): Try[Vector[String]] = { val clientId = props.BrightcoveClientId val clientSecret = props.BrightcoveClientSecret - val token = brightcoveClient.getToken(clientId, clientSecret) - token match { - case Right(bearerToken) => - val cake = brightcoveClient.getVideoSource(accountId, videoId, bearerToken) - cake match { - case Right(videoSources) => - val mp4Sources = videoSources - .filter(source => source.hcursor.get[String]("container").toOption.contains("MP4")) - .map(source => source.hcursor.get[String]("src").toOption.getOrElse("")) - if (mp4Sources.nonEmpty) Right(mp4Sources) - else Left("No MP4 sources found for video.") - case Left(error) => Left(s"Failed to get video sources: $error") - } - case Left(error) => - Left(s"Failed to retrieve bearer token: $error") + + Try { + val token = brightcoveClient.getToken(clientId, clientSecret) match { + case Right(token) => token + case Left(error) => throw new RuntimeException(s"Failed to retrieve bearer token: $error") + } + + val videoSources = brightcoveClient.getVideoSource(accountId, videoId, token) match { + case Right(sources) => sources + case Left(error) => throw new RuntimeException(s"Failed to get video sources: $error") + } + + val mp4Sources = videoSources + .filter(source => source.hcursor.get[String]("container").toOption.contains("MP4")) + .map(source => source.hcursor.get[String]("src").toOption.getOrElse("")) + + if (mp4Sources.isEmpty) mp4Sources + else throw new RuntimeException(s"No MP4 sources found for videoId: $videoId") } } - private def downloadVideo(videoId: String, videoUrl: String): File = { + private def downloadVideo(videoId: String, videoUrl: String): Try[File] = { val videoFile = new File(s"/tmp/video_$videoId.mp4") val connection = HttpURLConnectionBackend() val response = basicRequest.get(uri"$videoUrl").response(asFile(videoFile)).send(connection) - - response.body match { - case Right(file) => file - case Left(error) => throw new RuntimeException(s"Failed to download video: $error") + Try { + response.body match { + case Right(file) => file + case Left(error) => throw new RuntimeException(s"Failed to download video: $error") + } + } match { + case Success(file) => Success(file) + case Failure(exception) => Failure(exception) } + } } } diff --git a/audio-api/src/test/scala/no/ndla/audioapi/UnitSuite.scala b/audio-api/src/test/scala/no/ndla/audioapi/UnitSuite.scala index 76d6f9edf..7a8ddc91e 100644 --- a/audio-api/src/test/scala/no/ndla/audioapi/UnitSuite.scala +++ b/audio-api/src/test/scala/no/ndla/audioapi/UnitSuite.scala @@ -26,5 +26,5 @@ trait UnitSuite extends UnitTestSuite with PrivateMethodTester { setPropEnv("SEARCH_INDEX_NAME", "audio-integration-test-index") setPropEnv("BRIGHTCOVE_API_CLIENT_ID", "client-id") setPropEnv("BRIGHTCOVE_API_CLIENT_SECRET", "client") - setPropEnv("NDLA_BRIGHTCOVE_ACCOUNT_ID", "312532") + setPropEnv("BRIGHTCOVE_ACCOUNT_ID", "312532") } From 6c355db04b29fe18397a16e03474ad6328589c3e Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Mon, 6 Jan 2025 12:19:03 +0100 Subject: [PATCH 29/31] Fix unused import and try handling of brightcove client --- .../controller/TranscriptionController.scala | 2 +- .../brightcove/NdlaBrightcoveClient.scala | 48 ++++++++++--------- 2 files changed, 27 insertions(+), 23 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala index ea270a70d..933ed1cf9 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/controller/TranscriptionController.scala @@ -9,7 +9,7 @@ package no.ndla.audioapi.controller import no.ndla.audioapi.Props -import no.ndla.audioapi.model.api.{JobAlreadyFoundException, TranscriptionResultDTO} +import no.ndla.audioapi.model.api.TranscriptionResultDTO import no.ndla.audioapi.service.{ReadService, TranscriptionService} import no.ndla.network.tapir.NoNullJsonPrinter.jsonBody import no.ndla.network.tapir.TapirController diff --git a/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala b/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala index 0be52d591..9a942ccd3 100644 --- a/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala +++ b/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala @@ -14,6 +14,8 @@ import io.circe.parser.* import sttp.client3.{HttpClientSyncBackend, UriContext, basicRequest} import no.ndla.common.configuration.HasBaseProps +import scala.util.{Failure, Success, Try} + case class TokenResponse(access_token: String, token_type: String, expires_in: Int) trait NdlaBrightcoveClient { @@ -23,24 +25,25 @@ trait NdlaBrightcoveClient { class NdlaBrightcoveClient { private val backend = HttpClientSyncBackend() - def getToken(clientID: String, clientSecret: String): Either[String, String] = { + def getToken(clientID: String, clientSecret: String): Try[String] = { val request = basicRequest.auth .basic(clientID, clientSecret) .post(uri"${props.BrightCoveAuthUri}?grant_type=client_credentials") val authResponse = request.send(backend) - - authResponse.body match { - case Right(jsonString) => - decode[TokenResponse](jsonString) match { - case Right(tokenResponse) => Right(tokenResponse.access_token) - case Left(error) => Left(s"Failed to decode token response: ${error.getMessage}") - } - case Left(error) => Left(s"Failed to get token: ${error}") + Try { + authResponse.body match { + case Right(jsonString) => + decode[TokenResponse](jsonString) match { + case Right(tokenResponse) => tokenResponse.access_token + case Left(error) => throw new Exception(s"Failed to decode token response: ${error.getMessage}") + } + case Left(error) => throw new Exception(s"Failed to get token: ${error}") + } } } - def getVideoSource(accountId: String, videoId: String, bearerToken: String): Either[String, Vector[Json]] = { + def getVideoSource(accountId: String, videoId: String, bearerToken: String): Try[Vector[Json]] = { val videoSourceUrl = props.BrightCoveVideoUri(accountId, videoId) val request = basicRequest @@ -50,18 +53,19 @@ trait NdlaBrightcoveClient { implicit val backend = HttpClientSyncBackend() val response = request.send(backend) - - response.body match { - case Right(jsonString) => - parse(jsonString) match { - case Right(json) => - json.asArray match { - case Some(videoSources) => Right(videoSources) - case None => Left("Expected a JSON array but got something else.") - } - case Left(error) => Left(s"Failed to decode video source response: ${error.getMessage}") - } - case Left(error) => Left(s"Failed to get video source: ${error}") + Try { + response.body match { + case Right(jsonString) => + parse(jsonString) match { + case Right(json) => + json.asArray match { + case Some(videoSources) => videoSources + case None => throw new Exception("Failed to parse video source") + } + case Left(error) => throw new Exception(s"Failed to parse video source: ${error.getMessage}") + } + case Left(error) => throw new Exception(s"Failed to get video source: ${error}") + } } } } From ea32cc9630f13aaffd520f2d1265d5bdda76d173 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Mon, 6 Jan 2025 12:25:06 +0100 Subject: [PATCH 30/31] Remove unused import --- .../scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala b/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala index 9a942ccd3..7bfc26302 100644 --- a/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala +++ b/common/src/main/scala/no/ndla/common/brightcove/NdlaBrightcoveClient.scala @@ -14,7 +14,7 @@ import io.circe.parser.* import sttp.client3.{HttpClientSyncBackend, UriContext, basicRequest} import no.ndla.common.configuration.HasBaseProps -import scala.util.{Failure, Success, Try} +import scala.util.Try case class TokenResponse(access_token: String, token_type: String, expires_in: Int) From 574bc3793a170ca612b79ae3e7f87359012f13d5 Mon Sep 17 00:00:00 2001 From: ekrojo77 Date: Mon, 6 Jan 2025 12:32:55 +0100 Subject: [PATCH 31/31] update getVideo after brightcove changes --- .../service/TranscriptionService.scala | 23 ++++++------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala index c727b0477..9169e8875 100644 --- a/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala +++ b/audio-api/src/main/scala/no/ndla/audioapi/service/TranscriptionService.scala @@ -227,24 +227,15 @@ trait TranscriptionService { val clientId = props.BrightcoveClientId val clientSecret = props.BrightcoveClientSecret - Try { - val token = brightcoveClient.getToken(clientId, clientSecret) match { - case Right(token) => token - case Left(error) => throw new RuntimeException(s"Failed to retrieve bearer token: $error") - } - - val videoSources = brightcoveClient.getVideoSource(accountId, videoId, token) match { - case Right(sources) => sources - case Left(error) => throw new RuntimeException(s"Failed to get video sources: $error") - } - - val mp4Sources = videoSources + for { + token <- brightcoveClient.getToken(clientId, clientSecret) + sources <- brightcoveClient.getVideoSource(accountId, videoId, token) + mp4Sources = sources .filter(source => source.hcursor.get[String]("container").toOption.contains("MP4")) .map(source => source.hcursor.get[String]("src").toOption.getOrElse("")) - - if (mp4Sources.isEmpty) mp4Sources - else throw new RuntimeException(s"No MP4 sources found for videoId: $videoId") - } + result <- + if (mp4Sources.nonEmpty) Success(mp4Sources) else Failure(new RuntimeException("No MP4 sources found")) + } yield result } private def downloadVideo(videoId: String, videoUrl: String): Try[File] = {