From 65e3fa79b77524050d83af0d6b96f7caa6e90aba Mon Sep 17 00:00:00 2001 From: Benjamin Benoist Date: Tue, 3 Oct 2023 17:37:14 +0200 Subject: [PATCH] Handle /robots.txt --- .../Routes.scala | 3 +- .../scalastream/it/core/RobotsSpec.scala | 65 +++++++++++++++++++ 2 files changed, 67 insertions(+), 1 deletion(-) create mode 100644 kinesis/src/it/scala/com/snowplowanalytics/snowplow/collectors/scalastream/it/core/RobotsSpec.scala diff --git a/http4s/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Routes.scala b/http4s/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Routes.scala index 3f9cad421..f03071c9b 100644 --- a/http4s/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Routes.scala +++ b/http4s/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Routes.scala @@ -21,7 +21,8 @@ class Routes[F[_]: Sync](enableDefaultRedirect: Boolean, service: IService[F]) e ifTrue = Ok("OK"), ifFalse = ServiceUnavailable("Service Unavailable") ) - + case GET -> Root / "robots.txt" => + Ok("User-agent: *\nDisallow: /") } private val corsRoute = HttpRoutes.of[F] { diff --git a/kinesis/src/it/scala/com/snowplowanalytics/snowplow/collectors/scalastream/it/core/RobotsSpec.scala b/kinesis/src/it/scala/com/snowplowanalytics/snowplow/collectors/scalastream/it/core/RobotsSpec.scala new file mode 100644 index 000000000..561c5220d --- /dev/null +++ b/kinesis/src/it/scala/com/snowplowanalytics/snowplow/collectors/scalastream/it/core/RobotsSpec.scala @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2023-2023 Snowplow Analytics Ltd. All rights reserved. + * + * This program is licensed to you under the Apache License Version 2.0, and + * you may not use this file except in compliance with the Apache License + * Version 2.0. You may obtain a copy of the Apache License Version 2.0 at + * http://www.apache.org/licenses/LICENSE-2.0. + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the Apache License Version 2.0 is distributed on an "AS + * IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. See the Apache License Version 2.0 for the specific language + * governing permissions and limitations there under. + */ +package com.snowplowanalytics.snowplow.collectors.scalastream.it.core + +import scala.concurrent.duration._ + +import org.specs2.mutable.Specification + +import cats.effect.IO + +import org.http4s.{Method, Request, Uri} + +import cats.effect.testing.specs2.CatsEffect + +import com.snowplowanalytics.snowplow.collectors.scalastream.it.kinesis.Kinesis +import com.snowplowanalytics.snowplow.collectors.scalastream.it.kinesis.containers._ +import com.snowplowanalytics.snowplow.collectors.scalastream.it.Http + +class RobotsSpec extends Specification with Localstack with CatsEffect { + + override protected val Timeout = 5.minutes + + "collector" should { + "respond to /robots.txt with 200 and not emit any event" in { + val testName = "robots" + val streamGood = s"$testName-raw" + val streamBad = s"$testName-bad-1" + + Collector.container( + "kinesis/src/it/resources/collector.hocon", + testName, + streamGood, + streamBad + ).use { collector => + val uri = Uri.unsafeFromString(s"http://${collector.host}:${collector.port}/robots.txt") + val request = Request[IO](Method.GET, uri) + + for { + response <- Http.response(request) + bodyBytes <- response.body.compile.toList + body = new String(bodyBytes.toArray) + _ <- IO.sleep(10.second) + collectorOutput <- Kinesis.readOutput(streamGood, streamBad) + } yield { + response.status.code must beEqualTo(200) + body must beEqualTo("User-agent: *\nDisallow: /") + collectorOutput.good must beEmpty + collectorOutput.bad must beEmpty + } + } + } + } +}