From 59438a4e7602b1c3e853fa3247564fa172b94f82 Mon Sep 17 00:00:00 2001 From: Piotr Limanowski Date: Wed, 10 Apr 2024 09:18:56 +0200 Subject: [PATCH 01/14] Return `Content-Type` for POST requests Previously `Content-Type` was not explicitly returned for POST requests. This would result in errors being logged for javascript tracker in Firefox[1]. Now, the behavior is rolled back to previous where collector would return `Content-Type` header for these requests. Addresses [BCPF-1102] and [PDP-1110] --- 1 - https://bugzilla.mozilla.org/show_bug.cgi?id=884693 --- .../Service.scala | 2 +- .../ServiceSpec.scala | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Service.scala b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Service.scala index 317bc3020..9acff5b66 100644 --- a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Service.scala +++ b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Service.scala @@ -261,7 +261,7 @@ class Service[F[_]: Sync]( case _ => Response[F]( status = Ok, - headers = headers, + headers = headers.put(`Content-Type`(MediaType.text.plain)), body = Stream.emit("ok").through(fs2.text.utf8.encode) ) } diff --git a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/ServiceSpec.scala b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/ServiceSpec.scala index 8d3bf9f45..164b30cf5 100644 --- a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/ServiceSpec.scala +++ b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/ServiceSpec.scala @@ -535,15 +535,16 @@ class ServiceSpec extends Specification { res.headers shouldEqual testHeaders } "send back ok otherwise" in { + val headers = testHeaders.put(`Content-Type`(MediaType.text.plain)) val res = service.buildHttpResponse( queryParams = Map.empty, - headers = testHeaders, + headers = headers, redirect = false, pixelExpected = false, shouldBounce = false ) res.status shouldEqual Status.Ok - res.headers shouldEqual testHeaders + res.headers shouldEqual headers res.bodyText.compile.toList.unsafeRunSync() shouldEqual List("ok") } } @@ -560,13 +561,14 @@ class ServiceSpec extends Specification { res.body.compile.toList.unsafeRunSync().toArray shouldEqual Service.pixel } "send back ok otherwise" in { + val headers = testHeaders.put(`Content-Type`(MediaType.text.plain)) val res = service.buildUsualHttpResponse( - headers = testHeaders, + headers = headers, pixelExpected = false, shouldBounce = false ) res.status shouldEqual Status.Ok - res.headers shouldEqual testHeaders + res.headers shouldEqual headers res.bodyText.compile.toList.unsafeRunSync() shouldEqual List("ok") } } From 55f020c8b916034342685d54b431dca1b48f2e61 Mon Sep 17 00:00:00 2001 From: spenes Date: Mon, 13 May 2024 02:35:47 +0300 Subject: [PATCH 02/14] Bump sbt-snowplow-release to 0.3.2 --- project/plugins.sbt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/plugins.sbt b/project/plugins.sbt index d8e653fe8..1b5644fd1 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -3,4 +3,4 @@ addSbtPlugin("com.eed3si9n" % "sbt-buildinfo" % "0.10.0") addSbtPlugin("io.github.davidgregory084" % "sbt-tpolecat" % "0.1.17") addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.2") addSbtPlugin("com.dwijnand" % "sbt-dynver" % "4.1.1") -addSbtPlugin("com.snowplowanalytics" % "sbt-snowplow-release" % "0.3.1") +addSbtPlugin("com.snowplowanalytics" % "sbt-snowplow-release" % "0.3.2") From fdd19b05247071f1694e56ba9442cde3efdacc48 Mon Sep 17 00:00:00 2001 From: adatzer Date: Fri, 23 Aug 2024 18:13:35 +0300 Subject: [PATCH 03/14] Bump azure-identity to 1.13.2 --- project/Dependencies.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 31ae80d44..847db28d1 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -41,7 +41,7 @@ object Dependencies { val tracker = "2.0.0" val dataDog4s = "0.32.0" val jnrPosix = "3.1.8" // force this version to mitigate security vulnerabilities - val azureIdentity = "1.11.0" + val azureIdentity = "1.13.2" } object Libraries { From ffff1e0488a1bfd934d0e58be7e246bb77405f65 Mon Sep 17 00:00:00 2001 From: adatzer Date: Fri, 23 Aug 2024 18:14:37 +0300 Subject: [PATCH 04/14] Bump aws sdk to 1.12.769 --- project/Dependencies.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 847db28d1..8505c73a5 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -13,7 +13,7 @@ import sbt._ object Dependencies { object V { - val awsSdk = "1.12.327" + val awsSdk = "1.12.769" val badRows = "2.2.1" val blaze = "0.23.15" val catsRetry = "3.1.0" From 548aaf6e4bf0aaefc98eb184246972aed5461690 Mon Sep 17 00:00:00 2001 From: Piotr Limanowski Date: Tue, 3 Sep 2024 21:17:36 +0200 Subject: [PATCH 05/14] Remove body read timeout feature The goal of the feature was to prevent long body reads in GCP, this however does not prevent the slow incoming connection handling at the framework level. Therefore, as this adds unnecessary complexity with possible negative performance impact, the feature is removed. The configuration parameter is no longer used, but can remain as is. --- core/src/main/resources/reference.conf | 1 - .../Config.scala | 1 - .../Pipes.scala | 29 ------------------- .../Routes.scala | 4 +-- .../Run.scala | 1 - .../PipesSpec.scala | 22 -------------- .../RoutesSpec.scala | 4 +-- .../TestUtils.scala | 1 - kafka/src/it/resources/collector.hocon | 1 - .../KafkaConfigSpec.scala | 1 - .../collector-cookie-anonymous.hocon | 1 - .../collector-cookie-attributes-1.hocon | 1 - .../collector-cookie-attributes-2.hocon | 1 - .../resources/collector-cookie-domain.hocon | 1 - .../resources/collector-cookie-fallback.hocon | 1 - .../collector-cookie-no-domain.hocon | 1 - .../it/resources/collector-custom-paths.hocon | 1 - .../collector-doNotTrackCookie-disabled.hocon | 1 - .../collector-doNotTrackCookie-enabled.hocon | 1 - kinesis/src/it/resources/collector.hocon | 1 - .../sinks/KinesisConfigSpec.scala | 1 - .../NsqConfigSpec.scala | 1 - pubsub/src/it/resources/collector.hocon | 1 - .../ConfigSpec.scala | 1 - .../SqsConfigSpec.scala | 1 - 25 files changed, 2 insertions(+), 78 deletions(-) delete mode 100644 core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Pipes.scala delete mode 100644 core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/PipesSpec.scala diff --git a/core/src/main/resources/reference.conf b/core/src/main/resources/reference.conf index 4ad566183..aa92fc969 100644 --- a/core/src/main/resources/reference.conf +++ b/core/src/main/resources/reference.conf @@ -95,7 +95,6 @@ maxConnections = 1024 idleTimeout = 610 seconds responseHeaderTimeout = 5 seconds - bodyReadTimeout = 1 second maxRequestLineLength = 20480 maxHeadersLength = 40960 } diff --git a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Config.scala b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Config.scala index 5d2d335a3..8f9061018 100644 --- a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Config.scala +++ b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Config.scala @@ -161,7 +161,6 @@ object Config { maxConnections: Int, idleTimeout: FiniteDuration, responseHeaderTimeout: FiniteDuration, - bodyReadTimeout: FiniteDuration, maxRequestLineLength: Int, maxHeadersLength: Int ) diff --git a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Pipes.scala b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Pipes.scala deleted file mode 100644 index 10f1b1aea..000000000 --- a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Pipes.scala +++ /dev/null @@ -1,29 +0,0 @@ -/** - * Copyright (c) 2013-present Snowplow Analytics Ltd. - * All rights reserved. - * - * This software is made available by Snowplow Analytics, Ltd., - * under the terms of the Snowplow Limited Use License Agreement, Version 1.0 - * located at https://docs.snowplow.io/limited-use-license-1.0 - * BY INSTALLING, DOWNLOADING, ACCESSING, USING OR DISTRIBUTING ANY PORTION - * OF THE SOFTWARE, YOU AGREE TO THE TERMS OF SUCH LICENSE AGREEMENT. - */ -package com.snowplowanalytics.snowplow.collector.core - -import scala.concurrent.duration.FiniteDuration -import cats.effect.Async -import fs2.{Pipe, Pull} - -object Pipes { - def timeoutOnIdle[F[_]: Async, A](duration: FiniteDuration): Pipe[F, A, A] = - _.pull.timed { timedPull => - def go(timedPull: Pull.Timed[F, A]): Pull[F, A, Unit] = - timedPull.timeout(duration) >> - timedPull.uncons.flatMap { - case Some((Right(elems), next)) => Pull.output(elems) >> go(next) - case _ => Pull.done - } - - go(timedPull) - }.stream -} diff --git a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Routes.scala b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Routes.scala index e80f239ba..fd699a15a 100644 --- a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Routes.scala +++ b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Routes.scala @@ -10,7 +10,6 @@ */ package com.snowplowanalytics.snowplow.collector.core -import scala.concurrent.duration.FiniteDuration import cats.implicits._ import cats.effect.{Async, Sync} import org.http4s._ @@ -22,7 +21,6 @@ class Routes[F[_]: Async]( enableDefaultRedirect: Boolean, enableRootResponse: Boolean, enableCrossdomainTracking: Boolean, - bodyReadTimeout: FiniteDuration, service: IService[F] ) extends Http4sDsl[F] { @@ -51,7 +49,7 @@ class Routes[F[_]: Async]( case req @ POST -> Root / vendor / version => val path = service.determinePath(vendor, version) service.cookie( - body = req.bodyText.through(Pipes.timeoutOnIdle(bodyReadTimeout)).compile.string.map(Some(_)), + body = req.bodyText.compile.string.map(Some(_)), path = path, request = req, pixelExpected = false, diff --git a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Run.scala b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Run.scala index 4553955ff..bcf0c60d9 100644 --- a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Run.scala +++ b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Run.scala @@ -98,7 +98,6 @@ object Run { config.enableDefaultRedirect, config.rootResponse.enabled, config.crossDomain.enabled, - config.networking.responseHeaderTimeout, collectorService ).value, if (config.ssl.enable) config.ssl.port else config.port, diff --git a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/PipesSpec.scala b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/PipesSpec.scala deleted file mode 100644 index bfd6803b1..000000000 --- a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/PipesSpec.scala +++ /dev/null @@ -1,22 +0,0 @@ -package com.snowplowanalytics.snowplow.collector.core - -import scala.concurrent.duration._ -import org.specs2.mutable.Specification -import cats.effect.IO -import cats.effect.unsafe.implicits.global -import fs2.Stream - -class PipesSpec extends Specification { - - "Pipes#timeoutOnIdle" should { - "allow terminating a stream early when idle" in { - Stream - .emits[IO, Int](Vector(1, 2, 3)) - .onComplete(Stream.empty[IO].delayBy(20.seconds)) - .through(Pipes.timeoutOnIdle(100.millis)) - .compile - .count - .unsafeRunSync() must beEqualTo(3) - } - } -} diff --git a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/RoutesSpec.scala b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/RoutesSpec.scala index 2df2627f8..d75249b8b 100644 --- a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/RoutesSpec.scala +++ b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/RoutesSpec.scala @@ -73,9 +73,7 @@ class RoutesSpec extends Specification { ) = { val service = new TestService() val routes = - new Routes(enabledDefaultRedirect, enableRootResponse, enableCrossdomainTracking, 500.millis, service) - .value - .orNotFound + new Routes(enabledDefaultRedirect, enableRootResponse, enableCrossdomainTracking, service).value.orNotFound val routesWithHsts = HttpServer.hstsMiddleware(Config.HSTS(enableHsts, 180.days), routes) (service, routesWithHsts) } diff --git a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/TestUtils.scala b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/TestUtils.scala index 1c0a5da72..e30ea43b4 100644 --- a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/TestUtils.scala +++ b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/TestUtils.scala @@ -121,7 +121,6 @@ object TestUtils { 1024, 610.seconds, 5.seconds, - 1.second, 20480, 40960 ), diff --git a/kafka/src/it/resources/collector.hocon b/kafka/src/it/resources/collector.hocon index e46b803ad..03b97888d 100644 --- a/kafka/src/it/resources/collector.hocon +++ b/kafka/src/it/resources/collector.hocon @@ -26,6 +26,5 @@ collector { networking { responseHeaderTimeout = 10 seconds - bodyReadTimeout = 2 seconds } } \ No newline at end of file diff --git a/kafka/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/KafkaConfigSpec.scala b/kafka/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/KafkaConfigSpec.scala index 3b402dafc..ba4593e60 100644 --- a/kafka/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/KafkaConfigSpec.scala +++ b/kafka/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/KafkaConfigSpec.scala @@ -172,7 +172,6 @@ object KafkaConfigSpec { maxConnections = 1024, idleTimeout = 610.seconds, responseHeaderTimeout = 5.seconds, - bodyReadTimeout = 1.second, maxRequestLineLength = 20480, maxHeadersLength = 40960 ), diff --git a/kinesis/src/it/resources/collector-cookie-anonymous.hocon b/kinesis/src/it/resources/collector-cookie-anonymous.hocon index 1b2ae929e..85c51eca6 100644 --- a/kinesis/src/it/resources/collector-cookie-anonymous.hocon +++ b/kinesis/src/it/resources/collector-cookie-anonymous.hocon @@ -33,7 +33,6 @@ collector { networking { responseHeaderTimeout = 10 seconds - bodyReadTimeout = 2 seconds } "cookie": { diff --git a/kinesis/src/it/resources/collector-cookie-attributes-1.hocon b/kinesis/src/it/resources/collector-cookie-attributes-1.hocon index 7e316aecd..34d393521 100644 --- a/kinesis/src/it/resources/collector-cookie-attributes-1.hocon +++ b/kinesis/src/it/resources/collector-cookie-attributes-1.hocon @@ -33,7 +33,6 @@ collector { networking { responseHeaderTimeout = 10 seconds - bodyReadTimeout = 2 seconds } "cookie": { diff --git a/kinesis/src/it/resources/collector-cookie-attributes-2.hocon b/kinesis/src/it/resources/collector-cookie-attributes-2.hocon index 1b2ae929e..85c51eca6 100644 --- a/kinesis/src/it/resources/collector-cookie-attributes-2.hocon +++ b/kinesis/src/it/resources/collector-cookie-attributes-2.hocon @@ -33,7 +33,6 @@ collector { networking { responseHeaderTimeout = 10 seconds - bodyReadTimeout = 2 seconds } "cookie": { diff --git a/kinesis/src/it/resources/collector-cookie-domain.hocon b/kinesis/src/it/resources/collector-cookie-domain.hocon index 94298cb0d..1f2749e23 100644 --- a/kinesis/src/it/resources/collector-cookie-domain.hocon +++ b/kinesis/src/it/resources/collector-cookie-domain.hocon @@ -33,7 +33,6 @@ collector { networking { responseHeaderTimeout = 10 seconds - bodyReadTimeout = 2 seconds } "cookie": { diff --git a/kinesis/src/it/resources/collector-cookie-fallback.hocon b/kinesis/src/it/resources/collector-cookie-fallback.hocon index 45d5786a2..17361150d 100644 --- a/kinesis/src/it/resources/collector-cookie-fallback.hocon +++ b/kinesis/src/it/resources/collector-cookie-fallback.hocon @@ -33,7 +33,6 @@ collector { networking { responseHeaderTimeout = 10 seconds - bodyReadTimeout = 2 seconds } "cookie": { diff --git a/kinesis/src/it/resources/collector-cookie-no-domain.hocon b/kinesis/src/it/resources/collector-cookie-no-domain.hocon index 1b2ae929e..85c51eca6 100644 --- a/kinesis/src/it/resources/collector-cookie-no-domain.hocon +++ b/kinesis/src/it/resources/collector-cookie-no-domain.hocon @@ -33,7 +33,6 @@ collector { networking { responseHeaderTimeout = 10 seconds - bodyReadTimeout = 2 seconds } "cookie": { diff --git a/kinesis/src/it/resources/collector-custom-paths.hocon b/kinesis/src/it/resources/collector-custom-paths.hocon index c100e92fc..cc4d7cc0c 100644 --- a/kinesis/src/it/resources/collector-custom-paths.hocon +++ b/kinesis/src/it/resources/collector-custom-paths.hocon @@ -33,7 +33,6 @@ collector { networking { responseHeaderTimeout = 10 seconds - bodyReadTimeout = 2 seconds } diff --git a/kinesis/src/it/resources/collector-doNotTrackCookie-disabled.hocon b/kinesis/src/it/resources/collector-doNotTrackCookie-disabled.hocon index 8e63d4ee2..4e1481b63 100644 --- a/kinesis/src/it/resources/collector-doNotTrackCookie-disabled.hocon +++ b/kinesis/src/it/resources/collector-doNotTrackCookie-disabled.hocon @@ -33,7 +33,6 @@ collector { networking { responseHeaderTimeout = 10 seconds - bodyReadTimeout = 2 seconds } "doNotTrackCookie": { diff --git a/kinesis/src/it/resources/collector-doNotTrackCookie-enabled.hocon b/kinesis/src/it/resources/collector-doNotTrackCookie-enabled.hocon index db836b82f..32720eebf 100644 --- a/kinesis/src/it/resources/collector-doNotTrackCookie-enabled.hocon +++ b/kinesis/src/it/resources/collector-doNotTrackCookie-enabled.hocon @@ -33,7 +33,6 @@ collector { networking { responseHeaderTimeout = 10 seconds - bodyReadTimeout = 2 seconds } "doNotTrackCookie": { diff --git a/kinesis/src/it/resources/collector.hocon b/kinesis/src/it/resources/collector.hocon index 4040f1764..f3e058e8a 100644 --- a/kinesis/src/it/resources/collector.hocon +++ b/kinesis/src/it/resources/collector.hocon @@ -33,6 +33,5 @@ collector { networking { responseHeaderTimeout = 10 seconds - bodyReadTimeout = 2 seconds } } \ No newline at end of file diff --git a/kinesis/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/KinesisConfigSpec.scala b/kinesis/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/KinesisConfigSpec.scala index 1574ee9c5..2ac2b4c6f 100644 --- a/kinesis/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/KinesisConfigSpec.scala +++ b/kinesis/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/KinesisConfigSpec.scala @@ -126,7 +126,6 @@ object KinesisConfigSpec { maxConnections = 1024, idleTimeout = 610.seconds, responseHeaderTimeout = 5.seconds, - bodyReadTimeout = 1.second, maxRequestLineLength = 20480, maxHeadersLength = 40960 ), diff --git a/nsq/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/NsqConfigSpec.scala b/nsq/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/NsqConfigSpec.scala index 8cc536d12..f08badb6a 100644 --- a/nsq/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/NsqConfigSpec.scala +++ b/nsq/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/NsqConfigSpec.scala @@ -159,7 +159,6 @@ object NsqConfigSpec { maxConnections = 1024, idleTimeout = 610.seconds, responseHeaderTimeout = 5.seconds, - bodyReadTimeout = 1.second, maxRequestLineLength = 20480, maxHeadersLength = 40960 ), diff --git a/pubsub/src/it/resources/collector.hocon b/pubsub/src/it/resources/collector.hocon index 0439687df..89005cbed 100644 --- a/pubsub/src/it/resources/collector.hocon +++ b/pubsub/src/it/resources/collector.hocon @@ -18,6 +18,5 @@ collector { networking { responseHeaderTimeout = 10 seconds - bodyReadTimeout = 2 seconds } } \ No newline at end of file diff --git a/pubsub/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/ConfigSpec.scala b/pubsub/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/ConfigSpec.scala index 1392133de..e1d66d3fc 100644 --- a/pubsub/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/ConfigSpec.scala +++ b/pubsub/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/ConfigSpec.scala @@ -115,7 +115,6 @@ object ConfigSpec { maxConnections = 1024, idleTimeout = 610.seconds, responseHeaderTimeout = 5.seconds, - bodyReadTimeout = 1.second, maxRequestLineLength = 20480, maxHeadersLength = 40960 ), diff --git a/sqs/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/SqsConfigSpec.scala b/sqs/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/SqsConfigSpec.scala index 86a8a8d76..762be70ac 100644 --- a/sqs/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/SqsConfigSpec.scala +++ b/sqs/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/SqsConfigSpec.scala @@ -116,7 +116,6 @@ object SqsConfigSpec { maxConnections = 1024, idleTimeout = 610.seconds, responseHeaderTimeout = 5.seconds, - bodyReadTimeout = 1.second, maxRequestLineLength = 20480, maxHeadersLength = 40960 ), From 314e128e8a716d65b620dd7e974e261cffeed865 Mon Sep 17 00:00:00 2001 From: Piotr Limanowski Date: Tue, 13 Aug 2024 15:51:47 +0200 Subject: [PATCH 06/14] Disable timeouts on healthcheck calls Currently, healthchecks reside behind the same timeout settings as any other endpoint. We observed that when autoscaling under massive load, it is possible for collector to be taken down because of long health check responses. Which previously did not happen. We therefore move healthchecks above the timeout middleware to return to previous behavior. Additionally, this allows us to set arbitrarily short (or long) response times for the regular endpoints when necessary. --- Part of [PDP-1408]. --- .../HttpServer.scala | 23 ++++++++------ .../Routes.scala | 30 +++++++++---------- .../Run.scala | 14 +++++---- .../RoutesSpec.scala | 5 ++-- 4 files changed, 40 insertions(+), 32 deletions(-) diff --git a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/HttpServer.scala b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/HttpServer.scala index bc59ba783..31c26c1f6 100644 --- a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/HttpServer.scala +++ b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/HttpServer.scala @@ -33,6 +33,7 @@ object HttpServer { def build[F[_]: Async]( routes: HttpRoutes[F], + healthRoutes: HttpRoutes[F], port: Int, secure: Boolean, hsts: Config.HSTS, @@ -42,7 +43,7 @@ object HttpServer { ): Resource[F, Server] = for { withMetricsMiddleware <- createMetricsMiddleware(routes, metricsConfig) - server <- buildBlazeServer[F](withMetricsMiddleware, port, secure, hsts, networking, debugHttp) + server <- buildBlazeServer[F](withMetricsMiddleware, healthRoutes, port, secure, hsts, networking, debugHttp) } yield server private def createMetricsMiddleware[F[_]: Async]( @@ -64,14 +65,14 @@ object HttpServer { StatsDMetricFactoryConfig(Some(metricsConfig.statsd.prefix), server, defaultTags = tags) } - private[core] def hstsMiddleware[F[_]: Async](hsts: Config.HSTS, routes: HttpApp[F]): HttpApp[F] = + private[core] def hstsApp[F[_]: Async](hsts: Config.HSTS, routes: HttpRoutes[F]): HttpApp[F] = if (hsts.enable) - HSTS(routes, `Strict-Transport-Security`.unsafeFromDuration(hsts.maxAge)) - else routes + HSTS(routes.orNotFound, `Strict-Transport-Security`.unsafeFromDuration(hsts.maxAge)) + else routes.orNotFound - private def loggerMiddleware[F[_]: Async](routes: HttpApp[F], config: Config.Debug.Http): HttpApp[F] = + private def loggerMiddleware[F[_]: Async](routes: HttpRoutes[F], config: Config.Debug.Http): HttpRoutes[F] = if (config.enable) { - LoggerMiddleware.httpApp[F]( + LoggerMiddleware.httpRoutes[F]( logHeaders = config.logHeaders, logBody = config.logBody, redactHeadersWhen = config.redactHeaders.map(CIString(_)).contains(_), @@ -79,11 +80,12 @@ object HttpServer { )(routes) } else routes - private def timeoutMiddleware[F[_]: Async](routes: HttpApp[F], networking: Config.Networking): HttpApp[F] = - Timeout.httpApp[F](timeout = networking.responseHeaderTimeout)(routes) + private def timeoutMiddleware[F[_]: Async](routes: HttpRoutes[F], networking: Config.Networking): HttpRoutes[F] = + Timeout.httpRoutes[F](timeout = networking.responseHeaderTimeout)(routes) private def buildBlazeServer[F[_]: Async]( routes: HttpRoutes[F], + healthRoutes: HttpRoutes[F], port: Int, secure: Boolean, hsts: Config.HSTS, @@ -94,7 +96,10 @@ object HttpServer { BlazeServerBuilder[F] .bindSocketAddress(new InetSocketAddress(port)) .withHttpApp( - loggerMiddleware(timeoutMiddleware(hstsMiddleware(hsts, routes.orNotFound), networking), debugHttp) + hstsApp( + hsts, + loggerMiddleware(timeoutMiddleware(routes, networking) <+> healthRoutes, debugHttp) + ) ) .withIdleTimeout(networking.idleTimeout) .withMaxConnections(networking.maxConnections) diff --git a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Routes.scala b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Routes.scala index fd699a15a..b340113f6 100644 --- a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Routes.scala +++ b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Routes.scala @@ -26,20 +26,6 @@ class Routes[F[_]: Async]( implicit val dns: Dns[F] = Dns.forSync[F] - private val healthRoutes = HttpRoutes.of[F] { - case GET -> Root / "health" => - Ok("ok") - case GET -> Root / "sink-health" => - service - .sinksHealthy - .ifM( - ifTrue = Ok("ok"), - ifFalse = ServiceUnavailable("Service Unavailable") - ) - case GET -> Root / "robots.txt" => - Ok("User-agent: *\nDisallow: /\n\nUser-agent: Googlebot\nDisallow: /\n\nUser-agent: AdsBot-Google\nDisallow: /") - } - private val corsRoute = HttpRoutes.of[F] { case req @ OPTIONS -> _ => service.preflightResponse(req) @@ -91,8 +77,22 @@ class Routes[F[_]: Async]( service.crossdomainResponse } + val health = HttpRoutes.of[F] { + case GET -> Root / "health" => + Ok("ok") + case GET -> Root / "sink-health" => + service + .sinksHealthy + .ifM( + ifTrue = Ok("ok"), + ifFalse = ServiceUnavailable("Service Unavailable") + ) + case GET -> Root / "robots.txt" => + Ok("User-agent: *\nDisallow: /\n\nUser-agent: Googlebot\nDisallow: /\n\nUser-agent: AdsBot-Google\nDisallow: /") + } + val value: HttpRoutes[F] = { - val routes = healthRoutes <+> corsRoute <+> cookieRoutes <+> rootRoute <+> crossdomainRoute + val routes = corsRoute <+> cookieRoutes <+> rootRoute <+> crossdomainRoute if (enableDefaultRedirect) routes else rejectRedirect <+> routes } } diff --git a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Run.scala b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Run.scala index bcf0c60d9..512b355fc 100644 --- a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Run.scala +++ b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Run.scala @@ -93,13 +93,15 @@ object Run { Sinks(sinks.good, sinks.bad), appInfo ) + routes = new Routes[F]( + config.enableDefaultRedirect, + config.rootResponse.enabled, + config.crossDomain.enabled, + collectorService + ) httpServer = HttpServer.build[F]( - new Routes[F]( - config.enableDefaultRedirect, - config.rootResponse.enabled, - config.crossDomain.enabled, - collectorService - ).value, + routes.value, + routes.health, if (config.ssl.enable) config.ssl.port else config.port, config.ssl.enable, config.hsts, diff --git a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/RoutesSpec.scala b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/RoutesSpec.scala index d75249b8b..43d96d175 100644 --- a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/RoutesSpec.scala +++ b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/RoutesSpec.scala @@ -5,6 +5,7 @@ import cats.data.NonEmptyList import scala.collection.mutable.ListBuffer import org.specs2.mutable.Specification import cats.effect.IO +import cats.syntax.all._ import cats.effect.unsafe.implicits.global import org.http4s.implicits._ import org.http4s._ @@ -73,8 +74,8 @@ class RoutesSpec extends Specification { ) = { val service = new TestService() val routes = - new Routes(enabledDefaultRedirect, enableRootResponse, enableCrossdomainTracking, service).value.orNotFound - val routesWithHsts = HttpServer.hstsMiddleware(Config.HSTS(enableHsts, 180.days), routes) + new Routes(enabledDefaultRedirect, enableRootResponse, enableCrossdomainTracking, service) + val routesWithHsts = HttpServer.hstsApp(Config.HSTS(enableHsts, 180.days), (routes.value <+> routes.health)) (service, routesWithHsts) } From 3be22e42bf01eac998fa63920be95f56de720223 Mon Sep 17 00:00:00 2001 From: Piotr Limanowski Date: Tue, 20 Aug 2024 17:24:28 +0200 Subject: [PATCH 07/14] Extend default timeouts to match upstream defaults The reference defaults should be less strict and match the settings we define upstream. --- core/src/main/resources/reference.conf | 2 +- .../TestUtils.scala | 2 +- .../KafkaConfigSpec.scala | 2 +- .../sinks/KinesisConfigSpec.scala | 2 +- .../NsqConfigSpec.scala | 2 +- .../ConfigSpec.scala | 2 +- .../SqsConfigSpec.scala | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/core/src/main/resources/reference.conf b/core/src/main/resources/reference.conf index aa92fc969..2704141aa 100644 --- a/core/src/main/resources/reference.conf +++ b/core/src/main/resources/reference.conf @@ -94,7 +94,7 @@ networking { maxConnections = 1024 idleTimeout = 610 seconds - responseHeaderTimeout = 5 seconds + responseHeaderTimeout = 30 seconds maxRequestLineLength = 20480 maxHeadersLength = 40960 } diff --git a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/TestUtils.scala b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/TestUtils.scala index e30ea43b4..11250a127 100644 --- a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/TestUtils.scala +++ b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/TestUtils.scala @@ -120,7 +120,7 @@ object TestUtils { networking = Networking( 1024, 610.seconds, - 5.seconds, + 30.seconds, 20480, 40960 ), diff --git a/kafka/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/KafkaConfigSpec.scala b/kafka/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/KafkaConfigSpec.scala index ba4593e60..2b5b06611 100644 --- a/kafka/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/KafkaConfigSpec.scala +++ b/kafka/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/KafkaConfigSpec.scala @@ -171,7 +171,7 @@ object KafkaConfigSpec { networking = Config.Networking( maxConnections = 1024, idleTimeout = 610.seconds, - responseHeaderTimeout = 5.seconds, + responseHeaderTimeout = 30.seconds, maxRequestLineLength = 20480, maxHeadersLength = 40960 ), diff --git a/kinesis/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/KinesisConfigSpec.scala b/kinesis/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/KinesisConfigSpec.scala index 2ac2b4c6f..44609c59b 100644 --- a/kinesis/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/KinesisConfigSpec.scala +++ b/kinesis/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/KinesisConfigSpec.scala @@ -125,7 +125,7 @@ object KinesisConfigSpec { networking = Config.Networking( maxConnections = 1024, idleTimeout = 610.seconds, - responseHeaderTimeout = 5.seconds, + responseHeaderTimeout = 30.seconds, maxRequestLineLength = 20480, maxHeadersLength = 40960 ), diff --git a/nsq/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/NsqConfigSpec.scala b/nsq/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/NsqConfigSpec.scala index f08badb6a..90acf07b6 100644 --- a/nsq/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/NsqConfigSpec.scala +++ b/nsq/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/NsqConfigSpec.scala @@ -158,7 +158,7 @@ object NsqConfigSpec { networking = Config.Networking( maxConnections = 1024, idleTimeout = 610.seconds, - responseHeaderTimeout = 5.seconds, + responseHeaderTimeout = 30.seconds, maxRequestLineLength = 20480, maxHeadersLength = 40960 ), diff --git a/pubsub/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/ConfigSpec.scala b/pubsub/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/ConfigSpec.scala index e1d66d3fc..a0091b626 100644 --- a/pubsub/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/ConfigSpec.scala +++ b/pubsub/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/ConfigSpec.scala @@ -114,7 +114,7 @@ object ConfigSpec { networking = Config.Networking( maxConnections = 1024, idleTimeout = 610.seconds, - responseHeaderTimeout = 5.seconds, + responseHeaderTimeout = 30.seconds, maxRequestLineLength = 20480, maxHeadersLength = 40960 ), diff --git a/sqs/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/SqsConfigSpec.scala b/sqs/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/SqsConfigSpec.scala index 762be70ac..f02d3665f 100644 --- a/sqs/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/SqsConfigSpec.scala +++ b/sqs/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/SqsConfigSpec.scala @@ -115,7 +115,7 @@ object SqsConfigSpec { networking = Config.Networking( maxConnections = 1024, idleTimeout = 610.seconds, - responseHeaderTimeout = 5.seconds, + responseHeaderTimeout = 30.seconds, maxRequestLineLength = 20480, maxHeadersLength = 40960 ), From 202a93730b77c72c84f07450e4c7365f4d15b685 Mon Sep 17 00:00:00 2001 From: Piotr Limanowski Date: Tue, 20 Aug 2024 17:26:59 +0200 Subject: [PATCH 08/14] Explicitly return 408 when timeout is hit Previously, we would return 503 Service Unavailable, suggesting that failures should not be retried and leading to confusion with early timeout being hit. Now, we return 408 Request Timeout which is more explicit and easier to monitor. --- .../HttpServer.scala | 72 +++++++++++-------- .../Run.scala | 2 +- .../HttpServerSpec.scala | 52 ++++++++++++++ 3 files changed, 95 insertions(+), 31 deletions(-) create mode 100644 core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/HttpServerSpec.scala diff --git a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/HttpServer.scala b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/HttpServer.scala index 31c26c1f6..b0d30c424 100644 --- a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/HttpServer.scala +++ b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/HttpServer.scala @@ -26,6 +26,8 @@ import org.typelevel.log4cats.slf4j.Slf4jLogger import java.net.InetSocketAddress import javax.net.ssl.SSLContext +import org.http4s.Response +import org.http4s.Status object HttpServer { @@ -40,12 +42,46 @@ object HttpServer { networking: Config.Networking, metricsConfig: Config.Metrics, debugHttp: Config.Debug.Http + )( + mkServer: ((HttpApp[F], Int, Boolean, Config.Networking) => Resource[F, Server]) ): Resource[F, Server] = for { withMetricsMiddleware <- createMetricsMiddleware(routes, metricsConfig) - server <- buildBlazeServer[F](withMetricsMiddleware, healthRoutes, port, secure, hsts, networking, debugHttp) + httpApp <- Resource.pure(httpApp(withMetricsMiddleware, healthRoutes, hsts, networking, debugHttp)) + server <- mkServer(httpApp, port, secure, networking) } yield server + def buildBlazeServer[F[_]: Async]( + httpApp: HttpApp[F], + port: Int, + secure: Boolean, + networking: Config.Networking + ): Resource[F, Server] = + Resource.eval(Logger[F].info("Building blaze server")) >> + BlazeServerBuilder[F] + .bindSocketAddress(new InetSocketAddress(port)) + .withHttpApp(httpApp) + .withIdleTimeout(networking.idleTimeout) + .withMaxConnections(networking.maxConnections) + .withResponseHeaderTimeout(networking.responseHeaderTimeout) + .withLengthLimits( + maxRequestLineLen = networking.maxRequestLineLength, + maxHeadersLen = networking.maxHeadersLength + ) + .cond(secure, _.withSslContext(SSLContext.getDefault)) + .resource + + def httpApp[F[_]: Async]( + routes: HttpRoutes[F], + healthRoutes: HttpRoutes[F], + hsts: Config.HSTS, + networking: Config.Networking, + debugHttp: Config.Debug.Http + ): HttpApp[F] = hstsApp( + hsts, + loggerMiddleware(timeoutMiddleware(routes, networking) <+> healthRoutes, debugHttp) + ) + private def createMetricsMiddleware[F[_]: Async]( routes: HttpRoutes[F], metricsConfig: Config.Metrics @@ -81,35 +117,11 @@ object HttpServer { } else routes private def timeoutMiddleware[F[_]: Async](routes: HttpRoutes[F], networking: Config.Networking): HttpRoutes[F] = - Timeout.httpRoutes[F](timeout = networking.responseHeaderTimeout)(routes) - - private def buildBlazeServer[F[_]: Async]( - routes: HttpRoutes[F], - healthRoutes: HttpRoutes[F], - port: Int, - secure: Boolean, - hsts: Config.HSTS, - networking: Config.Networking, - debugHttp: Config.Debug.Http - ): Resource[F, Server] = - Resource.eval(Logger[F].info("Building blaze server")) >> - BlazeServerBuilder[F] - .bindSocketAddress(new InetSocketAddress(port)) - .withHttpApp( - hstsApp( - hsts, - loggerMiddleware(timeoutMiddleware(routes, networking) <+> healthRoutes, debugHttp) - ) - ) - .withIdleTimeout(networking.idleTimeout) - .withMaxConnections(networking.maxConnections) - .withResponseHeaderTimeout(networking.responseHeaderTimeout) - .withLengthLimits( - maxRequestLineLen = networking.maxRequestLineLength, - maxHeadersLen = networking.maxHeadersLength - ) - .cond(secure, _.withSslContext(SSLContext.getDefault)) - .resource + Timeout.httpRoutes[F](timeout = networking.responseHeaderTimeout)(routes).map { + case Response(Status.ServiceUnavailable, httpVersion, headers, body, attributes) => + Response[F](Status.RequestTimeout, httpVersion, headers, body, attributes) + case response => response + } implicit class ConditionalAction[A](item: A) { def cond(cond: Boolean, action: A => A): A = diff --git a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Run.scala b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Run.scala index 512b355fc..d422f2c5c 100644 --- a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Run.scala +++ b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Run.scala @@ -108,7 +108,7 @@ object Run { config.networking, config.monitoring.metrics, config.debug.http - ) + )(HttpServer.buildBlazeServer) _ <- withGracefulShutdown(config.preTerminationPeriod)(httpServer) httpClient <- BlazeClientBuilder[F].resource } yield httpClient diff --git a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/HttpServerSpec.scala b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/HttpServerSpec.scala new file mode 100644 index 000000000..dc91f0835 --- /dev/null +++ b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/HttpServerSpec.scala @@ -0,0 +1,52 @@ +package com.snowplowanalytics.snowplow.collector.core + +import org.specs2.mutable.Specification +import cats.effect.IO + +import org.http4s.client.Client +import org.http4s._ +import org.http4s.dsl.io._ +import org.http4s.implicits._ +import scala.concurrent.duration._ +import cats.effect.testing.specs2._ + +class HttpServerSpec extends Specification with CatsEffect { + val routes = HttpRoutes.of[IO] { + case _ -> Root / "fast" => + Ok("Fast") + case _ -> Root / "never" => + IO.never[Response[IO]] + } + val healthRoutes = HttpRoutes.of[IO] { + case _ -> Root / "health" => + Ok("ok") + } + + "HttpServer" should { + "manage request timeout" should { + "timeout threshold is configured" in { + val config = + TestUtils + .testConfig + .copy(networking = TestUtils.testConfig.networking.copy(responseHeaderTimeout = 100.millis)) + val httpApp = HttpServer.httpApp( + routes, + healthRoutes, + config.hsts, + config.networking, + config.debug.http + ) + val client: Client[IO] = Client.fromHttpApp(httpApp) + val request: Request[IO] = Request(method = Method.GET, uri = uri"/never") + val res: IO[String] = client.expect[String](request) + + res + .attempt + .map(_ must beLeft[Throwable].which { + case org.http4s.client.UnexpectedStatus(Status.RequestTimeout, _, _) => true + case _ => false + }) + } + } + } +} From e494f56bef08350df7b5af241552810c0bc5b6c6 Mon Sep 17 00:00:00 2001 From: Benjamin Benoist Date: Fri, 8 Nov 2024 16:36:26 +0100 Subject: [PATCH 09/14] PDP-1526 Remove parts of the cookies that are not valid according to RFC 6265 --- .../Rfc6265Cookie.scala | 28 +++++++++++++++ .../Service.scala | 2 +- .../Rfc6265CookieSpec.scala | 35 +++++++++++++++++++ .../ServiceSpec.scala | 2 +- 4 files changed, 65 insertions(+), 2 deletions(-) create mode 100644 core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Rfc6265Cookie.scala create mode 100644 core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/Rfc6265CookieSpec.scala diff --git a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Rfc6265Cookie.scala b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Rfc6265Cookie.scala new file mode 100644 index 000000000..983f2087d --- /dev/null +++ b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Rfc6265Cookie.scala @@ -0,0 +1,28 @@ +/** + * Copyright (c) 2013-present Snowplow Analytics Ltd. + * All rights reserved. + * + * This software is made available by Snowplow Analytics, Ltd., + * under the terms of the Snowplow Limited Use License Agreement, Version 1.0 + * located at https://docs.snowplow.io/limited-use-license-1.0 + * BY INSTALLING, DOWNLOADING, ACCESSING, USING OR DISTRIBUTING ANY PORTION + * OF THE SOFTWARE, YOU AGREE TO THE TERMS OF SUCH LICENSE AGREEMENT. + */ +package com.snowplowanalytics.snowplow.collector.core + +object Rfc6265Cookie { + + // See https://www.ietf.org/rfc/rfc6265.txt + private val allowedChars = Set(0x21.toChar) ++ + Set(0x23.toChar to 0x2b.toChar: _*) ++ + Set(0x2d.toChar to 0x3a.toChar: _*) ++ + Set(0x3c.toChar to 0x5b.toChar: _*) ++ + Set(0x5d.toChar to 0x7e.toChar: _*) + + // Remove all the sub-parts (between two ';') that contain unauthorized characters + def parse(rawCookie: String): Option[String] = + rawCookie.replaceAll(" ", "").split(";").filter(_.forall(allowedChars.contains)).mkString(";") match { + case s if s.nonEmpty => Some(s) + case _ => None + } +} diff --git a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Service.scala b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Service.scala index 9acff5b66..78121d16d 100644 --- a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Service.scala +++ b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Service.scala @@ -305,7 +305,7 @@ class Service[F[_]: Sync]( case ci"X-Forwarded-For" | ci"X-Real-Ip" | ci"Cookie" if spAnonymous.isDefined => None // FIXME: This is a temporary backport of old akka behaviour we will remove by // adapting enrich to support a CIString header names as per RFC7230#Section-3.2 - case ci"Cookie" => Some(s"Cookie: ${h.value}") + case ci"Cookie" => Rfc6265Cookie.parse(h.value).map(c => s"Cookie: $c") case _ => Some(h.toString()) } } diff --git a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/Rfc6265CookieSpec.scala b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/Rfc6265CookieSpec.scala new file mode 100644 index 000000000..59950e9a5 --- /dev/null +++ b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/Rfc6265CookieSpec.scala @@ -0,0 +1,35 @@ +package com.snowplowanalytics.snowplow.collector.core + +import org.specs2.mutable.Specification + +class Rfc6265CookieSpec extends Specification { + val valid1 = "name=value" + val valid2 = "name1=value2" + val bothValid = s"$valid1;$valid2" + val invalid = "{\"key\": \"value\"}" + + "Rfc6265Cookie.parse" should { + "leave a valid cookie as is" in { + Rfc6265Cookie.parse(valid1) must beSome(valid1) + Rfc6265Cookie.parse(bothValid) must beSome(bothValid) + } + + "remove whitespaces" in { + Rfc6265Cookie.parse(s" $valid1 ") must beSome(valid1) + Rfc6265Cookie.parse("name = value") must beSome(valid1) + } + + "remove invalid parts" in { + Rfc6265Cookie.parse(s"$invalid;$valid1;$valid2") must beSome(bothValid) + Rfc6265Cookie.parse(s"$valid1;$invalid;$valid2") must beSome(bothValid) + Rfc6265Cookie.parse(s"$valid1;$valid2;$invalid") must beSome(bothValid) + } + + "return None if no valid part is left" in { + Rfc6265Cookie.parse(invalid) must beNone + Rfc6265Cookie.parse(s";$invalid;") must beNone + Rfc6265Cookie.parse(";") must beNone + Rfc6265Cookie.parse(";;") must beNone + } + } +} diff --git a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/ServiceSpec.scala b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/ServiceSpec.scala index 164b30cf5..a40f1ba52 100644 --- a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/ServiceSpec.scala +++ b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/ServiceSpec.scala @@ -249,7 +249,7 @@ class ServiceSpec extends Specification { "Content-Type: application/json", "X-Forwarded-For: 192.0.2.3", "Access-Control-Allow-Credentials: true", - "Cookie: cookie=value; sp=dfdb716e-ecf9-4d00-8b10-44edfbc8a108", + "Cookie: cookie=value;sp=dfdb716e-ecf9-4d00-8b10-44edfbc8a108", "image/gif" ).asJava e.contentType shouldEqual "image/gif" From 9807f4af192177cda46160a09c5060346feea0a6 Mon Sep 17 00:00:00 2001 From: Ian Streeter Date: Wed, 6 Nov 2024 07:33:47 +0000 Subject: [PATCH 10/14] Kafka sink to open fewer threads --- .../sinks/KafkaSink.scala | 74 +++++++++++++------ project/BuildSettings.scala | 2 +- project/Dependencies.scala | 4 +- 3 files changed, 53 insertions(+), 27 deletions(-) diff --git a/kafka/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/KafkaSink.scala b/kafka/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/KafkaSink.scala index 0917bbc4b..c909f8e7d 100644 --- a/kafka/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/KafkaSink.scala +++ b/kafka/src/main/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/KafkaSink.scala @@ -13,25 +13,24 @@ package sinks import cats.implicits._ import cats.effect._ - -import org.slf4j.LoggerFactory - -import fs2.kafka._ +import org.typelevel.log4cats.Logger +import org.typelevel.log4cats.slf4j.Slf4jLogger +import org.apache.kafka.clients.producer.{Callback, KafkaProducer, ProducerRecord, RecordMetadata} import com.snowplowanalytics.snowplow.collector.core.{Config, Sink} +import scala.jdk.CollectionConverters._ + /** * Kafka Sink for the Scala Stream Collector */ -class KafkaSink[F[_]: Async]( +class KafkaSink[F[_]: Async: Logger]( val maxBytes: Int, isHealthyState: Ref[F, Boolean], - kafkaProducer: KafkaProducer[F, String, Array[Byte]], + kafkaProducer: KafkaProducer[String, Array[Byte]], topicName: String ) extends Sink[F] { - private lazy val log = LoggerFactory.getLogger(getClass()) - override def isHealthy: F[Boolean] = isHealthyState.get /** @@ -40,17 +39,40 @@ class KafkaSink[F[_]: Async]( * @param events The list of events to send * @param key The partition key to use */ - override def storeRawEvents(events: List[Array[Byte]], key: String): F[Unit] = { - log.debug(s"Writing ${events.size} Thrift records to Kafka topic $topicName at key $key") - val records = ProducerRecords(events.map(e => (ProducerRecord(topicName, key, e)))) - kafkaProducer.produce(records).onError { case _: Throwable => isHealthyState.set(false) } *> isHealthyState.set( - true - ) - } + override def storeRawEvents(events: List[Array[Byte]], key: String): F[Unit] = + Logger[F].debug(s"Writing ${events.size} Thrift records to Kafka topic $topicName at key $key") *> + events.traverse_ { e => + def go: F[Unit] = + Async[F] + .async_[Unit] { cb => + val record = new ProducerRecord(topicName, key, e) + kafkaProducer.send(record, callback(cb)) + () + } + .handleErrorWith { e => + handlePublishError(e) >> go + } + go + } *> isHealthyState.set(true) + + private def callback(asyncCallback: Either[Throwable, Unit] => Unit): Callback = + new Callback { + def onCompletion(metadata: RecordMetadata, exception: Exception): Unit = + Option(exception) match { + case Some(e) => asyncCallback(Left(e)) + case None => asyncCallback(Right(())) + } + } + + private def handlePublishError(error: Throwable): F[Unit] = + isHealthyState.set(false) *> Logger[F].error(s"Publishing to Kafka failed with message ${error.getMessage}") } object KafkaSink { + implicit private def unsafeLogger[F[_]: Sync]: Logger[F] = + Slf4jLogger.getLogger[F] + def create[F[_]: Async]( sinkConfig: Config.Sink[KafkaSinkConfig], authCallbackClass: String @@ -58,8 +80,12 @@ object KafkaSink { for { isHealthyState <- Resource.eval(Ref.of[F, Boolean](false)) kafkaProducer <- createProducer(sinkConfig.config, sinkConfig.buffer, authCallbackClass) - kafkaSink = new KafkaSink(sinkConfig.config.maxBytes, isHealthyState, kafkaProducer, sinkConfig.name) - } yield kafkaSink + } yield new KafkaSink( + sinkConfig.config.maxBytes, + isHealthyState, + kafkaProducer, + sinkConfig.name + ) /** * Creates a new Kafka Producer with the given @@ -71,20 +97,20 @@ object KafkaSink { kafkaConfig: KafkaSinkConfig, bufferConfig: Config.Buffer, authCallbackClass: String - ): Resource[F, KafkaProducer[F, String, Array[Byte]]] = { + ): Resource[F, KafkaProducer[String, Array[Byte]]] = { val props = Map( + "bootstrap.servers" -> kafkaConfig.brokers, "acks" -> "all", "retries" -> kafkaConfig.retries.toString, - "buffer.memory" -> bufferConfig.byteLimit.toString, "linger.ms" -> bufferConfig.timeLimit.toString, "key.serializer" -> "org.apache.kafka.common.serialization.StringSerializer", "value.serializer" -> "org.apache.kafka.common.serialization.ByteArraySerializer", "sasl.login.callback.handler.class" -> authCallbackClass - ) ++ kafkaConfig.producerConf.getOrElse(Map.empty) - - val producerSettings = - ProducerSettings[F, String, Array[Byte]].withBootstrapServers(kafkaConfig.brokers).withProperties(props) + ) ++ kafkaConfig.producerConf.getOrElse(Map.empty) + ("buffer.memory" -> Long.MaxValue.toString) - KafkaProducer.resource(producerSettings) + val make = Sync[F].delay { + new KafkaProducer[String, Array[Byte]]((props: Map[String, AnyRef]).asJava) + } + Resource.make(make)(p => Sync[F].blocking(p.close)) } } diff --git a/project/BuildSettings.scala b/project/BuildSettings.scala index d1aebcb38..73389c784 100644 --- a/project/BuildSettings.scala +++ b/project/BuildSettings.scala @@ -87,7 +87,7 @@ object BuildSettings { moduleName := "snowplow-stream-collector-kafka", Docker / packageName := "scala-stream-collector-kafka", libraryDependencies ++= Seq( - Dependencies.Libraries.fs2Kafka, + Dependencies.Libraries.kafka, Dependencies.Libraries.mskAuth, Dependencies.Libraries.azureIdentity, diff --git a/project/Dependencies.scala b/project/Dependencies.scala index 8505c73a5..77f4a2002 100644 --- a/project/Dependencies.scala +++ b/project/Dependencies.scala @@ -25,7 +25,7 @@ object Dependencies { val fs2PubSub = "0.22.0" val http4s = "0.23.23" val jackson = "2.12.7" // force this version to mitigate security vulnerabilities - val fs2Kafka = "2.6.1" + val kafka = "3.8.1" val log4cats = "2.6.0" val log4j = "2.17.2" // CVE-2021-44228 val mskAuth = "1.1.1" @@ -68,7 +68,7 @@ object Dependencies { //sinks val fs2PubSub = "com.permutive" %% "fs2-google-pubsub-grpc" % V.fs2PubSub val jackson = "com.fasterxml.jackson.core" % "jackson-databind" % V.jackson - val fs2Kafka = "com.github.fd4s" %% "fs2-kafka" % V.fs2Kafka + val kafka = "org.apache.kafka" % "kafka-clients" % V.kafka val kinesis = "com.amazonaws" % "aws-java-sdk-kinesis" % V.awsSdk val log4j = "org.apache.logging.log4j" % "log4j-core" % V.log4j val mskAuth = "software.amazon.msk" % "aws-msk-iam-auth" % V.mskAuth % Runtime // Enables AWS MSK IAM authentication https://github.com/snowplow/stream-collector/pull/214 From b5a9d31fa21ec1369af80558374a88888da48b02 Mon Sep 17 00:00:00 2001 From: Piotr Limanowski Date: Fri, 20 Sep 2024 19:11:45 +0200 Subject: [PATCH 11/14] Remove debug http --- core/src/main/resources/reference.conf | 8 ------- .../Config.scala | 10 +------- .../HttpServer.scala | 23 ++++--------------- .../Run.scala | 3 +-- .../HttpServerSpec.scala | 3 +-- .../TestUtils.scala | 3 --- .../KafkaConfigSpec.scala | 5 +--- .../sinks/KinesisConfigSpec.scala | 5 +--- .../NsqConfigSpec.scala | 5 +--- .../ConfigSpec.scala | 5 +--- .../SqsConfigSpec.scala | 5 +--- 11 files changed, 13 insertions(+), 62 deletions(-) diff --git a/core/src/main/resources/reference.conf b/core/src/main/resources/reference.conf index 2704141aa..fb5ef5d1f 100644 --- a/core/src/main/resources/reference.conf +++ b/core/src/main/resources/reference.conf @@ -99,14 +99,6 @@ maxHeadersLength = 40960 } - debug { - http { - enable = false - logHeaders = true - logBody = false - redactHeaders = [] - } - } enableDefaultRedirect = false preTerminationPeriod = 10 seconds diff --git a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Config.scala b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Config.scala index 8f9061018..8e878a454 100644 --- a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Config.scala +++ b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Config.scala @@ -41,8 +41,7 @@ case class Config[+SinkConfig]( enableDefaultRedirect: Boolean, redirectDomains: Set[String], preTerminationPeriod: FiniteDuration, - license: Config.License, - debug: Config.Debug.Debug + license: Config.License ) object Config { @@ -169,11 +168,6 @@ object Config { accept: Boolean ) - object Debug { - case class Http(enable: Boolean, logHeaders: Boolean, logBody: Boolean, redactHeaders: List[String]) - case class Debug(http: Http) - } - implicit def decoder[SinkConfig: Decoder]: Decoder[Config[SinkConfig]] = { implicit val license: Decoder[License] = { val truthy = Set("true", "yes", "on", "1") @@ -206,8 +200,6 @@ object Config { implicit val hsts = deriveDecoder[HSTS] implicit val telemetry = deriveDecoder[Telemetry] implicit val networking = deriveDecoder[Networking] - implicit val http = deriveDecoder[Debug.Http] - implicit val debug = deriveDecoder[Debug.Debug] implicit val sinkConfig = newDecoder[SinkConfig].or(legacyDecoder[SinkConfig]) implicit val streams = deriveDecoder[Streams[SinkConfig]] diff --git a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/HttpServer.scala b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/HttpServer.scala index b0d30c424..32f2894d4 100644 --- a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/HttpServer.scala +++ b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/HttpServer.scala @@ -19,8 +19,7 @@ import org.http4s.{HttpApp, HttpRoutes} import org.http4s.blaze.server.BlazeServerBuilder import org.http4s.headers.`Strict-Transport-Security` import org.http4s.server.Server -import org.http4s.server.middleware.{HSTS, Logger => LoggerMiddleware, Metrics, Timeout} -import org.typelevel.ci.CIString +import org.http4s.server.middleware.{HSTS, Metrics, Timeout} import org.typelevel.log4cats.Logger import org.typelevel.log4cats.slf4j.Slf4jLogger @@ -40,14 +39,13 @@ object HttpServer { secure: Boolean, hsts: Config.HSTS, networking: Config.Networking, - metricsConfig: Config.Metrics, - debugHttp: Config.Debug.Http + metricsConfig: Config.Metrics )( mkServer: ((HttpApp[F], Int, Boolean, Config.Networking) => Resource[F, Server]) ): Resource[F, Server] = for { withMetricsMiddleware <- createMetricsMiddleware(routes, metricsConfig) - httpApp <- Resource.pure(httpApp(withMetricsMiddleware, healthRoutes, hsts, networking, debugHttp)) + httpApp <- Resource.pure(httpApp(withMetricsMiddleware, healthRoutes, hsts, networking)) server <- mkServer(httpApp, port, secure, networking) } yield server @@ -75,11 +73,10 @@ object HttpServer { routes: HttpRoutes[F], healthRoutes: HttpRoutes[F], hsts: Config.HSTS, - networking: Config.Networking, - debugHttp: Config.Debug.Http + networking: Config.Networking ): HttpApp[F] = hstsApp( hsts, - loggerMiddleware(timeoutMiddleware(routes, networking) <+> healthRoutes, debugHttp) + timeoutMiddleware(routes, networking) <+> healthRoutes ) private def createMetricsMiddleware[F[_]: Async]( @@ -106,16 +103,6 @@ object HttpServer { HSTS(routes.orNotFound, `Strict-Transport-Security`.unsafeFromDuration(hsts.maxAge)) else routes.orNotFound - private def loggerMiddleware[F[_]: Async](routes: HttpRoutes[F], config: Config.Debug.Http): HttpRoutes[F] = - if (config.enable) { - LoggerMiddleware.httpRoutes[F]( - logHeaders = config.logHeaders, - logBody = config.logBody, - redactHeadersWhen = config.redactHeaders.map(CIString(_)).contains(_), - logAction = Some((msg: String) => Logger[F].debug(msg)) - )(routes) - } else routes - private def timeoutMiddleware[F[_]: Async](routes: HttpRoutes[F], networking: Config.Networking): HttpRoutes[F] = Timeout.httpRoutes[F](timeout = networking.responseHeaderTimeout)(routes).map { case Response(Status.ServiceUnavailable, httpVersion, headers, body, attributes) => diff --git a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Run.scala b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Run.scala index d422f2c5c..194124168 100644 --- a/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Run.scala +++ b/core/src/main/scala/com.snowplowanalytics.snowplow.collector.core/Run.scala @@ -106,8 +106,7 @@ object Run { config.ssl.enable, config.hsts, config.networking, - config.monitoring.metrics, - config.debug.http + config.monitoring.metrics )(HttpServer.buildBlazeServer) _ <- withGracefulShutdown(config.preTerminationPeriod)(httpServer) httpClient <- BlazeClientBuilder[F].resource diff --git a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/HttpServerSpec.scala b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/HttpServerSpec.scala index dc91f0835..44d1939b5 100644 --- a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/HttpServerSpec.scala +++ b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/HttpServerSpec.scala @@ -33,8 +33,7 @@ class HttpServerSpec extends Specification with CatsEffect { routes, healthRoutes, config.hsts, - config.networking, - config.debug.http + config.networking ) val client: Client[IO] = Client.fromHttpApp(httpApp) val request: Request[IO] = Request(method = Method.GET, uri = uri"/never") diff --git a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/TestUtils.scala b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/TestUtils.scala index 11250a127..a79802aae 100644 --- a/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/TestUtils.scala +++ b/core/src/test/scala/com.snowplowanalytics.snowplow.collector.core/TestUtils.scala @@ -124,9 +124,6 @@ object TestUtils { 20480, 40960 ), - debug = Debug.Debug( - http = Debug.Http(enable = false, logHeaders = true, logBody = false, redactHeaders = List.empty) - ), enableDefaultRedirect = false, redirectDomains = Set.empty[String], preTerminationPeriod = 10.seconds, diff --git a/kafka/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/KafkaConfigSpec.scala b/kafka/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/KafkaConfigSpec.scala index 2b5b06611..7ea51fa5b 100644 --- a/kafka/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/KafkaConfigSpec.scala +++ b/kafka/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/KafkaConfigSpec.scala @@ -175,9 +175,6 @@ object KafkaConfigSpec { maxRequestLineLength = 20480, maxHeadersLength = 40960 ), - license = Config.License(accept = true), - debug = Config - .Debug - .Debug(Config.Debug.Http(enable = false, logHeaders = true, logBody = false, redactHeaders = List.empty)) + license = Config.License(accept = true) ) } diff --git a/kinesis/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/KinesisConfigSpec.scala b/kinesis/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/KinesisConfigSpec.scala index 44609c59b..c3c962712 100644 --- a/kinesis/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/KinesisConfigSpec.scala +++ b/kinesis/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/sinks/KinesisConfigSpec.scala @@ -191,10 +191,7 @@ object KinesisConfigSpec { instanceId = None, autoGeneratedId = None ), - license = Config.License(accept = true), - debug = Config - .Debug - .Debug(Config.Debug.Http(enable = false, logHeaders = true, logBody = false, redactHeaders = List.empty)) + license = Config.License(accept = true) ) } diff --git a/nsq/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/NsqConfigSpec.scala b/nsq/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/NsqConfigSpec.scala index 90acf07b6..f57902f30 100644 --- a/nsq/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/NsqConfigSpec.scala +++ b/nsq/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/NsqConfigSpec.scala @@ -162,9 +162,6 @@ object NsqConfigSpec { maxRequestLineLength = 20480, maxHeadersLength = 40960 ), - license = Config.License(accept = true), - debug = Config - .Debug - .Debug(Config.Debug.Http(enable = false, logHeaders = true, logBody = false, redactHeaders = List.empty)) + license = Config.License(accept = true) ) } diff --git a/pubsub/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/ConfigSpec.scala b/pubsub/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/ConfigSpec.scala index a0091b626..bfd039b33 100644 --- a/pubsub/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/ConfigSpec.scala +++ b/pubsub/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/ConfigSpec.scala @@ -182,10 +182,7 @@ object ConfigSpec { instanceId = None, autoGeneratedId = None ), - license = Config.License(accept = true), - debug = Config - .Debug - .Debug(Config.Debug.Http(enable = false, logHeaders = true, logBody = false, redactHeaders = List.empty)) + license = Config.License(accept = true) ) } diff --git a/sqs/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/SqsConfigSpec.scala b/sqs/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/SqsConfigSpec.scala index f02d3665f..df6fe1a08 100644 --- a/sqs/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/SqsConfigSpec.scala +++ b/sqs/src/test/scala/com.snowplowanalytics.snowplow.collectors.scalastream/SqsConfigSpec.scala @@ -171,10 +171,7 @@ object SqsConfigSpec { instanceId = None, autoGeneratedId = None ), - license = Config.License(accept = true), - debug = Config - .Debug - .Debug(Config.Debug.Http(enable = false, logHeaders = true, logBody = false, redactHeaders = List.empty)) + license = Config.License(accept = true) ) } From 4f679d90deac7231cef49470bed436a817f9e89c Mon Sep 17 00:00:00 2001 From: Piotr Limanowski Date: Tue, 15 Oct 2024 12:15:52 +0200 Subject: [PATCH 12/14] Update workflows to install sbt --- .github/workflows/cookieless.yml | 2 ++ .github/workflows/deploy.yml | 2 ++ .github/workflows/telemetryIntegTest.yml | 4 +++- .github/workflows/test.yml | 2 ++ 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cookieless.yml b/.github/workflows/cookieless.yml index 42c7007f3..7cbf28720 100644 --- a/.github/workflows/cookieless.yml +++ b/.github/workflows/cookieless.yml @@ -12,6 +12,8 @@ jobs: uses: actions/setup-java@v1 with: java-version: 11 + - name: Install sbt + uses: sbt/setup-sbt@v1 - name: Publish Docker image run: sbt 'project stdout; set Docker / version := "0.0.0"' docker:publishLocal - name: Run Docker image diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index dc95026a5..6ef7e18c1 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -15,6 +15,8 @@ jobs: uses: actions/setup-java@v1 with: java-version: 11 + - name: Install sbt + uses: sbt/setup-sbt@v1 - name: Build artifacts run: | sbt 'project kafka' assembly diff --git a/.github/workflows/telemetryIntegTest.yml b/.github/workflows/telemetryIntegTest.yml index 247f52eb1..78d4380ba 100644 --- a/.github/workflows/telemetryIntegTest.yml +++ b/.github/workflows/telemetryIntegTest.yml @@ -12,6 +12,8 @@ jobs: uses: actions/setup-java@v1 with: java-version: 11 + - name: Install sbt + uses: sbt/setup-sbt@v1 - name: Set up python uses: actions/setup-python@v2 with: @@ -41,4 +43,4 @@ jobs: pip install requests python3 .github/workflows/integration_tests/telemetry/verify_micro_content.py - name: clean up - run: docker stop micro \ No newline at end of file + run: docker stop micro diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4edbc7ed8..35847f7e9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -12,6 +12,8 @@ jobs: uses: actions/setup-java@v1 with: java-version: 11 + - name: Install sbt + uses: sbt/setup-sbt@v1 - name: Check formatting run: sbt scalafmtCheckAll - name: Run unit tests From cbe0bfcb3fc6e35009aa55a86132b7bc1fb76066 Mon Sep 17 00:00:00 2001 From: Piotr Limanowski Date: Thu, 21 Nov 2024 14:54:17 +0100 Subject: [PATCH 13/14] Update README --- README.md | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 3c039e16f..1ff7a1bb1 100644 --- a/README.md +++ b/README.md @@ -6,15 +6,15 @@ ## Introduction -Stream Collector receives raw [Snowplow][snowplow] events sent over HTTP by trackers or webhooks. It serializes them to a [Thrift][thrift] record format, and then writes them to one of supported sinks like [Amazon Kinesis][kinesis], [Google PubSub][pubsub], [Apache Kafka][kafka], [Amazon SQS][sqs], [NSQ][nsq]. -The Stream Collector supports cross-domain Snowplow deployments, setting a user_id (used to identify unique visitors) server side to reliably identify the same user across domains. +Stream Collector receives raw [Snowplow][snowplow] events sent over HTTP by trackers or webhooks. It serializes them to a [Thrift][thrift] record format, and then writes them to one of the supported sinks like [Amazon Kinesis][kinesis], [Google PubSub][pubsub], [Apache Kafka][kafka], [Amazon SQS][sqs], [NSQ][nsq]. +The Stream Collector supports cross-domain Snowplow deployments, setting a `user_id` (used to identify unique visitors) server side to reliably identify the same user across domains. ## Find out more -| Technical Docs | Setup Guide | Roadmap | Contributing | -|----------------------------|----------------------|-----------------------------|---------------------------| -| ![i1][techdocs-image] | ![i2][setup-image] | ![i3][roadmap-image] | ![i4][contributing-image] | -| [Technical Docs][techdocs] | [Setup Guide][setup] | [Snowplow Roadmap][roadmap] | _coming soon_ | +| Technical Docs | Setup Guide | Contributing | +|----------------------------|----------------------|------------------------------| +| ![i1][techdocs-image] | ![i2][setup-image] | ![i4][contributing-image] | +| [Technical Docs][techdocs] | [Setup Guide][setup] | [Contributing][contributing] | ## Copyright and license @@ -22,23 +22,22 @@ Copyright (c) 2023-present Snowplow Analytics Ltd. All rights reserved. Licensed under the [Snowplow Limited Use License Agreement][license]. _(If you are uncertain how it applies to your use case, check our answers to [frequently asked questions][faq].)_ -[snowplow]: http://snowplowanalytics.com +[snowplow]: https://snowplow.io/ [thrift]: http://thrift.apache.org [kinesis]: http://aws.amazon.com/kinesis [pubsub]: https://cloud.google.com/pubsub/ +[kafka]: http://kafka.apache.org [sqs]: https://aws.amazon.com/sqs/ [nsq]: http://nsq.io/ [techdocs-image]: https://d3i6fms1cm1j0i.cloudfront.net/github/images/techdocs.png [setup-image]: https://d3i6fms1cm1j0i.cloudfront.net/github/images/setup.png -[roadmap-image]: https://d3i6fms1cm1j0i.cloudfront.net/github/images/roadmap.png [contributing-image]: https://d3i6fms1cm1j0i.cloudfront.net/github/images/contributing.png -[techdocs]: https://docs.snowplowanalytics.com/docs/pipeline-components-and-applications/stream-collector/ -[setup]: https://docs.snowplowanalytics.com/docs/getting-started-on-snowplow-open-source/ -[roadmap]: https://github.com/snowplow/snowplow/projects/7 -[contributing]: https://docs.snowplowanalytics.com/docs/contributing/ +[techdocs]: https://docs.snowplow.io/docs/pipeline-components-and-applications/stream-collector/ +[setup]: https://docs.snowplow.io/docs/getting-started-on-community-edition/ +[contributing]: https://docs.snowplow.io/docs/contributing/ [build-image]: https://github.com/snowplow/stream-collector/workflows/build/badge.svg [build-wf]: https://github.com/snowplow/stream-collector/actions?query=workflow%3Abuild From f37af900280cc40a21c01689dd8c5a12a15c8afd Mon Sep 17 00:00:00 2001 From: Piotr Limanowski Date: Thu, 21 Nov 2024 19:31:02 +0100 Subject: [PATCH 14/14] Prepare for 3.2.1 release --- CHANGELOG | 528 ------------------------------------------- CHANGELOG.md | 622 +++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 622 insertions(+), 528 deletions(-) delete mode 100644 CHANGELOG create mode 100644 CHANGELOG.md diff --git a/CHANGELOG b/CHANGELOG deleted file mode 100644 index 8e99919b4..000000000 --- a/CHANGELOG +++ /dev/null @@ -1,528 +0,0 @@ -Release 3.2.0 (2024-03-11) -+-------------------------- -Bump transitive jnr-posix to 3.1.8 (#419) -Add snowman job for tag builds -Prevent Kafka sink from blocking (#418) -Allow setting size limit on line and header length (#417) -Add debug logging and timeout configurations (#417) -Add timeout for body parsing (#417) -Expand default time limit (#417) -Cross compile to scala 2.12 -Log cats-effect warning at debug level (#414) -collector-kafka: authenticate with Event Hubs using OAuth2 (#401) - -Release 3.1.2 (2024-02-22) -+-------------------------- -Improve relative redirect in cookie bounce feature (#413) - -Release 3.1.1 (2024-02-20) -+-------------------------- -Upcase cookie header name (#412) - -Release 3.1.0 (2024-01-25) --------------------------- -Add an option to send HSTS header (#408) - -Release 3.0.1 (2024-01-10) --------------------------- -Remove unnecessary argument (#407) - -Release 3.0.0 (2024-01-08) --------------------------- -Add mandatory SLULA license acceptance flag (close #405) -Remove unused warmup config section -Use shortname for collector name (close #403) -Add statsd metrics reporting (close #404) -Add support for Do Not Track cookie (close #400) -Add crossdomain.xml support (close #399) -Add http root response (close #397) -Deploy 2.13 scala assets to GH on CI (close #392) -Use correct sqs buffer queue name with Kinesis bad sink (close #393) -Sbt project modernization (close #361) -Update the Pubsub UserAgent format (close #362) -Add separate good/bad sink configurations (close #388) -Add Kafka sink healthcheck (close #387) -Make maxConnections and idleTimeout configurable (close #386) -Add support for handling /robots.txt (close #385) -Set installation id (close #384) -Set maxBytes in the NsqSink (close #383) -Add http4s Kafka support (close #382) -Add http4s NSQ support (close #348) -Add telemetry support (close #381) -Use Blaze as default http4s backend (close #380) -Add http4s SQS sink (close #378) -Add http4s Kinesis sink (close #379) -Add iglu routes spec (close #377) -Add http4s PubSub sink (close #376) -Add http4s SSL support (close #374) -Add http4s redirect support (close #373) -Load config (close #326) -Add http4s anonymous tracking (close #372) -Add http4s CORS support (close #371) -Add http4s pixel endpoint (close #370) -Add http4s GET and HEAD endpoints (close #369) -Configure set-cookie header (close #368) -Add test for the stdout sink (close #367) -Add http4s POST endpoint (close #366) -Add http4s graceful shutdown (close #365) -Add http4s module (close #364) -Add Snowplow Limited Use License (close #346) - -Release 2.10.0 (2023-11-08) --------------------------- -Update the Pubsub UserAgent format (#362) -Bump sbt-snowplow-release to 0.3.1 (#363) - -Release 2.9.2 (2023-08-25) --------------------------- -Add ability to set custom tags for statsd metrics (#340) - -Release 2.9.1 (2023-08-03) --------------------------- -PubSub: use debug for logging the inserts (#321) -Run background check for Kinesis if it is made unhealthy and SQS buffer is activated (#315) - -Release 2.9.0 (2023-04-11) --------------------------- -Bump protobuf-java to 3.21.7 (#308) -PubSub: add second layer of retry (#304) -Replace set-output in Github Actions (#305) -Make MaxRetries configurable (#295) -Use sbt-snowplow-release to build docker images (#302) -Update /sink-health endpoint logic (#276) -Integration tests should work with both regular and distroless project (#301) -Scan Docker images with Snyk container monitor in deploy.yml (#296) -Add integration tests for Set-Cookie (#287) -Add integration test for doNotTrackCookie (#289) -Add integration test for X-Forwarded-For (#288) -Add integration test for custom paths (#286) -Add integration test for /health endpoint (#285) -Kinesis: add integration tests (#283) -Validate cookie.fallbackDomain config option on startup (#278) -PubSub: add integration tests (#274) -PubSub: make it possible to use PubSub emulator (#270) -Put MaxBytes in default application.conf instead of hard coding it (#272) - -Release 2.8.2 (2022-11-03) --------------------------- -Ensure docker images have latest libexpat version (#268) - -Release 2.8.1 (2022-10-28) --------------------------- -Bump aws sdk to 1.12.327 (#266) -Warmup process should iterate until success (#264) -Health endpoint should return 503 during warmup phase (#263) - -Release 2.8.0 (2022-09-27) --------------------------- -Add RabbitMQ asset (#251) - -Release 2.7.1 (2022-09-06) --------------------------- -Ensure docker image has latest zlib1g version (#254) - -Release 2.7.0 (2022-07-27) --------------------------- -Send warmup requests to self on startup (#249) - -Release 2.6.3 (2022-07-21) --------------------------- -Ensure docker image has latest libfreetype6 version (#247) - -Release 2.6.2 (2022-07-07) --------------------------- -Ensure docker image has openssl version >= 1.1.1n-0+deb11u3 (#244) - -Release 2.6.1 (2022-06-13) --------------------------- -Reduce number of error messages in kinesis collector logs (#242) -Bump log4j to 2.17.2 (#241) -Remove sbt-dependency-graph from plugins.sbt (#235) -Bump jackson-databind to 2.12.7 (#240) -Bump aws-java-sdk to 1.12.238 (#239) -Bump google-cloud-pubsub to 1.119.1 (#238) -Bump jnr-unixsocket to 0.38.17 (#237) -Bump akka-http-metrics-datadog to 1.7.1 (#236) - -Release 2.6.0 (2022-04-22) --------------------------- -Replace all metrics implementations with statsd (#223) -Enable AWS MSK IAM Authentication (#232) -Remove warning about missing config option (#222) -Inspect X-Forwarded-Proto for http -> https redirects (#221) -Change docker base image to eclipse-temurin:11-jre-focal (#228) -Publish distroless docker image (#229) - -Release 2.5.0 (2022-01-31) --------------------------- -Use shorter app name (#217) -Update copyright notices to 2022 (#216) -Disable warnings for illegal headers (#178) -Change default telemetry url (#209) -Add configuration option for allow list for collector redirects (#131) -Increase default value of pubsub backoffPolicy totalBackoff (#187) -Move logging of thread pool creation out of KinesisSink (#129) -CollectorServiceSpec should test number of events written to sink (#188) -Improve graceful shutdown (#12) - -Release 2.4.5 (2021-12-22) --------------------------- -Fix how headers are stringified in the collector payload (#210) - -Release 2.4.4 (2021-12-19) --------------------------- -Bump log4j-core to 2.17.0 (#206) - -Release 2.4.3 (2021-12-14) --------------------------- -Fix log4j-core version to 2.16.0 (#195) -Bump slf4j to 1.7.32 (#196) -Bump joda-time to 2.10.13 (#198) -Bump aws-java-sdk to 1.12.128 (#199) -Bump google-cloud-pubsub to 1.115.0 (#200) -Bump libthrift to 0.15.0 (#201) -Bump sbt to 1.5.6 (#202) - -Release 2.4.2 (2021-12-08) --------------------------- -Fix docs link in README (#192) -Bump akka-http to 2.4.1 (#193) - -Release 2.4.1 (2021-10-20) --------------------------- -OSS terraform modules unique id not propagated into telemetry event (#182) - -Release 2.4.0 (2021-10-19) --------------------------- -Make it possible to configure the collector without a file (#173) -Add telemetry (#167) -Handle LimitExceededException when testing if stream exists (#174) -Include aws-java-sdk-sts to enable web token authentication (#169) -Use sbt-dynver to set application version (#166) -Publish arm64 and amd64 docker images (#165) -Change docker base image to adoptopenjdk:11-jre-hotspot-focal (#164) -Use JRE defaults for https configuration (#163) -Bump akka-http to 10.2.6 (#162) -Bump akka to 2.6.16 (#161) - -Release 2.3.1 (2021-08-12) --------------------------- -Bump pubsub to 2.113.7 (#158) -Bump jackson-databind to 2.10.5.1 on nsq module (#157) -Add cn-northwest-1 to list of custom endpoints (#152) -Bump jackson-dataformat-cbor to 2.11.4 (#155) -Bump snowplow-badrows to 2.1.1 (#154) -Bump libthrift to 0.14.1 (#153) - -Release 2.3.0 (2021-05-25) --------------------------- -Add outage protection features to Kinesis, SQS and Pubsub sinks (#132) -Pass Kinesis partitioning key as SQS message attribute (#146) -Fix typo in PubSub sink useragent string (#147) -Use base image from DockerHub (#107) -Attach jar files to Github releases (#108) -Remove Bintray from resolutionRepos (#144) -Bump scopt to 4.0.1 (#143) -Bump pureconfig to 0.15.0 (#142) -Bump jackson-databind to 2.9.10.8 (#141) -Bump json4s-jackson to 3.6.11 (#140) -Bump specs2-core to 4.11.0 (#139) -Bump sbt-scalafmt to 2.4.2 (#138) -Bump sbt-tpolecat to 0.1.17 (#137) -Bump sbt-buildinfo to 0.10.0 (#136) -Bump sbt-assembly to 0.15.0 (#135) -Bump sbt to 1.5.1 (#134) -Add link to Snowplow's public roadmap in README (#145) - -Release 2.2.1 (2021-03-26) --------------------------- -Respect SQS batch request limit (#125) -Set network_userid to empty UUID in anonymous mode to prevent collector_payload_format_violation (#126) - -Release 2.2.0 (2021-03-08) --------------------------- -Add SQS collector module (#120) -Serve robots.txt file (#109) - -Release 2.1.2 (2021-02-18) --------------------------- -Prevent network_userid being captured when SP-Anonymous header is set (#117) - -Release 2.1.1 (2021-01-28) --------------------------- -Emit valid bad rows for size violation and generic error (#113) -Extend copyright to 2021 (#114) - -Release 2.1.0 (2020-12-11) --------------------------- -Do not set cookie if request has SP-Anonymous header (#90) -Generate BadRow if querystring cannot be parsed (#73) -Don't crash but warn if Kinesis stream and SQS queue don't exist (#100) -Bump sbt to 1.4.4 (#105) -Bump specs2-core to 4.10.5 (#106) -Migrate from Travis to GH actions (#91) -Bump to JDK 11 (#92) -Bump base-debian to 0.2.1 (#72) -Integrate coursier cache action (#93) -Fix assembly merge strategy (#97) -Reorganise imports (#104) -Update copyright to 2020 (#95) - -Release 2.0.1 (2020-11-19) --------------------------- -Increase number of Kinesis put retries when surge protection is on (#75) -Bump scalafmt to 2.3.2 (#87) -Bump akka to 2.5.32 (#86) -Bump akka-http to 10.1.12 (#85) -Bump prometheus-simpleclient to 0.9.0 (#82) -Bump config to 1.4.1 (#81) -Bump slf4j to 1.7.30 (#80) -Bump joda-time to 2.10.8 (#79) -Remove scalaz7 dependency (#84) -Remove softwaremill-retry dependency (#83) - -Release 2.0.0 (2020-09-15) --------------------------- -Disable default redirect (#64) -Bump vulnerable libs (#56) -Implement surge protection (#57) -Add test for extracting a URL-encoded schema from the querystring (#60) -Add snyk monitor (#52) -Add DockerHub credentials to .travis.yml (#49) -Add travis integration (#59) - -Release 1.0.1 (snowplow/snowplow: 119 Tycho Magnetic Anomaly Two) (2020-04-30) ------------------------------------------------------------------------------- -Bump to 1.0.1 (snowplow/snowplow#4338) -Add Snowplow Bintray to resolvers (snowplow/snowplow#4326) -Publish Docker image for stdout via Travis (snowplow/snowplow#4333) -Fix config example (snowplow/snowplow#4332) -Fix incompatible jackson dependencies to enable CBOR (snowplow/snowplow#4266) - -Release 1.0.0 (snowplow/snowplow: 118 Morgantina) (2020-01-16) --------------------------------------------------------------- -Extend copyright notice to 2020 (snowplow/snowplow#4261) -Bump to 1.0.0 (snowplow/snowplow#4193) -Introduce sbt-scalafmt (snowplow/snowplow#4192) -Bump sbt-buildinfo to 0.9.0 (snowplow/snowplow#4191) -Use sbt-tpolecat (snowplow/snowplow#4190) -Bump sbt-assembly to 0.14.9 (snowplow/snowplow#4189) -Bump specs2 to 4.5.1 (snowplow/snowplow#4188) -Bump pureconfig to 0.11.1 (snowplow/snowplow#4187) -Bump akka to 2.5.19 (snowplow/snowplow#4186) -Bump prometheus-simpleclient to 0.6.0 (snowplow/snowplow#4184) -Bump config to 1.3.4 (snowplow/snowplow#4183) -Bump slf4j to 1.7.26 (snowplow/snowplow#4182) -Bump joda-time to 2.10.2 (snowplow/snowplow#4181) -Bump kafka-clients to 2.2.1 (snowplow/snowplow#4180) -Bump google-cloud-pubsub to 1.78.0 (snowplow/snowplow#4179) -Bump aws-java-sdk to 1.11.573 (snowplow/snowplow#4178) -Integrate the size violation bad row type (snowplow/snowplow#4177) -Bump SBT to 1.3.3 (snowplow/snowplow#4176) -Bump Scala to 2.12.10 (snowplow/snowplow#4175) - -Release 0.17.0 (snowplow/snowplow: 117 Biskupin) (2019-12-03) -------------------------------------------------------------- -Publish docker images for scala-stream-collector to DockerHub (#4237) -Allow users to disable the default redirect endpoint (snowplow/snowplow#4211) -Bump Scala version to 2.11.12 (snowplow/snowplow#4206) -Bump akka-http to 10.1.10 (snowplow/snowplow#4185) -Add support for TLS port binding and certificate (snowplow/snowplow#4085) -Remove duplicate section in example hocon config file (snowplow/snowplow#4210) -Bump to 0.17.0 (snowplow/snowplow#4208) - -Release 0.16.0 (snowplow/snowplow: 116 Madara Rider) (2019-09-12) ------------------------------------------------------------------ -Add options to configure secure, same-site and http-only for the cookie (snowplow/snowplow#3753) -Allow multiple cookie domains to be used (snowplow/snowplow#3994) -Provide a way to specify custom path mappings (snowplow/snowplow#4087) -Send back a Cache-Control header (snowplow/snowplow#4017) -Add sbt-native-packager and Docker config (snowplow/snowplow#4128) -Bump Akka HTTP to 10.0.15 (snowplow/snowplow#4131) -Bump version to 0.16.0 (snowplow/snowplow#4134) - -Release 0.15.0 (snowplow/snowplow: 113 Filitosa) (2019-02-27) -------------------------------------------------------------- -Expose Prometheus metrics (snowplow/snowplow#3421) -Bump kafka client to 2.1.1 (snowplow/snowplow#3981) -Provide a way to add arbitrary Kafka configuration settings (snowplow/snowplow#3968) -Add support for an Access-Control-Max-Age header (snowplow/snowplow#3904) -Allow for the do not track cookie value configuration to be a regex (snowplow/snowplow#3966) -Showcase the usage of env variables in the configuration example (snowplow/snowplow#3971) -Extend copyright notice to 2019 (snowplow/snowplow#3997) - -Release 0.14.0 (snowplow/snowplow: 109 Lambaesis) (2018-08-21) --------------------------------------------------------------- -Respect a do not track cookie (snowplow/snowplow#3825) -Add a way to customize the response from the root path (snowplow/snowplow#3670) -Support HEAD requests (snowplow/snowplow#3827) -Allow for multiple domains in crossdomain.xml (snowplow/snowplow#3740) -Allow overriding of the kinesis endpoint url in the configuration (snowplow/snowplow#3846) -Turn BufferConfig's byteLimit and recordLimit into longs (snowplow/snowplow#3807) - -Release 0.13.0 (snowplow/snowplow: 101 Neapolis) (2018-03-21) -------------------------------------------------------------- -Add Google Cloud PubSub sink (snowplow/snowplow#3047) -Split into multiple artifacts according to targeted platform (snowplow/snowplow#3621) -Expose number of requests over JMX (snowplow/snowplow#3637) -Move cross domain configuration to enabled-style (snowplow/snowplow#3556) -Truncate events exceeding the configured maximum size into a BadRow (snowplow/snowplow#3587) -Remove string interpolation false positive warnings (snowplow/snowplow#3623) -Update config.hocon.sample to support Google Cloud PubSub (snowplow/snowplow#3049) -Customize useragent for GCP API calls (snowplow/snowplow#3658) -Bump kafka-clients to 1.0.1 (snowplow/snowplow#3660) -Bump aws-java-sdk to 1.11.290 (snowplow/snowplow#3665) -Bump scala-common-enrich to 0.31.0 (snowplow/snowplow#3666) -Bump SBT to 1.1.1 (snowplow/snowplow#3629) -Bump sbt-assembly to 0.14.6 (snowplow/snowplow#3667) -Use sbt-buildinfo (snowplow/snowplow#3626) -Extend copyright notice to 2018 (snowplow/snowplow#3687) - -Release 0.12.0 (snowplow/snowplow: 98 Argentomagus) (2018-01-05) ----------------------------------------------------------------- -Make Flash access domains and secure configurable (snowplow/snowplow#2915) -Add URL redirect replacement macro (snowplow/snowplow#3491) -Allow use of the originating scheme during cookie bounce (snowplow/snowplow#3512) -Replace Location header with RawHeader to preserve double encoding (snowplow/snowplow#3546) -Bump nsq-java-client to 1.2.0 (snowplow/snowplow#3519) -Document the stdout sink better (snowplow/snowplow#3515) -Fix stdout sink configuration (snowplow/snowplow#3550) -Fix scaladoc for 'ipAndPartitionKey' (snowplow/snowplow#3513) - -Release 0.11.0 (snowplow/snowplow: 96 Zeugma) (2017-11-21) ----------------------------------------------------------- -Update config.hocon.sample to support NSQ (snowplow/snowplow#3294) -Add NSQ sink (snowplow/snowplow#2093) -Make Kinesis, Kafka and NSQ config a coproduct (snowplow/snowplow#3449) -Keep sending records when the Kinesis stream is resharding (snowplow/snowplow#3453) - -Release 0.10.0 (snowplow/snowplow: 93 Virunum) (2017-10-03) ------------------------------------------------------------ -Replace spray by akka-http (snowplow/snowplow#3299) -Replace argot by scopt (snowplow/snowplow#3298) -Add support for cookie bounce (snowplow/snowplow#2697) -Allow raw query params (snowplow/snowplow#3273) -Add support for the Chinese Kinesis endpoint (snowplow/snowplow#3335) -Use the DefaultAWSCredentialsProviderChain for Kinesis Sink (snowplow/snowplow#3245) -Use Kafka callback based API to detect failures to send messages (snowplow/snowplow#3317) -Make Kafka sink more fault tolerant by allowing retries (snowplow/snowplow#3367) -Fix incorrect property used for kafkaProducer.batch.size (snowplow/snowplow#3173) -Configuration decoding with pureconfig (snowplow/snowplow#3318) -Stop making the assembly jar executable (snowplow/snowplow#3410) -Add config dependency (snowplow/snowplow#3326) -Upgrade to Java 8 (snowplow/snowplow#3328) -Bump Scala version to 2.11 (snowplow/snowplow#3311) -Bump SBT to 0.13.16 (snowplow/snowplow#3312) -Bump sbt-assembly to 0.14.5 (snowplow/snowplow#3329) -Bump aws-java-sdk-kinesis to 1.11 (snowplow/snowplow#3310) -Bump kafka-clients to 0.10.2.1 (snowplow/snowplow#3325) -Bump scala-common-enrich to 0.26.0 (snowplow/snowplow#3305) -Bump iglu-scala-client to 0.5.0 (snowplow/snowplow#3309) -Bump specs2-core to 3.9.4 (snowplow/snowplow#3308) -Bump scalaz-core to 7.0.9 (snowplow/snowplow#3307) -Bump joda-time to 2.9 (snowplow/snowplow#3323) -Remove commons-codec dependency (snowplow/snowplow#3324) -Remove snowplow-thrift-raw-event dependency (snowplow/snowplow#3306) -Remove joda-convert dependency (snowplow/snowplow#3304) -Remove mimepull dependency (snowplow/snowplow#3302) -Remove scalazon dependency (snowplow/snowplow#3300) -Run the unit tests systematically in Travis (snowplow/snowplow#3409) - -Release 0.9.0 (snowplow/snowplow: 85 Metamorphosis) (2016-11-15) ----------------------------------------------------------------- -Add Kafka sink (snowplow/snowplow#2937) -Update config.hocon.sample to support Kafka (snowplow/snowplow#2943) -Move sink.kinesis.buffer to sink.buffer in config.hocon.sample (snowplow/snowplow#2938) - -Release 0.8.0 (snowplow/snowplow: 84 Steller's Sea Eagle) (2016-10-07) ----------------------------------------------------------------------- -Add scala_ into artifact filename in Bintray (snowplow/snowplow#2843) -Use nuid query parameter value to set the 3rd party network id cookie (snowplow/snowplow#2512) -Configurable cookie path (snowplow/snowplow#2528) -Call Config.resolve() to resolve environment variables in hocon (snowplow/snowplow#2879) - -Release 0.7.0 (snowplow/snowplow: 80 Southern Cassowary) (2016-05-30) ---------------------------------------------------------------------- -Increase tolerance of timings in tests (snowplow/snowplow#2614) -Send nonempty response to POST requests (snowplow/snowplow#2606) -Crash when unable to find stream instead of hanging (snowplow/snowplow#2583) -Stop using deprecated Config.getMilliseconds method (snowplow/snowplow#2570) -Move example configuration file to examples folder (snowplow/snowplow#2566) -Upgrade the log level for reports of stream nonexistence from INFO to ERROR (snowplow/snowplow#2384) -Crash rather than hanging when unable to bind to the supplied port (snowplow/snowplow#2551) -Bump Spray version to 1.3.3 (snowplow/snowplow#2522) -Bump Scala version to 2.10.5 (snowplow/snowplow#2565) -Fix omitted string interpolation (snowplow/snowplow#2561) - -Release 0.6.0 (snowplow/snowplow: 78 Great Hornbill) (2016-03-15) ------------------------------------------------------------------ -Added Scala Common Enrich as a library dependency (snowplow/snowplow#2153) -Added click redirect mode (snowplow/snowplow#549) -Configured the ability to use IP address as partition key (snowplow/snowplow#2331) -Converted bad rows to new format (snowplow/snowplow#2006) -Shared a single thread pool for all writes to Kinesis (snowplow/snowplow#2369) -Specified UTF-8 encoding everywhere (snowplow/snowplow#2147) -Made cookie name customizable, thanks @kazjote! (snowplow/snowplow#2474) -Added boolean collector.cookie.enabled setting (snowplow/snowplow#2488) -Made backoffPolicy fields macros (snowplow/snowplow#2518) -Updated AWS credentials to support iam/env/default not cpf (snowplow/snowplow#1518) - -Release 0.5.0 (snowplow/snowplow: 67 Bohemian Waxwing) (2015-07-13) -------------------------------------------------------------------- -Stdout bad sink now prints to stderr (snowplow/snowplow#1799) -Added splitter for large event arrays (snowplow/snowplow#941) -Increased maximum record size from 50kB to 1MB (snowplow/snowplow#1753) -Added tests for splitting large requests (snowplow/snowplow#1683) -Updated bad rows to include timestamp (snowplow/snowplow#1681) -Handled case where IP is not present (snowplow/snowplow#1680) -Did some reorganisation and refactoring of the project (snowplow/snowplow#1678) -Added json4s dependency (snowplow/snowplow#1673) -Added bad stream (snowplow/snowplow#1502) - -Release 0.4.0 (snowplow/snowplow: 65 Scarlet Rosefinch) (2015-05-08) --------------------------------------------------------------------- -Bumped Scalazon to 0.11 (snowplow/snowplow#1504) -Added support for PutRecords API (snowplow/snowplow#1227) -Added CORS support (snowplow/snowplow#1165) -Added CORS-style support for ActionScript3 Tracker (snowplow/snowplow#1331) -Added ability to disable third-party cookies (snowplow/snowplow#1363) -Removed automatic creation of stream (snowplow/snowplow#1464) -Added macros to config.hocon.sample (snowplow/snowplow#1471) -Logged the name of the stream to which records are written (snowplow/snowplow#1503) -Added shutdown hook to send stored events (snowplow/snowplow#1535) -Added configurable exponential backoff with jitter (snowplow/snowplow#1592) - -Release 0.3.0 (snowplow/snowplow: 60 Bee Hummingbird) (2015-02-03) ------------------------------------------------------------------- -Started sending CollectorPayloads instead of SnowplowRawEvents (snowplow/snowplow#1226) -Added support for POST requests (snowplow/snowplow#187) -Added support for any {api-vendor}/{api-version} for GET and POST (snowplow/snowplow#652) -Stopped decoding URLs (snowplow/snowplow#1217) -Changed 1x1 pixel response to use a stable GIF (snowplow/snowplow#1260) -Renamed default.conf to config.hocon.sample (snowplow/snowplow#1243) -Started using ThreadLocal to handle Thrift serialization, thanks @denismo and @pkallos! (snowplow/snowplow#1254) -Added healthcheck for load balancers, thanks @duncan! (snowplow/snowplow#1360) - -Release 0.2.0 (snowplow/snowplow: 0.9.12) (2014-11-26) ------------------------------------------------------- -Changed organization to "com.snowplowanalytics" (snowplow/snowplow#1168) -Made the --config option mandatory (snowplow/snowplow#1128) -Added ability to set AWS credentials from environment variables (snowplow/snowplow#1116) -Now enforcing Java 7 for compilation (snowplow/snowplow#1068) -Increased request character limit to 32768 (snowplow/snowplow#987) -Improved performance by using Future, thanks @pkallos! (snowplow/snowplow#580) -Scala Stream Collector, Scala Kinesis Enrich: made endpoint configurable, thanks @sambo1972! (snowplow/snowplow#978) -Scala Stream Collector, Scala Kinesis Enrich: added support for IAM roles, thanks @pkallos! (snowplow/snowplow#534) -Scala Stream Collector, Scala Kinesis Enrich: replaced stream list with describe to tighten permissions, thanks @pkallos! (snowplow/snowplow#535) - -Release 0.2.0 (snowplow/snowplow: 0.9.12) (2014-11-26) ------------------------------------------------------- -Changed organization to "com.snowplowanalytics" (snowplow/snowplow#1168) -Made the --config option mandatory (snowplow/snowplow#1128) -Added ability to set AWS credentials from environment variables (snowplow/snowplow#1116) -Now enforcing Java 7 for compilation (snowplow/snowplow#1068) -Increased request character limit to 32768 (snowplow/snowplow#987) -Improved performance by using Future, thanks @pkallos! (snowplow/snowplow#580) -Scala Stream Collector, Scala Kinesis Enrich: made endpoint configurable, thanks @sambo1972! (snowplow/snowplow#978) -Scala Stream Collector, Scala Kinesis Enrich: added support for IAM roles, thanks @pkallos! (snowplow/snowplow#534) -Scala Stream Collector, Scala Kinesis Enrich: replaced stream list with describe to tighten permissions, thanks @pkallos! (snowplow/snowplow#535) diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 000000000..f6a8ea688 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,622 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] +### Added +### Changed +### Fixed +### Deprecated +### Removed +### Security + +## [3.2.1] - 2024-11-25 +### Changed +- Kafka sink to open fewer threads [#431] +- Explicitly return 408 when timeout is hit [#427] +- Extend default timeouts to match upstream defaults [#426] +- Update workflows to install sbt [#434] + +### Fixed +- Remove parts of the cookies that are not valid according to RFC 6265 [#432] +- Disable timeouts on healthcheck calls [#426] + +### Removed +- Remove debug http [#434] +- Remove body read timeout feature [#429] + +### Security +- Bump aws sdk to 1.12.769 66 [#428] +- Bump azure-identity to 1.13.2 [#428] +- Bump sbt-snowplow-release to 0.3.2 [#428] + +## [3.2.0] - 2024-03-11 +### Added +- collector-kafka: authenticate with Event Hubs using OAuth2 [#401] + +### Changed +- Prevent Kafka sink from blocking [#418] +- Allow setting size limit on line and header length [#417] +- Add debug logging and timeout configurations [#417] +- Add timeout for body parsing [#417] +- Cross compile to scala 2.12 +- Log cats-effect warning at debug level [#414] +- Add snowman job for tag builds + +### Security +- Bump transitive jnr-posix to 3.1.8 [#419] + +## [3.1.2] - 2024-02-22 +### Changed +- Improve relative redirect in cookie bounce feature [#413] + +## [3.1.1] - 2024-02-20 +### Changed +- Upcase cookie header name [#412] + +## [3.1.0] - 2024-01-25 +### Added +- Add an option to send HSTS header [#408] + +## [3.0.1] - 2024-01-10 +### Fixed +- Remove unnecessary argument [#407] + +## [3.0.0] - 2024-01-08 +### Changed +- Add mandatory SLULA license acceptance flag (close #405) +- Remove unused warmup config section +- Use shortname for collector name (close #403) +- Add statsd metrics reporting (close #404) +- Add support for Do Not Track cookie (close #400) +- Add crossdomain.xml support (close #399) +- Add http root response (close #397) +- Deploy 2.13 scala assets to GH on CI (close #392) +- Use correct sqs buffer queue name with Kinesis bad sink (close #393) +- Sbt project modernization (close #361) +- Update the Pubsub UserAgent format (close #362) +- Add separate good/bad sink configurations (close #388) +- Add Kafka sink healthcheck (close #387) +- Make maxConnections and idleTimeout configurable (close #386) +- Add support for handling /robots.txt (close #385) +- Set installation id (close #384) +- Set maxBytes in the NsqSink (close #383) +- Add http4s Kafka support (close #382) +- Add http4s NSQ support (close #348) +- Add telemetry support (close #381) +- Use Blaze as default http4s backend (close #380) +- Add http4s SQS sink (close #378) +- Add http4s Kinesis sink (close #379) +- Add iglu routes spec (close #377) +- Add http4s PubSub sink (close #376) +- Add http4s SSL support (close #374) +- Add http4s redirect support (close #373) +- Load config (close #326) +- Add http4s anonymous tracking (close #372) +- Add http4s CORS support (close #371) +- Add http4s pixel endpoint (close #370) +- Add http4s GET and HEAD endpoints (close #369) +- Configure set-cookie header (close #368) +- Add test for the stdout sink (close #367) +- Add http4s POST endpoint (close #366) +- Add http4s graceful shutdown (close #365) +- Add http4s module (close #364) +- Add Snowplow Limited Use License (close #346) + +## [2.10.0] - 2023-11-08 +### Changed +- Update the Pubsub UserAgent format [#362] +- Bump sbt-snowplow-release to 0.3.1 [#363] + +## [2.9.2] - 2023-08-25 +### Changed +- Add ability to set custom tags for statsd metrics [#340] + +## [2.9.1] - 2023-08-03 +### Changed +- PubSub: use debug for logging the inserts [#321] +- Run background check for Kinesis if it is made unhealthy and SQS buffer is activated [#315] + +## [2.9.0] - 2023-04-11 +### Changed +- Bump protobuf-java to 3.21.7 [#308] +- PubSub: add second layer of retry [#304] +- Replace set-output in Github Actions [#305] +- Make MaxRetries configurable [#295] +- Use sbt-snowplow-release to build docker images [#302] +- Update /sink-health endpoint logic [#276] +- Integration tests should work with both regular and distroless project [#301] +- Scan Docker images with Snyk container monitor in deploy.yml [#296] +- Add integration tests for Set-Cookie [#287] +- Add integration test for doNotTrackCookie [#289] +- Add integration test for X-Forwarded-For [#288] +- Add integration test for custom paths [#286] +- Add integration test for /health endpoint [#285] +- Kinesis: add integration tests [#283] +- Validate cookie.fallbackDomain config option on startup [#278] +- PubSub: add integration tests [#274] +- PubSub: make it possible to use PubSub emulator [#270] +- Put MaxBytes in default application.conf instead of hard coding it [#272] + +## [2.8.2] - 2022-11-03 +### Changed +- Ensure docker images have latest libexpat version [#268] + +## [2.8.1] - 2022-10-28 +### Changed +- Bump aws sdk to 1.12.327 [#266] +- Warmup process should iterate until success [#264] +- Health endpoint should return 503 during warmup phase [#263] + +## [2.8.0] - 2022-09-27 +### Changed +- Add RabbitMQ asset [#251] + +## [2.7.1] - 2022-09-06 +### Changed +- Ensure docker image has latest zlib1g version [#254] + +## [2.7.0] - 2022-07-27 +### Changed +- Send warmup requests to self on startup [#249] + +## [2.6.3] - 2022-07-21 +### Changed +Ensure docker image has latest libfreetype6 version [#247] + +## [2.6.2] - 2022-07-07 +### Changed +- Ensure docker image has openssl version >= 1.1.1n-0+deb11u3 [#244] + +## [2.6.1] - 2022-06-13 +### Changed +- Reduce number of error messages in kinesis collector logs [#242] +- Bump log4j to 2.17.2 [#241] +- Remove sbt-dependency-graph from plugins.sbt [#235] +- Bump jackson-databind to 2.12.7 [#240] +- Bump aws-java-sdk to 1.12.238 [#239] +- Bump google-cloud-pubsub to 1.119.1 [#238] +- Bump jnr-unixsocket to 0.38.17 [#237] +- Bump akka-http-metrics-datadog to 1.7.1 [#236] + +## [2.6.0] - 2022-04-22 +### Changed +- Replace all metrics implementations with statsd [#223] +- Enable AWS MSK IAM Authentication [#232] +- Remove warning about missing config option [#222] +- Inspect X-Forwarded-Proto for http -> https redirects [#221] +- Change docker base image to eclipse-temurin:11-jre-focal [#228] +- Publish distroless docker image [#229] + +## [2.5.0] - 2022-01-31 +### Changed +- Use shorter app name [#217] +- Update copyright notices to 2022 [#216] +- Disable warnings for illegal headers [#178] +- Change default telemetry url [#209] +- Add configuration option for allow list for collector redirects [#131] +- Increase default value of pubsub backoffPolicy totalBackoff [#187] +- Move logging of thread pool creation out of KinesisSink [#129] +- CollectorServiceSpec should test number of events written to sink [#188] +- Improve graceful shutdown [#12] + +## [2.4.5] - 2021-12-22 +### Changed +- Fix how headers are stringified in the collector payload [#210] + +## [2.4.4] - 2021-12-19 +### Changed +- Bump log4j-core to 2.17.0 [#206] + +## [2.4.3] - 2021-12-14 +### Changed +- Fix log4j-core version to 2.16.0 [#195] +- Bump slf4j to 1.7.32 [#196] +- Bump joda-time to 2.10.13 [#198] +- Bump aws-java-sdk to 1.12.128 [#199] +- Bump google-cloud-pubsub to 1.115.0 [#200] +- Bump libthrift to 0.15.0 [#201] +- Bump sbt to 1.5.6 [#202] + +## [2.4.2] - 2021-12-08 +### Changed +- Fix docs link in README [#192] +- Bump akka-http to 2.4.1 [#193] + +## [2.4.1] - 2021-10-20 +### Changed +- OSS terraform modules unique id not propagated into telemetry event [#182] + +## [2.4.0] - 2021-10-19 +### Changed +- Make it possible to configure the collector without a file [#173] +- Add telemetry [#167] +- Handle LimitExceededException when testing if stream exists [#174] +- Include aws-java-sdk-sts to enable web token authentication [#169] +- Use sbt-dynver to set application version [#166] +- Publish arm64 and amd64 docker images [#165] +- Change docker base image to adoptopenjdk:11-jre-hotspot-focal [#164] +- Use JRE defaults for https configuration [#163] +- Bump akka-http to 10.2.6 [#162] +- Bump akka to 2.6.16 [#161] + +## [2.3.1] - 2021-08-12 +### Changed +- Bump pubsub to 2.113.7 [#158] +- Bump jackson-databind to 2.10.5.1 on nsq module [#157] +- Add cn-northwest-1 to list of custom endpoints [#152] +- Bump jackson-dataformat-cbor to 2.11.4 [#155] +- Bump snowplow-badrows to 2.1.1 [#154] +- Bump libthrift to 0.14.1 [#153] + +## [2.3.0] - 2021-05-25 +### Changed +- Add outage protection features to Kinesis, SQS and Pubsub sinks [#132] +- Pass Kinesis partitioning key as SQS message attribute [#146] +- Fix typo in PubSub sink useragent string [#147] +- Use base image from DockerHub [#107] +- Attach jar files to Github releases [#108] +- Remove Bintray from resolutionRepos [#144] +- Bump scopt to 4.0.1 [#143] +- Bump pureconfig to 0.15.0 [#142] +- Bump jackson-databind to 2.9.10.8 [#141] +- Bump json4s-jackson to 3.6.11 [#140] +- Bump specs2-core to 4.11.0 [#139] +- Bump sbt-scalafmt to 2.4.2 [#138] +- Bump sbt-tpolecat to 0.1.17 [#137] +- Bump sbt-buildinfo to 0.10.0 [#136] +- Bump sbt-assembly to 0.15.0 [#135] +- Bump sbt to 1.5.1 [#134] +- Add link to Snowplow's public roadmap in README [#145] + +## [2.2.1] - 2021-03-26 +### Changed +- Respect SQS batch request limit [#125] +- Set network_userid to empty UUID in anonymous mode to prevent collector_payload_format_violation [#126] + +## [2.2.0] - 2021-03-08 +### Changed +- Add SQS collector module [#120] +- Serve robots.txt file [#109] + +## [2.1.2] - 2021-02-18 +### Changed +- Prevent network_userid being captured when SP-Anonymous header is set [#117] + +- ## [2.1.1] - 2021-01-28 +### Changed +- Emit valid bad rows for size violation and generic error [#113] +- Extend copyright to 2021 [#114] + +## [2.1.0] - 2020-12-11 +- Do not set cookie if request has SP-Anonymous header [#90] +- Generate BadRow if querystring cannot be parsed [#73] +- Don't crash but warn if Kinesis stream and SQS queue don't exist [#100] +- Bump sbt to 1.4.4 [#105] +- Bump specs2-core to 4.10.5 [#106] +- Migrate from Travis to GH actions [#91] +- Bump to JDK 11 [#92] +- Bump base-debian to 0.2.1 [#72] +- Integrate coursier cache action [#93] +- Fix assembly merge strategy [#97] +- Reorganise imports [#104] +- Update copyright to 2020 [#95] + +## [2.0.1] - 2020-11-19 +### Changed +- Increase number of Kinesis put retries when surge protection is on [#75] +- Bump scalafmt to 2.3.2 [#87] +- Bump akka to 2.5.32 [#86] +- Bump akka-http to 10.1.12 [#85] +- Bump prometheus-simpleclient to 0.9.0 [#82] +- Bump config to 1.4.1 [#81] +- Bump slf4j to 1.7.30 [#80] +- Bump joda-time to 2.10.8 [#79] +- Remove scalaz7 dependency [#84] +- Remove softwaremill-retry dependency [#83] + +## [2.0.0] - 2020-09-15 +### Changed +- Disable default redirect [#64] +- Bump vulnerable libs [#56] +- Implement surge protection [#57] +- Add test for extracting a URL-encoded schema from the querystring [#60] +- Add snyk monitor [#52] +- Add DockerHub credentials to .travis.yml [#49] +- Add travis integration [#59] + +## [1.0.1 (snowplow/snowplow: 119 Tycho Magnetic Anomaly Two)] - 2020-04-30 +### Changed +- Bump to 1.0.1 [snowplow/snowplow#4338] +- Add Snowplow Bintray to resolvers [snowplow/snowplow#4326] +- Publish Docker image for stdout via Travis [snowplow/snowplow#4333] +- Fix config example [snowplow/snowplow#4332] +- Fix incompatible jackson dependencies to enable CBOR [snowplow/snowplow#4266] + +## [1.0.0 (snowplow/snowplow: 118 Morgantina)] - 2020-01-16 +### Changed +- Extend copyright notice to 2020 [snowplow/snowplow#4261] +- Bump to 1.0.0 [snowplow/snowplow#4193] +- Introduce sbt-scalafmt [snowplow/snowplow#4192] +- Bump sbt-buildinfo to 0.9.0 [snowplow/snowplow#4191] +- Use sbt-tpolecat [snowplow/snowplow#4190] +- Bump sbt-assembly to 0.14.9 [snowplow/snowplow#4189] +- Bump specs2 to 4.5.1 [snowplow/snowplow#4188] +- Bump pureconfig to 0.11.1 [snowplow/snowplow#4187] +- Bump akka to 2.5.19 [snowplow/snowplow#4186] +- Bump prometheus-simpleclient to 0.6.0 [snowplow/snowplow#4184] +- Bump config to 1.3.4 [snowplow/snowplow#4183] +- Bump slf4j to 1.7.26 [snowplow/snowplow#4182] +- Bump joda-time to 2.10.2 [snowplow/snowplow#4181] +- Bump kafka-clients to 2.2.1 [snowplow/snowplow#4180] +- Bump google-cloud-pubsub to 1.78.0 [snowplow/snowplow#4179] +- Bump aws-java-sdk to 1.11.573 [snowplow/snowplow#4178] +- Integrate the size violation bad row type [snowplow/snowplow#4177] +- Bump SBT to 1.3.3 [snowplow/snowplow#4176] +- Bump Scala to 2.12.10 [snowplow/snowplow#4175] + +## [0.17.0 (snowplow/snowplow: 117 Biskupin)] - 2019-12-03 +### Changed +- Publish docker images for scala-stream-collector to DockerHub [#4237] +- Allow users to disable the default redirect endpoint [snowplow/snowplow#4211] +- Bump Scala version to 2.11.12 [snowplow/snowplow#4206] +- Bump akka-http to 10.1.10 [snowplow/snowplow#4185] +- Add support for TLS port binding and certificate [snowplow/snowplow#4085] +- Remove duplicate section in example hocon config file [snowplow/snowplow#4210] +- Bump to 0.17.0 [snowplow/snowplow#4208] + +## [0.16.0 (snowplow/snowplow: 116 Madara Rider)] - 2019-09-12 +### Changed +- Add options to configure secure, same-site and http-only for the cookie [snowplow/snowplow#3753] +- Allow multiple cookie domains to be used [snowplow/snowplow#3994] +- Provide a way to specify custom path mappings [snowplow/snowplow#4087] +- Send back a Cache-Control header [snowplow/snowplow#4017] +- Add sbt-native-packager and Docker config [snowplow/snowplow#4128] +- Bump Akka HTTP to 10.0.15 [snowplow/snowplow#4131] +- Bump version to 0.16.0 [snowplow/snowplow#4134] + +## [0.15.0 (snowplow/snowplow: 113 Filitosa)] - 2019-02-27 +### Changed +- Expose Prometheus metrics [snowplow/snowplow#3421] +- Bump kafka client to 2.1.1 [snowplow/snowplow#3981] +- Provide a way to add arbitrary Kafka configuration settings [snowplow/snowplow#3968] +- Add support for an Access-Control-Max-Age header [snowplow/snowplow#3904] +- Allow for the do not track cookie value configuration to be a regex [snowplow/snowplow#3966] +- Showcase the usage of env variables in the configuration example [snowplow/snowplow#3971] +- Extend copyright notice to 2019 [snowplow/snowplow#3997] + +## [0.14.0 (snowplow/snowplow: 109 Lambaesis)] - 2018-08-21 +### Changed +- Respect a do not track cookie [snowplow/snowplow#3825] +- Add a way to customize the response from the root path [snowplow/snowplow#3670] +- Support HEAD requests [snowplow/snowplow#3827] +- Allow for multiple domains in crossdomain.xml [snowplow/snowplow#3740] +- Allow overriding of the kinesis endpoint url in the configuration [snowplow/snowplow#3846] +- Turn BufferConfig's byteLimit and recordLimit into longs [snowplow/snowplow#3807] + +## [0.13.0 (snowplow/snowplow: 101 Neapolis)] - 2018-03-21 +### Changed +- Add Google Cloud PubSub sink [snowplow/snowplow#3047] +- Split into multiple artifacts according to targeted platform [snowplow/snowplow#3621] +- Expose number of requests over JMX [snowplow/snowplow#3637] +- Move cross domain configuration to enabled-style [snowplow/snowplow#3556] +- Truncate events exceeding the configured maximum size into a BadRow [snowplow/snowplow#3587] +- Remove string interpolation false positive warnings [snowplow/snowplow#3623] +- Update config.hocon.sample to support Google Cloud PubSub [snowplow/snowplow#3049] +- Customize useragent for GCP API calls [snowplow/snowplow#3658] +- Bump kafka-clients to 1.0.1 [snowplow/snowplow#3660] +- Bump aws-java-sdk to 1.11.290 [snowplow/snowplow#3665] +- Bump scala-common-enrich to 0.31.0 [snowplow/snowplow#3666] +- Bump SBT to 1.1.1 [snowplow/snowplow#3629] +- Bump sbt-assembly to 0.14.6 [snowplow/snowplow#3667] +- Use sbt-buildinfo [snowplow/snowplow#3626] +- Extend copyright notice to 2018 [snowplow/snowplow#3687] + +## [0.12.0 (snowplow/snowplow: 98 Argentomagus)] - 2018-01-05 +### Changed +- Make Flash access domains and secure configurable [snowplow/snowplow#2915] +- Add URL redirect replacement macro [snowplow/snowplow#3491] +- Allow use of the originating scheme during cookie bounce [snowplow/snowplow#3512] +- Replace Location header with RawHeader to preserve double encoding [snowplow/snowplow#3546] +- Bump nsq-java-client to 1.2.0 [snowplow/snowplow#3519] +- Document the stdout sink better [snowplow/snowplow#3515] +- Fix stdout sink configuration [snowplow/snowplow#3550] +- Fix scaladoc for 'ipAndPartitionKey' [snowplow/snowplow#3513] + +## [0.11.0 (snowplow/snowplow: 96 Zeugma)] - 2017-11-21 +### Changed +- Update config.hocon.sample to support NSQ [snowplow/snowplow#3294] +- Add NSQ sink [snowplow/snowplow#2093] +- Make Kinesis, Kafka and NSQ config a coproduct [snowplow/snowplow#3449] +- Keep sending records when the Kinesis stream is resharding [snowplow/snowplow#3453] + +## [0.10.0 (snowplow/snowplow: 93 Virunum)] - 2017-10-03 +### Changed +- Replace spray by akka-http [snowplow/snowplow#3299] +- Replace argot by scopt [snowplow/snowplow#3298] +- Add support for cookie bounce [snowplow/snowplow#2697] +- Allow raw query params [snowplow/snowplow#3273] +- Add support for the Chinese Kinesis endpoint [snowplow/snowplow#3335] +- Use the DefaultAWSCredentialsProviderChain for Kinesis Sink [snowplow/snowplow#3245] +- Use Kafka callback based API to detect failures to send messages [snowplow/snowplow#3317] +- Make Kafka sink more fault tolerant by allowing retries [snowplow/snowplow#3367] +- Fix incorrect property used for kafkaProducer.batch.size [snowplow/snowplow#3173] +- Configuration decoding with pureconfig [snowplow/snowplow#3318] +- Stop making the assembly jar executable [snowplow/snowplow#3410] +- Add config dependency [snowplow/snowplow#3326] +- Upgrade to Java 8 [snowplow/snowplow#3328] +- Bump Scala version to 2.11 [snowplow/snowplow#3311] +- Bump SBT to 0.13.16 [snowplow/snowplow#3312] +- Bump sbt-assembly to 0.14.5 [snowplow/snowplow#3329] +- Bump aws-java-sdk-kinesis to 1.11 [snowplow/snowplow#3310] +- Bump kafka-clients to 0.10.2.1 [snowplow/snowplow#3325] +- Bump scala-common-enrich to 0.26.0 [snowplow/snowplow#3305] +- Bump iglu-scala-client to 0.5.0 [snowplow/snowplow#3309] +- Bump specs2-core to 3.9.4 [snowplow/snowplow#3308] +- Bump scalaz-core to 7.0.9 [snowplow/snowplow#3307] +- Bump joda-time to 2.9 [snowplow/snowplow#3323] +- Remove commons-codec dependency [snowplow/snowplow#3324] +- Remove snowplow-thrift-raw-event dependency [snowplow/snowplow#3306] +- Remove joda-convert dependency [snowplow/snowplow#3304] +- Remove mimepull dependency [snowplow/snowplow#3302] +- Remove scalazon dependency [snowplow/snowplow#3300] +- Run the unit tests systematically in Travis [snowplow/snowplow#3409] + +## [0.9.0 (snowplow/snowplow: 85 Metamorphosis)] - 2016-11-15 +### Changed +- Add Kafka sink [snowplow/snowplow#2937] +- Update config.hocon.sample to support Kafka [snowplow/snowplow#2943] +- Move sink.kinesis.buffer to sink.buffer in config.hocon.sample [snowplow/snowplow#2938] + +## [0.8.0 (snowplow/snowplow: 84 Steller's Sea Eagle)] - 2016-10-07 +### Changed +- Add scala_ into artifact filename in Bintray [snowplow/snowplow#2843] +- Use nuid query parameter value to set the 3rd party network id cookie [snowplow/snowplow#2512] +- Configurable cookie path [snowplow/snowplow#2528] +- Call Config.resolve() to resolve environment variables in hocon [snowplow/snowplow#2879] + +## [0.7.0 (snowplow/snowplow: 80 Southern Cassowary)] - 2016-05-30 +### Changed +- Increase tolerance of timings in tests [snowplow/snowplow#2614] +- Send nonempty response to POST requests [snowplow/snowplow#2606] +- Crash when unable to find stream instead of hanging [snowplow/snowplow#2583] +- Stop using deprecated Config.getMilliseconds method [snowplow/snowplow#2570] +- Move example configuration file to examples folder [snowplow/snowplow#2566] +- Upgrade the log level for reports of stream nonexistence from INFO to ERROR [snowplow/snowplow#2384] +- Crash rather than hanging when unable to bind to the supplied port [snowplow/snowplow#2551] +- Bump Spray version to 1.3.3 [snowplow/snowplow#2522] +- Bump Scala version to 2.10.5 [snowplow/snowplow#2565] +- Fix omitted string interpolation [snowplow/snowplow#2561] + +## [0.6.0 (snowplow/snowplow: 78 Great Hornbill)] - 2016-03-15 +### Changed +- Added Scala Common Enrich as a library dependency [snowplow/snowplow#2153] +- Added click redirect mode [snowplow/snowplow#549] +- Configured the ability to use IP address as partition key [snowplow/snowplow#2331] +- Converted bad rows to new format [snowplow/snowplow#2006] +- Shared a single thread pool for all writes to Kinesis [snowplow/snowplow#2369] +- Specified UTF-8 encoding everywhere [snowplow/snowplow#2147] +- Made cookie name customizable, thanks @kazjote! [snowplow/snowplow#2474] +- Added boolean collector.cookie.enabled setting [snowplow/snowplow#2488] +- Made backoffPolicy fields macros [snowplow/snowplow#2518] +- Updated AWS credentials to support iam/env/default not cpf [snowplow/snowplow#1518] + +## [0.5.0 (snowplow/snowplow: 67 Bohemian Waxwing)] - 2015-07-13 +### Changed +- Stdout bad sink now prints to stderr [snowplow/snowplow#1799] +- Added splitter for large event arrays [snowplow/snowplow#941] +- Increased maximum record size from 50kB to 1MB [snowplow/snowplow#1753] +- Added tests for splitting large requests [snowplow/snowplow#1683] +- Updated bad rows to include timestamp [snowplow/snowplow#1681] +- Handled case where IP is not present [snowplow/snowplow#1680] +- Did some reorganisation and refactoring of the project [snowplow/snowplow#1678] +- Added json4s dependency [snowplow/snowplow#1673] +- Added bad stream [snowplow/snowplow#1502] + +## [0.4.0 (snowplow/snowplow: 65 Scarlet Rosefinch)] - 2015-05-08 +### Changed +- Bumped Scalazon to 0.11 [snowplow/snowplow#1504] +- Added support for PutRecords API [snowplow/snowplow#1227] +- Added CORS support [snowplow/snowplow#1165] +- Added CORS-style support for ActionScript3 Tracker [snowplow/snowplow#1331] +- Added ability to disable third-party cookies [snowplow/snowplow#1363] +- Removed automatic creation of stream [snowplow/snowplow#1464] +- Added macros to config.hocon.sample [snowplow/snowplow#1471] +- Logged the name of the stream to which records are written [snowplow/snowplow#1503] +- Added shutdown hook to send stored events [snowplow/snowplow#1535] +- Added configurable exponential backoff with jitter [snowplow/snowplow#1592] + +## [0.3.0 (snowplow/snowplow: 60 Bee Hummingbird)] - 2015-02-03 +### Changed +- Started sending CollectorPayloads instead of SnowplowRawEvents [snowplow/snowplow#1226] +- Added support for POST requests [snowplow/snowplow#187] +- Added support for any {api-vendor}/{api-version} for GET and POST [snowplow/snowplow#652] +- Stopped decoding URLs [snowplow/snowplow#1217] +- Changed 1x1 pixel response to use a stable GIF [snowplow/snowplow#1260] +- Renamed default.conf to config.hocon.sample [snowplow/snowplow#1243] +- Started using ThreadLocal to handle Thrift serialization, thanks @denismo and @pkallos! [snowplow/snowplow#1254] +- Added healthcheck for load balancers, thanks @duncan! [snowplow/snowplow#1360] + +## [0.2.0 (snowplow/snowplow: 0.9.12)] - 2014-11-26 +### Changed +- Changed organization to "com.snowplowanalytics" [snowplow/snowplow#1168] +- Made the --config option mandatory [snowplow/snowplow#1128] +- Added ability to set AWS credentials from environment variables [snowplow/snowplow#1116] +- Now enforcing Java 7 for compilation [snowplow/snowplow#1068] +- Increased request character limit to 32768 [snowplow/snowplow#987] +- Improved performance by using Future, thanks @pkallos! [snowplow/snowplow#580] +- Scala Stream Collector, Scala Kinesis Enrich: made endpoint configurable, thanks @sambo1972! [snowplow/snowplow#978] +- Scala Stream Collector, Scala Kinesis Enrich: added support for IAM roles, thanks @pkallos! [snowplow/snowplow#534] +- Scala Stream Collector, Scala Kinesis Enrich: replaced stream list with describe to tighten permissions, thanks @pkallos! [snowplow/snowplow#535] + +## [0.2.0 (snowplow/snowplow: 0.9.12)] - 2014-11-26 +- Changed organization to "com.snowplowanalytics" [snowplow/snowplow#1168] +- Made the --config option mandatory [snowplow/snowplow#1128] +- Added ability to set AWS credentials from environment variables [snowplow/snowplow#1116] +- Now enforcing Java 7 for compilation [snowplow/snowplow#1068] +- Increased request character limit to 32768 [snowplow/snowplow#987] +- Improved performance by using Future, thanks @pkallos! [snowplow/snowplow#580] +- Scala Stream Collector, Scala Kinesis Enrich: made endpoint configurable, thanks @sambo1972! [snowplow/snowplow#978] +- Scala Stream Collector, Scala Kinesis Enrich: added support for IAM roles, thanks @pkallos! [snowplow/snowplow#534] +- Scala Stream Collector, Scala Kinesis Enrich: replaced stream list with describe to tighten permissions, thanks @pkallos! [snowplow/snowplow#535] + + +[Unreleased]: https://github.com/snowplow/stream-collector/compare/3.2.1...HEAD +[3.2.1]: https://github.com/snowplow/stream-collector/compare/3.2.0...3.2.1 +[3.2.0]: https://github.com/snowplow/stream-collector/compare/3.1.2...3.2.0 +[3.1.2]: https://github.com/snowplow/stream-collector/compare/3.1.1...3.1.2 +[3.1.1]: https://github.com/snowplow/stream-collector/compare/3.1.0...3.1.1 +[3.1.0]: https://github.com/snowplow/stream-collector/compare/3.0.1...3.1.0 +[3.0.1]: https://github.com/snowplow/stream-collector/compare/3.0.0...3.0.1 +[3.0.0]: https://github.com/snowplow/stream-collector/compare/2.10.0...3.0.0 +[2.10.0]: https://github.com/snowplow/stream-collector/compare/2.9.2...2.10.0 +[2.9.2]: https://github.com/snowplow/stream-collector/compare/2.9.1...2.9.2 +[2.9.1]: https://github.com/snowplow/stream-collector/compare/2.9.0...2.9.1 +[2.9.0]: https://github.com/snowplow/stream-collector/compare/2.8.2...2.9.0 +[2.8.2]: https://github.com/snowplow/stream-collector/compare/2.8.2...2.9.0 +[2.8.1]: https://github.com/snowplow/stream-collector/compare/2.8.0...2.8.1 +[2.8.0]: https://github.com/snowplow/stream-collector/compare/2.7.1...2.8.0 +[2.7.1]: https://github.com/snowplow/stream-collector/compare/2.7.0...2.7.1 +[2.7.0]: https://github.com/snowplow/stream-collector/compare/2.6.3...2.7.0 +[2.6.3]: https://github.com/snowplow/stream-collector/compare/2.6.2...2.6.3 +[2.6.2]: https://github.com/snowplow/stream-collector/compare/2.6.1...2.6.2 +[2.6.1]: https://github.com/snowplow/stream-collector/compare/2.6.0...2.6.1 +[2.6.0]: https://github.com/snowplow/stream-collector/compare/2.5.0...2.6.0 +[2.5.0]: https://github.com/snowplow/stream-collector/compare/2.4.5...2.5.0 +[2.4.5]: https://github.com/snowplow/stream-collector/compare/2.4.4...2.4.5 +[2.4.4]: https://github.com/snowplow/stream-collector/compare/2.4.3...2.4.4 +[2.4.3]: https://github.com/snowplow/stream-collector/compare/2.4.2...2.4.3 +[2.4.2]: https://github.com/snowplow/stream-collector/compare/2.4.1...2.4.2 +[2.4.1]: https://github.com/snowplow/stream-collector/compare/2.4.0...2.4.1 +[2.4.0]: https://github.com/snowplow/stream-collector/compare/2.3.1...2.4.0 +[2.3.1]: https://github.com/snowplow/stream-collector/compare/2.3.0...2.3.1 +[2.3.0]: https://github.com/snowplow/stream-collector/compare/2.2.1...2.3.0 +[2.2.1]: https://github.com/snowplow/stream-collector/compare/2.2.0...2.2.1 +[2.2.0]: https://github.com/snowplow/stream-collector/compare/2.1.2...2.2.0 +[2.1.2]: https://github.com/snowplow/stream-collector/compare/2.1.1...2.1.2 +[2.1.1]: https://github.com/snowplow/stream-collector/compare/2.1.0...2.1.1 +[2.1.0]: https://github.com/snowplow/stream-collector/compare/2.0.1...2.1.0 +[2.0.1]: https://github.com/snowplow/stream-collector/compare/2.0.0...2.0.1 +[2.0.0]: https://github.com/snowplow/stream-collector/compare/1.0.1...2.0.0 +[1.0.1 (snowplow/snowplow: 119 Tycho Magnetic Anomaly Two)]: https://github.com/snowplow/stream-collector/releases +[1.0.0 (snowplow/snowplow: 118 Morgantina)]: https://github.com/snowplow/stream-collector/releases +[0.17.0 (snowplow/snowplow: 117 Biskupin)]: https://github.com/snowplow/stream-collector/releases +[0.16.0 (snowplow/snowplow: 116 Madara Rider)]: https://github.com/snowplow/stream-collector/releases +[0.15.0 (snowplow/snowplow: 113 Filitosa)]: https://github.com/snowplow/stream-collector/releases +[0.14.0 (snowplow/snowplow: 109 Lambaesis)]: https://github.com/snowplow/stream-collector/releases +[0.13.0 (snowplow/snowplow: 101 Neapolis)]: https://github.com/snowplow/stream-collector/releases +[0.12.0 (snowplow/snowplow: 98 Argentomagus)]: https://github.com/snowplow/stream-collector/releases +[0.11.0 (snowplow/snowplow: 96 Zeugma)]: https://github.com/snowplow/stream-collector/releases +[0.10.0 (snowplow/snowplow: 93 Virunum)]: https://github.com/snowplow/stream-collector/releases +[0.9.0 (snowplow/snowplow: 85 Metamorphosis)]: https://github.com/snowplow/stream-collector/releases +[0.8.0 (snowplow/snowplow: 84 Steller's Sea Eagle)]: https://github.com/snowplow/stream-collector/releases +[0.7.0 (snowplow/snowplow: 80 Southern Cassowary)]: https://github.com/snowplow/stream-collector/releases +[0.6.0 (snowplow/snowplow: 78 Great Hornbill)]: https://github.com/snowplow/stream-collector/releases +[0.5.0 (snowplow/snowplow: 67 Bohemian Waxwing)]: https://github.com/snowplow/stream-collector/releases +[0.4.0 (snowplow/snowplow: 65 Scarlet Rosefinch)]: https://github.com/snowplow/stream-collector/releases +[0.3.0 (snowplow/snowplow: 60 Bee Hummingbird)]: https://github.com/snowplow/stream-collector/releases +[0.2.0 (snowplow/snowplow: 0.9.12)]: https://github.com/snowplow/stream-collector/releases +[0.2.0 (snowplow/snowplow: 0.9.12)]: https://github.com/snowplow/stream-collector/releases