diff --git a/.travis.yml b/.travis.yml index 08d3217..a34768d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,8 +10,8 @@ script: env: matrix: - IMG=base - - IMG=scala-stream-collector/0.11.0 - - IMG=stream-enrich/0.12.0 + - IMG=scala-stream-collector/0.12.0 + - IMG=stream-enrich/0.13.0 - IMG=s3-loader/0.6.0 - IMG=elasticsearch-loader/0.10.1 global: diff --git a/CHANGELOG b/CHANGELOG index 2cb1ea8..86458f0 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,9 @@ +Release 3 (2018-01-08) +---------------------- +Stream Enrich 0.13.0 image (#31) +Scala Stream Collector 0.12.0 image (#30) +Use SSC 0.12.0 and SE 0.13.0 in the Docker Compose example (#32) + Release 2 (2017-11-28) ---------------------- Stream Enrich 0.12.0 image (#24) diff --git a/README.md b/README.md index 45ed03f..b1324e1 100644 --- a/README.md +++ b/README.md @@ -20,10 +20,10 @@ You can pull the images from the registry directly: ```bash # Scala Stream Collector image -docker pull snowplow-docker-registry.bintray.io/snowplow/scala-stream-collector:0.11.0 +docker pull snowplow-docker-registry.bintray.io/snowplow/scala-stream-collector:0.12.0 # Stream Enrich image -docker pull snowplow-docker-registry.bintray.io/snowplow/stream-enrich:0.12.0 +docker pull snowplow-docker-registry.bintray.io/snowplow/stream-enrich:0.13.0 # Elasticsearch Loader image docker pull snowplow-docker-registry.bintray.io/snowplow/elasticsearch-loader:0.10.1 @@ -41,10 +41,10 @@ Alternatively, you can build them yourself: docker pull snowplow-docker-registry.bintray.io/snowplow/base:0.1.0 # Scala Stream Collector image -docker build -t snowplow/scala-stream-collector:0.11.0 scala-stream-collector/0.11.0 +docker build -t snowplow/scala-stream-collector:0.12.0 scala-stream-collector/0.12.0 # Stream Enrich image -docker build -t snowplow/stream-enrich:0.12.0 stream-enrich/0.12.0 +docker build -t snowplow/stream-enrich:0.13.0 stream-enrich/0.13.0 # Elasticsearch Loader image docker build -t snowplow/elasticsearch-loader:0.10.1 elasticsearch-loader/0.10.1 @@ -69,15 +69,15 @@ Next, you can run a container for each component by mounting your configuration # Scala Stream Collector container docker run \ -v $PWD/scala-stream-collector-config:/snowplow/config \ - snowplow/scala-stream-collector:0.11.0 \ # if you have built the image - # snowplow-docker-registry.bintray.io/snowplow/scala-stream-collector:0.11.0 if you have pulled the image + snowplow/scala-stream-collector:0.12.0 \ # if you have built the image + # snowplow-docker-registry.bintray.io/snowplow/scala-stream-collector:0.12.0 if you have pulled the image --config /snowplow/config/config.hocon # Stream Enrich docker run \ -v $PWD/stream-enrich-config:/snowplow/config \ - snowplow/stream-enrich:0.12.0 \ # if you have built the image - # snowplow-docker-registry.bintray.io/snowplow/stream-enrich:0.12.0 if you have pulled the image + snowplow/stream-enrich:0.13.0 \ # if you have built the image + # snowplow-docker-registry.bintray.io/snowplow/stream-enrich:0.13.0 if you have pulled the image --config /snowplow/config/config.hocon \ --resolver file:/snowplow/config/resolver.json \ --enrichments file:/snowplow/config/enrichments/ \ diff --git a/example/README.md b/example/README.md index 8b54c25..b2d3849 100644 --- a/example/README.md +++ b/example/README.md @@ -4,25 +4,22 @@ This folder contains a Docker Compose example for the Snowplow realtime pipeline ## Introduction -This Docker Compose example bundles the following components in two distinct containers: +This Docker Compose example bundles the following Snowplow components in two distinct containers: - [Scala Stream Collector][ssc] - [Stream Enrich][se] -As is, the configuration files make the following assumptions regarding Kinesis streams: +Additionally, they make use of [NSQ][nsq] topics to store events, the NSQ components are running +in three different containers: -- The `snowplow-raw` stream exists and is used to store the collected events -- The `snowplow-enriched` stream exists and is used to store the enriched events -- The `snowplow-bad` stream exists and is used to store the events which failed validation - -All those streams being located in `us-east-1`. To authenticate the components, the -[DefaultAWSCredentialsProviderChain][dacpc] is used. - -Feel free to modify those configuration files to suit your needs. +- nsqd: the daemon in charge of receiving, queueing and delivering messages +- nsqlookupd: the daemon taking care of managing who produces and consumes what +- nsqadmin: a web UI to perform administrative tasks as well as giving an overview of the NSQ +topology ## Usage -Once you have configured the configuration files to your liking, you can launch those two components +Once you have modified the configuration files to your liking, you can launch those two components with: ```bash @@ -52,6 +49,6 @@ limitations under the License. [ssc]: https://github.com/snowplow/snowplow-docker/tree/master/scala-stream-collector [se]: https://github.com/snowplow/snowplow-docker/tree/master/stream-enrich -[dacpc]: http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html +[nsq]: http://nsq.io/ [license]: http://www.apache.org/licenses/LICENSE-2.0 \ No newline at end of file diff --git a/example/docker-compose.yml b/example/docker-compose.yml index 503a980..6a40572 100644 --- a/example/docker-compose.yml +++ b/example/docker-compose.yml @@ -26,7 +26,7 @@ services: - "4171:4171" scala-stream-collector: - image: snowplow-docker-registry.bintray.io/snowplow/scala-stream-collector:0.11.0 + image: snowplow-docker-registry.bintray.io/snowplow/scala-stream-collector:0.12.0 command: [ "--config", "/snowplow/config/config.hocon" ] depends_on: - nsqd @@ -51,7 +51,7 @@ services: - "SP_JAVA_OPTS=-Xms512m -Xmx512m" stream-enrich: - image: snowplow-docker-registry.bintray.io/snowplow/stream-enrich:0.12.0 + image: snowplow-docker-registry.bintray.io/snowplow/stream-enrich:0.13.0 command: [ "--config", "/snowplow/config/config.hocon", "--resolver", "file:/snowplow/config/resolver.json", diff --git a/example/scala-stream-collector-config/config.hocon b/example/scala-stream-collector-config/config.hocon index 0dcc37e..781780c 100644 --- a/example/scala-stream-collector-config/config.hocon +++ b/example/scala-stream-collector-config/config.hocon @@ -16,8 +16,14 @@ collector { cookieBounce { enabled = false - name = "n3pc" + name = n3pc fallbackNetworkUserId = "00000000-0000-4000-A000-000000000000" + forwardedProtocolHeader = "X-Forwarded-Proto" + } + + redirectMacro { + enabled = false + placeholder = "[TOKEN]" } streams { diff --git a/example/stream-enrich-config/config.hocon b/example/stream-enrich-config/config.hocon index 708c443..73fab12 100644 --- a/example/stream-enrich-config/config.hocon +++ b/example/stream-enrich-config/config.hocon @@ -36,6 +36,7 @@ enrich { rawChannel = enrich host = nsqd port = 4150 + lookupHost = nsqlookupd lookupPort = 4161 } diff --git a/scala-stream-collector/0.12.0/Dockerfile b/scala-stream-collector/0.12.0/Dockerfile new file mode 100644 index 0000000..a602d6d --- /dev/null +++ b/scala-stream-collector/0.12.0/Dockerfile @@ -0,0 +1,26 @@ +FROM snowplow-docker-registry.bintray.io/snowplow/base:0.1.0 +LABEL maintainer="Snowplow Analytics Ltd. " + +# The version of the collector to download. +ENV COLLECTOR_VERSION="0.12.0" + +# The name of the archive to download. +ENV ARCHIVE="snowplow_scala_stream_collector_${COLLECTOR_VERSION}.zip" + +# Install the Scala Stream Collector. +RUN mkdir -p /tmp/build && \ + cd /tmp/build && \ + wget -q http://dl.bintray.com/snowplow/snowplow-generic/${ARCHIVE} && \ + unzip -d ${SNOWPLOW_BIN_PATH} ${ARCHIVE} && \ + cd /tmp && \ + rm -rf /tmp/build + +# Port used by the collector. +EXPOSE 80 + +# Defines an entrypoint script delegating the lauching of the collector to the snowplow user. +# The script uses dumb-init as the top-level process. +COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh +ENTRYPOINT [ "docker-entrypoint.sh" ] + +CMD [ "--help" ] \ No newline at end of file diff --git a/scala-stream-collector/0.12.0/docker-entrypoint.sh b/scala-stream-collector/0.12.0/docker-entrypoint.sh new file mode 100755 index 0000000..a76cd8d --- /dev/null +++ b/scala-stream-collector/0.12.0/docker-entrypoint.sh @@ -0,0 +1,12 @@ +#!/usr/bin/dumb-init /bin/sh +set -e + +# If the config directory has been mounted through -v, we chown it. +if [ "$(stat -c %u ${SNOWPLOW_CONFIG_PATH})" != "$(id -u snowplow)" ]; then + chown snowplow:snowplow ${SNOWPLOW_CONFIG_PATH} +fi + +# Make sure we run the collector as the snowplow user +exec su-exec snowplow:snowplow /usr/bin/java \ + $SP_JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap \ + -jar ${SNOWPLOW_BIN_PATH}/snowplow-stream-collector-${COLLECTOR_VERSION}.jar "$@" diff --git a/stream-enrich/0.13.0/Dockerfile b/stream-enrich/0.13.0/Dockerfile new file mode 100644 index 0000000..efbda13 --- /dev/null +++ b/stream-enrich/0.13.0/Dockerfile @@ -0,0 +1,23 @@ +FROM snowplow-docker-registry.bintray.io/snowplow/base:0.1.0 +LABEL maintainer="Snowplow Analytics Ltd. " + +# The version of the collector to download. +ENV ENRICH_VERSION="0.13.0" + +# The name of the archive to download. +ENV ARCHIVE="snowplow_stream_enrich_${ENRICH_VERSION}.zip" + +# Install the Scala Stream Collector. +RUN mkdir -p /tmp/build && \ + cd /tmp/build && \ + wget -q http://dl.bintray.com/snowplow/snowplow-generic/${ARCHIVE} && \ + unzip -d ${SNOWPLOW_BIN_PATH} ${ARCHIVE} && \ + cd /tmp && \ + rm -rf /tmp/build + +# Defines an entrypoint script delegating the lauching of stream enrich to the snowplow user. +# The script uses dumb-init as the top-level process. +COPY docker-entrypoint.sh /usr/local/bin/docker-entrypoint.sh +ENTRYPOINT [ "docker-entrypoint.sh" ] + +CMD [ "--help" ] diff --git a/stream-enrich/0.13.0/docker-entrypoint.sh b/stream-enrich/0.13.0/docker-entrypoint.sh new file mode 100755 index 0000000..ebb606a --- /dev/null +++ b/stream-enrich/0.13.0/docker-entrypoint.sh @@ -0,0 +1,15 @@ +#!/usr/bin/dumb-init /bin/sh +set -e + +# If the config directory has been mounted through -v, we chown it. +if [ "$(stat -c %u ${SNOWPLOW_CONFIG_PATH})" != "$(id -u snowplow)" ]; then + chown snowplow:snowplow ${SNOWPLOW_CONFIG_PATH} +fi + +# Needed because of SCE's ./ip_geo file +cd $(eval echo ~snowplow) + +# Make sure we run the collector as the snowplow user +exec su-exec snowplow:snowplow /usr/bin/java \ + $SP_JAVA_OPTS -XX:+UnlockExperimentalVMOptions -XX:+UseCGroupMemoryLimitForHeap \ + -jar ${SNOWPLOW_BIN_PATH}/snowplow-stream-enrich-${ENRICH_VERSION}.jar "$@"