From 4e1ef66e5177c309542adb62c9f8055b75b2befa Mon Sep 17 00:00:00 2001 From: Josh Klar Date: Thu, 23 Feb 2023 12:22:21 -0800 Subject: [PATCH] docker: Build AMD64+ARM64 multiarch images and push to GHCR. In light of recent changes to Docker Hub, move our Docker images into GHCR which integrates more tightly with GitHub flows we already use throughout the Zulip org. Since it's near-trivial to do so at the same time, add officiallly-supported ARM64 builds. Resolves #357. --- .../workflows/build-and-push-ghcr-common.yml | 43 +++++++ .../workflows/build-and-push-ghcr-latest.yml | 14 +++ .github/workflows/build-and-push-ghcr-pr.yml | 19 +++ Dockerfile | 5 +- IMAGE_TAG | 15 +++ README.md | 19 +-- build_and_push_image.sh | 110 ++++++++++++++++++ 7 files changed, 217 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/build-and-push-ghcr-common.yml create mode 100644 .github/workflows/build-and-push-ghcr-latest.yml create mode 100644 .github/workflows/build-and-push-ghcr-pr.yml create mode 100644 IMAGE_TAG create mode 100755 build_and_push_image.sh diff --git a/.github/workflows/build-and-push-ghcr-common.yml b/.github/workflows/build-and-push-ghcr-common.yml new file mode 100644 index 0000000000..2711e9ed10 --- /dev/null +++ b/.github/workflows/build-and-push-ghcr-common.yml @@ -0,0 +1,43 @@ +--- +on: + workflow_call: + inputs: + # N.B.: This behavior is not concurrency-safe and updates the mutable + # :latest image tag + push-latest: + type: boolean + primary-registry-tag: + type: string + skip-pull-check: + type: boolean + +jobs: + multiarch-image: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Set up QEMU + uses: docker/setup-qemu-action@v2 + - name: Set up Docker Buildx + id: buildx + uses: docker/setup-buildx-action@v2 + - name: Login to GitHub Container Registry + uses: docker/login-action@v2 + with: + registry: ghcr.io + username: ${{ github.repository_owner }} + password: ${{ github.token }} + - name: Set PUSH_LATEST_TAG in environment + if: inputs.push-latest + run: echo "PUSH_LATEST_TAG=1" >> $GITHUB_ENV + - name: Set REGISTRY_TAG in environment + if: inputs.primary-registry-tag != '' + run: echo "REGISTRY_TAG=${{ inputs.primary-registry-tag }}" >> $GITHUB_ENV + - name: Set SKIP_PULL_CHECK in environment + if: inputs.skip-pull-check + run: echo "SKIP_PULL_CHECK=1" >> $GITHUB_ENV + - name: Build Images + env: + EXTERNAL_QEMU: "1" + run: ./build_and_push_image.sh diff --git a/.github/workflows/build-and-push-ghcr-latest.yml b/.github/workflows/build-and-push-ghcr-latest.yml new file mode 100644 index 0000000000..94754ee557 --- /dev/null +++ b/.github/workflows/build-and-push-ghcr-latest.yml @@ -0,0 +1,14 @@ +--- +name: Publish Docker Image to GHCR (versioned + :latest) + +on: + push: + branches: + - main + +jobs: + multiarch-image: + uses: ./.github/workflows/build-and-push-ghcr-common.yml + secrets: inherit + with: + push-latest-tag: true diff --git a/.github/workflows/build-and-push-ghcr-pr.yml b/.github/workflows/build-and-push-ghcr-pr.yml new file mode 100644 index 0000000000..34c7d9eda6 --- /dev/null +++ b/.github/workflows/build-and-push-ghcr-pr.yml @@ -0,0 +1,19 @@ +--- +name: Publish Docker Image to GHCR (PR) + +on: + pull_request: + branches: + - main + +jobs: + multiarch-image: + uses: ./.github/workflows/build-and-push-ghcr-common.yml + secrets: inherit + with: + # dz prefix to emphasize that the commit does not line up with any commit + # in zulip/zulip + primary-registry-tag: "dz-${{ github.sha }}" + # There's no realistic chance of overwriting an existing SHA tag here, so + # save the pull time + skip-pull-check: true diff --git a/Dockerfile b/Dockerfile index 8d85b83bce..3cfe444a2f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -38,7 +38,10 @@ WORKDIR /home/zulip/zulip ARG CUSTOM_CA_CERTIFICATES -# Finally, we provision the development environment and build a release tarball +# Finally, we provision the development environment and build a release +# tarball, after first bumping Yarn's network timeout to 5 minutes to account +# for occasional glitches in QEMU environments (eg. multiarch builds). +RUN echo 'network-timeout 300000' >> ~/.yarnrc RUN SKIP_VENV_SHELL_WARNING=1 ./tools/provision --build-release-tarball-only RUN . /srv/zulip-py3-venv/bin/activate && \ ./tools/build-release-tarball docker && \ diff --git a/IMAGE_TAG b/IMAGE_TAG new file mode 100644 index 0000000000..6617dd2728 --- /dev/null +++ b/IMAGE_TAG @@ -0,0 +1,15 @@ +# This should ~always be updated to match the Zulip Server version targeted, +# with the trailing -N used to denote image versions within the series. Only +# the final line of this file is read during publishing. +# +# Note that changes to the Dockerfile that are not bundled with a bump to this +# number will never be published to GitHub Container Registry, as duplicates +# aren't built. + +# Changelog: +# +# 6.1-1: Add ARM64 support, publish to GHCR +# <--> This file created here <--> +# 6.1-0: final version published exclusively to Docker Hub + +6.1-1 diff --git a/README.md b/README.md index 3a985570a3..54c8d930a0 100644 --- a/README.md +++ b/README.md @@ -10,15 +10,20 @@ This is a container image for running [Zulip](https://zulip.com) Current Zulip version: `6.1` Current Docker image version: `6.1-0` +Current architectures supported: `amd64` -Project status: **Alpha**. While this project works and is -used by many sites in production, configuring is substantially more -error-prone than the [normal Zulip installer][normal-install] (which -Just Works). We recommend this project if you want to host Zulip -using Docker, but both setting up and maintaining a Zulip server is -simpler and less error-prone with the normal installer than with Docker. + +> `arm64` support is experimental, and is not provided in the Docker Hub +> images. To build an `arm64` image yourself, see `make help` locally. -[normal-install]: https://zulip.readthedocs.io/en/latest/production/install.html +Project status: **Alpha**. While these images work and are used by many sites +in production, configuring is substantially more error-prone than the [bare +metal Zulip installer][bare-metal-install] (which Just Works, though generally +expects a dedicated node). We're actively working to improve the situation, but +for now recommend these containers and orchestrator recipes primarily to those +comfortable being early adopters, and who are ready to report bugs. + +[bare-metal-install]: https://zulip.readthedocs.io/en/latest/production/install.html ## Overview diff --git a/build_and_push_image.sh b/build_and_push_image.sh new file mode 100755 index 0000000000..255ff04fb8 --- /dev/null +++ b/build_and_push_image.sh @@ -0,0 +1,110 @@ +#!/usr/bin/env bash + +# This script wraps Docker and Docker BuildX to build multiarch Zulip images. +# Make sure a recent Docker and BuildX are installed on your system - Docker +# Desktop users (on any OS) should be good to go, those using Linux +# distribution's builds of Docker will need to find the correct packages. +# +# To use locally, override the environment variables REGISTRY, REGISTRY_TAG +# (perhaps to 'local'), and optionally BUILDX_PLATFORMS. Additionally, +# PUSH_LATEST_TAG can be set to 1 to additonally tag :latest when pushing to +# the registry. Then, run the script without arguments. For example: +# +# REGISTRY=docker.example.com/myorg/zulip REGISTRY_TAG=local PUSH_LATEST_TAG=1 +# ./build_and_push_image.sh +# +# Note: EXTERNAL_QEMU=1 is required when it's unsafe or undesired to manage +# binfmt helpers, for example within CI systems like GitHub Actions (use +# docker/setup-buildx-action@v1 instead). +# +# By default, REGISTRY:REGISTRY_TAG will be built for linux/amd64 and +# linux/arm64. Adding other platforms to this list is unsupported and will +# almost certainly not work, but the list can be shrunk. REGISTRY must be set +# to something the builder has push access to, because BuildX images and +# manifests are not loaded into the host's Docker registry (an upstream +# limitation). +# +# If building for architectures other than that the host runs on, ne can expect +# this step to take many multiples of the time it takes to build the Zulip +# image for just the native architecture. If it takes 10 minutes to build the +# amd64 image by itself, expect cross-compiling the arm64 image to take 30-60 +# minutes on most currently-common hardware. Currently, distributing the image +# builds to multiple machines (perhaps to allow the arm64 image to build on a +# native arm64 host for efficiency) is unsupported. +# +# Assuming all goes well, REGISTRY:REGISTRY_TAG will point to a multiarch +# manifest referring to an image for each of BUILDX_PLATFORMS, which can then +# be rolled out to your infrastructure, used in Docker Compose, etc. +# +# Please report bugs with this script or anything it runs, or with running +# Zulip on arm64 in general, at https://github.com/zulip/docker-zulip and/or at +# https://chat.zulip.org + +set -ex + +REGISTRY="${REGISTRY:-ghcr.io/zulip/zulip}" +REGISTRY_TAG="${REGISTRY_TAG:-$(tail -n 1 < "$(git rev-parse --show-toplevel)/IMAGE_TAG")}" +PRIMARY_IMAGE="${REGISTRY}:${REGISTRY_TAG}" + +if [ "${SKIP_PULL_CHECK}" != "1" ]; then + if docker pull "${PRIMARY_IMAGE}"; then + echo "Image ${PRIMARY_IMAGE} already exists, refusing to overwrite!" > /dev/stderr + exit 1 + fi +fi + +PUSH_LATEST_TAG="${PUSH_LATEST_TAG:-0}" + +if [ "${PUSH_LATEST_TAG}" = "1" ]; then + PUSH_LATEST_TAG_ARG=("-t" "${REGISTRY}:latest") +fi + +# Default to creating our own buildx context, as "default", using the native +# "docker" driver, can result in errors like the following when using Linux +# distros' Docker and not Docker Desktop: +# +# ERROR: multiple platforms feature is currently not supported for docker +# driver. Please switch to a different driver (eg. "docker buildx create +# --use") +BUILDX_BUILDER="${BUILDX_BUILDER:-zulip}" +BUILDX_PLATFORMS="${BUILDX_PLATFORMS:-linux/amd64,linux/arm64}" + +if [ "${EXTERNAL_QEMU}" != "1" ]; then + # --credential yes is required to run sudo within qemu, without it the + # effective UID after a call to sudo will not be 0 and sudo in cross-built + # containers (eg. the arm64 build if running on an amd64 host) will fail. + # See also: https://github.com/crazy-max/ghaction-docker-buildx/issues/213. + # + # We're allowing failures here (|| true) for two main reasons: + # + # - BUILDX_PLATFORMS can be overridden to a single, native platform + # (meaning this QEMU reset won't be necessary anyway) + # - On ZFS<2.2 root filesystems, this incantation can fail due to + # Docker-side dataset teardown issues as documented in + # https://github.com/moby/moby/issues/40132. The QEMU reset may have + # succeeded despite the Docker daemon errors, so we'll try to power + # through. + docker run \ + --rm \ + --privileged \ + multiarch/qemu-user-static \ + --reset \ + -p yes \ + --credential yes \ + || true +fi + +(docker buildx ls | grep "${BUILDX_BUILDER}" >/dev/null 2>&1) || { + docker buildx create \ + --name "${BUILDX_BUILDER}" \ + --platform "${BUILDX_PLATFORMS}" \ + --bootstrap \ + --use +} + +docker buildx build \ + --platform "${BUILDX_PLATFORMS}" \ + -t "${PRIMARY_IMAGE}" \ + "${PUSH_LATEST_TAG_ARG[@]}" \ + --push \ + .