Skip to content

Commit

Permalink
Use only one docker image
Browse files Browse the repository at this point in the history
  • Loading branch information
tomaslink committed Jun 14, 2024
1 parent dcee90d commit cecde49
Show file tree
Hide file tree
Showing 13 changed files with 136 additions and 158 deletions.
4 changes: 4 additions & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,7 @@ docker-compose.yaml
example
dev-temp
scripts/samples

examples
.venv
venv
4 changes: 3 additions & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ jobs:
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
# cache: 'pip' # fails if you don´t have requirements.txt or pyproject.toml on root dir.
cache: 'pip'
# cache option make the step fail if you don´t have requirements.txt or pyproject.toml on root.
# https://github.com/actions/setup-python/issues/807.

- name: Install dependencies
run: |
Expand Down
26 changes: 26 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
FROM gcr.io/world-fishing-827/github.com/globalfishingwatch/gfw-bash-pipeline:latest-python3.8

# Install SDK. (needed for Python SDK)
RUN pip install --no-cache-dir apache-beam[gcp]==2.56.0

# Copy files from official SDK image, including script/dependencies.
COPY --from=apache/beam_python3.8_sdk:2.56.0 /opt/apache/beam /opt/apache/beam

# Perform any additional customizations if desired
COPY ./requirements.txt ./
RUN pip install -r requirements.txt

# Temporary. TODO: Use a local test docker image with extra dependencies.
COPY ./requirements/test.txt ./
RUN pip install -r test.txt

# Temporary. TODO: Use a local dev docker image with extra dependencies.
COPY ./requirements/dev.txt ./
RUN pip install -r dev.txt

# Setup local packages
COPY . /opt/project
RUN pip install -e .

# Set the entrypoint to Apache Beam SDK launcher.
ENTRYPOINT ["/opt/apache/beam/boot"]
24 changes: 0 additions & 24 deletions Dockerfile-scheduler

This file was deleted.

6 changes: 0 additions & 6 deletions Dockerfile-worker

This file was deleted.

46 changes: 24 additions & 22 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,45 +1,47 @@
VENV_NAME:=.venv
REQS_WORKER:=requirements/worker
REQS_SCHEDULER:=requirements/scheduler
REQS_PROD_IN:=requirements/prod.in
REQS_PROD_TXT:=requirements.txt
REQS_ALL:=requirements/all.txt

## help: Prints this list of commands.
## gcp: pulls gcloud docker image, authenticates to google cloud and configure the project.
## build: Builds docker image.
## login: run google cloud authentication .
## dockershell: Enters to docker container shell.
## requirements: Compiles requirements file with pip-tools.
## upgrade-requirementsde: Upgrades requirements file based on .in constraints.
## venv: creates a virtual environment inside .venv.
## install: Installs development dependencies.
## requirements: Compiles requirement txt files with pip-tools.
## upgrade-requirements: Upgrades requirements txt files based on .in constraints.
## requirements-worker: Compiles only worker requirements with pip-tools.
## requirements-scheduler: Compiles only scheduler requirements with pip-tools.
## test: Run unit tests.
## testdocker: Run unit tests inside docker container.
## testdocker-all: Run unit and integration tests inside docker container.

## venv3.8: creates a virtual environment inside .venv (using python3.8).
## install: Installs all dependencies needed for development.
## test: Runs unit tests.
## testdocker: Runs unit tests inside docker container.
## testdocker-all: Runs unit and integration tests inside docker container.


help:
@echo "\nUsage: \n"
@sed -n 's/^##//p' ${MAKEFILE_LIST} | column -t -s ':' | sed -e 's/^/-/'

gcp:
docker compose pull gcloud
docker volume create --name=gcp
docker compose run gcloud auth application-default login
docker compose run gcloud config set project world-fishing-827
docker compose run gcloud auth application-default set-quota-project world-fishing-827

build:
docker compose build

dockershell:
docker compose run --entrypoint /bin/bash -it dev

requirements-worker:
docker compose run --entrypoint /bin/bash -it dev -c \
'pip-compile -o ${REQS_WORKER}.txt ${REQS_WORKER}.in -v && \
pip-compile -o ${REQS_SCHEDULER}.txt ${REQS_SCHEDULER}.in -v' \

requirements-scheduler:
requirements:
docker compose run --entrypoint /bin/bash -it dev -c \
'pip-compile -o ${REQS_SCHEDULER}.txt ${REQS_SCHEDULER}.in -v'
'pip-compile -o ${REQS_PROD_TXT} ${REQS_PROD_IN} -v'

upgrade-requirements:
docker compose run --entrypoint /bin/bash -it dev -c \
'pip-compile -o ${REQS_WORKER}.txt -U ${REQS_WORKER}.in -v && \
pip-compile -o ${REQS_SCHEDULER}.txt -U ${REQS_SCHEDULER}.in -v'
'pip-compile -o ${REQS_PROD_TXT} -U ${REQS_PROD_IN} -v'

venv:
python3 -m venv ${VENV_NAME}
Expand All @@ -48,7 +50,7 @@ venv3.8:
python3.8 -m venv ${VENV_NAME}

install:
pip install -r requirements/all.txt
pip install -r ${REQS_ALL}

test:
pytest
Expand All @@ -60,4 +62,4 @@ testdocker-all:
docker compose run --entrypoint "pytest --runslow" dev


.PHONY: help install requirements requirements-upgrade requirements-worker requirements-scheduler test testdocker testdocker-all
.PHONY: help gcp build dockersheel requirements upgrade-requirements venv venv3.8 install test testdocker testdocker-all
70 changes: 38 additions & 32 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@ which are broadcasting using the same MMSI at the same time.
[git workflow documentation]: GIT-WORKFLOW.md
[Makefile]: Makefile
[pip-tools]: https://pip-tools.readthedocs.io/en/stable/
[requirements/scheduler.in]: requirements/scheduler.in
[requirements/worker.in]: requirements/worker.in
[requirements/scheduler.txt]: requirements/scheduler.txt
[requirements/worker.txt]: requirements/worker.txt
[requirements.txt]: requirements.txt
[requirements/prod.in]: requirements/prod.in
[Semantic Versioning]: https://semver.org


If you are going to be contribuiting, jump directly to the [How to contribute](#how-to-contribute) section.
If you just want to run the pipeline, use the following instructions.

# How to run

First, make sure you have [git installed], and [configure a SSH-key for GitHub].
Expand All @@ -45,28 +46,34 @@ git clone [email protected]:GlobalFishingWatch/pipe-segment.git
Install Docker Engine using the [docker official instructions] (avoid snap packages)
and the [docker compose plugin]. No other dependencies are required.

## Building docker images

To build the docker image, run:
```bash
docker compose build
```

## Google Cloud setup

The pipeline reads it's input from (and write its output to) BigQuery,
so you need to first authenticate with your google cloud account inside the docker images.
To do that, you need to run this command and follow the instructions:

1. Create external volume to share GCP authentication across containers:
```bash
docker volume create --name=gcp
```

2. Run authentication service
```bash
docker compose run gcloud auth application-default login
```

You also need to configure the project:
3. Configure the project:
```bash
docker compose run gcloud config set project world-fishing-827
docker compose run gcloud auth application-default set-quota-project world-fishing-827
```

## Building docker image

To build the docker image, run:
```bash
docker compose build
```

## CLI

The pipeline includes a CLI that can be used to start both local test runs and
Expand Down Expand Up @@ -97,12 +104,23 @@ docker compose run dev segment --help

The [Makefile] should ease the development process.

## Git Workflow

Please refer to our [git workflow documentation] to know how to manage branches in this repository.

## Setup the environment

Create a virtual environment:
```shell
make venv
. .venv/bin/activate
```

Authenticate to google cloud and set up project (not necessary if you already did it on this machine):
```shell
make gcp
```

Install dependencies:
```shell
make install
Expand All @@ -115,7 +133,7 @@ make test

Alternatively, you can run the unit tests inside the docker container:
```shell
docker compose build
make build
make testdocker
```

Expand All @@ -124,29 +142,17 @@ Run all tests in docker including ones that hit some GCP API (**currently failin
make testdocker-all
```

## Git Workflow

Please refer to our [git workflow documentation] to know how to manage branches in this repository.

## Updating dependencies

We maintain two docker images with their own set of dependencies:
- scheduler (launch environment): [requirements/scheduler.txt].
- worker (runtime environment): [requirements/worker.txt].
The [requirements.txt] contains all transitive dependencies pinned to specific versions.
This file is compiled automatically with [pip-tools], based on [requirements/prod.in].

The are compiled with [pip-tools] inside the docker container,
using previously declared [requirements/scheduler.in] and [requirements/worker.in].
Use [requirements/prod.in] to specify high-level dependencies with restrictions.
Do not modify [requirements.txt] manually.

The scheduler requirements are a superset of the worker requirements.
Thus, if you changed something in [requirements/worker.in],
you must also re-compile [requirements/scheduler.in].
This is enforced using a unique Makefile command:
```shell
make requirements-worker
```
If you only modified something in [requirements/scheduler.in], you can just run
To re-compile dependencies, just run
```shell
make requirements-scheduler
make requirements
```

If you want to upgrade all dependencies to latest available versions
Expand Down
31 changes: 8 additions & 23 deletions cloudbuild.yaml
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
steps:
- name: 'gcr.io/cloud-builders/docker'
id: build-scheduler
id: build
args: [
'build',
'-t', '${_SCHEDULER_IMAGE_NAME}:${TAG_NAME}',
'-t', '${_SCHEDULER_IMAGE_NAME}:latest',
'-f', 'Dockerfile-scheduler',
'-t', '${_IMAGE_NAME}:${TAG_NAME}',
'-t', '${_IMAGE_NAME}:latest',
'-f', 'Dockerfile',
'.',
]

Expand All @@ -15,31 +15,16 @@ steps:
'run',
'--rm',
'--entrypoint', 'py.test',
'${_SCHEDULER_IMAGE_NAME}:latest',
]

- name: 'gcr.io/cloud-builders/docker'
id: build-worker
waitFor: ['test']
args: [
'build',
'-t', '${_WORKER_IMAGE_NAME}:${TAG_NAME}',
'-t', '${_WORKER_IMAGE_NAME}:latest',
'-f', 'Dockerfile-worker',
'.',
'${_IMAGE_NAME}:latest',
]

images:
- '${_SCHEDULER_IMAGE_NAME}:${TAG_NAME}'
- '${_SCHEDULER_IMAGE_NAME}:latest'
- '${_WORKER_IMAGE_NAME}:${TAG_NAME}'
- '${_WORKER_IMAGE_NAME}:latest'
- '${_IMAGE_NAME}:${TAG_NAME}'
- '${_IMAGE_NAME}:latest'

timeout: 600s
substitutions:
_BASE_IMAGE_NAME: 'gcr.io/${PROJECT_ID}/github.com/globalfishingwatch/pipe-segment'
_SCHEDULER_IMAGE_NAME: '${_BASE_IMAGE_NAME}/scheduler'
_WORKER_IMAGE_NAME: '${_BASE_IMAGE_NAME}/worker'
_IMAGE_NAME: 'gcr.io/${PROJECT_ID}/github.com/globalfishingwatch/pipe-segment'
options:
dynamic_substitutions: true

11 changes: 4 additions & 7 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
services:
dev:
image: gfw/pipe-segment
entrypoint: "./scripts/run.sh"
build:
context: .
dockerfile: Dockerfile-scheduler
dockerfile: Dockerfile
volumes:
- ".:/opt/project"
- "gcp:/root/.config/"
gcloud:
image: gfw/pipe-segment
build:
context: .
dockerfile: Dockerfile-scheduler
image: google/cloud-sdk:latest
entrypoint: gcloud
volumes:
- "gcp:/root/.config/"
Expand All @@ -21,5 +19,4 @@ services:
# docker volume create --name=gcp
volumes:
gcp:
external: true

external: true
2 changes: 2 additions & 0 deletions examples/example_segment_dataflow.sh
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ docker compose run dev segment \
--project=world-fishing-827 \
--temp_location=gs://pipe-temp-us-central-ttl7/dataflow_temp \
--staging_location=gs://pipe-temp-us-central-ttl7/dataflow_staging \
--ssvid_filter_query='"226013750","226010660","226014030"' \
--sdk_container_image=gcr.io/world-fishing-827/github.com/globalfishingwatch/pipe-segment:4.2.4-dev-unique-docker-image-2 \
--region=us-central1 \
--max_num_workers=600 \
--worker_machine_type=custom-1-65536-ext \
Expand Down
Loading

0 comments on commit cecde49

Please sign in to comment.