diff --git a/compose-example/Dockerfile b/compose-example/Dockerfile new file mode 100644 index 00000000..b21cb1f2 --- /dev/null +++ b/compose-example/Dockerfile @@ -0,0 +1,20 @@ +# The jupyterhub/jupyterhub instance is quite bulky... A stripped down version +# can be built from the python:3.x-slim images. +#FROM jupyterhub/jupyterhub +FROM python:3.11-slim + +# Recommended minimal set of packages, allowing use of DockerSpawner +# and OAuthenticator classes. +# hadolint ignore=DL3008,DL3013 +RUN apt-get update && \ + apt-get install -y git --no-install-recommends && \ + python3 -m pip install -U --no-cache-dir \ + dockerspawner \ + jupyterhub \ + oauthenticator \ + git+https://github.com/jupyterhub/traefik-proxy.git && \ + apt-get remove -y git && \ + rm -rf /var/lib/apt/lists + +CMD ["jupyterhub", "-f", "/srv/jupyterhub/jupyterhub_config.py"] +WORKDIR /srv/jupyterhub diff --git a/compose-example/README.md b/compose-example/README.md new file mode 100644 index 00000000..96d420c3 --- /dev/null +++ b/compose-example/README.md @@ -0,0 +1,77 @@ +# Docker Compose + +An example `docker-compose` file and related configuration files are provided +here. + +There are four files included in this directory, which can be used as a +starting point, and should be configured to suit your individual needs:- + +- [`Dockerfile`](#Dockerfile) +- [`docker-compose.yaml`](#docker-compose.yaml) +- [`jupyterhub_config.py`](#jupyterhub_config.py) +- [`traefik.yaml`](#traefik.yaml) + +# Usage + +Configure the files appropriately, and launch the `traefik` and `jupyterhub` +services with the command:- + +``` +docker-compose up -d +``` + +# Requirements + +- `docker` +- [`docker-compose`](https://docs.docker.com/compose/). +- Optionally, a domain name for LetsEncrypt certificates + +## `Dockerfile` + +Defines the docker build rules for the `jupyterhub` container image. See +https://jupyterhub-dockerspawner.readthedocs.io/en/latest/docker-image.html for +details on what must be included in this image. This example builds a slimmed +down version of jupyterhub, installing `jupyterhub_traefik_proxy` from +github (not PyPi), along with `dockerspawner` and `oauthenticator` jupyterhub +modules. + +## `docker-compose.yaml` + +Defines the `jupyterhub_traefik_proxy` and `traefik` service containers that +will be built and run. + +Also includes rules for how the traefik API will be accessed. Change the +credentials allowed by the `basicauth` middleware, as it is configured by +default with credentials of `admin` and `password`. + +## `jupyterhub_config.py` + +jupyterhub's configuration file. Spend some time working through this file. +This is a minimal, but documented example that works for me. A full jupyterhub +configuration can be obtained by running `jupyterhub --generate-config` in the +jupyterhub container. i.e. + +``` +# Launch the docker-compose project +docker-compose up -d + +# Generate a full configuration file, save to jupyterhub_config-full.py +docker-compose exec hub jupyterhub --generate-config > jupyterhub_config-full.py +``` + +However, a newly generated configuration file won't include configuration +directives for everything you might want to use, e.g. +`jupyterhub_traefik_proxy`, +[`oauthenticator`](https://github.com/jupyterhub/oauthenticator), or +[`dockerspawner`](https://jupyterhub-dockerspawner.readthedocs.io/). The +relevant documentation (or code) for non-default modules should be referred to. + +## `traefik.yaml` + +The static configuration file used by `traefik`. This file can be used to +configure various features on traefik, including but not limited to:- + +- [ACME certificate resolvers](https://doc.traefik.io/traefik/https/acme/) +- [traefik entrypoints](https://doc.traefik.io/traefik/routing/entrypoints/) +- [traefik log](https://doc.traefik.io/traefik/observability/logs/) +- [traefik API](https://doc.traefik.io/traefik/operations/api/) diff --git a/compose-example/docker-compose.yaml b/compose-example/docker-compose.yaml new file mode 100644 index 00000000..8b27e605 --- /dev/null +++ b/compose-example/docker-compose.yaml @@ -0,0 +1,92 @@ +version: "3" + +services: + # The JupyterHub service configuration { + hub: + image: jupyterhub-traefik-proxy:example + build: . + container_name: jupyterhub + + # Start traefik first + depends_on: + - traefik + + volumes: + # Jupyterhub configuration file + - ./jupyterhub_config.py:/srv/jupyterhub/jupyterhub_config.py:ro + + # Shared volume for the file provider's dynamic config + - traefik-dynamic-config:/var/run/traefik/ + + # jupyterhub's DockerSpawner needs read access to the docker socket. + - /var/run/docker.sock:/var/run/docker.sock:ro + + # Volume to persist the jupyterhub sqlite database + - data:/srv/jupyterhub + + networks: + - traefik_internal + + # } /JupyterHub + + # The traefik service configuration { + traefik: + image: traefik:latest + restart: unless-stopped + container_name: traefik + + ports: + - "80:80/tcp" + - "443:443/tcp" + + volumes: + # Static configuration file + - ./traefik.yaml:/etc/traefik/traefik.yml:ro + + # Shared dynamic config volume + - traefik-dynamic-config:/var/run/traefik + + # Traefik needs read-only access to the docker API socket + - /var/run/docker.sock:/var/run/docker.sock:ro + + labels: + # Tell traefik to enable the rules defined in the below labels. + - "traefik.enable=true" + + # Dashboard configuration + - "traefik.http.routers.dashboard.entryPoints=websecure" + + # Router rule for requests to the api service. The 'Host' rule must match the following in + # jupyterhub_config.py:- + # c.TraefikFileProviderProxy.traefik_api_url = "https://traefik" + - "traefik.http.routers.dashboard.rule=Host(`traefik`) && PathPrefix(`/api`, `/dashboard`)" + - "traefik.http.routers.dashboard.service=api@internal" + + # Connections to the dashboard and api should be encrypted + - "traefik.http.routers.dashboard.tls=true" + + # Users should be authorised to access the dashboard and api + - "traefik.http.routers.dashboard.middlewares=dashboard-auth" + + # User: "admin". Password: "password". (N.B. Each $ char must be escaped, with an extra $) + - "traefik.http.middlewares.dashboard-auth.basicauth.users=admin:$$apr1$$uqxc0z9g$$ukB361ceL17eKK7gBZSkG1" + + networks: + - default + - traefik_internal + + # } /traefik + +volumes: + # Jupyterhub data volume + data: + # traefik's dynamic configuration folder will be in a volume shared between + # both services + traefik-dynamic-config: + +networks: + traefik_internal: + # The default network name will have this folder's name prepended to it. + # Fix its full name here, to match 'c.DockerSpawner.network_name', in + # jupyterhub_config.py. + name: traefik_internal diff --git a/compose-example/jupyterhub_config.py b/compose-example/jupyterhub_config.py new file mode 100644 index 00000000..df35c07f --- /dev/null +++ b/compose-example/jupyterhub_config.py @@ -0,0 +1,149 @@ +# Configuration file for jupyterhub. +import os + +c = get_config() # noqa + +# Class for authenticating users. +# +# One of the benefits of using jupyterhub, is its ability to use a central +# Identity Provider and Authentication service. +# +# Currently installed: +# - default: jupyterhub.auth.PAMAuthenticator +# - dummy: jupyterhub.auth.DummyAuthenticator +# - null: jupyterhub.auth.NullAuthenticator +# - pam: jupyterhub.auth.PAMAuthenticator +# Default: 'jupyterhub.auth.PAMAuthenticator' +# +# Also, check the OAuth authenticators, at:- +# https://oauthenticator.readthedocs.io/en/latest/tutorials/provider-specific-setup/index.html +# +# The 'dummy' authenticator will allow any user to login and launch a jupyter +# notebook, so should definitely NOT BE USED in production or on publicly +# accessible servers! +c.JupyterHub.authenticator_class = "dummy" + +# The public facing URL of the whole JupyterHub application. +# +# This is the address on which the proxy will bind. +# Sets protocol, ip, base_url +# Default: 'http://:8000' +# (dev note) This will be copied to c.Proxy.public_url +c.JupyterHub.bind_url = "https://hub.example.com" + +# Whether to clean up the jupyterhub-managed traefik configuration +# when the Hub shuts down. +c.JupyterHub.cleanup_proxy = True + +# The URL on which the Hub will listen. This is a private URL for internal +# communication. Typically set in combination with hub_connect_url. If a unix +# socket, hub_connect_url **must** also be set. +# +# For example: +# +# "http://127.0.0.1:8081" +# "unix+http://%2Fsrv%2Fjupyterhub%2Fjupyterhub.sock" +# +# .. versionadded:: 0.9 +# Default: '' +# +# jupyterhub_traefik_proxy will configure the 'service' url in traefik, so this +# needs to be accessible from traefik. By default, jupyterhub will bind to +# 'localhost', but this will bind jupyterhub to its container name +c.JupyterHub.hub_bind_url = "http://hub:8000" + +# This sets traefik's router rule for routing traffic to the jupyterhub +# instance. +# +# Typically, you'll want a traefik Host-based configuration rule, e.g.:- +# traefik.http.routers.jupyterhub.rule=Host(`hub.example.com`) +# +# The corresponding `hub_routespec` for the above would be:- +# c.JupyterHub.hub_routespec = 'hub.example.com' +# +# The default is to bind to everything, creating a path-based rule. i.e. +# traefik.http.routers.jupyterhub.rule=PathPrefix(`/`) +# +# Default: = '/' +# +c.JupyterHub.hub_routespec = "hub.example.com/" + +# Set the log level by value or name. +# Choices: any of [0, 10, 20, 30, 40, 50, 'DEBUG', 'INFO', 'WARN', 'ERROR', 'CRITICAL'] +# Default: 30 +# See also: Application.log_level +c.JupyterHub.log_level = "DEBUG" + +# Use jupyterhub_traefik_proxy's `TraefikFileProviderProxy` class +c.JupyterHub.proxy_class = "traefik_file" + +# JupyterHub shouldn't start the proxy, docker-compose will launch it +c.TraefikFileProviderProxy.should_start = False + +# The configuration file jupyterhub will write to, and traefik will watch +c.TraefikFileProviderProxy.dynamic_config_file = "/var/run/traefik/jupyterhub.yaml" + +# Settings jupyterhub_traefik_proxy will use to access the traefik API +# These must match traefik's dynamic configuration (check the labels in +# docker-compose.yaml) +c.TraefikFileProviderProxy.traefik_api_url = "https://traefik" +c.TraefikFileProviderProxy.traefik_api_validate_cert = False +c.TraefikFileProviderProxy.traefik_api_username = "admin" +c.TraefikFileProviderProxy.traefik_api_password = "password" + +# Traefik can automatically retrieve certificates for each user container from +# an ACME provider (e.g. Let's Encrypt), For an example, read the comments in +# traefik's static configuraiton file, traefik.yaml, and refer to the +# reference documentation at:- +# https://doc.traefik.io/traefik/https/acme/ +# c.TraefikFileProviderProxy.traefik_cert_resolver = "leresolver" + +# The class to use for spawning single-user servers. +# +# Currently installed: +# - default: jupyterhub.spawner.LocalProcessSpawner +# - localprocess: jupyterhub.spawner.LocalProcessSpawner +# - simple: jupyterhub.spawner.SimpleLocalProcessSpawner +# Default: 'jupyterhub.spawner.LocalProcessSpawner' +# +# Launch each user's notebook server in a separate container. +c.JupyterHub.spawner_class = "dockerspawner.DockerSpawner" + +# Base Image to use for user notebook containers. You can build your own, +# or use an image name and tag from hub.docker.com, or another image repository +c.DockerSpawner.image = "jupyterhub/singleuser" + +# Explicitly set notebook directory because we'll be mounting a host volume to +# it. Most jupyter/docker-stacks *-notebook images run the Notebook server as +# user `jovyan`, and set the notebook directory to `/home/jovyan/work`. +# We follow the same convention. +notebook_dir = os.environ.get("DOCKER_NOTEBOOK_DIR") or "/home/jovyan/work" +c.DockerSpawner.notebook_dir = notebook_dir + +# Create per-user docker volumes, mounted to the user's notebook_dir in the +# container +c.DockerSpawner.volumes = {"jupyterhub-user-{username}": notebook_dir} + +# The docker network name that single-user notebook containers should attach to +c.DockerSpawner.network_name = "traefik_internal" + +# For jupyterhub to let traefik manage certificates, 'ssl_cert' needs a +# value. (This gets around a validate rule on 'proxy.bind_url', which +# forces redirects to 'http', unless there is a value in ssl_cert). +# Otherwise, when logging in, there will always be 302 redirects to http:// +c.JupyterHub.ssl_cert = "externally managed" + +# jupyterhub will only configure path-based routing by default. To stop +# traefik from routing all requests to jupyterhub, a subdomain host should be +# configured. +# That is, by default, jupyterhub will create a router rule of just PathPrefix(`/`). +# This could conflict with other traefik router rules, or just be too easily +# accessible. +# +# If a subdomain_host is configured, each user container will be accessible at:- +# https://. +# +# e.g. A user of "jbloggs", logging into a hub with a subdomain_host of +# "https://hub.example.com", will be redirected to their notebook at +# https://jbloggs.hub.example.com +c.JupyterHub.subdomain_host = "https://hub.example.com" diff --git a/compose-example/traefik.yaml b/compose-example/traefik.yaml new file mode 100644 index 00000000..5947648d --- /dev/null +++ b/compose-example/traefik.yaml @@ -0,0 +1,80 @@ +# Docker configuration backend +providers: + docker: + endpoint: "unix://var/run/docker.sock" + exposedByDefault: false + file: + directory: /var/run/traefik + watch: true + +# Listen on ports 80 and 443, redirecting all http connections to https +entryPoints: + web: + address: :80 + http: + redirections: + entryPoint: + to: websecure + scheme: https + + websecure: + address: :443 + +# API and dashboard configuration +api: + dashboard: true + +# Set log level to DEBUG, while testing +log: + level: DEBUG +# +# What a Lets Encrypt Certificate Resolver might look like:- +# +#certificatesResolvers: +# leresolver: +# acme: +# email: "hello@example.com" +# +# # The certificates' duration in hours. +# # It defaults to 2160 (90 days) to follow Let's Encrypt certificates' duration. +# +# certificatesDuration: 2160 +# +# # JSON file where keys and issued certificates will be stored. traefik +# # will manage this file, don't touch it! +# +# storage: "/etc/acme/acme.json" +# +# # CA server to use. +# # Uncomment the line to use Let's Encrypt's staging server, +# # leave commented to go to prod. +# #caServer: "https://acme-staging-v02.api.letsencrypt.org/directory" +# +# # By default, traefik will generate RSA4096 keys. EC384 keys are smaller, +# # faster to process, and provide about an equivalent amount of +# # protection as RSA4096 keys, so use these. +# keyType: EC384 +# +# # What Lets Encrypt challenge to request? +# +# # HTTP-01 ACME challenge. +# # This is good if the server is publicly accessible from the internet, +# # and resolveable with its public DNS name +# httpChallenge: +# entryPoint: web +# +# # Use a DNS-01 ACME challenge rather than HTTP-01 challenge. +# # Note: mandatory for wildcard certificate generation. +# # +# # traefik can work with various dynamic DNS services, provided with the +# # correct authentication information. +# # See https://doc.traefik.io/traefik/https/acme/ for info on all +# # supported DNS providers +# # e.g. For the cloudflare provider, set the environment variable +# # CF_DNS_API_TOKEN, or CF_DNS_API_TOKEN_FILE to the cloudflare-issued +# # DNS API key. The _FILE variant can be used with docker secrets +# #dnsChallenge: +# # provider: cloudflare +# +# # Use a TLS-ALPN-01 ACME challenge. +# #tlsChallenge: