-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathDockerfile.runner
122 lines (99 loc) · 5 KB
/
Dockerfile.runner
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
ARG CUDA_VERSION="12.4.1"
ARG UBUNTU_VERSION="22.04"
ARG UV_VERSION="0.5.4"
FROM ghcr.io/astral-sh/uv:${UV_VERSION} AS uv
FROM ghcr.io/astral-sh/uv:${UV_VERSION}-bookworm-slim AS diffusers
ENV UV_LINK_MODE=copy
WORKDIR /workspace
COPY scripts/pull_diffusers_models .
ARG PULL_DIFFUSERS_MODELS_FLUX=""
ENV PULL_DIFFUSERS_MODELS_FLUX=$PULL_DIFFUSERS_MODELS_FLUX
ARG PULL_DIFFUSERS_MODELS_STABLEDIFFUSION=""
ENV PULL_DIFFUSERS_MODELS_STABLEDIFFUSION=$PULL_DIFFUSERS_MODELS_STABLEDIFFUSION
RUN mkdir -p /workspace/diffusers
# Split these up to try to help with caching. Put most used/stable models first.
# Save the files to a "hub" subdirectory to match the expected default structure of the diffusers library
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=secret,id=HF_TOKEN \
if [ ! -z "$PULL_DIFFUSERS_MODELS_FLUX" ]; then \
export HF_TOKEN=$(cat /run/secrets/HF_TOKEN) && \
uv run --frozen pull.py --model_name "black-forest-labs/FLUX.1-dev" --save_path "/workspace/diffusers/hub"; \
fi
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=secret,id=HF_TOKEN \
if [ ! -z "$PULL_DIFFUSERS_MODELS_STABLEDIFFUSION" ]; then \
export HF_TOKEN=$(cat /run/secrets/HF_TOKEN) && \
uv run --frozen pull.py --model_name "stabilityai/sd-turbo" --save_path "/workspace/diffusers/hub"; \
fi
RUN --mount=type=cache,target=/root/.cache/uv \
--mount=type=secret,id=HF_TOKEN \
if [ ! -z "$PULL_DIFFUSERS_MODELS_STABLEDIFFUSION" ]; then \
export HF_TOKEN=$(cat /run/secrets/HF_TOKEN) && \
uv run --frozen pull.py --model_name "stabilityai/stable-diffusion-3.5-medium" --save_path "/workspace/diffusers/hub"; \
fi
FROM winglian/axolotl:main-20241008-py3.11-cu124-2.4.0
ENV CACHE_DATE=2024-12-04
ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONUNBUFFERED=1
WORKDIR /workspace
ARG PULL_OLLAMA_MODELS=""
ENV PULL_OLLAMA_MODELS=$PULL_OLLAMA_MODELS
ARG PULL_OLLAMA_MODELS_PHASE_2=""
ENV PULL_OLLAMA_MODELS_PHASE_2=$PULL_OLLAMA_MODELS_PHASE_2
ARG PULL_OLLAMA_MODELS_PHASE_3=""
ENV PULL_OLLAMA_MODELS_PHASE_3=$PULL_OLLAMA_MODELS_PHASE_3
ARG PULL_OLLAMA_MODELS_PHASE_4=""
ENV PULL_OLLAMA_MODELS_PHASE_4=$PULL_OLLAMA_MODELS_PHASE_4
# TODO: not sure if this is necessary
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib/x86_64-linux-gnu:/usr/local/nvidia/lib64:/usr/local/nvidia/bin
# Install ollama
RUN TEMP_DIR=$(mktemp -d /tmp/ollama_install_XXXXXX) && \
curl --retry 5 -L https://github.com/ollama/ollama/releases/download/v0.5.1/ollama-linux-amd64.tgz -o $TEMP_DIR/ollama.tgz && \
tar -xzf $TEMP_DIR/ollama.tgz -C $TEMP_DIR && \
mv $TEMP_DIR/bin/ollama /usr/bin/ollama && \
chmod +x /usr/bin/ollama && \
cp -r $TEMP_DIR/lib/ollama /usr/lib/ && \
rm -rf $TEMP_DIR
# Set up the ollama models directory, runner will be able to access this
RUN mkdir -p /workspace/ollama
ENV OLLAMA_MODELS=/workspace/ollama
COPY scripts/pull_ollama_models.sh /workspace/scripts/pull_ollama_models.sh
# ====================================
# we cache all the weights early on
# ====================================
# high certainty models in a base layer
RUN bash /workspace/scripts/pull_ollama_models.sh
# low certainty models in a smaller layer that can change more often
RUN if [ ! -z "$PULL_OLLAMA_MODELS_PHASE_2" ]; then \
export PULL_OLLAMA_MODELS="$PULL_OLLAMA_MODELS_PHASE_2" && \
bash /workspace/scripts/pull_ollama_models.sh; \
fi
RUN if [ ! -z "$PULL_OLLAMA_MODELS_PHASE_3" ]; then \
export PULL_OLLAMA_MODELS="$PULL_OLLAMA_MODELS_PHASE_3" && \
bash /workspace/scripts/pull_ollama_models.sh; \
fi
RUN if [ ! -z "$PULL_OLLAMA_MODELS_PHASE_4" ]; then \
export PULL_OLLAMA_MODELS="$PULL_OLLAMA_MODELS_PHASE_4" && \
bash /workspace/scripts/pull_ollama_models.sh; \
fi
# Fake venv - helix runner expects one but axolotl is the "root venv" (actually, default conda env) in the image
RUN mkdir -p /workspace/axolotl/venv/bin
RUN echo "echo \"Pretending to activate virtualenv (actually doing nothing)\"" > /workspace/axolotl/venv/bin/activate
# accumulate deb stuff here. golang is for _development_ workflow of quick iteration on runner
RUN --mount=type=cache,target=/var/cache/apt apt-get update -qq && apt-get install -qqy \
libgl1-mesa-glx ffmpeg libsm6 libxext6 wget software-properties-common python3 python3-pip git unzip wget python3-virtualenv && \
add-apt-repository -y ppa:longsleep/golang-backports && apt update -qq && apt install -qqy golang-1.21 golang-go && \
rm -rf /var/lib/apt/lists/*
RUN cd /workspace/axolotl && \
git remote rm origin && \
git remote add origin https://github.com/axolotl-ai-cloud/axolotl && \
git fetch --all && \
git checkout 1834cdc3645c003e3db02346912cab19a1eb5ca3 && \
. venv/bin/activate && \
pip3 install packaging ninja mlflow && \
pip3 install -e '.[flash-attn,deepspeed]'
# Copy over weights from the diffusers container if they exist
COPY --from=diffusers /workspace/diffusers /workspace/diffusers
EXPOSE 5000
RUN mkdir -p /workspace/helix
WORKDIR /workspace/helix