Skip to content

Experiment with dockerfile refactor #436

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 33 additions & 69 deletions docker/1.7-1/base/Dockerfile.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ ARG IMAGE_DIGEST=c2d95c9c6ff77da41cf0f2f9e8c5088f5b4db20c16a7566b808762f05b9032e

FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${UBUNTU_VERSION}@sha256:${IMAGE_DIGEST}

# Argument Variables
ARG MINICONDA_VERSION=4.9.2
ARG CONDA_PY_VERSION=39
ARG CONDA_CHECKSUM="b4e46fcc8029e2cfa731b788f25b1d36"
Expand All @@ -13,93 +14,56 @@ ARG PYARROW_VERSION=14.0.1
ARG MLIO_VERSION=0.9.0
ARG XGBOOST_VERSION=1.7.4

# Environment Variables
ENV DEBIAN_FRONTEND=noninteractive
ENV LANG=C.UTF-8
ENV LC_ALL=C.UTF-8

# Python won’t try to write .pyc or .pyo files on the import of source modules
# Force stdin, stdout and stderr to be totally unbuffered. Good for logging
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV PYTHONIOENCODING='utf-8'
ENV PATH=/miniconda3/bin:${PATH}

RUN apt-key del 7fa2af80 && \
apt-get update && apt-get install -y --no-install-recommends wget && \
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb && \
dpkg -i cuda-keyring_1.0-1_all.deb && \
apt-get update && \
apt-get -y upgrade && \
apt-get -y install --no-install-recommends \
# Install system dependencies, clean up, and install Miniconda in a single RUN
RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
curl \
git \
jq \
ca-certificates \
libatlas-base-dev \
nginx \
openjdk-8-jdk-headless \
unzip \
wget \
&& \
# MLIO build dependencies
# Official Ubuntu APT repositories do not contain an up-to-date version of CMake required to build MLIO.
# Kitware contains the latest version of CMake.
wget http://es.archive.ubuntu.com/ubuntu/pool/main/libf/libffi/libffi7_3.3-4_amd64.deb && \
dpkg -i libffi7_3.3-4_amd64.deb && \
apt-get -y install --no-install-recommends \
apt-transport-https \
ca-certificates \
gnupg \
software-properties-common \
&& \
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | \
gpg --dearmor - | \
tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null && \
echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ bionic main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null && \
apt-get update && \
rm /usr/share/keyrings/kitware-archive-keyring.gpg && \
apt-get install -y --no-install-recommends \
autoconf \
automake \
build-essential \
cmake \
cmake-data \
doxygen \
kitware-archive-keyring \
libcurl4-openssl-dev \
libssl-dev \
libtool \
ninja-build \
cmake \
zlib1g-dev \
python3-dev \
python3-distutils \
python3-pip \
zlib1g-dev \
&& \
rm -rf /var/lib/apt/lists/*

# Install conda
RUN cd /tmp && \
curl -L --output /tmp/Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-py${CONDA_PY_VERSION}_${MINICONDA_VERSION}-Linux-x86_64.sh && \
echo "${CONDA_CHECKSUM} /tmp/Miniconda3.sh" | md5sum -c - && \
bash /tmp/Miniconda3.sh -bfp /miniconda3 && \
rm /tmp/Miniconda3.sh

ENV PATH=/miniconda3/bin:${PATH}

# Install MLIO with Apache Arrow integration
libssl-dev \
libcurl4-openssl-dev \
doxygen \
libtool \
&& rm -rf /var/lib/apt/lists/* && \
cd /tmp && \
curl -L --output Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-py${CONDA_PY_VERSION}_${MINICONDA_VERSION}-Linux-x86_64.sh && \
echo "${CONDA_CHECKSUM} Miniconda3.sh" | md5sum -c - && \
bash Miniconda3.sh -bfp /miniconda3 && \
rm Miniconda3.sh && \
apt-get clean && rm -rf /var/lib/apt/lists/*

# We could install mlio-py from conda, but it comes with extra support such as image reader that increases image size
# which increases training time. We build from source to minimize the image size.
RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \
# Conda configuration see https://conda.io/projects/conda/en/latest/configuration.html
conda config --system --set auto_update_conda false && \
# Install Conda dependencies and Python packages
RUN conda config --system --set auto_update_conda false && \
conda config --system --set show_channel_urls true && \
echo "python ${PYTHON_VERSION}.*" >> /miniconda3/conda-meta/pinned && \
conda install -c conda-forge python=${PYTHON_VERSION} && \
pip install requests==2.27.0 && \
conda install conda=${CONDA_PKG_VERSION} && \
conda install -c conda-forge python=${PYTHON_VERSION} requests==2.27.0 conda=${CONDA_PKG_VERSION} pyarrow=${PYARROW_VERSION} && \
conda update -y conda && \
conda install -c conda-forge pyarrow=${PYARROW_VERSION} && \
cd /tmp && \
pip install --upgrade pip && \
apt-get clean && \
rm -rf /var/lib/apt/lists/*

# Build MLIO and clean up build tools afterward
RUN cd /tmp && \
git clone --branch v${MLIO_VERSION} https://github.com/awslabs/ml-io.git mlio && \
cd mlio && \
build-tools/build-dependency build/third-party all && \
Expand All @@ -114,12 +78,12 @@ RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \
cmake --build . --target mlio-arrow && \
cd ../../src/mlio-py && \
python3 setup.py bdist_wheel && \
python3 -m pip install typing && \
python3 -m pip install --upgrade pip && \
python3 -m pip install dist/*.whl && \
cp -r /tmp/mlio/build/third-party/lib/libtbb* /usr/local/lib/ && \
ldconfig && \
rm -rf /tmp/mlio
apt-get remove --purge -y cmake ninja-build build-essential && \
apt-get autoremove -y && \
rm -rf /tmp/mlio /tmp/mlio/build /var/lib/apt/lists/*

# Install latest version of XGBoost
# Install XGBoost
RUN python3 -m pip install --no-cache -I xgboost==${XGBOOST_VERSION}
51 changes: 20 additions & 31 deletions docker/1.7-1/final/Dockerfile.cpu
Original file line number Diff line number Diff line change
Expand Up @@ -19,43 +19,33 @@ RUN python3 -m pip install git+https://github.com/awslabs/sagemaker-debugger.git
# Copy wheel to container #
###########################
COPY dist/sagemaker_xgboost_container-2.0-py2.py3-none-any.whl /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl
RUN rm -rf /miniconda3/lib/python3.8/site-packages/numpy-1.21.2.dist-info && \
python3 -m pip install --no-cache /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl && \
RUN python3 -m pip install --no-cache-dir /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl && \
python3 -m pip uninstall -y typing && \
rm /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl
rm /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl && \
rm -rf /miniconda3/lib/python3.8/site-packages/numpy-1.21.2.dist-info

##############
# DMLC PATCH #
##############
# TODO: remove after making contributions back to xgboost for tracker.py
COPY src/sagemaker_xgboost_container/dmlc_patch/tracker.py \
/miniconda3/lib/python${PYTHON_VERSION}/site-packages/xgboost/dmlc-core/tracker/dmlc_tracker/tracker.py
/miniconda3/lib/python${PYTHON_VERSION}/site-packages/xgboost/dmlc-core/tracker/dmlc_tracker/tracker.py

# Include DMLC python code in PYTHONPATH to use RabitTracker
ENV PYTHONPATH=$PYTHONPATH:/miniconda3/lib/python${PYTHON_VERSION}/site-packages/xgboost/dmlc-core/tracker

#######
# MMS #
#######
# Create MMS user directory
RUN useradd -m model-server
RUN mkdir -p /home/model-server/tmp && chown -R model-server /home/model-server

# Copy MMS configs
# Create user, directories, and set permissions
RUN useradd -m model-server && \
mkdir -p /home/model-server/tmp /opt/ml/models /tmp/plugins /etc/dask && \
chown -R model-server /home/model-server && \
chmod +rwx /opt/ml/models && \
chmod +x /tmp/plugins/endpoints-1.0.jar

# Copy MMS configs and setup
COPY docker/${SAGEMAKER_XGBOOST_VERSION}/resources/mms/config.properties.tmp /home/model-server
ENV XGBOOST_MMS_CONFIG=/home/model-server/config.properties

# Copy execution parameters endpoint plugin for MMS
RUN mkdir -p /tmp/plugins
COPY docker/${SAGEMAKER_XGBOOST_VERSION}/resources/mms/endpoints-1.0.jar /tmp/plugins
RUN chmod +x /tmp/plugins/endpoints-1.0.jar

# Create directory for models
RUN mkdir -p /opt/ml/models
RUN chmod +rwx /opt/ml/models

# Copy Dask configs
RUN mkdir /etc/dask
COPY docker/configs/dask_configs.yaml /etc/dask/

# Required label for multi-model loading
Expand All @@ -64,22 +54,21 @@ LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
#####################
# Required ENV vars #
#####################
# Set SageMaker training environment variables
# Set SageMaker environment variables
ENV SM_INPUT /opt/ml/input
ENV SM_INPUT_TRAINING_CONFIG_FILE $SM_INPUT/config/hyperparameters.json
ENV SM_INPUT_DATA_CONFIG_FILE $SM_INPUT/config/inputdataconfig.json
ENV SM_CHECKPOINT_CONFIG_FILE $SM_INPUT/config/checkpointconfig.json
# See: https://github.com/dmlc/xgboost/issues/7982#issuecomment-1379390906 https://github.com/dmlc/xgboost/pull/8257
ENV NCCL_SOCKET_IFNAME eth


# Set SageMaker serving environment variables
ENV NCCL_SOCKET_IFNAME eth
ENV SM_MODEL_DIR /opt/ml/model

# Set SageMaker entrypoints
# Sagemaker entrypoints
ENV SAGEMAKER_TRAINING_MODULE sagemaker_xgboost_container.training:main
ENV SAGEMAKER_SERVING_MODULE sagemaker_xgboost_container.serving:main

EXPOSE 8080
ENV TEMP=/home/model-server/tmp

# Required for SageMaker
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true

# Expose port for the serving container
EXPOSE 8080