diff --git a/docker/1.7-1/base/Dockerfile.cpu b/docker/1.7-1/base/Dockerfile.cpu index 6ce800c4..c44cdd6c 100644 --- a/docker/1.7-1/base/Dockerfile.cpu +++ b/docker/1.7-1/base/Dockerfile.cpu @@ -4,6 +4,7 @@ ARG IMAGE_DIGEST=c2d95c9c6ff77da41cf0f2f9e8c5088f5b4db20c16a7566b808762f05b9032e FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${UBUNTU_VERSION}@sha256:${IMAGE_DIGEST} +# Argument Variables ARG MINICONDA_VERSION=4.9.2 ARG CONDA_PY_VERSION=39 ARG CONDA_CHECKSUM="b4e46fcc8029e2cfa731b788f25b1d36" @@ -13,93 +14,56 @@ ARG PYARROW_VERSION=14.0.1 ARG MLIO_VERSION=0.9.0 ARG XGBOOST_VERSION=1.7.4 +# Environment Variables ENV DEBIAN_FRONTEND=noninteractive ENV LANG=C.UTF-8 ENV LC_ALL=C.UTF-8 - -# Python won’t try to write .pyc or .pyo files on the import of source modules -# Force stdin, stdout and stderr to be totally unbuffered. Good for logging ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONUNBUFFERED=1 ENV PYTHONIOENCODING='utf-8' +ENV PATH=/miniconda3/bin:${PATH} -RUN apt-key del 7fa2af80 && \ - apt-get update && apt-get install -y --no-install-recommends wget && \ - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb && \ - dpkg -i cuda-keyring_1.0-1_all.deb && \ - apt-get update && \ - apt-get -y upgrade && \ - apt-get -y install --no-install-recommends \ +# Install system dependencies, clean up, and install Miniconda in a single RUN +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ build-essential \ curl \ git \ - jq \ + ca-certificates \ libatlas-base-dev \ - nginx \ openjdk-8-jdk-headless \ unzip \ wget \ - && \ - # MLIO build dependencies - # Official Ubuntu APT repositories do not contain an up-to-date version of CMake required to build MLIO. - # Kitware contains the latest version of CMake. - wget http://es.archive.ubuntu.com/ubuntu/pool/main/libf/libffi/libffi7_3.3-4_amd64.deb && \ - dpkg -i libffi7_3.3-4_amd64.deb && \ - apt-get -y install --no-install-recommends \ - apt-transport-https \ - ca-certificates \ - gnupg \ - software-properties-common \ - && \ - wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | \ - gpg --dearmor - | \ - tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null && \ - echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ bionic main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null && \ - apt-get update && \ - rm /usr/share/keyrings/kitware-archive-keyring.gpg && \ - apt-get install -y --no-install-recommends \ - autoconf \ - automake \ - build-essential \ - cmake \ - cmake-data \ - doxygen \ - kitware-archive-keyring \ - libcurl4-openssl-dev \ - libssl-dev \ - libtool \ ninja-build \ + cmake \ + zlib1g-dev \ python3-dev \ python3-distutils \ python3-pip \ - zlib1g-dev \ - && \ - rm -rf /var/lib/apt/lists/* - -# Install conda -RUN cd /tmp && \ - curl -L --output /tmp/Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-py${CONDA_PY_VERSION}_${MINICONDA_VERSION}-Linux-x86_64.sh && \ - echo "${CONDA_CHECKSUM} /tmp/Miniconda3.sh" | md5sum -c - && \ - bash /tmp/Miniconda3.sh -bfp /miniconda3 && \ - rm /tmp/Miniconda3.sh - -ENV PATH=/miniconda3/bin:${PATH} - -# Install MLIO with Apache Arrow integration + libssl-dev \ + libcurl4-openssl-dev \ + doxygen \ + libtool \ + && rm -rf /var/lib/apt/lists/* && \ + cd /tmp && \ + curl -L --output Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-py${CONDA_PY_VERSION}_${MINICONDA_VERSION}-Linux-x86_64.sh && \ + echo "${CONDA_CHECKSUM} Miniconda3.sh" | md5sum -c - && \ + bash Miniconda3.sh -bfp /miniconda3 && \ + rm Miniconda3.sh && \ + apt-get clean && rm -rf /var/lib/apt/lists/* -# We could install mlio-py from conda, but it comes with extra support such as image reader that increases image size -# which increases training time. We build from source to minimize the image size. -RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \ - # Conda configuration see https://conda.io/projects/conda/en/latest/configuration.html - conda config --system --set auto_update_conda false && \ +# Install Conda dependencies and Python packages +RUN conda config --system --set auto_update_conda false && \ conda config --system --set show_channel_urls true && \ echo "python ${PYTHON_VERSION}.*" >> /miniconda3/conda-meta/pinned && \ - conda install -c conda-forge python=${PYTHON_VERSION} && \ - pip install requests==2.27.0 && \ - conda install conda=${CONDA_PKG_VERSION} && \ + conda install -c conda-forge python=${PYTHON_VERSION} requests==2.27.0 conda=${CONDA_PKG_VERSION} pyarrow=${PYARROW_VERSION} && \ conda update -y conda && \ - conda install -c conda-forge pyarrow=${PYARROW_VERSION} && \ - cd /tmp && \ + pip install --upgrade pip && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Build MLIO and clean up build tools afterward +RUN cd /tmp && \ git clone --branch v${MLIO_VERSION} https://github.com/awslabs/ml-io.git mlio && \ cd mlio && \ build-tools/build-dependency build/third-party all && \ @@ -114,12 +78,12 @@ RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \ cmake --build . --target mlio-arrow && \ cd ../../src/mlio-py && \ python3 setup.py bdist_wheel && \ - python3 -m pip install typing && \ - python3 -m pip install --upgrade pip && \ python3 -m pip install dist/*.whl && \ cp -r /tmp/mlio/build/third-party/lib/libtbb* /usr/local/lib/ && \ ldconfig && \ - rm -rf /tmp/mlio + apt-get remove --purge -y cmake ninja-build build-essential && \ + apt-get autoremove -y && \ + rm -rf /tmp/mlio /tmp/mlio/build /var/lib/apt/lists/* -# Install latest version of XGBoost +# Install XGBoost RUN python3 -m pip install --no-cache -I xgboost==${XGBOOST_VERSION} diff --git a/docker/1.7-1/final/Dockerfile.cpu b/docker/1.7-1/final/Dockerfile.cpu index ab36823c..60d0102b 100644 --- a/docker/1.7-1/final/Dockerfile.cpu +++ b/docker/1.7-1/final/Dockerfile.cpu @@ -19,17 +19,16 @@ RUN python3 -m pip install git+https://github.com/awslabs/sagemaker-debugger.git # Copy wheel to container # ########################### COPY dist/sagemaker_xgboost_container-2.0-py2.py3-none-any.whl /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl -RUN rm -rf /miniconda3/lib/python3.8/site-packages/numpy-1.21.2.dist-info && \ - python3 -m pip install --no-cache /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl && \ +RUN python3 -m pip install --no-cache-dir /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl && \ python3 -m pip uninstall -y typing && \ - rm /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl + rm /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl && \ + rm -rf /miniconda3/lib/python3.8/site-packages/numpy-1.21.2.dist-info ############## # DMLC PATCH # ############## -# TODO: remove after making contributions back to xgboost for tracker.py COPY src/sagemaker_xgboost_container/dmlc_patch/tracker.py \ - /miniconda3/lib/python${PYTHON_VERSION}/site-packages/xgboost/dmlc-core/tracker/dmlc_tracker/tracker.py + /miniconda3/lib/python${PYTHON_VERSION}/site-packages/xgboost/dmlc-core/tracker/dmlc_tracker/tracker.py # Include DMLC python code in PYTHONPATH to use RabitTracker ENV PYTHONPATH=$PYTHONPATH:/miniconda3/lib/python${PYTHON_VERSION}/site-packages/xgboost/dmlc-core/tracker @@ -37,25 +36,16 @@ ENV PYTHONPATH=$PYTHONPATH:/miniconda3/lib/python${PYTHON_VERSION}/site-packages ####### # MMS # ####### -# Create MMS user directory -RUN useradd -m model-server -RUN mkdir -p /home/model-server/tmp && chown -R model-server /home/model-server - -# Copy MMS configs +# Create user, directories, and set permissions +RUN useradd -m model-server && \ + mkdir -p /home/model-server/tmp /opt/ml/models /tmp/plugins /etc/dask && \ + chown -R model-server /home/model-server && \ + chmod +rwx /opt/ml/models && \ + chmod +x /tmp/plugins/endpoints-1.0.jar + +# Copy MMS configs and setup COPY docker/${SAGEMAKER_XGBOOST_VERSION}/resources/mms/config.properties.tmp /home/model-server -ENV XGBOOST_MMS_CONFIG=/home/model-server/config.properties - -# Copy execution parameters endpoint plugin for MMS -RUN mkdir -p /tmp/plugins COPY docker/${SAGEMAKER_XGBOOST_VERSION}/resources/mms/endpoints-1.0.jar /tmp/plugins -RUN chmod +x /tmp/plugins/endpoints-1.0.jar - -# Create directory for models -RUN mkdir -p /opt/ml/models -RUN chmod +rwx /opt/ml/models - -# Copy Dask configs -RUN mkdir /etc/dask COPY docker/configs/dask_configs.yaml /etc/dask/ # Required label for multi-model loading @@ -64,22 +54,21 @@ LABEL com.amazonaws.sagemaker.capabilities.multi-models=true ##################### # Required ENV vars # ##################### -# Set SageMaker training environment variables +# Set SageMaker environment variables ENV SM_INPUT /opt/ml/input ENV SM_INPUT_TRAINING_CONFIG_FILE $SM_INPUT/config/hyperparameters.json ENV SM_INPUT_DATA_CONFIG_FILE $SM_INPUT/config/inputdataconfig.json ENV SM_CHECKPOINT_CONFIG_FILE $SM_INPUT/config/checkpointconfig.json -# See: https://github.com/dmlc/xgboost/issues/7982#issuecomment-1379390906 https://github.com/dmlc/xgboost/pull/8257 -ENV NCCL_SOCKET_IFNAME eth - - -# Set SageMaker serving environment variables +ENV NCCL_SOCKET_IFNAME eth ENV SM_MODEL_DIR /opt/ml/model -# Set SageMaker entrypoints +# Sagemaker entrypoints ENV SAGEMAKER_TRAINING_MODULE sagemaker_xgboost_container.training:main ENV SAGEMAKER_SERVING_MODULE sagemaker_xgboost_container.serving:main - -EXPOSE 8080 ENV TEMP=/home/model-server/tmp + +# Required for SageMaker LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true + +# Expose port for the serving container +EXPOSE 8080