From 0bedd7b8774247f71554fcdc6bed7a6f0187680b Mon Sep 17 00:00:00 2001 From: haixiw Date: Tue, 10 Sep 2024 23:48:23 +0000 Subject: [PATCH 1/4] Experiment with dockerfile refactor --- docker/1.7-1/base/Dockerfile.cpu | 89 ++++++++++++-------------------- 1 file changed, 32 insertions(+), 57 deletions(-) diff --git a/docker/1.7-1/base/Dockerfile.cpu b/docker/1.7-1/base/Dockerfile.cpu index 6ce800c4..bbda3565 100644 --- a/docker/1.7-1/base/Dockerfile.cpu +++ b/docker/1.7-1/base/Dockerfile.cpu @@ -23,83 +23,55 @@ ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONUNBUFFERED=1 ENV PYTHONIOENCODING='utf-8' -RUN apt-key del 7fa2af80 && \ - apt-get update && apt-get install -y --no-install-recommends wget && \ - wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb && \ - dpkg -i cuda-keyring_1.0-1_all.deb && \ - apt-get update && \ - apt-get -y upgrade && \ - apt-get -y install --no-install-recommends \ +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ build-essential \ curl \ git \ - jq \ + ca-certificates \ libatlas-base-dev \ - nginx \ openjdk-8-jdk-headless \ unzip \ wget \ - && \ - # MLIO build dependencies - # Official Ubuntu APT repositories do not contain an up-to-date version of CMake required to build MLIO. - # Kitware contains the latest version of CMake. - wget http://es.archive.ubuntu.com/ubuntu/pool/main/libf/libffi/libffi7_3.3-4_amd64.deb && \ - dpkg -i libffi7_3.3-4_amd64.deb && \ - apt-get -y install --no-install-recommends \ - apt-transport-https \ - ca-certificates \ - gnupg \ - software-properties-common \ - && \ - wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | \ - gpg --dearmor - | \ - tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null && \ - echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ bionic main' | tee /etc/apt/sources.list.d/kitware.list >/dev/null && \ - apt-get update && \ - rm /usr/share/keyrings/kitware-archive-keyring.gpg && \ - apt-get install -y --no-install-recommends \ - autoconf \ - automake \ - build-essential \ - cmake \ - cmake-data \ - doxygen \ - kitware-archive-keyring \ - libcurl4-openssl-dev \ - libssl-dev \ - libtool \ ninja-build \ + cmake \ + zlib1g-dev \ python3-dev \ python3-distutils \ python3-pip \ - zlib1g-dev \ - && \ - rm -rf /var/lib/apt/lists/* + # && \ + # Add Kitware repository for the latest version of CMake + # wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | \ + # gpg --dearmor - | \ + # tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null && \ + # echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ bionic main' | \ + # tee /etc/apt/sources.list.d/kitware.list >/dev/null && \ + # apt-get update && apt-get install -y --no-install-recommends cmake && \ + && rm -rf /var/lib/apt/lists/* -# Install conda + +# Install Miniconda RUN cd /tmp && \ curl -L --output /tmp/Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-py${CONDA_PY_VERSION}_${MINICONDA_VERSION}-Linux-x86_64.sh && \ - echo "${CONDA_CHECKSUM} /tmp/Miniconda3.sh" | md5sum -c - && \ - bash /tmp/Miniconda3.sh -bfp /miniconda3 && \ + echo "${CONDA_CHECKSUM} /tmp/Miniconda3.sh" | md5sum -c - && \ + bash Miniconda3.sh -bfp /miniconda3 && \ rm /tmp/Miniconda3.sh ENV PATH=/miniconda3/bin:${PATH} -# Install MLIO with Apache Arrow integration - -# We could install mlio-py from conda, but it comes with extra support such as image reader that increases image size -# which increases training time. We build from source to minimize the image size. +# Install Conda and MLIO dependencies in a single step RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \ - # Conda configuration see https://conda.io/projects/conda/en/latest/configuration.html conda config --system --set auto_update_conda false && \ conda config --system --set show_channel_urls true && \ echo "python ${PYTHON_VERSION}.*" >> /miniconda3/conda-meta/pinned && \ - conda install -c conda-forge python=${PYTHON_VERSION} && \ - pip install requests==2.27.0 && \ - conda install conda=${CONDA_PKG_VERSION} && \ + conda install -c conda-forge python=${PYTHON_VERSION} requests==2.27.0 conda=${CONDA_PKG_VERSION} pyarrow=${PYARROW_VERSION} && \ conda update -y conda && \ - conda install -c conda-forge pyarrow=${PYARROW_VERSION} && \ - cd /tmp && \ + pip install --upgrade pip && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* + +# Build MLIO from source, clean up build tools afterward to reduce size +RUN cd /tmp && \ git clone --branch v${MLIO_VERSION} https://github.com/awslabs/ml-io.git mlio && \ cd mlio && \ build-tools/build-dependency build/third-party all && \ @@ -114,12 +86,15 @@ RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \ cmake --build . --target mlio-arrow && \ cd ../../src/mlio-py && \ python3 setup.py bdist_wheel && \ - python3 -m pip install typing && \ - python3 -m pip install --upgrade pip && \ python3 -m pip install dist/*.whl && \ cp -r /tmp/mlio/build/third-party/lib/libtbb* /usr/local/lib/ && \ ldconfig && \ - rm -rf /tmp/mlio + # Remove unnecessary files and dependencies to minimize the image + rm -rf /tmp/mlio /tmp/mlio/build && \ + apt-get remove --purge -y cmake ninja-build build-essential && \ + apt-get autoremove -y && \ + apt-get clean + # Install latest version of XGBoost RUN python3 -m pip install --no-cache -I xgboost==${XGBOOST_VERSION} From b3b0741744943807819f2ae3e5be13c4cf41dc6e Mon Sep 17 00:00:00 2001 From: haixiw Date: Thu, 12 Sep 2024 02:50:40 +0000 Subject: [PATCH 2/4] test --- docker/1.7-1/base/Dockerfile.cpu | 4 +++ docker/1.7-1/final/Dockerfile.cpu | 51 ++++++++++++------------------- 2 files changed, 24 insertions(+), 31 deletions(-) diff --git a/docker/1.7-1/base/Dockerfile.cpu b/docker/1.7-1/base/Dockerfile.cpu index bbda3565..7b93ea98 100644 --- a/docker/1.7-1/base/Dockerfile.cpu +++ b/docker/1.7-1/base/Dockerfile.cpu @@ -39,6 +39,10 @@ RUN apt-get update && \ python3-dev \ python3-distutils \ python3-pip \ + # This package provides the OpenSSL headers and libraries necessary to build applications that depend on OpenSSL. + libssl-dev \ + # Required for handling compressed files and is often needed alongside OpenSSL. + zlib1g-dev && \ # && \ # Add Kitware repository for the latest version of CMake # wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | \ diff --git a/docker/1.7-1/final/Dockerfile.cpu b/docker/1.7-1/final/Dockerfile.cpu index ab36823c..60d0102b 100644 --- a/docker/1.7-1/final/Dockerfile.cpu +++ b/docker/1.7-1/final/Dockerfile.cpu @@ -19,17 +19,16 @@ RUN python3 -m pip install git+https://github.com/awslabs/sagemaker-debugger.git # Copy wheel to container # ########################### COPY dist/sagemaker_xgboost_container-2.0-py2.py3-none-any.whl /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl -RUN rm -rf /miniconda3/lib/python3.8/site-packages/numpy-1.21.2.dist-info && \ - python3 -m pip install --no-cache /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl && \ +RUN python3 -m pip install --no-cache-dir /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl && \ python3 -m pip uninstall -y typing && \ - rm /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl + rm /sagemaker_xgboost_container-1.0-py2.py3-none-any.whl && \ + rm -rf /miniconda3/lib/python3.8/site-packages/numpy-1.21.2.dist-info ############## # DMLC PATCH # ############## -# TODO: remove after making contributions back to xgboost for tracker.py COPY src/sagemaker_xgboost_container/dmlc_patch/tracker.py \ - /miniconda3/lib/python${PYTHON_VERSION}/site-packages/xgboost/dmlc-core/tracker/dmlc_tracker/tracker.py + /miniconda3/lib/python${PYTHON_VERSION}/site-packages/xgboost/dmlc-core/tracker/dmlc_tracker/tracker.py # Include DMLC python code in PYTHONPATH to use RabitTracker ENV PYTHONPATH=$PYTHONPATH:/miniconda3/lib/python${PYTHON_VERSION}/site-packages/xgboost/dmlc-core/tracker @@ -37,25 +36,16 @@ ENV PYTHONPATH=$PYTHONPATH:/miniconda3/lib/python${PYTHON_VERSION}/site-packages ####### # MMS # ####### -# Create MMS user directory -RUN useradd -m model-server -RUN mkdir -p /home/model-server/tmp && chown -R model-server /home/model-server - -# Copy MMS configs +# Create user, directories, and set permissions +RUN useradd -m model-server && \ + mkdir -p /home/model-server/tmp /opt/ml/models /tmp/plugins /etc/dask && \ + chown -R model-server /home/model-server && \ + chmod +rwx /opt/ml/models && \ + chmod +x /tmp/plugins/endpoints-1.0.jar + +# Copy MMS configs and setup COPY docker/${SAGEMAKER_XGBOOST_VERSION}/resources/mms/config.properties.tmp /home/model-server -ENV XGBOOST_MMS_CONFIG=/home/model-server/config.properties - -# Copy execution parameters endpoint plugin for MMS -RUN mkdir -p /tmp/plugins COPY docker/${SAGEMAKER_XGBOOST_VERSION}/resources/mms/endpoints-1.0.jar /tmp/plugins -RUN chmod +x /tmp/plugins/endpoints-1.0.jar - -# Create directory for models -RUN mkdir -p /opt/ml/models -RUN chmod +rwx /opt/ml/models - -# Copy Dask configs -RUN mkdir /etc/dask COPY docker/configs/dask_configs.yaml /etc/dask/ # Required label for multi-model loading @@ -64,22 +54,21 @@ LABEL com.amazonaws.sagemaker.capabilities.multi-models=true ##################### # Required ENV vars # ##################### -# Set SageMaker training environment variables +# Set SageMaker environment variables ENV SM_INPUT /opt/ml/input ENV SM_INPUT_TRAINING_CONFIG_FILE $SM_INPUT/config/hyperparameters.json ENV SM_INPUT_DATA_CONFIG_FILE $SM_INPUT/config/inputdataconfig.json ENV SM_CHECKPOINT_CONFIG_FILE $SM_INPUT/config/checkpointconfig.json -# See: https://github.com/dmlc/xgboost/issues/7982#issuecomment-1379390906 https://github.com/dmlc/xgboost/pull/8257 -ENV NCCL_SOCKET_IFNAME eth - - -# Set SageMaker serving environment variables +ENV NCCL_SOCKET_IFNAME eth ENV SM_MODEL_DIR /opt/ml/model -# Set SageMaker entrypoints +# Sagemaker entrypoints ENV SAGEMAKER_TRAINING_MODULE sagemaker_xgboost_container.training:main ENV SAGEMAKER_SERVING_MODULE sagemaker_xgboost_container.serving:main - -EXPOSE 8080 ENV TEMP=/home/model-server/tmp + +# Required for SageMaker LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true + +# Expose port for the serving container +EXPOSE 8080 From 9abfd35d11595870bd4937ea41ba25a56cb45419 Mon Sep 17 00:00:00 2001 From: haixiw Date: Thu, 12 Sep 2024 02:57:23 +0000 Subject: [PATCH 3/4] fix typo --- docker/1.7-1/base/Dockerfile.cpu | 48 ++++++++++---------------------- 1 file changed, 15 insertions(+), 33 deletions(-) diff --git a/docker/1.7-1/base/Dockerfile.cpu b/docker/1.7-1/base/Dockerfile.cpu index 7b93ea98..f3254cfa 100644 --- a/docker/1.7-1/base/Dockerfile.cpu +++ b/docker/1.7-1/base/Dockerfile.cpu @@ -4,6 +4,7 @@ ARG IMAGE_DIGEST=c2d95c9c6ff77da41cf0f2f9e8c5088f5b4db20c16a7566b808762f05b9032e FROM nvidia/cuda:${CUDA_VERSION}-base-ubuntu${UBUNTU_VERSION}@sha256:${IMAGE_DIGEST} +# Argument Variables ARG MINICONDA_VERSION=4.9.2 ARG CONDA_PY_VERSION=39 ARG CONDA_CHECKSUM="b4e46fcc8029e2cfa731b788f25b1d36" @@ -13,16 +14,16 @@ ARG PYARROW_VERSION=14.0.1 ARG MLIO_VERSION=0.9.0 ARG XGBOOST_VERSION=1.7.4 +# Environment Variables ENV DEBIAN_FRONTEND=noninteractive ENV LANG=C.UTF-8 ENV LC_ALL=C.UTF-8 - -# Python won’t try to write .pyc or .pyo files on the import of source modules -# Force stdin, stdout and stderr to be totally unbuffered. Good for logging ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONUNBUFFERED=1 ENV PYTHONIOENCODING='utf-8' +ENV PATH=/miniconda3/bin:${PATH} +# Install system dependencies, clean up, and install Miniconda in a single RUN RUN apt-get update && \ apt-get install -y --no-install-recommends \ build-essential \ @@ -39,33 +40,17 @@ RUN apt-get update && \ python3-dev \ python3-distutils \ python3-pip \ - # This package provides the OpenSSL headers and libraries necessary to build applications that depend on OpenSSL. libssl-dev \ - # Required for handling compressed files and is often needed alongside OpenSSL. - zlib1g-dev && \ - # && \ - # Add Kitware repository for the latest version of CMake - # wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc 2>/dev/null | \ - # gpg --dearmor - | \ - # tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null && \ - # echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ bionic main' | \ - # tee /etc/apt/sources.list.d/kitware.list >/dev/null && \ - # apt-get update && apt-get install -y --no-install-recommends cmake && \ - && rm -rf /var/lib/apt/lists/* - - -# Install Miniconda -RUN cd /tmp && \ - curl -L --output /tmp/Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-py${CONDA_PY_VERSION}_${MINICONDA_VERSION}-Linux-x86_64.sh && \ - echo "${CONDA_CHECKSUM} /tmp/Miniconda3.sh" | md5sum -c - && \ + && rm -rf /var/lib/apt/lists/* && \ + cd /tmp && \ + curl -L --output Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-py${CONDA_PY_VERSION}_${MINICONDA_VERSION}-Linux-x86_64.sh && \ + echo "${CONDA_CHECKSUM} Miniconda3.sh" | md5sum -c - && \ bash Miniconda3.sh -bfp /miniconda3 && \ - rm /tmp/Miniconda3.sh - -ENV PATH=/miniconda3/bin:${PATH} + rm Miniconda3.sh && \ + apt-get clean && rm -rf /var/lib/apt/lists/* -# Install Conda and MLIO dependencies in a single step -RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \ - conda config --system --set auto_update_conda false && \ +# Install Conda dependencies and Python packages +RUN conda config --system --set auto_update_conda false && \ conda config --system --set show_channel_urls true && \ echo "python ${PYTHON_VERSION}.*" >> /miniconda3/conda-meta/pinned && \ conda install -c conda-forge python=${PYTHON_VERSION} requests==2.27.0 conda=${CONDA_PKG_VERSION} pyarrow=${PYARROW_VERSION} && \ @@ -74,7 +59,7 @@ RUN echo "conda ${CONDA_PKG_VERSION}" >> /miniconda3/conda-meta/pinned && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* -# Build MLIO from source, clean up build tools afterward to reduce size +# Build MLIO and clean up build tools afterward RUN cd /tmp && \ git clone --branch v${MLIO_VERSION} https://github.com/awslabs/ml-io.git mlio && \ cd mlio && \ @@ -93,12 +78,9 @@ RUN cd /tmp && \ python3 -m pip install dist/*.whl && \ cp -r /tmp/mlio/build/third-party/lib/libtbb* /usr/local/lib/ && \ ldconfig && \ - # Remove unnecessary files and dependencies to minimize the image - rm -rf /tmp/mlio /tmp/mlio/build && \ apt-get remove --purge -y cmake ninja-build build-essential && \ apt-get autoremove -y && \ - apt-get clean - + rm -rf /tmp/mlio /tmp/mlio/build /var/lib/apt/lists/* -# Install latest version of XGBoost +# Install XGBoost RUN python3 -m pip install --no-cache -I xgboost==${XGBOOST_VERSION} From 0aee902fae0ae6b95e2664f29f72ce75922b16bf Mon Sep 17 00:00:00 2001 From: haixiw Date: Thu, 12 Sep 2024 03:20:13 +0000 Subject: [PATCH 4/4] add missing dependencies for MLIO --- docker/1.7-1/base/Dockerfile.cpu | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker/1.7-1/base/Dockerfile.cpu b/docker/1.7-1/base/Dockerfile.cpu index f3254cfa..c44cdd6c 100644 --- a/docker/1.7-1/base/Dockerfile.cpu +++ b/docker/1.7-1/base/Dockerfile.cpu @@ -41,6 +41,9 @@ RUN apt-get update && \ python3-distutils \ python3-pip \ libssl-dev \ + libcurl4-openssl-dev \ + doxygen \ + libtool \ && rm -rf /var/lib/apt/lists/* && \ cd /tmp && \ curl -L --output Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-py${CONDA_PY_VERSION}_${MINICONDA_VERSION}-Linux-x86_64.sh && \