diff --git a/.github/actions/workflow-build/build-workflow.py b/.github/actions/workflow-build/build-workflow.py index a3121091c70..13fdfec5424 100755 --- a/.github/actions/workflow-build/build-workflow.py +++ b/.github/actions/workflow-build/build-workflow.py @@ -424,16 +424,15 @@ def generate_dispatch_job_runner(matrix_job, job_type): job_info = get_job_type_info(job_type) if not job_info["gpu"]: - # # Use smaller runners for build jobs if we can - # if job_type == "build": - # # ClangCUDA and MSVC should use 16-core runners - # if ("msvc" in matrix_job["cxx"]) or ("clang" in matrix_job["cudacxx"]): - # return f"{runner_os}-{cpu}-cpu16" - # # NVHPC and OneAPI should use 8-core runners - # elif ("intel" in matrix_job["cxx"]) or ("nvhpc" in matrix_job["cxx"]): - # return f"{runner_os}-{cpu}-cpu8" - # # All others 4 - # return f"{runner_os}-{cpu}-cpu4" + # Use smaller 4-core runners for build jobs if we can + if job_type == "build": + # ClangCUDA, MSVC, and NVHPC should use 16-core runners + if ( + ("clang" not in matrix_job["cudacxx"]) and + ("msvc" not in matrix_job["cxx"]) and + ("nvhpc" not in matrix_job["cxx"]) + ): + return f"{runner_os}-{cpu}-cpu4" return f"{runner_os}-{cpu}-cpu16" gpu = get_gpu(matrix_job["gpu"]) diff --git a/.github/actions/workflow-run-job-linux/action.yml b/.github/actions/workflow-run-job-linux/action.yml index c554d5e3486..31997ef00d9 100644 --- a/.github/actions/workflow-run-job-linux/action.yml +++ b/.github/actions/workflow-run-job-linux/action.yml @@ -81,6 +81,8 @@ runs: AWS_SESSION_TOKEN: "${{env.AWS_SESSION_TOKEN}}" AWS_SECRET_ACCESS_KEY: "${{env.AWS_SECRET_ACCESS_KEY}}" run: | + set -x + echo "[host] github.workspace: ${{github.workspace}}" echo "[host] runner.temp: ${{runner.temp}}" echo "[container] GITHUB_WORKSPACE: ${GITHUB_WORKSPACE:-}" @@ -167,10 +169,9 @@ runs: # Everything should be cached for test jobs. not_test_job="$(grep -q '"./ci/test_' <<< "$COMMAND" || echo $?)" - # Temporary: don't use sccache-dist for NVHPC, OneAPI, or clang-cuda + # Temporary: don't use sccache-dist for NVHPC or clang-cuda # until sccache packages up a correct toolchain for the server not_nvhpc="$(grep -q 'nvhpc' <<< "${{inputs.host}}" || echo $?)" - not_oneapi="$(grep -q 'oneapi' <<< "${{inputs.host}}" || echo $?)" not_clang_cuda="$(grep -q '\-cuda "clang' <<< "$COMMAND" || echo $?)" # If a test job, over-subscribe -j to download more cache objects at once @@ -193,7 +194,6 @@ runs: # If this is not a test job and not one of the excluded compilers, use the build cluster if test -n "${not_nvhpc:+x}" \ - && test -n "${not_oneapi:+x}" \ && test -n "${not_test_job:+x}" \ && test -n "${not_clang_cuda:+x}" \ && test -n "${DIST_TOKEN:+x}"; then @@ -209,13 +209,13 @@ runs: extra_launch_args+=( # Uncomment to repopulate the cache - # --env "SCCACHE_RECACHE=1" + --env "SCCACHE_RECACHE=1" # Uncomment to not use the cache at all - --env "SCCACHE_NO_CACHE=1" + # --env "SCCACHE_NO_CACHE=1" # Over-subscribe -j to keep the build cluster busy - # --env "PARALLEL_LEVEL=$((CPUS * 2))" + --env "PARALLEL_LEVEL=$((CPUS * 4))" # --env "PARALLEL_LEVEL=64" # Retry failed builds 1 time before building locally @@ -254,6 +254,8 @@ runs: # Launch this container using the host's docker daemon set -x + echo "extra_launch_args: ${extra_launch_args[*]}" + ${{github.event.repository.name}}/.devcontainer/launch.sh \ --docker \ --cuda ${{inputs.cuda}} \ @@ -275,7 +277,7 @@ runs: --volume "${{runner.temp}}/.aws:/root/.aws" \ --volume "${{runner.temp}}/.config:/root/.config:ro" \ --volume "$(dirname "$(dirname "${{github.workspace}}")"):/__w" \ - "${extra_launch_args[@]}" \ + ${extra_launch_args[@]} \ -- /ci.sh - if: ${{ always() }}