More cleanups.

NVIDIA · Apr 23, 2024 · a4f0c68 · a4f0c68
1 parent 72ee620
commit a4f0c68
Show file tree

Hide file tree

Showing 2 changed files with 118 additions and 70 deletions.
diff --git a/ci/compute-matrix.py b/ci/compute-matrix.py
@@ -10,7 +10,7 @@
     ],
     "ctk": "11.1",
     "gpu": "t4",
-    "cmake_cuda_arch": "75-real",
+    "sm": "75-real",
     "host_compiler": {
       "name": "llvm",
       "version": "9",
@@ -98,8 +98,8 @@ def get_formatted_host_compiler_name(host_compiler):
 
 
 def get_formatted_job_type(job_type):
-    if job_type in matrix_yaml['formatted_job_types']:
-        return matrix_yaml['formatted_job_types'][job_type]
+    if job_type in matrix_yaml['formatted_jobs']:
+        return matrix_yaml['formatted_jobs'][job_type]
     # Return with first letter capitalized:
     return job_type.capitalize()
 
@@ -142,6 +142,11 @@ def set_derived_tags(matrix_job):
     if matrix_job['device_compiler'] == 'nvcc':
         matrix_job['device_compiler'] = {'name': 'nvcc', 'version': matrix_job['ctk'], 'exe': 'nvcc'}
 
+    if 'sm' in matrix_job and matrix_job['sm'] == 'gpu':
+        if not 'gpu' in matrix_job:
+            raise Exception(f"sm: 'gpu' requires tag 'gpu' in matrix job: {matrix_job}")
+        matrix_job['sm'] = matrix_yaml['gpu_sm'][matrix_job['gpu']]
+
 
 def generate_dispatch_group_name(matrix_job):
     project_name = get_formatted_projected_name(matrix_job['project'])
@@ -163,8 +168,8 @@ def generate_dispatch_group_name(matrix_job):
 def generate_dispatch_job_name(matrix_job, job_type):
     std_str = ("C++" + str(matrix_job['std']) + " ") if 'std' in matrix_job else ''
     cpu_str = matrix_job['cpu']
-    gpu_str = (', ' + matrix_job['gpu'].upper()) if job_type in matrix_yaml['gpu_required_job_types'] else ""
-    cuda_compile_arch = (" sm{" + matrix_job['cmake_cuda_arch'] + "}") if 'cmake_cuda_arch' in matrix_job else ""
+    gpu_str = (', ' + matrix_job['gpu'].upper()) if job_type in matrix_yaml['gpu_required_jobs'] else ""
+    cuda_compile_arch = (" sm{" + matrix_job['sm'] + "}") if 'sm' in matrix_job else ""
     cmake_options = (' ' + matrix_job['cmake_options']) if 'cmake_options' in matrix_job else ""
 
     host_compiler_name = get_formatted_host_compiler_name(matrix_job['host_compiler'])
@@ -183,7 +188,7 @@ def generate_dispatch_job_runner(matrix_job, job_type):
     runner_os = "windows" if is_windows(matrix_job) else "linux"
     cpu = matrix_job['cpu']
 
-    if not job_type in matrix_yaml['gpu_required_job_types']:
+    if not job_type in matrix_yaml['gpu_required_jobs']:
         return f"{runner_os}-{cpu}-cpu16"
 
     gpu = matrix_job['gpu']
@@ -218,7 +223,7 @@ def generate_dispatch_job_command(matrix_job, job_type):
     device_compiler_name = matrix_job['device_compiler']['name']
     device_compiler_exe = matrix_job['device_compiler']['exe']
 
-    cuda_compile_arch = matrix_job['cmake_cuda_arch'] if 'cmake_cuda_arch' in matrix_job else ''
+    cuda_compile_arch = matrix_job['sm'] if 'sm' in matrix_job else ''
     cmake_options = matrix_job['cmake_options'] if 'cmake_options' in matrix_job else ''
 
     command = f"\"{script_name}\""
@@ -264,10 +269,10 @@ def generate_dispatch_group_jobs(matrix_job):
         "two_stage": []
     }
 
-    job_types = set(copy.deepcopy(matrix_job['job_types']))
+    job_types = set(matrix_job['jobs'])
 
     # job_types that appear in build_required_job_types:
-    build_required = set(matrix_yaml['build_required_job_types']) & job_types
+    build_required = set(matrix_yaml['build_required_jobs']) & job_types
     has_build_and_test = len(build_required) > 0
     job_types -= build_required
 
@@ -304,7 +309,7 @@ def matrix_job_to_dispatch_group(matrix_job):
 def explode_tags(matrix_job):
     explode_tag = None
     for tag in matrix_job:
-        if tag != "job_types" and isinstance(matrix_job[tag], list):
+        if not tag in matrix_yaml['non_exploded_tags'] and isinstance(matrix_job[tag], list):
             explode_tag = tag
             break
 
@@ -461,24 +466,35 @@ def natural_sort_key(key):
 def pretty_print_workflow(final_workflow, outfile):
     print(f"::group::Job list", file=outfile)
 
-    def print_job_array(total_jobs, key, group_json):
+    total_jobs = 0
+    runner_counts = {}
+    def print_job_array(key, group_json):
+        nonlocal total_jobs
+        nonlocal runner_counts
+
         job_array = group_json[key] if key in group_json else []
-        key += ":"
         for job_json in job_array:
             total_jobs += 1
             print(f"{total_jobs:4}   {key:13} {job_json['name']}", file=outfile)
-        return total_jobs
+            runner = job_json['runner']
+            runner_counts[runner] = runner_counts.get(runner, 0) + 1
 
-    total_jobs = 0
     for group_name, group_json in final_workflow.items():
         print(f"{'':4} {group_name}:", file=outfile)
-        total_jobs = print_job_array(total_jobs, 'standalone', group_json)
+        print_job_array('standalone', group_json)
         if 'two_stage' in group_json:
             for two_stage_json in group_json['two_stage']:
-                total_jobs = print_job_array(total_jobs, 'producers', two_stage_json)
-                total_jobs = print_job_array(total_jobs, 'consumers', two_stage_json)
+                print_job_array('producers', two_stage_json)
+                print_job_array('consumers', two_stage_json)
+    print(f"::endgroup::", file=outfile)
+
+    print(f"::group::Runner counts", file=outfile)
+    print(f"::notice:: Total jobs: {total_jobs}", file=outfile)
+    # Sort by descending counts:
+    runner_counts = {k : v for k, v in sorted(runner_counts.items(), key=lambda item: item[1], reverse=True)}
+    for runner, count in runner_counts.items():
+        print(f"::notice:: {count}x {runner}", file=outfile)
     print(f"::endgroup::", file=outfile)
-    print(f"Total jobs: {total_jobs}", file=outfile)
 
     print("::group::Final Workflow JSON", file=outfile)
     print(json.dumps(final_workflow, indent=2), file=outfile)

diff --git a/ci/matrix.yaml b/ci/matrix.yaml
@@ -40,79 +40,107 @@ oneapi: &oneapi { name: 'oneapi', version: '2023.2.0', exe: 'icpc' }
 workflows:
   pull_request:
     # default_projects: nvcc
-    - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *gcc6,                         std: [11, 14]         }
-    - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: [*gcc7, *gcc8, *gcc9, *llvm9], std: [11, 14, 17]     }
-    - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: *msvc2017,                     std: 14               }
-    - {job_types: ['build'], ctk: *ctk_prev_max, host_compiler: *gcc11,                        std: [11, 14, 17],     cmake_cuda_arch: '60;70;80;90'}
-    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: [*gcc7, *gcc8, *gcc9],         std: [11, 14, 17]     }
-    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: [*gcc10, *gcc11],              std: [11, 14, 17, 20] }
-    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: [*llvm9, *llvm10],             std: [11, 14, 17]     }
-    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: [*llvm11, *llvm12, *llvm13],   std: [11, 14, 17, 20] }
-    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: [*llvm14, *llvm15],            std: [11, 14, 17, 20] }
-    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: [*gcc12, *llvm16],             std: [11, 14, 17, 20], cpu: 'arm64'}
-    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *gcc12,                        std: [11, 14, 17, 20], cmake_cuda_arch: '90a'}
-    - {job_types: ['test'],  ctk: *ctk_curr,     host_compiler: *gcc12,                        std: [11, 14, 17, 20] }
-    - {job_types: ['test'],  ctk: *ctk_curr,     host_compiler: *llvm16,                       std: [11, 14, 17, 20] }
-    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *oneapi,                       std: [11, 14, 17]     }
-    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *msvc2019,                     std: [14, 17]         }
-    - {job_types: ['build'], ctk: *ctk_curr,     host_compiler: *msvc2022,                     std: [14, 17, 20]     }
+    - {jobs: ['build'], ctk: *ctk_prev_min, host_compiler: *gcc6,                         std: [11, 14]         }
+    - {jobs: ['build'], ctk: *ctk_prev_min, host_compiler: [*gcc7, *gcc8, *gcc9, *llvm9], std: [11, 14, 17]     }
+    - {jobs: ['build'], ctk: *ctk_prev_min, host_compiler: *msvc2017,                     std: 14               }
+    - {jobs: ['build'], ctk: *ctk_prev_max, host_compiler: *gcc11,                        std: [11, 14, 17],     sm: '60;70;80;90'}
+    - {jobs: ['build'], ctk: *ctk_curr,     host_compiler: [*gcc7, *gcc8, *gcc9],         std: [11, 14, 17]     }
+    - {jobs: ['build'], ctk: *ctk_curr,     host_compiler: [*gcc10, *gcc11],              std: [11, 14, 17, 20] }
+    - {jobs: ['build'], ctk: *ctk_curr,     host_compiler: [*llvm9, *llvm10],             std: [11, 14, 17]     }
+    - {jobs: ['build'], ctk: *ctk_curr,     host_compiler: [*llvm11, *llvm12, *llvm13],   std: [11, 14, 17, 20] }
+    - {jobs: ['build'], ctk: *ctk_curr,     host_compiler: [*llvm14, *llvm15],            std: [11, 14, 17, 20] }
+    - {jobs: ['build'], ctk: *ctk_curr,     host_compiler: [*gcc12, *llvm16],             std: [11, 14, 17, 20], cpu: 'arm64'}
+    - {jobs: ['build'], ctk: *ctk_curr,     host_compiler: *gcc12,                        std: [11, 14, 17, 20], sm: '90a'}
+    - {jobs: ['test'],  ctk: *ctk_curr,     host_compiler: *gcc12,                        std: [11, 14, 17, 20] }
+    - {jobs: ['test'],  ctk: *ctk_curr,     host_compiler: *llvm16,                       std: [11, 14, 17, 20] }
+    - {jobs: ['build'], ctk: *ctk_curr,     host_compiler: *oneapi,                       std: [11, 14, 17]     }
+    - {jobs: ['build'], ctk: *ctk_curr,     host_compiler: *msvc2019,                     std: [14, 17]         }
+    - {jobs: ['build'], ctk: *ctk_curr,     host_compiler: *msvc2022,                     std: [14, 17, 20]     }
     # default_projects: clang-cuda
-    - {job_types: ['build'], device_compiler: *llvm-newest, host_compiler: *llvm-newest, std: [17, 20]}
+    - {jobs: ['build'], device_compiler: *llvm-newest, host_compiler: *llvm-newest, std: [17, 20]}
     # nvrtc:
-    - {job_types: ['nvrtc'], project: 'libcudacxx', ctk: *ctk_curr, host_compiler: *gcc12, std: [11, 14, 17, 20]}
+    - {jobs: ['nvrtc'], project: 'libcudacxx', ctk: *ctk_curr, host_compiler: *gcc12, std: [11, 14, 17, 20]}
     # verify-codegen:
-    - { job_types: ['verify_codegen'], project: 'libcudacxx'}
+    - {jobs: ['verify_codegen'], project: 'libcudacxx'}
     # cccl-infra:
-    - {job_types: ['infra'], project: 'cccl', ctk: *ctk_prev_min, host_compiler: [*gcc-oldest, *llvm-oldest]}
-    - {job_types: ['infra'], project: 'cccl', ctk: *ctk_curr,     host_compiler: [*gcc-newest, *llvm-newest]}
+    - {jobs: ['infra'], project: 'cccl', ctk: *ctk_prev_min, host_compiler: [*gcc-oldest, *llvm-oldest]}
+    - {jobs: ['infra'], project: 'cccl', ctk: *ctk_curr,     host_compiler: [*gcc-newest, *llvm-newest]}
   nightly:
-    - {job_types: ['test'],  ctk: *ctk_prev_min, gpu: 'v100',     cmake_cuda_arch: '70', host_compiler: *gcc6,   std: [11]             }
-    - {job_types: ['test'],  ctk: *ctk_prev_min, gpu: 't4',       cmake_cuda_arch: '75', host_compiler: *llvm9,  std: [17]             }
-    - {job_types: ['test'],  ctk: *ctk_prev_max, gpu: 'rtx2080',  cmake_cuda_arch: '75', host_compiler: *gcc11,  std: [17]             }
-    - {job_types: ['test'],  ctk: *ctk_curr,     gpu: 'rtxa6000', cmake_cuda_arch: '86', host_compiler: *gcc7,   std: [14]             }
-    - {job_types: ['test'],  ctk: *ctk_curr,     gpu: 'l4',       cmake_cuda_arch: '89', host_compiler: *gcc12,  std: [11, 14, 17, 20] }
-    - {job_types: ['test'],  ctk: *ctk_curr,     gpu: 'rtx4090',  cmake_cuda_arch: '89', host_compiler: *llvm9,  std: [11]             }
-    - {job_types: ['test'],  ctk: *ctk_curr,     gpu: 'h100',     cmake_cuda_arch: '90', host_compiler: *gcc12,  std: [11, 20]         }
-    - {job_types: ['test'],  ctk: *ctk_curr,     gpu: 'h100',     cmake_cuda_arch: '90', host_compiler: *llvm16, std: [17]             }
+    - {jobs: ['test'],  ctk: *ctk_prev_min, gpu: 'v100',     sm: 'gpu', host_compiler: *gcc6,   std: [11]             }
+    - {jobs: ['test'],  ctk: *ctk_prev_min, gpu: 't4',       sm: 'gpu', host_compiler: *llvm9,  std: [17]             }
+    - {jobs: ['test'],  ctk: *ctk_prev_max, gpu: 'rtx2080',  sm: 'gpu', host_compiler: *gcc11,  std: [17]             }
+    - {jobs: ['test'],  ctk: *ctk_curr,     gpu: 'rtxa6000', sm: 'gpu', host_compiler: *gcc7,   std: [14]             }
+    - {jobs: ['test'],  ctk: *ctk_curr,     gpu: 'l4',       sm: 'gpu', host_compiler: *gcc12,  std: [11, 14, 17, 20] }
+    - {jobs: ['test'],  ctk: *ctk_curr,     gpu: 'rtx4090',  sm: 'gpu', host_compiler: *llvm9,  std: [11]             }
+    - {jobs: ['test'],  ctk: *ctk_curr,     gpu: 'h100',     sm: 'gpu', host_compiler: *gcc12,  std: [11, 20]         }
+    - {jobs: ['test'],  ctk: *ctk_curr,     gpu: 'h100',     sm: 'gpu', host_compiler: *llvm16, std: [17]             }
     # nvrtc:
-    - {job_types: ['nvrtc'], ctk: *ctk_curr,     gpu: 't4',       cmake_cuda_arch: '75', host_compiler: *gcc12,  std: [20],             project: ['libcudacxx']}
-    - {job_types: ['nvrtc'], ctk: *ctk_curr,     gpu: 'rtxa6000', cmake_cuda_arch: '86', host_compiler: *gcc12,  std: [20],             project: ['libcudacxx']}
-    - {job_types: ['nvrtc'], ctk: *ctk_curr,     gpu: 'l4',       cmake_cuda_arch: '89', host_compiler: *gcc12,  std: [11, 14, 17, 20], project: ['libcudacxx']}
-    - {job_types: ['nvrtc'], ctk: *ctk_curr,     gpu: 'h100',     cmake_cuda_arch: '90', host_compiler: *gcc12,  std: [11, 20],         project: ['libcudacxx']}
+    - {jobs: ['nvrtc'], ctk: *ctk_curr,     gpu: 't4',       sm: 'gpu', host_compiler: *gcc12,  std: [20],             project: ['libcudacxx']}
+    - {jobs: ['nvrtc'], ctk: *ctk_curr,     gpu: 'rtxa6000', sm: 'gpu', host_compiler: *gcc12,  std: [20],             project: ['libcudacxx']}
+    - {jobs: ['nvrtc'], ctk: *ctk_curr,     gpu: 'l4',       sm: 'gpu', host_compiler: *gcc12,  std: [11, 14, 17, 20], project: ['libcudacxx']}
+    - {jobs: ['nvrtc'], ctk: *ctk_curr,     gpu: 'h100',     sm: 'gpu', host_compiler: *gcc12,  std: [11, 20],         project: ['libcudacxx']}
 
 #
-# Resources for compute_matrix.py:
+# Resources for compute_matrix.py. These can be modified to add new jobs, etc.
 #
-
-# Error if tags are missing:
-required_tags: ['job_types']
-
-# Tags that will be added if not specified:
-defaulted_tags: ['ctk', 'cpu', 'gpu', 'host_compiler', 'device_compiler', 'project', 'os']
-
-# Tags that may be omitted:
-optional_tags: ['std', 'cmake_cuda_arch', 'cmake_options']
-
-# job_types that have an implied prerequisite 'build' job:
-build_required_job_types:
+# Jobs are executed by running scripts:
+# - Linux:   'ci/<job>_<project>.sh`
+# - Windows: `ci/windows/<job>_<project>.bat`
+
+# A matrix entry must have the following tag.
+required_tags:
+  - 'jobs' # A list of job types to run (e.g. 'build', 'test', 'nvrtc', 'infra', 'verify_codegen', ...) for
+           # the specified configuration(s).
+
+# If a matrix entry omits these tags, a default value (defined later in `default_<tag>`) is used.
+defaulted_tags:
+ - 'ctk'             # CUDA ToolKit version. Will be exploded if a list.
+ - 'cpu'             # CPU architecture. Will be exploded if a list.
+ - 'gpu'             # GPU model. Will be exploded if a list.
+ - 'host_compiler'   # Host compiler {name, version, exe}. Will be exploded if a list.
+ - 'device_compiler' # Device compiler as {name, version, exe} or 'nvcc' to use nvcc from the specified `ctk`.
+                     # Will be exploded if a list.
+ - 'project'         # Project name (e.g. libcudacxx, cub, thrust, cccl). Will be exploded if a list.
+ - 'os'              # Operating system. Will be exploded if a list.
+
+# These tags will only exist if needed:
+optional_tags:
+  - 'std'             # C++ standard. Passed to script with `-std <std>`. Will be exploded if a list.
+  - 'sm'              # `CMAKE_CUDA_ARCHITECTURES` Passed to script with `-arch <sm>`.
+                      # Defaults to use the settings in the CMakePresets.json file.
+                      # Set to 'gpu' to only target the GPU in the `gpu` tag.
+                      # Can pass multiple architectures via "60;70-real;80-virtual"
+                      # Will be exploded if a list (e.g. `sm: ['60;70;80;90', '90a']` creates two jobs)
+  - 'cmake_options'   # Additional CMake options to pass to the build. Passed to script with `-cmake_options "<cmake_options>"`.
+                      # Will be exploded if a list.
+
+# Tags that aren't exploded:
+non_exploded_tags:
+  - 'jobs' # Keeping jobs as a list allows for dependency schedule for e.g. build->test steps.
+
+# Jobs that have an implied prerequisite 'build' job:
+build_required_jobs:
   - 'test'
 
-# job_types that require a GPU
-gpu_required_job_types:
+# Jobs that require a GPU
+gpu_required_jobs:
   - 'test'
   - 'nvrtc'
   - 'infra' # cccl infra's example project test launches a kernel
 
-formatted_job_types: # Default: Capitalize first letter.
-  'nvrtc': 'NVRTC'
+# Human readable name for jobs. Default behavior is to capitalize the first letter.
+formatted_jobs:
+  'nvrtc':          'NVRTC'
   'verify_codegen': 'VerifyCodegen'
 
+# Human readable name for projects. Default behavior uses the project name as-is.
 formatted_project_names:
   'libcudacxx': 'libcu++'
   'cub': 'CUB'
   'thrust': 'Thrust'
   'cccl': 'CCCL'
 
+# Human readable name for compilers. Default behavior uses the "compiler.name" tag as-is.
 formatted_host_compiler_names:
   'llvm': 'clang'
   'oneapi': 'intel'
@@ -155,6 +183,7 @@ default_os_lookup:
   'ctk12.4-cl14.39': 'windows2022'
   'ctk12.4-oneapi2023.2.0': 'ubuntu22.04'
 
+# All known GPUs
 gpus:
   - 'v100'     # ?? runners
   - 't4'       #  8 runners
@@ -164,7 +193,8 @@ gpus:
   - 'rtx4090'  # 10 runners
   - 'h100'     # 16 runners
 
-gpus_sm:
+# SM versions of GPUs
+gpu_sm:
   'v100':     '70'
   't4':       '75'
   'rtx2080':  '75'
@@ -173,7 +203,8 @@ gpus_sm:
   'rtx4090':  '89'
   'h100':     '90'
 
-gpus_mem_gb:
+# Memory size of GPUs
+gpu_mem_gb:
   'v100':     '32'
   't4':       '16'
   'rtx2080':  '8'
@@ -182,6 +213,7 @@ gpus_mem_gb:
   'rtx4090':  '24'
   'h100':     '80'
 
+# GPUs that require `-testing` at the end of the runner pool name.
 testing_pool_gpus:
   - 't4'
   - 'rtx2080'