From 8b5ae80686e1849066756be3757e8a402d0d1743 Mon Sep 17 00:00:00 2001 From: Allison Piper Date: Tue, 23 Apr 2024 05:48:33 +0000 Subject: [PATCH] Scan workflow for duplicates, emit warnings. --- ci/compute-matrix.py | 184 +++++++++++++++++++++++++++++++------------ ci/matrix.yaml | 81 +++++++++---------- 2 files changed, 174 insertions(+), 91 deletions(-) diff --git a/ci/compute-matrix.py b/ci/compute-matrix.py index 926d175cb4c..539abbae731 100755 --- a/ci/compute-matrix.py +++ b/ci/compute-matrix.py @@ -65,7 +65,6 @@ import yaml matrix_yaml = None -dirty_projects = [] def write_output(key, value): @@ -107,7 +106,7 @@ def is_windows(matrix_job): return matrix_job['os'].startswith('windows') -def validate_matrix_job(matrix_job): +def validate_required_tags(matrix_job): for tag in matrix_yaml['required_tags']: if tag not in matrix_job: raise Exception(f"Missing required tag '{tag}' in matrix job {matrix_job}") @@ -122,7 +121,7 @@ def validate_matrix_job(matrix_job): raise Exception(f"Unknown tag '{tag}' in matrix job {matrix_job}") -def fill_defaults_matrix_job(matrix_job): +def set_default_tags(matrix_job): generic_defaults = set(matrix_yaml['defaulted_tags']) generic_defaults -= set(['os']) # handled specially. @@ -142,24 +141,6 @@ def set_derived_tags(matrix_job): matrix_job['device_compiler'] = {'name': 'nvcc', 'version': matrix_job['ctk'], 'exe': 'nvcc'} -def explode_matrix_job(matrix_job): - new_jobs = [] - for tag in matrix_job: - if tag != "job_types" and isinstance(matrix_job[tag], list): - for value in matrix_job[tag]: - new_job = copy.deepcopy(matrix_job) - new_job[tag] = value - exploded = explode_matrix_job(new_job) - if exploded: - new_jobs.extend(exploded) - else: - new_jobs.append(new_job) - # Only explode the first explodable tag. Recursion handles the others. - break - - return new_jobs if len(new_jobs) > 0 else None - - def generate_dispatch_group_name(matrix_job): project_name = get_formatted_projected_name(matrix_job['project']) ctk = matrix_job['ctk'] @@ -315,34 +296,125 @@ def merge_dispatch_groups(accum_dispatch_groups, new_dispatch_groups): def matrix_job_to_dispatch_group(matrix_job): - validate_matrix_job(matrix_job) - fill_defaults_matrix_job(matrix_job) + return {generate_dispatch_group_name(matrix_job): generate_dispatch_group_jobs(matrix_job)} + - # If the job explodes, recurse into the results: - exploded_jobs = explode_matrix_job(matrix_job) - if exploded_jobs is not None: - all_dispatch_groups = {} - for job in exploded_jobs: - dispatch_group = matrix_job_to_dispatch_group(job) - merge_dispatch_groups(all_dispatch_groups, dispatch_group) - return all_dispatch_groups +def explode_tags(matrix_job): + explode_tag = None + for tag in matrix_job: + if tag != "job_types" and isinstance(matrix_job[tag], list): + explode_tag = tag + break - set_derived_tags(matrix_job) + if not explode_tag: + return [matrix_job] - # Filter jobs that don't need to rerun: - if matrix_job['project'] not in dirty_projects: - return {} + result = [] + for value in matrix_job[explode_tag]: + new_job = copy.deepcopy(matrix_job) + new_job[explode_tag] = value + result.extend(explode_tags(new_job)) - # We have a fully specified job, start processing. - dispatch_group_name = generate_dispatch_group_name(matrix_job) - dispatch_group_jobs = generate_dispatch_group_jobs(matrix_job) + return result - return {dispatch_group_name: dispatch_group_jobs} + +def preprocess_matrix_jobs(matrix_jobs): + result = [] + for matrix_job in matrix_jobs: + validate_required_tags(matrix_job) + set_default_tags(matrix_job) + for job in explode_tags(matrix_job): + set_derived_tags(job) + result.append(job) + return result + + +def filter_projects(matrix_jobs, projects): + return [job for job in matrix_jobs if job['project'] in projects] def finalize_workflow_dispatch_groups(workflow_dispatch_groups_orig): workflow_dispatch_groups = copy.deepcopy(workflow_dispatch_groups_orig) + # Merge consumers for any two_stage arrays that have the same producer(s). Print a warning. + for group_name, group_json in workflow_dispatch_groups.items(): + if not 'two_stage' in group_json: + continue + two_stage_json = group_json['two_stage'] + merged_producers = [] + merged_consumers = [] + for two_stage in two_stage_json: + producers = two_stage['producers'] + consumers = two_stage['consumers'] + if producers in merged_producers: + producer_index = merged_producers.index(producers) + matching_consumers = merged_consumers[producer_index] + + producer_names = ", ".join([producer['name'] for producer in producers]) + print(f"::warning file=ci/matrix.yaml::Duplicate producer '{producer_names}' in '{group_name}'", + file=sys.stderr) + consumer_names = ", ".join([consumer['name'] for consumer in matching_consumers]) + print(f"::warning file=ci/matrix.yaml::Original consumers: {consumer_names}", file=sys.stderr) + consumer_names = ", ".join([consumer['name'] for consumer in consumers]) + print(f"::warning file=ci/matrix.yaml::Duplicate consumers: {consumer_names}", file=sys.stderr) + # Merge if unique: + for consumer in consumers: + if consumer not in matching_consumers: + matching_consumers.append(consumer) + consumer_names = ", ".join([consumer['name'] for consumer in matching_consumers]) + print(f"::warning file=ci/matrix.yaml::Merged consumers: {consumer_names}", file=sys.stderr) + else: + merged_producers.append(producers) + merged_consumers.append(consumers) + # Update with the merged lists: + two_stage_json = [] + for producers, consumers in zip(merged_producers, merged_consumers): + two_stage_json.append({'producers': producers, 'consumers': consumers}) + group_json['two_stage'] = two_stage_json + + # Check for any duplicate job names in standalone arrays. Warn and remove duplicates. + for group_name, group_json in workflow_dispatch_groups.items(): + standalone_jobs = group_json['standalone'] if 'standalone' in group_json else [] + unique_standalone_jobs = [] + for job_json in standalone_jobs: + if job_json in unique_standalone_jobs: + print(f"::warning file=ci/matrix.yaml::Removing duplicate standalone job '{job_json['name']}' in '{group_name}'", + file=sys.stderr) + else: + unique_standalone_jobs.append(job_json) + + # If any producer/consumer jobs exist in standalone arrays, warn and remove the standalones. + two_stage_jobs = group_json['two_stage'] if 'two_stage' in group_json else [] + for two_stage_job in two_stage_jobs: + for producer in two_stage_job['producers']: + if producer in unique_standalone_jobs: + print(f"::warning file=ci/matrix.yaml::Removing standalone job '{producer['name']}' " + + f"as it appears as a producer in '{group_name}'", + file=sys.stderr) + unique_standalone_jobs.remove(producer) + for consumer in two_stage_job['consumers']: + if consumer in unique_standalone_jobs: + print(f"::warning file=ci/matrix.yaml::Removing standalone job '{consumer['name']}' " + + f"as it appears as a consumer in '{group_name}'", + file=sys.stderr) + unique_standalone_jobs.remove(consumer) + standalone_jobs = list(unique_standalone_jobs) + + # If any producer or consumer job appears more than once, warn and leave as-is. + all_two_stage_jobs = [] + duplicate_jobs = {} + for two_stage_job in two_stage_jobs: + for job in two_stage_job['producers'] + two_stage_job['consumers']: + if job in all_two_stage_jobs: + duplicate_jobs[job['name']] = duplicate_jobs.get(job['name'], 1) + 1 + else: + all_two_stage_jobs.append(job) + for job_name, count in duplicate_jobs.items(): + print(f"::warning file=ci/matrix.yaml::" + + f"Job '{job_name}' appears {count} times in '{group_name}'.", + f"Cannot remove duplicate while resolving dependencies. This job WILL execute {count} times.", + file=sys.stderr) + # Remove all named values that contain an empty list of jobs: for group_name, group_json in workflow_dispatch_groups.items(): if not group_json['standalone'] and not group_json['two_stage']: @@ -368,6 +440,7 @@ def natural_sort_key(key): group_json['two_stage'], key=lambda x: natural_sort_key(x['producers'][0]['name'])) # Count the total number of jobs: + print(f"::begin-group::Job list", file=sys.stderr) total_jobs = 0 for group_name, group_json in workflow_dispatch_groups.items(): if 'standalone' in group_json: @@ -383,6 +456,7 @@ def natural_sort_key(key): total_jobs += 1 print(f"{total_jobs} - {group_name}: {job_json['name']}", file=sys.stderr) + print(f"::end-group::", file=sys.stderr) print(f"Total jobs: {total_jobs}", file=sys.stderr) # Check to see if any .two_stage.producers arrays have more than 1 job, which is not supported. See ci-dispatch-two-stage.yml for details. @@ -401,7 +475,6 @@ def natural_sort_key(key): def main(): - global dirty_projects global matrix_yaml parser = argparse.ArgumentParser(description='Compute matrix for workflow') @@ -410,7 +483,6 @@ def main(): parser.add_argument('--dirty-projects', nargs='*', dest='dirty_projects', help='Project(s) to rerun', default=[]) args = parser.parse_args() - dirty_projects = args.dirty_projects # Check if the matrix file exists if not os.path.isfile(args.matrix_file): @@ -421,8 +493,8 @@ def main(): matrix_yaml = yaml.safe_load(f) # Check if the workflow is valid - if args.workflow not in matrix_yaml: - print(f"Error: Workflow '{args.workflow}' does not exist in the matrix YAML.") + if args.workflow not in matrix_yaml['workflows']: + print(f"Error: Workflow 'workflows.{args.workflow}' does not exist in the matrix YAML.") sys.exit(1) # Print usage if no arguments are provided @@ -433,24 +505,38 @@ def main(): # Print the arguments to stderr: print("Arguments:", file=sys.stderr) print(args, file=sys.stderr) - print("Matrix YAML:", file=sys.stderr) - print(matrix_yaml, file=sys.stderr) - matrix_json = matrix_yaml[args.workflow] + # print("::group::Matrix YAML", file=sys.stderr) + # print("Matrix YAML:", file=sys.stderr) + # print(matrix_yaml, file=sys.stderr) + # print("::end-group::", file=sys.stderr) + + matrix_jobs = preprocess_matrix_jobs(matrix_yaml['workflows'][args.workflow]) + + # print("::group::Matrix Jobs", file=sys.stderr) + # print("Matrix Jobs:", file=sys.stderr) + # for matrix_job in matrix_jobs: + # print(json.dumps(matrix_job, indent=None, separators=(',', ':')), file=sys.stderr) + # print("::end-group::", file=sys.stderr) + + if args.dirty_projects: + matrix_jobs = filter_projects(matrix_jobs, args.dirty_projects) workflow_dispatch_groups = {} - for matrix_job in matrix_json: + for matrix_job in matrix_jobs: merge_dispatch_groups(workflow_dispatch_groups, matrix_job_to_dispatch_group(matrix_job)) final_workflow = finalize_workflow_dispatch_groups(workflow_dispatch_groups) # Pretty print the workflow json to stderr: + print("::group::Final Workflow", file=sys.stderr) print(json.dumps(final_workflow, indent=2), file=sys.stderr) + print("::end-group::", file=sys.stderr) # Print a single-line, compact version of the workflow json to stdout: - write_output("WORKFLOW", json.dumps(final_workflow)) + write_output("WORKFLOW", json.dumps(final_workflow, indent=None, separators=(',', ':'))) # Print the list of key (dispatch group) names to stdout in a single line as a json list: - write_output("WORKFLOW_KEYS", json.dumps(list(final_workflow.keys()))) + write_output("WORKFLOW_KEYS", json.dumps(list(final_workflow.keys()), indent=None, separators=(',', ':'))) if __name__ == '__main__': diff --git a/ci/matrix.yaml b/ci/matrix.yaml index 6a293d47a01..871e02e0fd1 100644 --- a/ci/matrix.yaml +++ b/ci/matrix.yaml @@ -130,45 +130,42 @@ testing_pool_gpus: # # Workflow matrices: # - -# Configurations that will run for every PR -pull_request: - - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: [*msvc2017, *gcc6], std: [11, 14] } - - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: [*gcc7, *gcc8, *gcc9, *llvm9], std: [11, 14, 17] } - - {job_types: ['build'], ctk: *ctk_prev_max, host_compiler: *gcc11, std: [11, 14, 17], cmake_cuda_arch: '60;70;80;90'} - - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *gcc12, std: [11, 14, 17, 20] } - - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*gcc7, *gcc8, *gcc9], std: [11, 14, 17] } - - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*gcc10, *gcc11], std: [11, 14, 17, 20] } - - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*llvm9, *llvm10], std: [11, 14, 17] } - - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*llvm11, *llvm12, *llvm13], std: [11, 14, 17, 20] } - - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*llvm14, *llvm15], std: [11, 14, 17, 20] } - - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *msvc2019, std: [14, 17] } - - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *msvc2022, std: [14, 17, 20] } - - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *oneapi, std: [11, 14, 17] } - - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*gcc12, *llvm16], std: [11, 14, 17, 20], cpu: 'arm64'} - - {job_types: ['test'], ctk: *ctk_curr, host_compiler: [*gcc12, *llvm16], std: [11, 14, 17, 20], cmake_cuda_arch: '60;70;80;90'} - # clang-cuda: - - {job_types: ['build'], device_compiler: *llvm-newest, host_compiler: *llvm-newest, std: [17, 20]} - # nvrtc: - - {job_types: ['nvrtc'], project: 'libcudacxx', ctk: *ctk_curr, host_compiler: *gcc12, std: [11, 14, 17, 20]} - # cccl-infra: - - {job_types: ['infra'], project: 'cccl', ctk: *ctk_prev_min, host_compiler: [*gcc-oldest, *llvm-oldest]} - - {job_types: ['infra'], project: 'cccl', ctk: *ctk_curr, host_compiler: [*gcc-newest, *llvm-newest]} - # verify-codegen: - - { job_types: ['verify_codegen'], project: 'libcudacxx'} - -# Run each night: -nightly: - - {job_types: ['test'], ctk: *ctk_prev_min, gpu: 'v100', cmake_cuda_arch: '70', host_compiler: *gcc6, std: [11] } - - {job_types: ['test'], ctk: *ctk_prev_min, gpu: 't4', cmake_cuda_arch: '75', host_compiler: *llvm9, std: [17] } - - {job_types: ['test'], ctk: *ctk_prev_max, gpu: 'rtx2080', cmake_cuda_arch: '75', host_compiler: *gcc11, std: [17] } - - {job_types: ['test'], ctk: *ctk_curr, gpu: 'rtxa6000', cmake_cuda_arch: '86', host_compiler: *gcc7, std: [14] } - - {job_types: ['test'], ctk: *ctk_curr, gpu: 'l4', cmake_cuda_arch: '89', host_compiler: *gcc12, std: [11, 14, 17, 20] } - - {job_types: ['test'], ctk: *ctk_curr, gpu: 'rtx4090', cmake_cuda_arch: '89', host_compiler: *llvm9, std: [11] } - - {job_types: ['test'], ctk: *ctk_curr, gpu: 'h100', cmake_cuda_arch: '90', host_compiler: *gcc12, std: [11, 20] } - - {job_types: ['test'], ctk: *ctk_curr, gpu: 'h100', cmake_cuda_arch: '90', host_compiler: *llvm16, std: [17] } - # nvrtc: - - {job_types: ['nvrtc'], ctk: *ctk_curr, gpu: 't4', cmake_cuda_arch: '75', host_compiler: *gcc12, std: [20], project: ['libcudacxx']} - - {job_types: ['nvrtc'], ctk: *ctk_curr, gpu: 'rtxa6000', cmake_cuda_arch: '86', host_compiler: *gcc12, std: [20], project: ['libcudacxx']} - - {job_types: ['nvrtc'], ctk: *ctk_curr, gpu: 'l4', cmake_cuda_arch: '89', host_compiler: *gcc12, std: [11, 14, 17, 20], project: ['libcudacxx']} - - {job_types: ['nvrtc'], ctk: *ctk_curr, gpu: 'h100', cmake_cuda_arch: '90', host_compiler: *gcc12, std: [11, 20], project: ['libcudacxx']} +workflows: + pull_request: + - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: [*msvc2017, *gcc6], std: [11, 14] } + - {job_types: ['build'], ctk: *ctk_prev_min, host_compiler: [*gcc7, *gcc8, *gcc9, *llvm9], std: [11, 14, 17] } + - {job_types: ['build'], ctk: *ctk_prev_max, host_compiler: *gcc11, std: [11, 14, 17], cmake_cuda_arch: '60;70;80;90'} + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *gcc12, std: [11, 14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*gcc7, *gcc8, *gcc9], std: [11, 14, 17] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*gcc10, *gcc11], std: [11, 14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*llvm9, *llvm10], std: [11, 14, 17] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*llvm11, *llvm12, *llvm13], std: [11, 14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*llvm14, *llvm15], std: [11, 14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *msvc2019, std: [14, 17] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *msvc2022, std: [14, 17, 20] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: *oneapi, std: [11, 14, 17] } + - {job_types: ['build'], ctk: *ctk_curr, host_compiler: [*gcc12, *llvm16], std: [11, 14, 17, 20], cpu: 'arm64'} + - {job_types: ['test'], ctk: *ctk_curr, host_compiler: [*gcc12, *llvm16], std: [11, 14, 17, 20], cmake_cuda_arch: '60;70;80;90'} + # clang-cuda: + - {job_types: ['build'], device_compiler: *llvm-newest, host_compiler: *llvm-newest, std: [17, 20]} + # nvrtc: + - {job_types: ['nvrtc'], project: 'libcudacxx', ctk: *ctk_curr, host_compiler: *gcc12, std: [11, 14, 17, 20]} + # cccl-infra: + - {job_types: ['infra'], project: 'cccl', ctk: *ctk_prev_min, host_compiler: [*gcc-oldest, *llvm-oldest]} + - {job_types: ['infra'], project: 'cccl', ctk: *ctk_curr, host_compiler: [*gcc-newest, *llvm-newest]} + # verify-codegen: + - { job_types: ['verify_codegen'], project: 'libcudacxx'} + nightly: + - {job_types: ['test'], ctk: *ctk_prev_min, gpu: 'v100', cmake_cuda_arch: '70', host_compiler: *gcc6, std: [11] } + - {job_types: ['test'], ctk: *ctk_prev_min, gpu: 't4', cmake_cuda_arch: '75', host_compiler: *llvm9, std: [17] } + - {job_types: ['test'], ctk: *ctk_prev_max, gpu: 'rtx2080', cmake_cuda_arch: '75', host_compiler: *gcc11, std: [17] } + - {job_types: ['test'], ctk: *ctk_curr, gpu: 'rtxa6000', cmake_cuda_arch: '86', host_compiler: *gcc7, std: [14] } + - {job_types: ['test'], ctk: *ctk_curr, gpu: 'l4', cmake_cuda_arch: '89', host_compiler: *gcc12, std: [11, 14, 17, 20] } + - {job_types: ['test'], ctk: *ctk_curr, gpu: 'rtx4090', cmake_cuda_arch: '89', host_compiler: *llvm9, std: [11] } + - {job_types: ['test'], ctk: *ctk_curr, gpu: 'h100', cmake_cuda_arch: '90', host_compiler: *gcc12, std: [11, 20] } + - {job_types: ['test'], ctk: *ctk_curr, gpu: 'h100', cmake_cuda_arch: '90', host_compiler: *llvm16, std: [17] } + # nvrtc: + - {job_types: ['nvrtc'], ctk: *ctk_curr, gpu: 't4', cmake_cuda_arch: '75', host_compiler: *gcc12, std: [20], project: ['libcudacxx']} + - {job_types: ['nvrtc'], ctk: *ctk_curr, gpu: 'rtxa6000', cmake_cuda_arch: '86', host_compiler: *gcc12, std: [20], project: ['libcudacxx']} + - {job_types: ['nvrtc'], ctk: *ctk_curr, gpu: 'l4', cmake_cuda_arch: '89', host_compiler: *gcc12, std: [11, 14, 17, 20], project: ['libcudacxx']} + - {job_types: ['nvrtc'], ctk: *ctk_curr, gpu: 'h100', cmake_cuda_arch: '90', host_compiler: *gcc12, std: [11, 20], project: ['libcudacxx']}