From a2fb7e66f810905375c8a5bde9aede183e0ac502 Mon Sep 17 00:00:00 2001 From: Michael Cayanan <42812746+mcayanan@users.noreply.github.com> Date: Wed, 9 Oct 2024 11:11:53 -0700 Subject: [PATCH] HC-550: Update job retry to allow specifying new time limits and a new job queue (#33) * HC-550: Update retry job to allow updating of soft time limits and sending to another queue * add commas * cast to int * re-order --------- Co-authored-by: Mike Cayanan --- docker/hysds-io.json.lw-mozart-retry | 21 +++++++++++++++++++++ docker/job-spec.json.lw-mozart-retry | 12 ++++++++++++ retry.py | 19 +++++++++++++++++-- 3 files changed, 50 insertions(+), 2 deletions(-) diff --git a/docker/hysds-io.json.lw-mozart-retry b/docker/hysds-io.json.lw-mozart-retry index 9de9917..86f1165 100644 --- a/docker/hysds-io.json.lw-mozart-retry +++ b/docker/hysds-io.json.lw-mozart-retry @@ -27,6 +27,27 @@ ], "default": "0", "lambda": "lambda x: int(x)" + }, + { + "name": "soft_time_limit", + "placeholder": "specify a new soft time limit for the job being retried", + "type": "number", + "from": "submitter", + "optional": true + }, + { + "name": "time_limit", + "placeholder": "specify a new time limit for the job being retried", + "type": "number", + "from": "submitter", + "optional": true + }, + { + "name": "job_queue", + "placeholder": "specify a new job queue for the job being retried", + "type": "text", + "from": "submitter", + "optional": true } ] } \ No newline at end of file diff --git a/docker/job-spec.json.lw-mozart-retry b/docker/job-spec.json.lw-mozart-retry index 2616441..900d5f0 100644 --- a/docker/job-spec.json.lw-mozart-retry +++ b/docker/job-spec.json.lw-mozart-retry @@ -18,6 +18,18 @@ { "name": "job_priority_increment", "destination": "context" + }, + { + "name": "soft_time_limit", + "destination": "context" + }, + { + "name": "time_limit", + "destination": "context" + }, + { + "name": "job_queue", + "destination": "context" } ] } \ No newline at end of file diff --git a/retry.py b/retry.py index bd6734a..c301f1c 100644 --- a/retry.py +++ b/retry.py @@ -145,6 +145,22 @@ def resubmit_jobs(context): # delete old job status delete_by_id(index, _id) + # check if new queues, soft time limit, and time limit values were set + new_job_queue = context.get("job_queue", "") + if new_job_queue: + print(f"new job queue specified. Sending retry job to {new_job_queue}") + job_json['job_info']['job_queue'] = new_job_queue + + new_soft_time_limit = context.get("soft_time_limit", "") + if new_soft_time_limit: + print(f"new soft time limit specified. Setting new soft time limit to {int(new_soft_time_limit)}") + job_json['job_info']['soft_time_limit'] = int(new_soft_time_limit) + + new_time_limit = context.get("time_limit", "") + if new_time_limit: + print(f"new time limit specified. Setting new time limit to {int(new_time_limit)}") + job_json['job_info']['time_limit'] = int(new_time_limit) + # Before re-queueing, check to see if the job was under the job_failed index. If so, need to # move it back to job_status if index.startswith("job_failed"): @@ -162,8 +178,7 @@ def resubmit_jobs(context): log_job_status(job_status_json) # submit job - queue = job_json['job_info']['job_queue'] - run_job.apply_async((job_json,), queue=queue, + run_job.apply_async((job_json,), queue=job_json['job_info']['job_queue'], time_limit=job_json['job_info']['time_limit'], soft_time_limit=job_json['job_info']['soft_time_limit'], priority=job_json['priority'],