Skip to content

Commit

Permalink
HC-550: Update job retry to allow specifying new time limits and a ne…
Browse files Browse the repository at this point in the history
…w job queue (#33)

* HC-550: Update retry job to allow updating of soft time limits and sending to another queue

* add commas

* cast to int

* re-order

---------

Co-authored-by: Mike Cayanan <[email protected]>
  • Loading branch information
mcayanan and Mike Cayanan authored Oct 9, 2024
1 parent 51d703f commit a2fb7e6
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 2 deletions.
21 changes: 21 additions & 0 deletions docker/hysds-io.json.lw-mozart-retry
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,27 @@
],
"default": "0",
"lambda": "lambda x: int(x)"
},
{
"name": "soft_time_limit",
"placeholder": "specify a new soft time limit for the job being retried",
"type": "number",
"from": "submitter",
"optional": true
},
{
"name": "time_limit",
"placeholder": "specify a new time limit for the job being retried",
"type": "number",
"from": "submitter",
"optional": true
},
{
"name": "job_queue",
"placeholder": "specify a new job queue for the job being retried",
"type": "text",
"from": "submitter",
"optional": true
}
]
}
12 changes: 12 additions & 0 deletions docker/job-spec.json.lw-mozart-retry
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,18 @@
{
"name": "job_priority_increment",
"destination": "context"
},
{
"name": "soft_time_limit",
"destination": "context"
},
{
"name": "time_limit",
"destination": "context"
},
{
"name": "job_queue",
"destination": "context"
}
]
}
19 changes: 17 additions & 2 deletions retry.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,22 @@ def resubmit_jobs(context):
# delete old job status
delete_by_id(index, _id)

# check if new queues, soft time limit, and time limit values were set
new_job_queue = context.get("job_queue", "")
if new_job_queue:
print(f"new job queue specified. Sending retry job to {new_job_queue}")
job_json['job_info']['job_queue'] = new_job_queue

new_soft_time_limit = context.get("soft_time_limit", "")
if new_soft_time_limit:
print(f"new soft time limit specified. Setting new soft time limit to {int(new_soft_time_limit)}")
job_json['job_info']['soft_time_limit'] = int(new_soft_time_limit)

new_time_limit = context.get("time_limit", "")
if new_time_limit:
print(f"new time limit specified. Setting new time limit to {int(new_time_limit)}")
job_json['job_info']['time_limit'] = int(new_time_limit)

# Before re-queueing, check to see if the job was under the job_failed index. If so, need to
# move it back to job_status
if index.startswith("job_failed"):
Expand All @@ -162,8 +178,7 @@ def resubmit_jobs(context):
log_job_status(job_status_json)

# submit job
queue = job_json['job_info']['job_queue']
run_job.apply_async((job_json,), queue=queue,
run_job.apply_async((job_json,), queue=job_json['job_info']['job_queue'],
time_limit=job_json['job_info']['time_limit'],
soft_time_limit=job_json['job_info']['soft_time_limit'],
priority=job_json['priority'],
Expand Down

0 comments on commit a2fb7e6

Please sign in to comment.