Skip to content

Commit

Permalink
always use an agent heartbeat timeout of 600 in CI/CD
Browse files Browse the repository at this point in the history
Summary:
The current logic breaks down if you deploy to serverless and then redeploy with a code change in the middle of the initial deploy, which seems more likely and worth designing around than the (hopefully unlikely) case of the serverless agent going down and needing to wait a few more minutes to find out.

Test Plan:Point a pex deploy at this image hash
  • Loading branch information
gibsondan committed Feb 27, 2024
1 parent 7c2f65f commit a505c00
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 16 deletions.
9 changes: 1 addition & 8 deletions src/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -116,21 +116,14 @@ if [[ -z $PR_STATUS || "$PR_STATUS" == "OPEN" ]]; then
echo "Deploying location ${INPUT_LOCATION_NAME} to deployment ${DEPLOYMENT_NAME}..."
echo "deployment=${DEPLOYMENT_NAME}" >> ${GITHUB_OUTPUT}

# Extend timeout in case the agent is still spinning up
if [[ $CI_RUN_NUMBER -eq 1 ]]; then
AGENT_HEARTBEAT_TIMEOUT=600
else
AGENT_HEARTBEAT_TIMEOUT=90
fi

dagster-cloud workspace add-location \
--url "${DAGSTER_CLOUD_URL}/${DEPLOYMENT_NAME}" \
--api-token "$DAGSTER_CLOUD_API_TOKEN" \
--location-file "${INPUT_LOCATION_FILE}" \
--location-name "${INPUT_LOCATION_NAME}" \
--image "${INPUT_REGISTRY}:${INPUT_IMAGE_TAG}" \
--location-load-timeout 3600 \
--agent-heartbeat-timeout $AGENT_HEARTBEAT_TIMEOUT \
--agent-heartbeat-timeout 600 \
--git-url "$COMMIT_URL" \
--commit-hash "$COMMIT_HASH"

Expand Down
16 changes: 8 additions & 8 deletions src/deploy_pex.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@

import yaml

DAGSTER_CLOUD_PEX_PATH = Path(__file__).parent.parent / "generated/gha/dagster-cloud.pex"
DAGSTER_CLOUD_PEX_PATH = (
Path(__file__).parent.parent / "generated/gha/dagster-cloud.pex"
)
UPDATE_COMMENT_SCRIPT_PATH = Path(__file__).parent / "create_or_update_comment.py"


Expand Down Expand Up @@ -70,7 +72,9 @@ def get_locations(dagster_cloud_file) -> List[str]:
workspace_contents = f.read()
workspace_contents_yaml = yaml.safe_load(workspace_contents)

return [location["location_name"] for location in workspace_contents_yaml["locations"]]
return [
location["location_name"] for location in workspace_contents_yaml["locations"]
]


def run(args):
Expand Down Expand Up @@ -112,17 +116,13 @@ def deploy_pex(args, branch_deployment_name: Optional[str], build_method: str):
args.insert(0, os.path.dirname(dagster_cloud_yaml))
args = args + [f"--build-method={build_method}"]
commit_hash = os.getenv("GITHUB_SHA")
git_url = (
f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/tree/{commit_hash}"
)
git_url = f"{os.getenv('GITHUB_SERVER_URL')}/{os.getenv('GITHUB_REPOSITORY')}/tree/{commit_hash}"
deployment_name = branch_deployment_name if branch_deployment_name else "prod"
deployment_flag = f"--url={os.getenv('DAGSTER_CLOUD_URL')}/{deployment_name}"
locations = get_locations(dagster_cloud_yaml)
# give first deploy extra time to spin up agent
agent_heartbeat_timeout = 600 if (os.getenv("GITHUB_RUN_NUMBER") == "1") else 90
timeout_args = [
"--location-load-timeout=3600",
f"--agent-heartbeat-timeout={agent_heartbeat_timeout}",
"--agent-heartbeat-timeout=600",
]
notify(branch_deployment_name, locations, "pending")

Expand Down

0 comments on commit a505c00

Please sign in to comment.