Skip to content

Commit

Permalink
feat: add prefect deploy that calls imap cli
Browse files Browse the repository at this point in the history
  • Loading branch information
alastairtree committed Feb 4, 2025
1 parent 8212dcd commit e5d40cf
Show file tree
Hide file tree
Showing 8 changed files with 2,293 additions and 147 deletions.
1 change: 1 addition & 0 deletions .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
!/src/matlab
!/deploy
#!/dist/file.txt
!/src/prefect_server/**

# Ignore unnecessary files inside allowed directories
# This should go after the allowed directories
Expand Down
29 changes: 29 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,32 @@ chown -R appuser:appuser /mnt/imap-data
```

You can also build a compiled linux executable with `./build-linux.sh` and a docker image with `./build-docker.sh`

### Using the CLI inside the docker container

```bash
# Using the so-mag CLI:
docker run --entrypoint /bin/sh ghcr.io/imperialcollegelondon/imap-pipeline-core:local-dev -c "imap-mag hello world"
# Using the prefect CLI:
docker run --entrypoint /bin/bash -it --rm -e PREFECT_API_URL=http://prefect:4200/api --network mag-lab-data-platform ghcr.io/imperialcollegelondon/imap-pipeline-core:local-dev -c "prefect --version"
### Deploy to a full Prefect server using a docker container (e.g. from WSL)
From a linux host or WSL (i.e. not in a dev container) you can use the container image to run a deployment:
```bash

./pack.sh
./build-docker.sh

docker run -it --rm \
--network mag-lab-data-platform \
-e PREFECT_API_URL=http://prefect:4200/api \
-e IMAP_IMAGE_TAG=local-dev \
-e IMAP_VOLUMES=/mnt/imap-data/dev:/data \
--entrypoint /bin/bash \
ghcr.io/imperialcollegelondon/imap-pipeline-core:local-dev \
-c "python -c 'import prefect_server.workflow; prefect_server.workflow.deploy_flows()'"
```
2 changes: 1 addition & 1 deletion build-docker.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set -e
CLI_TOOL="imap_mag"
TOOL_PYTHON_VERSION="${TOOL_PYTHON_VERSION:-python3.12}"
TOOL_PACKAGE="${TOOL_PACKAGE:-$CLI_TOOL-*.tar.gz}"
IMAGE_NAME="${IMAGE_NAME:-ghcr.io/imperialcollegelondon/imap-pipeline-core:latest-local-dev}"
IMAGE_NAME="${IMAGE_NAME:-ghcr.io/imperialcollegelondon/imap-pipeline-core:local-dev}"

if [ ! -f dist/$TOOL_PYTHON_VERSION/$TOOL_PACKAGE ]
then
Expand Down
11 changes: 8 additions & 3 deletions deploy/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ USER appuser

# first restore the python dependencies in a single cache layer - should get faster builds as this changes rarely
COPY dist/python${PYTHON_VERSION}/requirements.txt .
RUN python3 -m pip install --user -r requirements.txt
RUN python3 -m pip install --user --no-deps -r requirements.txt

# now install the imap-mag package as the user
COPY dist/python${PYTHON_VERSION}/${TOOL_PACKAGE} python${PYTHON_VERSION}/
Expand All @@ -39,10 +39,15 @@ RUN adduser -u 5678 --disabled-password --gecos "" appuser && \
chmod +x /app/entrypoint.sh

WORKDIR /app
USER appuser

# Make sure scripts in .local are usable:
USER appuser
ENV PATH="$PATH:/home/appuser/.local/bin"
ENV PREFECT_HOME=/app/.prefect
ENV PREFECT_PROFILES_PATH=/app/.prefect/profiles.toml

#include the prefect workflows
COPY src/ /app/


# Now the imap-mag CLI is available on the path and
# the 3.12 python package for imap-mag is intalled at /home/appuser/.local/bin
Expand Down
32 changes: 15 additions & 17 deletions deploy/entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,37 +2,35 @@
set -e

echo "Starting IMAP MAG pipeline..."
sleep 20

imap-db create-db

imap-db upgrade-db

echo "DB admin complete"

while :
do
# delete all data
rm -rf /data/*
# delete all data
echo "Deleting all data - reset the datastore as this is just a test"
rm -rf /data/hk_l0
rm -rf /data/hk_l1
rm -rf /data/science
rm -rf /data/output

START_DATE='2025-05-02'
END_DATE='2025-05-03'
START_DATE='2025-05-02'
END_DATE='2025-05-03'

imap-mag fetch-binary --config config-hk-download.yaml --apid 1063 --start-date $START_DATE --end-date $END_DATE
imap-mag fetch-binary --config config-hk-download.yaml --apid 1063 --start-date $START_DATE --end-date $END_DATE

imap-mag process --config config-hk-process.yaml power.pkts
imap-mag process --config config-hk-process.yaml power.pkts

imap-mag fetch-science --level l1b --start-date $START_DATE --end-date $END_DATE --config config-sci.yaml
imap-mag fetch-science --level l1b --start-date $START_DATE --end-date $END_DATE --config config-sci.yaml

imap-db query-db
imap-db query-db

imap-mag calibrate --config calibration_config.yaml --method SpinAxisCalibrator imap_mag_l1b_norm-mago_20250511_v000.cdf
imap-mag calibrate --config calibration_config.yaml --method SpinAxisCalibrator imap_mag_l1b_norm-mago_20250511_v000.cdf

imap-mag apply --config calibration_application_config.yaml --calibration calibration.json imap_mag_l1b_norm-mago_20250511_v000.cdf
imap-mag apply --config calibration_application_config.yaml --calibration calibration.json imap_mag_l1b_norm-mago_20250511_v000.cdf

ls -l /data

sleep 3600 # 1 Hour
done
ls -l /data


2,217 changes: 2,091 additions & 126 deletions poetry.lock

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ pandas = "^2.2.2"
imap-data-access = "^0.7.0"
cdflib = "^1.3.1"
psycopg = {extras = ["binary"], version = "^3.2.1"}
prefect = {extras = ["shell"], version = "^3.1.7"}
prefect-docker = "^0.6.1"

[tool.poetry.group.dev.dependencies]
pytest = "^8.3.1"
Expand Down
146 changes: 146 additions & 0 deletions src/prefect_server/workflow.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import asyncio
import os
import sys
from datetime import datetime

import prefect
import prefect.blocks
import prefect.deployments
from prefect import deploy, flow, get_client, serve
from prefect.client.schemas.objects import (
ConcurrencyLimitConfig,
ConcurrencyLimitStrategy,
)
from prefect_shell import ShellOperation


class CONSTANTS:
DEFAULT_WORKPOOL = "default-pool"
DEPLOYMENT_TAG = "NASA_IMAP"


@flow(log_prints=True)
def run_imap_pipeline():
print("Starting IMAP pipeline")

ShellOperation(
commands=[
"./entrypoint.sh",
],
env={"today": datetime.today().strftime("%Y%m%d")},
).run()

print("Finished IMAP pipeline")


async def setupOtherServerConfig():
# Set a concurrency limit of 10 on the 'autoflow_kernels' tag
async with get_client() as client:
# Check if the limit already exists

try:
existing_limit = await client.read_global_concurrency_limit_by_name(
"not-a-name"
)
except prefect.exceptions.ObjectNotFound:
existing_limit = None

print(f"config: {existing_limit}")


def get_cron_from_env(env_var_name: str, default: str | None = None) -> str | None:
cron = os.getenv(env_var_name, default)

if cron is None or cron == "":
return None
else:
cron = cron.strip(" '\"")
print(f"Using cron schedule: {env_var_name}={cron}")
return cron


def deploy_flows(local_debug: bool = False):
asyncio.get_event_loop().run_until_complete(setupOtherServerConfig())

imap_flow_name = "imappipeline"

if local_debug:
# just run the prefect server locally and deploy all the flows to it without params and schedules

serve(
run_imap_pipeline.to_deployment(
name=imap_flow_name,
),
)
else:
# do a full prefect deployment with containers, work-pools, schedules etc

# Docker image and tag, e.g. so-pipeline-core:latest. May include registry, e.g. ghcr.io/imperialcollegelondon/so-pipeline-core:latest
docker_image = os.getenv(
"IMAP_IMAGE",
"ghcr.io/imperialcollegelondon/imap-pipeline-core",
)
docker_tag = os.getenv(
"IMAP_IMAGE_TAG",
"main",
)
# Comma separated docker volumes, e.g. /mnt/imap-data/dev:/data
docker_volumes = os.getenv("IMAP_VOLUMES", "").split(",")
# Comma separated docker networks, e.g. mag-lab-data-platform,some-other-network
docker_networks = os.getenv(
"DOCKER_NETWORK",
"mag-lab-data-platform",
).split(",")

# remove empty strings
docker_volumes = [x for x in docker_volumes if x]
docker_networks = [x for x in docker_networks if x]

shared_job_env_variables = dict(
WEBPODA_AUTH_CODE=os.getenv("WEBPODA_AUTH_CODE"),
SDC_AUTH_CODE=os.getenv("SDC_AUTH_CODE"),
SQLALCHEMY_URL=os.getenv("SQLALCHEMY_URL"),
PREFECT_LOGGING_EXTRA_LOGGERS="imap_mag,imap_db,mag_toolkit",
)
shared_job_variables = dict(
env=shared_job_env_variables,
image_pull_policy="IfNotPresent",
networks=docker_networks,
volumes=docker_volumes,
)

print(
f"Deploying IMAP Pipeline to Prefect with docker {docker_image}:{docker_tag}\n Networks: {docker_networks}\n Volumes: {docker_volumes}"
)

imap_pipeline_deployable = run_imap_pipeline.to_deployment(
name=imap_flow_name,
cron=get_cron_from_env("IMAP_CRON_HEALTHCHECK"),
job_variables=shared_job_variables,
concurrency_limit=ConcurrencyLimitConfig(
limit=1, collision_strategy=ConcurrencyLimitStrategy.CANCEL_NEW
),
tags=[CONSTANTS.DEPLOYMENT_TAG],
)

deployables = (imap_pipeline_deployable,)

deploy_ids = deploy(
*deployables,
work_pool_name=CONSTANTS.DEFAULT_WORKPOOL,
build=False,
push=False,
image=f"{docker_image}:{docker_tag}",
)

if len(deploy_ids) != len(deployables):
print(f"Incomplete deployment: {deploy_ids}")
sys.exit(1)


if __name__ == "__main__":
local_debug = False
if len(sys.argv) > 1 and sys.argv[1] == "--local":
local_debug = True

deploy_flows(local_debug=local_debug)

0 comments on commit e5d40cf

Please sign in to comment.