Skip to content

Meltano Pipeline

Meltano Pipeline #1900

Workflow file for this run

name: Meltano Pipeline
on:
pull_request:
branches: [main]
paths:
- logging/**
- meltano.yml
- pyproject.toml
- plugins/**/*.lock
- transform/models/**/*.sql
- transform/models/**/*.yml
- .github/workflows/melty.yaml
- evidence/sources/**/*.sql
- evidence/pages/**/*.md
- evidence/package.json
- evidence/package-lock.json
push:
branches: [main]
paths:
- logging/**
- meltano.yml
- pyproject.toml
- plugins/**/*.lock
- transform/models/**/*.sql
- transform/models/**/*.yml
- .github/workflows/melty.yaml
- evidence/sources/**/*.sql
- evidence/pages/**/*.md
- evidence/package.json
- evidence/package-lock.json
workflow_dispatch:
inputs:
environment:
description: Meltano Environment
required: true
default: prod
logConfig:
description: 'Logging configuration file'
required: true
default: 'logging/dev.yaml'
type: choice
options:
- logging/dev.yaml
forceRun:
description: 'Force run the pipeline'
required: false
default: false
type: boolean
schedule:
- cron: '0 12 * * *' # Once a day around 6am CST (12am UTC)
env:
DUCKDB_PATH: ${{ secrets.DUCKDB_PATH }}
FORCE_COLOR: "1"
MELTANO_CLI_LOG_CONFIG: ${{ github.event.inputs.environment || 'logging/ci.yaml' }}
MELTANO_DATABASE_URI: ${{ secrets.MELTANO_DATABASE_URI }}
MELTANO_ENVIRONMENT: ${{ github.event.inputs.environment || 'prod' }}
MOTHERDUCK_TOKEN: ${{ secrets.MOTHERDUCK_TOKEN }}
EVIDENCE_SOURCE__motherduck_personal__token: ${{ secrets.MOTHERDUCK_TOKEN }}
UV_EXCLUDE_NEWER: "2024-12-16"
concurrency:
group: meltano
cancel-in-progress: true
jobs:
pkl:
name: Check stale Pkl output
runs-on: ubuntu-24.04
steps:
- uses: actions/checkout@v4
- name: Install Pkl
uses: pkl-community/setup-pkl@v0
with:
pkl-version: 0.27.1
- name: Check stale pkl output
run: pkl eval pkl/main.pkl --format=yaml > meltano.yml
- name: Check if meltano.yml is different
run: git diff --exit-code meltano.yml
ping-start:
name: Ping the start of the pipeline
runs-on: ubuntu-latest
needs: [pkl]
steps:
- name: Ping
# Only ping the start of the pipeline if the event is a scheduled run or a push to main
run: |
if [ "${{ github.event_name }}" == "schedule" ] || [ "${{ github.event_name }}" == "push" ]; then
curl https://hc-ping.com/${{ secrets.HEALTHCHECK_ID }}/start
fi
meltano-run:
name: "Meltano Job: ${{ matrix.job }}"
runs-on: ubuntu-latest
needs: [ping-start]
container:
image: ghcr.io/meltano/meltano:latest-python${{ matrix.python-version || '3.13' }}
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GHCR_IO_TOKEN }}
env:
TAP_GETPOCKET_CONSUMER_KEY: ${{ secrets.TAP_GETPOCKET_CONSUMER_KEY }}
TAP_GETPOCKET_ACCESS_TOKEN: ${{ secrets.TAP_GETPOCKET_ACCESS_TOKEN }}
TAP_GITHUB_AUTH_TOKEN: ${{ secrets.TAP_GITHUB_AUTH_TOKEN }}
TAP_READTHEDOCS_TOKEN: ${{ secrets.TAP_READTHEDOCS_TOKEN }}
TAP_STACKEXCHANGE_KEY: ${{ secrets.TAP_STACKEXCHANGE_KEY }}
TARGET_DUCKDB_PATH: ${{ secrets.TARGET_DUCKDB_PATH }}
strategy:
fail-fast: false
matrix:
job:
- "pocket-to-duckdb"
- "stackoverflow-to-duckdb"
- "rtd-to-duckdb"
ok_to_skip: [false]
include:
- job: "pypistats-to-duckdb"
ok_to_skip: true
- job: "gh-to-duckdb"
python-version: "3.12"
steps:
- uses: actions/checkout@v4
- name: Check Meltano
run: |
meltano --version
- name: Run
continue-on-error: ${{ matrix.ok_to_skip }}
run: |
meltano run ${{ matrix.job }} ${{ github.event.inputs.forceRun && '--force' || '' }}
- name: Upload metrics log
uses: actions/upload-artifact@v4
with:
name: singer-metrics-${{ matrix.job }}
path: logs/singer_metrics.log
lint:
name: Lint models
runs-on: ubuntu-latest
continue-on-error: true
needs: [ping-start]
container:
image: ghcr.io/meltano/meltano:latest-python3.13
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GHCR_IO_TOKEN }}
env:
DUCKDB_PATH: ${{ secrets.DUCKDB_PATH }}
steps:
- uses: actions/checkout@v4
- name: Check Meltano
run: |
meltano --version
- name: Lint
env:
MELTANO_CLI_LOG_LEVEL: info
run: |
meltano invoke sqlfluff:lint --format github-annotation-native -v transform/models evidence/sources
transform:
name: Data Transformation with dbt-duckdb
runs-on: ubuntu-latest
needs: [meltano-run, lint]
container:
image: ghcr.io/meltano/meltano:latest-python3.13
credentials:
username: ${{ github.actor }}
password: ${{ secrets.GHCR_IO_TOKEN }}
steps:
- uses: actions/checkout@v4
- name: Check Meltano
run: |
meltano --version
- name: Transform and Test
run: |
meltano run dbt-duckdb:build
# TODO: Uncomment this block to build the Evidence site with Meltano
# build-evidence:
# name: Build Evidence site locally
# needs: [transform]
# runs-on: ubuntu-latest
# container:
# image: ghcr.io/meltano/meltano:latest-python3.13
# credentials:
# username: ${{ github.actor }}
# password: ${{ secrets.GHCR_IO_TOKEN }}
# steps:
# - uses: actions/checkout@v4
# - name: Check Meltano
# run: |
# meltano --version
# - name: Install evidence
# run: |
# meltano install transform dbt-duckdb
# meltano install utility evidence
# - name: Build
# run: |
# meltano invoke evidence npm install
# meltano invoke evidence npm run sources
# meltano invoke evidence build --strict
# - name: Upload Evidence build
# uses: actions/upload-artifact@v4
# with:
# name: evidence-build
# path: evidence/build
build-evidence:
name: Build Evidence site locally using Node
needs: [transform]
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/setup-node@v4
with:
node-version: '18'
cache: npm
cache-dependency-path: evidence/package-lock.json
- name: Build Evidence site
run: |
cd evidence
npm install
npm run sources
npm run build:strict
- name: Upload Evidence build
uses: actions/upload-artifact@v4
with:
name: evidence-build
path: evidence/build
deploy-evidence:
name: Deploy Evidence site
needs: [build-evidence]
runs-on: ubuntu-latest
permissions:
pull-requests: write
steps:
- uses: actions/checkout@v4
- name: Set Git config
run: |
git config --global --add safe.directory /__w/meltano-dataops/meltano-dataops
- name: Download Evidence build
uses: actions/download-artifact@v4
with:
name: evidence-build
path: evidence/build
- name: Deploy
uses: amondnet/[email protected]
with:
github-comment: ${{ github.event_name == 'pull_request' && 'true' || 'false' }}
github-token: ${{ secrets.GITHUB_TOKEN }}
vercel-token: ${{ secrets.VERCEL_TOKEN }}
vercel-org-id: ${{ secrets.VERCEL_ORG_ID }}
vercel-project-id: ${{ secrets.VERCEL_PROJECT_ID }}
vercel-args: ${{ github.event_name == 'pull_request' && '' || '--prod' }}
ping-end:
name: Ping the end of the pipeline
runs-on: ubuntu-latest
needs: [deploy-evidence]
if: github.event_name == 'schedule' || github.event_name == 'push'
steps:
- name: Ping
run: |
curl https://hc-ping.com/${{ secrets.HEALTHCHECK_ID }}