Skip to content

Commit

Permalink
add github actions and cleanup pipeline scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
C00ldudeNoonan committed Nov 3, 2024
1 parent 8a6c51c commit e4a7213
Show file tree
Hide file tree
Showing 7 changed files with 165 additions and 9 deletions.
76 changes: 76 additions & 0 deletions .github/workflows/run_pipelines_on_command.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
name: Run Pipelines on Command

on:
# Only trigger workflow manually from GitHub UI
workflow_dispatch:
# Optional: Add input parameters that can be set when triggering the workflow
inputs:
script_name:
description: 'Specific script to run (leave empty to run all)'
required: false
type: string
logLevel:
description: 'Log level'
required: true
default: 'warning'
type: choice
options:
- info
- warning
- debug
tags:
description: 'Runs Data Pipelines on command'
required: false
type: boolean
python_version:
description: 'Python version to use'
required: false
default: '3.11'
type: string

jobs:
run-scripts:
runs-on: ubuntu-latest

steps:
- name: Check out repository
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ inputs.python_version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Run Python scripts
run: |
echo "Starting to run Python scripts (excluding utils.py)..."
for script in python_scripts/*.py; do
filename=$(basename "$script")
if [ "$filename" != "utils.py" ]; then
echo "----------------------------------------"
echo "Running $script..."
python "$script"
echo "Finished running $script"
else
echo "Skipping utils.py"
fi
done
echo "----------------------------------------"
echo "All scripts completed"
- name: Report execution status
if: always()
run: |
echo "Workflow execution completed"
# List all scripts that were run
echo "Scripts processed:"
for script in python_scripts/*.py; do
filename=$(basename "$script")
if [ "$filename" != "utils.py" ]; then
echo "- $filename"
fi
done
Empty file.
78 changes: 78 additions & 0 deletions .github/workflows/run_piplines_on_schedule.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
name: Manually Run Pipelines

on:
# Only trigger workflow manually from GitHub UI
schedule:
- cron: '0 0 1 * *'
workflow_dispatch:
# Optional: Add input parameters that can be set when triggering the workflow
inputs:
script_name:
description: 'Specific script to run (leave empty to run all)'
required: false
type: string
logLevel:
description: 'Log level'
required: true
default: 'warning'
type: choice
options:
- info
- warning
- debug
tags:
description: 'Runs Data Pipelines on command'
required: false
type: boolean
python_version:
description: 'Python version to use'
required: false
default: '3.11'
type: string

jobs:
run-scripts:
runs-on: ubuntu-latest

steps:
- name: Check out repository
uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: ${{ inputs.python_version }}

- name: Install dependencies
run: |
python -m pip install --upgrade pip
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Run Python scripts
run: |
echo "Starting to run Python scripts (excluding utils.py)..."
for script in python_scripts/*.py; do
filename=$(basename "$script")
if [ "$filename" != "utils.py" ]; then
echo "----------------------------------------"
echo "Running $script..."
python "$script"
echo "Finished running $script"
else
echo "Skipping utils.py"
fi
done
echo "----------------------------------------"
echo "All scripts completed"
- name: Report execution status
if: always()
run: |
echo "Workflow execution completed"
# List all scripts that were run
echo "Scripts processed:"
for script in python_scripts/*.py; do
filename=$(basename "$script")
if [ "$filename" != "utils.py" ]; then
echo "- $filename"
fi
done
13 changes: 7 additions & 6 deletions python_scripts/bls-data.py → python_scripts/bls_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def get_housing_inventory(census_api_key):
rows = response.json()[1:]

# Create DataFrame
df = pl.DataFrame(rows, schema=columns)
df = pl.DataFrame(rows, schema=columns, orient="row")
# appending the df to the main_df
main_df = pl.concat([main_df, df])

Expand Down Expand Up @@ -106,20 +106,21 @@ def get_household_pulse(census_api_key):
df = pl.DataFrame(data)
main_df = pl.concat([main_df, df])
except Exception as e:
print("series doesnt exist")
print('series doesnt exist')
print(cycle)
print(e)
break

break
break
print(f'fetched {len(main_df)} rows for housing pulse')
drop_create_duck_db_table('housing_pulse', main_df)

return "housing pulse added to database"
return 'housing pulse added to database'

def main():
census_api_key = os.getenv("CENSUS_API_KEY") # Replace with your actual API key
census_api_key = os.getenv('CENSUS_API_KEY') # Replace with your actual API key
print(get_housing_inventory(census_api_key))
print(get_household_pulse(census_api_key))

if __name__ == "__main__":
if __name__ == '__main__':
main()
File renamed without changes.
File renamed without changes.
7 changes: 4 additions & 3 deletions python_scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,11 @@ def drop_create_duck_db_table(table_name, df):
conn.commit()
except Exception as e:
print(e)
raise
finally:


conn.close()
# Only try to close if conn was successfully created
if conn is not None:
conn.close()

return db_path

Expand Down

0 comments on commit e4a7213

Please sign in to comment.