diff --git a/.github/workflows/run_pipelines_on_command.yml b/.github/workflows/run_pipelines_on_command.yml new file mode 100644 index 0000000..2051bf8 --- /dev/null +++ b/.github/workflows/run_pipelines_on_command.yml @@ -0,0 +1,76 @@ +name: Run Pipelines on Command + +on: + # Only trigger workflow manually from GitHub UI + workflow_dispatch: + # Optional: Add input parameters that can be set when triggering the workflow + inputs: + script_name: + description: 'Specific script to run (leave empty to run all)' + required: false + type: string + logLevel: + description: 'Log level' + required: true + default: 'warning' + type: choice + options: + - info + - warning + - debug + tags: + description: 'Runs Data Pipelines on command' + required: false + type: boolean + python_version: + description: 'Python version to use' + required: false + default: '3.11' + type: string + +jobs: + run-scripts: + runs-on: ubuntu-latest + + steps: + - name: Check out repository + uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ inputs.python_version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + + - name: Run Python scripts + run: | + echo "Starting to run Python scripts (excluding utils.py)..." + for script in python_scripts/*.py; do + filename=$(basename "$script") + if [ "$filename" != "utils.py" ]; then + echo "----------------------------------------" + echo "Running $script..." + python "$script" + echo "Finished running $script" + else + echo "Skipping utils.py" + fi + done + echo "----------------------------------------" + echo "All scripts completed" + + - name: Report execution status + if: always() + run: | + echo "Workflow execution completed" + # List all scripts that were run + echo "Scripts processed:" + for script in python_scripts/*.py; do + filename=$(basename "$script") + if [ "$filename" != "utils.py" ]; then + echo "- $filename" + fi + done \ No newline at end of file diff --git a/.github/workflows/run_pipelines_on_pr.yml b/.github/workflows/run_pipelines_on_pr.yml new file mode 100644 index 0000000..e69de29 diff --git a/.github/workflows/run_piplines_on_schedule.yml b/.github/workflows/run_piplines_on_schedule.yml new file mode 100644 index 0000000..4fa1bef --- /dev/null +++ b/.github/workflows/run_piplines_on_schedule.yml @@ -0,0 +1,78 @@ +name: Manually Run Pipelines + +on: + # Only trigger workflow manually from GitHub UI + schedule: + - cron: '0 0 1 * *' + workflow_dispatch: + # Optional: Add input parameters that can be set when triggering the workflow + inputs: + script_name: + description: 'Specific script to run (leave empty to run all)' + required: false + type: string + logLevel: + description: 'Log level' + required: true + default: 'warning' + type: choice + options: + - info + - warning + - debug + tags: + description: 'Runs Data Pipelines on command' + required: false + type: boolean + python_version: + description: 'Python version to use' + required: false + default: '3.11' + type: string + +jobs: + run-scripts: + runs-on: ubuntu-latest + + steps: + - name: Check out repository + uses: actions/checkout@v3 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: ${{ inputs.python_version }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + + - name: Run Python scripts + run: | + echo "Starting to run Python scripts (excluding utils.py)..." + for script in python_scripts/*.py; do + filename=$(basename "$script") + if [ "$filename" != "utils.py" ]; then + echo "----------------------------------------" + echo "Running $script..." + python "$script" + echo "Finished running $script" + else + echo "Skipping utils.py" + fi + done + echo "----------------------------------------" + echo "All scripts completed" + + - name: Report execution status + if: always() + run: | + echo "Workflow execution completed" + # List all scripts that were run + echo "Scripts processed:" + for script in python_scripts/*.py; do + filename=$(basename "$script") + if [ "$filename" != "utils.py" ]; then + echo "- $filename" + fi + done \ No newline at end of file diff --git a/python_scripts/bls-data.py b/python_scripts/bls_data.py similarity index 93% rename from python_scripts/bls-data.py rename to python_scripts/bls_data.py index 5aaf232..f9b6703 100644 --- a/python_scripts/bls-data.py +++ b/python_scripts/bls_data.py @@ -73,7 +73,7 @@ def get_housing_inventory(census_api_key): rows = response.json()[1:] # Create DataFrame - df = pl.DataFrame(rows, schema=columns) + df = pl.DataFrame(rows, schema=columns, orient="row") # appending the df to the main_df main_df = pl.concat([main_df, df]) @@ -106,20 +106,21 @@ def get_household_pulse(census_api_key): df = pl.DataFrame(data) main_df = pl.concat([main_df, df]) except Exception as e: - print("series doesnt exist") + print('series doesnt exist') print(cycle) print(e) break - break + break + print(f'fetched {len(main_df)} rows for housing pulse') drop_create_duck_db_table('housing_pulse', main_df) - return "housing pulse added to database" + return 'housing pulse added to database' def main(): - census_api_key = os.getenv("CENSUS_API_KEY") # Replace with your actual API key + census_api_key = os.getenv('CENSUS_API_KEY') # Replace with your actual API key print(get_housing_inventory(census_api_key)) print(get_household_pulse(census_api_key)) -if __name__ == "__main__": +if __name__ == '__main__': main() \ No newline at end of file diff --git a/python_scripts/fred-data.py b/python_scripts/fred_data.py similarity index 100% rename from python_scripts/fred-data.py rename to python_scripts/fred_data.py diff --git a/python_scripts/realtor-data.py b/python_scripts/realtor_data.py similarity index 100% rename from python_scripts/realtor-data.py rename to python_scripts/realtor_data.py diff --git a/python_scripts/utils.py b/python_scripts/utils.py index 747a48a..b775889 100644 --- a/python_scripts/utils.py +++ b/python_scripts/utils.py @@ -18,10 +18,11 @@ def drop_create_duck_db_table(table_name, df): conn.commit() except Exception as e: print(e) + raise finally: - - - conn.close() + # Only try to close if conn was successfully created + if conn is not None: + conn.close() return db_path