From c422e176dc5b27c7b4c2742b0891075e3b43a641 Mon Sep 17 00:00:00 2001 From: vincent Date: Sun, 31 Jan 2021 11:04:31 +0100 Subject: [PATCH 1/3] call command securely --- README.md | 2 +- datajob/package/wheel.py | 2 +- examples/data_pipeline_simple/README.md | 2 +- examples/data_pipeline_with_packaged_project/README.md | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index e824cd4..bf6f70f 100644 --- a/README.md +++ b/README.md @@ -67,8 +67,8 @@ The definition of our pipeline can be found in `examples/data_pipeline_simple/da Set the aws account number and the profile that contains your aws credentials (`~/.aws/credentials`) as environment variables: - export AWS_DEFAULT_ACCOUNT=my-account-number export AWS_PROFILE=my-profile + export AWS_DEFAULT_REGION=your-region # e.g. eu-west-1 Point to the configuration of the pipeline using `--config` and deploy diff --git a/datajob/package/wheel.py b/datajob/package/wheel.py index a3e8264..a607679 100644 --- a/datajob/package/wheel.py +++ b/datajob/package/wheel.py @@ -18,7 +18,7 @@ def create(project_root): if setup_py_file.is_file(): logger.debug(f"found a setup.py file in {project_root}") logger.debug("creating wheel for glue job") - cmd = f"cd {project_root}; python setup.py bdist_wheel" + cmd = f"cd {project_root} && python setup.py bdist_wheel" print(f"wheel command: {cmd}") # todo - shell=True is not secure subprocess.check_call(shlex.split(cmd)) diff --git a/examples/data_pipeline_simple/README.md b/examples/data_pipeline_simple/README.md index a92bbf0..354ae7b 100644 --- a/examples/data_pipeline_simple/README.md +++ b/examples/data_pipeline_simple/README.md @@ -8,7 +8,7 @@ The definition of the datajob can be found in `datajob_stack.py` # Deployment - export AWS_DEFAULT_ACCOUNT=my-account-number export AWS_PROFILE=my-profile + export AWS_DEFAULT_REGION=eu-west-1 cd examples/data_pipeline_simple datajob deploy --config datajob_stack.py diff --git a/examples/data_pipeline_with_packaged_project/README.md b/examples/data_pipeline_with_packaged_project/README.md index e490d6c..0abf819 100644 --- a/examples/data_pipeline_with_packaged_project/README.md +++ b/examples/data_pipeline_with_packaged_project/README.md @@ -17,7 +17,7 @@ Make sure you have configured a `setup.py` in the root of your poject. ## Deployment - export AWS_DEFAULT_ACCOUNT=my-account-number export AWS_PROFILE=my-profile + export AWS_DEFAULT_REGION=eu-west-1 cd examples/data_pipeline_with_packaged_project datajob deploy --stage dev --config datajob_stack.py --package From 7a687530348961eda73d4a3e4a0ab9b14143f77e Mon Sep 17 00:00:00 2001 From: vincent Date: Sun, 31 Jan 2021 16:50:07 +0100 Subject: [PATCH 2/3] call subprocess securely --- datajob/__init__.py | 7 +++ datajob/datajob.py | 2 +- datajob/glue/glue_job.py | 2 +- datajob/package/wheel.py | 54 +++++++++---------- .../stepfunctions/stepfunctions_workflow.py | 6 +-- datajob_tests/datajob_context_test.py | 1 + datajob_tests/datajob_stack_test.py | 1 + datajob_tests/datajob_test.py | 2 +- datajob_tests/glue/glue_job_test.py | 1 + .../stepfunctions_workflow_test.py | 2 +- 10 files changed, 41 insertions(+), 37 deletions(-) diff --git a/datajob/__init__.py b/datajob/__init__.py index d290541..930f27b 100644 --- a/datajob/__init__.py +++ b/datajob/__init__.py @@ -1,6 +1,8 @@ import logging import os import pathlib +import shlex +import subprocess from pathlib import Path ROOT_DIR = pathlib.Path(__file__).parent.absolute() @@ -16,3 +18,8 @@ logging.basicConfig(level=logging.getLevelName(log_level)) project_name = Path(__file__).parent.stem logger = logging.getLogger(project_name) + + +def call_subprocess(cmd: str) -> None: + print(f"datajob subprocess command: " f"{cmd}") + subprocess.check_call(shlex.split(cmd)) diff --git a/datajob/datajob.py b/datajob/datajob.py index 381b85b..37c57b6 100644 --- a/datajob/datajob.py +++ b/datajob/datajob.py @@ -1,10 +1,10 @@ import os import pathlib +import shlex import subprocess from pathlib import Path import typer -import shlex from datajob.package import wheel diff --git a/datajob/glue/glue_job.py b/datajob/glue/glue_job.py index 2ec5a30..5486476 100644 --- a/datajob/glue/glue_job.py +++ b/datajob/glue/glue_job.py @@ -1,5 +1,5 @@ -from pathlib import Path from enum import Enum +from pathlib import Path from aws_cdk import aws_glue as glue, core, aws_s3_deployment from aws_cdk import aws_iam as iam diff --git a/datajob/package/wheel.py b/datajob/package/wheel.py index e326a2a..63cbcc7 100644 --- a/datajob/package/wheel.py +++ b/datajob/package/wheel.py @@ -1,8 +1,6 @@ -import subprocess from pathlib import Path -import shlex -from datajob import logger +from datajob import logger, call_subprocess class DatajobPackageWheelError(Exception): @@ -27,16 +25,11 @@ def _setuppy_wheel(project_root: str) -> None: :param project_root: the path to the root of your project. :return: None """ - setup_py_file = Path(project_root, "setup.py") - if setup_py_file.is_file(): - logger.debug(f"found a setup.py file in {project_root}") - cmd = f"cd {project_root}; python setup.py bdist_wheel" - _call_create_wheel_command(cmd=cmd) - else: - raise DatajobPackageWheelError( - f"no setup.py file detected in project root {project_root}. " - f"Hence we cannot create a python wheel for this project" - ) + _execute_packaging_logic( + project_root=project_root, + config_file="setup.py", + cmd="python setup.py bdist_wheel", + ) def _poetry_wheel(project_root: str) -> None: @@ -45,25 +38,26 @@ def _poetry_wheel(project_root: str) -> None: :param project_root: the path to the root of your project. :return: None """ - poetry_file = Path(project_root, "pyproject.toml") - if poetry_file.is_file(): - logger.debug(f"found a pyproject.toml file in {project_root}") - cmd = f"cd {project_root}; poetry build" - _call_create_wheel_command(cmd=cmd) - else: - raise DatajobPackageWheelError( - f"no pyproject.toml file detected in project root {project_root}. " - f"Hence we cannot create a python wheel for this project" - ) + _execute_packaging_logic( + project_root=project_root, config_file="pyproject.toml", cmd="poetry build" + ) -def _call_create_wheel_command(cmd: str) -> None: +def _execute_packaging_logic(project_root: str, config_file: str, cmd: str) -> None: """ - shell out and call the command to create the wheel. - :param cmd: the command to create a wheel + + :param project_root: the path to the root of your project. + :param config_file: the confgi file to package the project as a wheel (setup.py or pyproject.toml) + :param cmd: the command to execute to create a wheel. :return: None """ - logger.debug("creating wheel") - print(f"wheel command: {cmd}") - # todo - shell=True is not secure - subprocess.call(cmd, shell=True) + config_file_full_path = Path(project_root, config_file) + logger.info(f"expecting {config_file_full_path}") + if config_file_full_path.is_file(): + logger.debug(f"found a {config_file} file in {project_root}") + call_subprocess(cmd=cmd) + else: + raise DatajobPackageWheelError( + f"no {config_file} file detected in project root {project_root}. " + f"Hence we cannot create a python wheel for this project" + ) diff --git a/datajob/stepfunctions/stepfunctions_workflow.py b/datajob/stepfunctions/stepfunctions_workflow.py index 3667f33..1512c8e 100644 --- a/datajob/stepfunctions/stepfunctions_workflow.py +++ b/datajob/stepfunctions/stepfunctions_workflow.py @@ -1,10 +1,10 @@ -import contextvars +import os import tempfile import uuid -import os -import boto3 from pathlib import Path +import boto3 +import contextvars from aws_cdk import aws_iam as iam from aws_cdk import cloudformation_include as cfn_inc from aws_cdk import core diff --git a/datajob_tests/datajob_context_test.py b/datajob_tests/datajob_context_test.py index 5077fa0..c0a6d0f 100644 --- a/datajob_tests/datajob_context_test.py +++ b/datajob_tests/datajob_context_test.py @@ -1,4 +1,5 @@ import unittest + from datajob.datajob_stack import DataJobStack, DatajobContext diff --git a/datajob_tests/datajob_stack_test.py b/datajob_tests/datajob_stack_test.py index 06d7196..84e997c 100644 --- a/datajob_tests/datajob_stack_test.py +++ b/datajob_tests/datajob_stack_test.py @@ -1,4 +1,5 @@ import unittest + from datajob.datajob_stack import DataJobStack diff --git a/datajob_tests/datajob_test.py b/datajob_tests/datajob_test.py index 328a3af..d633d4d 100644 --- a/datajob_tests/datajob_test.py +++ b/datajob_tests/datajob_test.py @@ -1,6 +1,6 @@ import unittest - from unittest.mock import patch + from typer.testing import CliRunner from datajob import datajob diff --git a/datajob_tests/glue/glue_job_test.py b/datajob_tests/glue/glue_job_test.py index a56999d..5dda290 100644 --- a/datajob_tests/glue/glue_job_test.py +++ b/datajob_tests/glue/glue_job_test.py @@ -1,4 +1,5 @@ import unittest + from datajob.datajob_stack import DataJobStack from datajob.glue.glue_job import GlueJob, GlueJobType diff --git a/datajob_tests/stepfunctions/stepfunctions_workflow_test.py b/datajob_tests/stepfunctions/stepfunctions_workflow_test.py index 463f144..6e0e035 100644 --- a/datajob_tests/stepfunctions/stepfunctions_workflow_test.py +++ b/datajob_tests/stepfunctions/stepfunctions_workflow_test.py @@ -1,5 +1,5 @@ -import unittest import os +import unittest from moto import mock_stepfunctions from stepfunctions.steps.compute import GlueStartJobRunStep From 8f5e70540e39e62984e79fb7116b76788048ab9c Mon Sep 17 00:00:00 2001 From: vincent Date: Sun, 31 Jan 2021 17:02:14 +0100 Subject: [PATCH 3/3] update documentation --- datajob/__init__.py | 6 ++++++ datajob/package/wheel.py | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/datajob/__init__.py b/datajob/__init__.py index 930f27b..4dbdbb2 100644 --- a/datajob/__init__.py +++ b/datajob/__init__.py @@ -21,5 +21,11 @@ def call_subprocess(cmd: str) -> None: + """ + call a command as a subprocess in a secure way. + https://stackoverflow.com/a/59090212/1771155 + :param cmd: the command to execute + :return: None + """ print(f"datajob subprocess command: " f"{cmd}") subprocess.check_call(shlex.split(cmd)) diff --git a/datajob/package/wheel.py b/datajob/package/wheel.py index 63cbcc7..554a4e1 100644 --- a/datajob/package/wheel.py +++ b/datajob/package/wheel.py @@ -45,7 +45,8 @@ def _poetry_wheel(project_root: str) -> None: def _execute_packaging_logic(project_root: str, config_file: str, cmd: str) -> None: """ - + check if the config file exists in the project root and execute the command to + create a wheel. :param project_root: the path to the root of your project. :param config_file: the confgi file to package the project as a wheel (setup.py or pyproject.toml) :param cmd: the command to execute to create a wheel.