diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..7826d49 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +jlloyd@widen.com \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..b320028 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,32 @@ +name: CI (Testing/Linting) Workflow + +on: [push] + +jobs: + build: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.7", "3.8", "3.9"] + steps: + - uses: actions/checkout@v2 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install pipx + python -m pipx ensurepath + python -m pipx install poetry + # Force update PATH to include pipx executables + export PATH=$PATH:/root/.local/bin + # Create virtual environment and install dependencies + poetry env use python + poetry install --no-root + - name: Test and lint the code + run: | + poetry run tox -e py + + + diff --git a/.github/workflows/releases.yml b/.github/workflows/releases.yml new file mode 100644 index 0000000..da9a164 --- /dev/null +++ b/.github/workflows/releases.yml @@ -0,0 +1,34 @@ +name: Publish New Releases to Pypi + +on: + release: + types: [published] + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install pipx + python -m pipx ensurepath + python -m pipx install poetry + # Force update PATH to include pipx executables + export PATH=$PATH:/root/.local/bin + # Create virtual environment and install dependencies + poetry env use python + poetry install --no-root + - name: Build package + run: poetry build + - name: Publish package + uses: pypa/gh-action-pypi-publish@release/v1 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN_PROD }} diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index 3a0fc27..0000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,28 +0,0 @@ ---- -variables: - PYTHON_IMAGE_TAG: 3.9.7 -stages: - - build - - deploy - -build_package: - stage: build - image: ${CI_DEPENDENCY_PROXY_GROUP_IMAGE_PREFIX}/python:$PYTHON_IMAGE_TAG - artifacts: - paths: - - dist - script: - - curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python3 - - - export PATH=$HOME/.local/bin:$PATH - - poetry build - -deploy_package: - stage: deploy - image: ${CI_DEPENDENCY_PROXY_GROUP_IMAGE_PREFIX}/python:$PYTHON_IMAGE_TAG - rules: - - if: '$CI_COMMIT_TAG =~ /^v\d+\.\d+\.\d+$/' - script: - - curl -sSL https://raw.githubusercontent.com/python-poetry/poetry/master/install-poetry.py | python3 - - - export PATH=$HOME/.local/bin:$PATH - - poetry config repositories.privatepypi ${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/packages/pypi - - poetry publish --repository privatepypi --username gitlab-ci-token --password ${CI_JOB_TOKEN} diff --git a/README.md b/README.md index ac7ed30..cd99541 100644 --- a/README.md +++ b/README.md @@ -138,7 +138,7 @@ poetry install ### Create and Run Tests -Create tests within the `tap_rest_api_msdk/tests` subfolder and +Create tests within the `tests/` directory and then run: ```bash @@ -151,6 +151,15 @@ You can also test the `tap-rest-api-msdk` CLI interface directly using `poetry r poetry run tap-rest-api-msdk --help ``` +### Continuous Integration +Run through the full suite of tests and linters by running + +```bash +poetry run tox -e py +``` + +These must pass in order for PR's to be merged. + ### Testing with [Meltano](https://www.meltano.com) _**Note:** This tap will work in any Singer environment and does not require Meltano. diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 0000000..0311381 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,35 @@ +[mypy] +python_version = 3.8 +warn_unused_configs = True +warn_return_any = True +exclude = tests + +[mypy-singer.*] +# Library 'pipelinewise-singer-tools' does not have type hints: +# - https://github.com/transferwise/pipelinewise-singer-python/issues/25 +ignore_missing_imports = True + +[mypy-backoff.*] +# Frozen due to pipelinewise-singer-tools dependency +ignore_missing_imports = True + +[mypy-bcrypt.*] +ignore_missing_imports = True + +[mypy-joblib.*] +ignore_missing_imports = True + +[mypy-pyarrow.*] +ignore_missing_imports = True + +[mypy-pandas.*] +ignore_missing_imports = True + +[mypy-jsonschema.*] +ignore_missing_imports = True + +[mypy-jsonpath_ng.*] +ignore_missing_imports = True + +[mypy-genson.*] +ignore_missing_imports = True diff --git a/poetry.lock b/poetry.lock index ada2aa3..48c50bd 100644 --- a/poetry.lock +++ b/poetry.lock @@ -260,6 +260,20 @@ category = "dev" optional = false python-versions = "*" +[[package]] +name = "isort" +version = "5.10.1" +description = "A Python utility / library to sort Python imports." +category = "dev" +optional = false +python-versions = ">=3.6.1,<4.0" + +[package.extras] +pipfile_deprecated_finder = ["pipreqs", "requirementslib"] +requirements_deprecated_finder = ["pipreqs", "pip-api"] +colors = ["colorama (>=0.4.3,<0.5.0)"] +plugins = ["setuptools"] + [[package]] name = "joblib" version = "1.1.0" @@ -576,6 +590,22 @@ urllib3 = ">=1.21.1,<1.27" socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"] use_chardet_on_py3 = ["chardet (>=3.0.2,<5)"] +[[package]] +name = "requests-mock" +version = "1.9.3" +description = "Mock out responses from the requests package" +category = "dev" +optional = false +python-versions = "*" + +[package.dependencies] +requests = ">=2.3,<3" +six = "*" + +[package.extras] +fixture = ["fixtures"] +test = ["fixtures", "mock", "purl", "pytest", "sphinx", "testrepository (>=0.0.18)", "testtools"] + [[package]] name = "simplejson" version = "3.11.1" @@ -737,7 +767,7 @@ testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytes [metadata] lock-version = "1.1" python-versions = "<3.10,>=3.6.2" -content-hash = "1ddae9949f4c16e7c79795332a427278ca90edced2e870e0ce585557d8c550c7" +content-hash = "d197dcfa8f5d0b9bd7b7837f3c0b4d96eaf058f6090bef11119b3304906be4d5" [metadata.files] atomicwrites = [ @@ -895,6 +925,10 @@ iniconfig = [ {file = "iniconfig-1.1.1-py2.py3-none-any.whl", hash = "sha256:011e24c64b7f47f6ebd835bb12a743f2fbe9a26d4cecaa7f53bc4f35ee9da8b3"}, {file = "iniconfig-1.1.1.tar.gz", hash = "sha256:bc3af051d7d14b2ee5ef9969666def0cd1a000e121eaea580d4a313df4b37f32"}, ] +isort = [ + {file = "isort-5.10.1-py3-none-any.whl", hash = "sha256:6f62d78e2f89b4500b080fe3a81690850cd254227f27f75c3a0c491a1f351ba7"}, + {file = "isort-5.10.1.tar.gz", hash = "sha256:e8443a5e7a020e9d7f97f1d7d9cd17c88bcb3bc7e218bf9cf5095fe550be2951"}, +] joblib = [ {file = "joblib-1.1.0-py2.py3-none-any.whl", hash = "sha256:f21f109b3c7ff9d95f8387f752d0d9c34a02aa2f7060c2135f465da0e5160ff6"}, {file = "joblib-1.1.0.tar.gz", hash = "sha256:4158fcecd13733f8be669be0683b96ebdbbd38d23559f54dca7205aea1bf1e35"}, @@ -1112,6 +1146,10 @@ requests = [ {file = "requests-2.26.0-py2.py3-none-any.whl", hash = "sha256:6c1246513ecd5ecd4528a0906f910e8f0f9c6b8ec72030dc9fd154dc1a6efd24"}, {file = "requests-2.26.0.tar.gz", hash = "sha256:b8aa58f8cf793ffd8782d3d8cb19e66ef36f7aba4353eec859e74678b01b07a7"}, ] +requests-mock = [ + {file = "requests-mock-1.9.3.tar.gz", hash = "sha256:8d72abe54546c1fc9696fa1516672f1031d72a55a1d66c85184f972a24ba0eba"}, + {file = "requests_mock-1.9.3-py2.py3-none-any.whl", hash = "sha256:0a2d38a117c08bb78939ec163522976ad59a6b7fdd82b709e23bb98004a44970"}, +] simplejson = [ {file = "simplejson-3.11.1-cp27-cp27m-win32.whl", hash = "sha256:38c2b563cd03363e7cb2bbba6c20ae4eaafd853a83954c8c8dd345ee391787bf"}, {file = "simplejson-3.11.1-cp27-cp27m-win_amd64.whl", hash = "sha256:8d73b96a6ee7c81fd49dac7225e3846fd60b54a0b5b93a0aaea04c5a5d2e7bf2"}, diff --git a/pyproject.toml b/pyproject.toml index bd8e818..96b5c71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,6 +31,17 @@ black = "^21.9b0" pydocstyle = "^6.1.1" mypy = "^0.910" types-requests = "^2.25.8" +requests-mock = "^1.9.3" +isort = "^5.10.1" + +[tool.black] +exclude = ".*simpleeval.*" + +[tool.isort] +profile = "black" +multi_line_output = 3 # Vertical Hanging Indent +src_paths = "singer_sdk" +known_first_party = ["tests", "samples"] [build-system] requires = ["poetry-core>=1.0.0"] diff --git a/tap_rest_api_msdk/__init__.py b/tap_rest_api_msdk/__init__.py index e69de29..40c6d25 100644 --- a/tap_rest_api_msdk/__init__.py +++ b/tap_rest_api_msdk/__init__.py @@ -0,0 +1 @@ +"""Package initialization.""" diff --git a/tap_rest_api_msdk/client.py b/tap_rest_api_msdk/client.py index 6da4a9e..90ea41b 100644 --- a/tap_rest_api_msdk/client.py +++ b/tap_rest_api_msdk/client.py @@ -1,6 +1,7 @@ """REST client handling, including RestApiStream base class.""" from pathlib import Path +from typing import Any from singer_sdk.streams import RESTStream @@ -11,6 +12,11 @@ class RestApiStream(RESTStream): """rest-api stream class.""" @property - def url_base(self) -> str: - """Return the API URL root, configurable via tap settings.""" + def url_base(self) -> Any: + """Return the API URL root, configurable via tap settings. + + Returns: + The base url for the api call. + + """ return self.config["api_url"] diff --git a/tap_rest_api_msdk/streams.py b/tap_rest_api_msdk/streams.py index 7a3a545..a6e0895 100644 --- a/tap_rest_api_msdk/streams.py +++ b/tap_rest_api_msdk/streams.py @@ -1,33 +1,52 @@ """Stream type classes for tap-rest-api-msdk.""" -import requests -from typing import Any, Dict, Optional, Iterable +from typing import Any, Dict, Iterable, Optional +import requests from singer_sdk.helpers.jsonpath import extract_jsonpath - from tap_rest_api_msdk.client import RestApiStream from tap_rest_api_msdk.utils import flatten_json class DynamicStream(RestApiStream): """Define custom stream.""" + def __init__( self, - tap, - name, - path=None, - params=None, - headers=None, - primary_keys=None, - replication_key=None, - except_keys=None, - records_path=None, - next_page_token_path=None, - schema=None, - pagination_request_style='default', - pagination_response_style='default', - pagination_page_size=None, - ): + tap: Any, + name: str, + records_path: str, + path: str, + params: dict = None, + headers: dict = None, + primary_keys: list = None, + replication_key: str = None, + except_keys: list = None, + next_page_token_path: str = None, + schema: dict = None, + pagination_request_style: str = "default", + pagination_response_style: str = "default", + pagination_page_size: int = None, + ) -> None: + """Class initialization. + + Args: + tap: see tap.py + name: see tap.py + path: see tap.py + params: see tap.py + headers: see tap.py + primary_keys: see tap.py + replication_key: see tap.py + except_keys: see tap.py + records_path: see tap.py + next_page_token_path: see tap.py + schema: the json schema for the stream. + pagination_request_style: see tap.py + pagination_response_style: see tap.py + pagination_page_size: see tap.py + + """ super().__init__(tap=tap, name=tap.name, schema=schema) if primary_keys is None: @@ -41,17 +60,27 @@ def __init__( self.replication_key = replication_key self.except_keys = except_keys self.records_path = records_path - self.next_page_token_jsonpath = next_page_token_path # Or override `get_next_page_token`. + self.next_page_token_jsonpath = ( + next_page_token_path # Or override `get_next_page_token`. + ) self.pagination_page_size = pagination_page_size - get_url_params_styles = {'style1': self._get_url_params_style1} - self.get_url_params = get_url_params_styles.get(pagination_response_style, self._get_url_params_default) - get_next_page_token_styles = {'style1': self._get_next_page_token_style1} - self.get_next_page_token = get_next_page_token_styles.get(pagination_response_style, - self._get_next_page_token_default) + get_url_params_styles = {"style1": self._get_url_params_style1} + self.get_url_params = get_url_params_styles.get( # type: ignore + pagination_response_style, self._get_url_params_default + ) + get_next_page_token_styles = {"style1": self._get_next_page_token_style1} + self.get_next_page_token = get_next_page_token_styles.get( # type: ignore + pagination_response_style, self._get_next_page_token_default + ) @property def http_headers(self) -> dict: - """Return the http headers needed.""" + """Return the http headers needed. + + Returns: + A dictionary of the headers to be included in the request. + + """ headers = {} if "user_agent" in self.config: headers["User-Agent"] = self.config.get("user_agent") @@ -64,10 +93,27 @@ def http_headers(self) -> dict: return headers - def _get_next_page_token_default(self, response: requests.Response, previous_token: Optional[Any]) -> Optional[Any]: - """Return a token for identifying next page or None if no more pages.""" + def _get_next_page_token_default( + self, response: requests.Response, previous_token: Optional[Any] + ) -> Optional[str]: + """Return a token for identifying next page or None if no more pages. + + This method follows the default style of getting the next page token from the + default path provided in the config or, if that doesn't exist, the header. + + Args: + response: the requests.Response given by the api call. + previous_token: optional - the token representing the current/previous page + of results. + + Returns: + A str representing the next page to be queried or `None`. + + """ if self.next_page_token_jsonpath: - all_matches = extract_jsonpath(self.next_page_token_jsonpath, response.json()) + all_matches = extract_jsonpath( + self.next_page_token_jsonpath, response.json() + ) first_match = next(iter(all_matches), None) next_page_token = first_match else: @@ -75,16 +121,43 @@ def _get_next_page_token_default(self, response: requests.Response, previous_tok return next_page_token - def _get_next_page_token_style1(self, response: requests.Response, previous_token: Optional[Any]) -> Optional[Any]: - pagination = response.json().get('pagination', {}) - if pagination and all(x in pagination for x in ['offset', 'limit', 'total']): - next_page_token = pagination['offset'] + pagination['limit'] - if next_page_token <= pagination['total']: + def _get_next_page_token_style1( + self, response: requests.Response, previous_token: Optional[Any] + ) -> Any: + """Return a token for identifying next page or None if no more pages. + + This method follows method of calculating the next page token from the + offsets, limits, and totals provided by the API. + + Args: + response: required - the requests.Response given by the api call. + previous_token: optional - the token representing the current/previous page + of results. + + Returns: + A str representing the next page to be queried or `None`. + + """ + pagination = response.json().get("pagination", {}) + if pagination and all(x in pagination for x in ["offset", "limit", "total"]): + next_page_token = pagination["offset"] + pagination["limit"] + if next_page_token <= pagination["total"]: return next_page_token return None - def _get_url_params_default(self, context: Optional[dict], next_page_token: Optional[Any]) -> Dict[str, Any]: - """Return a dictionary of values to be used in URL parameterization.""" + def _get_url_params_default( + self, context: Optional[dict], next_page_token: Optional[Any] + ) -> Dict[str, Any]: + """Return a dictionary of values to be used in URL parameterization. + + Args: + context: optional - the singer context object. + next_page_token: optional - the token for the next page of results. + + Returns: + An object containing the parameters to add to the request. + + """ params: dict = {} if self.params: for k, v in self.params.items(): @@ -96,8 +169,19 @@ def _get_url_params_default(self, context: Optional[dict], next_page_token: Opti params["order_by"] = self.replication_key return params - def _get_url_params_style1(self, context: Optional[dict], next_page_token: Optional[Any]) -> Dict[str, Any]: - """Return a dictionary of values to be used in URL parameterization.""" + def _get_url_params_style1( + self, context: Optional[dict], next_page_token: Optional[Any] + ) -> Dict[str, Any]: + """Return a dictionary of values to be used in URL parameterization. + + Args: + context: optional - the singer context object. + next_page_token: optional - the token for the next page of results. + + Returns: + An object containing the parameters to add to the request. + + """ params: dict = {} if self.params: for k, v in self.params.items(): @@ -112,9 +196,26 @@ def _get_url_params_style1(self, context: Optional[dict], next_page_token: Optio return params def parse_response(self, response: requests.Response) -> Iterable[dict]: - """Parse the response and return an iterator of result rows.""" + """Parse the response and return an iterator of result rows. + + Args: + response: required - the requests.Response given by the api call. + + Yields: + Parsed records. + + """ yield from extract_jsonpath(self.records_path, input=response.json()) def post_process(self, row: dict, context: Optional[dict] = None) -> dict: - """As needed, append or transform raw data to match expected structure.""" + """As needed, append or transform raw data to match expected structure. + + Args: + row: required - the record for processing. + context: optional - the singer context object. + + Returns: + A record that has been processed. + + """ return flatten_json(row, self.except_keys) diff --git a/tap_rest_api_msdk/tap.py b/tap_rest_api_msdk/tap.py index 7037e18..b4e28da 100644 --- a/tap_rest_api_msdk/tap.py +++ b/tap_rest_api_msdk/tap.py @@ -1,128 +1,195 @@ """rest-api tap class.""" -import requests -from typing import List +from typing import Any, List +import requests from genson import SchemaBuilder -from singer_sdk import Tap, Stream +from singer_sdk import Stream, Tap from singer_sdk import typing as th from singer_sdk.helpers.jsonpath import extract_jsonpath - from tap_rest_api_msdk.streams import DynamicStream from tap_rest_api_msdk.utils import flatten_json class TapRestApiMsdk(Tap): """rest-api tap class.""" + name = "tap-rest-api-msdk" config_jsonschema = th.PropertiesList( - th.Property("api_url", th.StringType, required=True, description="the base url/endpoint for the desired api"), + th.Property( + "api_url", + th.StringType, + required=True, + description="the base url/endpoint for the desired api", + ), # th.Property("auth_method", th.StringType, default='no_auth', required=False), # th.Property("auth_token", th.StringType, required=False), - th.Property('name', th.StringType, required=True, description="name of the stream"), - th.Property('path', - th.StringType, - default="", - required=False, - description="the path appeneded to the `api_url`."), - th.Property('params', - th.ObjectType(), - required=False, - description="an object of objects that provide the `params` in a `requests.get` method."), - th.Property('headers', - th.ObjectType(), - required=False, - description="an object of headers to pass into the api calls."), - th.Property("records_path", - th.StringType, - default="$[*]", - required=False, - description="a jsonpath string representing the path in the requests response that contains the " - "records to process. Defaults to `$[*]`."), - th.Property("next_page_token_path", - th.StringType, - default="$.next_page", - required=False, - description="a jsonpath string representing the path to the 'next page' token. " - "Defaults to `$.next_page`"), - th.Property("pagination_request_style", - th.StringType, - default="default", - required=False, - description="the pagination style to use for requests. " - "Defaults to `default`"), - th.Property("pagination_response_style", - th.StringType, - default="default", - required=False, - description="the pagination style to use for response. " - "Defaults to `default`"), - th.Property("pagination_page_size", - th.IntegerType, - default=None, - required=False, - description="the size of each page in records. " - "Defaults to None"), - th.Property('primary_keys', - th.ArrayType(th.StringType), - required=True, - description="a list of the json keys of the primary key for the stream."), - th.Property('replication_key', - th.StringType, - required=False, - description="the json key of the replication key. Note that this should be an incrementing " - "integer or datetime object."), - th.Property('except_keys', - th.ArrayType(th.StringType), - default=[], - required=False, - description="This tap automatically flattens the entire json structure and builds keys based on " - "the corresponding paths.; Keys, whether composite or otherwise, listed in this " - "dictionary will not be recursively flattened, but instead their values will be; " - "turned into a json string and processed in that format. This is also automatically " - "done for any lists within the records; therefore,; records are not duplicated for " - "each item in lists."), - th.Property('num_inference_records', - th.NumberType, - default=50, - required=False, - description="number of records used to infer the stream's schema. Defaults to 50."), + th.Property( + "name", th.StringType, required=True, description="name of the stream" + ), + th.Property( + "path", + th.StringType, + default="", + required=False, + description="the path appeneded to the `api_url`.", + ), + th.Property( + "params", + th.ObjectType(), + required=False, + description="an object of objects that provide the `params` in a " + "`requests.get` method.", + ), + th.Property( + "headers", + th.ObjectType(), + required=False, + description="an object of headers to pass into the api calls.", + ), + th.Property( + "records_path", + th.StringType, + default="$[*]", + required=False, + description="a jsonpath string representing the path in the requests " + "response that contains the " + "records to process. Defaults to `$[*]`.", + ), + th.Property( + "next_page_token_path", + th.StringType, + default="$.next_page", + required=False, + description="a jsonpath string representing the path to the 'next page' " + "token. Defaults to `$.next_page`", + ), + th.Property( + "pagination_request_style", + th.StringType, + default="default", + required=False, + description="the pagination style to use for requests. " + "Defaults to `default`", + ), + th.Property( + "pagination_response_style", + th.StringType, + default="default", + required=False, + description="the pagination style to use for response. " + "Defaults to `default`", + ), + th.Property( + "pagination_page_size", + th.IntegerType, + default=None, + required=False, + description="the size of each page in records. " "Defaults to None", + ), + th.Property( + "primary_keys", + th.ArrayType(th.StringType), + required=True, + description="a list of the json keys of the primary key for the stream.", + ), + th.Property( + "replication_key", + th.StringType, + required=False, + description="the json key of the replication key. Note that this should " + "be an incrementing integer or datetime object.", + ), + th.Property( + "except_keys", + th.ArrayType(th.StringType), + default=[], + required=False, + description="This tap automatically flattens the entire json structure " + "and builds keys based on the corresponding paths.; Keys, " + "whether composite or otherwise, listed in this dictionary " + "will not be recursively flattened, but instead their values " + "will be; turned into a json string and processed in that " + "format. This is also automatically done for any lists within " + "the records; therefore, records are not duplicated for each " + "item in lists.", + ), + th.Property( + "num_inference_records", + th.NumberType, + default=50, + required=False, + description="number of records used to infer the stream's schema. " + "Defaults to 50.", + ), ).to_dict() def discover_streams(self) -> List[Stream]: - """Return a list of discovered streams.""" + """Return a list of discovered streams. + + Returns: + A list of streams. + + """ return [ DynamicStream( tap=self, - name=self.config['name'], - path=self.config['path'], - params=self.config.get('params'), - headers=self.config.get('headers'), - records_path=self.config['records_path'], - next_page_token_path=self.config['next_page_token_path'], - primary_keys=self.config['primary_keys'], - replication_key=self.config.get('replication_key'), - except_keys=self.config.get('except_keys'), + name=self.config["name"], + path=self.config["path"], + params=self.config.get("params"), + headers=self.config.get("headers"), + records_path=self.config["records_path"], + next_page_token_path=self.config["next_page_token_path"], + primary_keys=self.config["primary_keys"], + replication_key=self.config.get("replication_key"), + except_keys=self.config.get("except_keys"), schema=self.get_schema( - self.config['records_path'], - self.config.get('except_keys'), - self.config.get('num_inference_records'), - self.config['path'], - self.config.get('params'), - self.config.get('headers'), + self.config["records_path"], + self.config.get("except_keys"), # type: ignore + self.config.get("num_inference_records"), # type: ignore + self.config["path"], + self.config.get("params"), # type: ignore + self.config.get("headers"), # type: ignore ), - pagination_request_style=self.config.get('pagination_request_style', 'default'), - pagination_response_style=self.config.get('pagination_response_style', 'default'), - pagination_page_size=self.config.get('pagination_page_size'), + pagination_request_style=self.config.get( # type: ignore + "pagination_request_style" + ), + pagination_response_style=self.config.get( # type: ignore + "pagination_response_style" + ), + pagination_page_size=self.config.get("pagination_page_size"), ) ] - def get_schema(self, records_path, except_keys, inference_records, path, params, headers) -> dict: - """Infer schema from the first records returned by api. Creates a Stream object.""" + def get_schema( + self, + records_path: str, + except_keys: list, + inference_records: int, + path: str, + params: dict, + headers: dict, + ) -> Any: + """Infer schema from the first records returned by api. Creates a Stream object. + + Args: + records_path: required - see config_jsonschema. + except_keys: required - see config_jsonschema. + inference_records: required - see config_jsonschema. + path: required - see config_jsonschema. + params: required - see config_jsonschema. + headers: required - see config_jsonschema. + + Raises: + ValueError: if the response is not valid or a record is not valid json. + + Returns: + A schema for the stream. + """ # todo: this request format is not very robust - r = requests.get(self.config['api_url'] + path, params=params, headers=headers) + r = requests.get(self.config["api_url"] + path, params=params, headers=headers) if r.ok: records = extract_jsonpath(records_path, input=r.json()) else: diff --git a/tap_rest_api_msdk/tests/test_tap.py b/tap_rest_api_msdk/tests/test_tap.py deleted file mode 100644 index f7408ba..0000000 --- a/tap_rest_api_msdk/tests/test_tap.py +++ /dev/null @@ -1,20 +0,0 @@ -from tap_rest_api_msdk.tap import TapRestApiMsdk -from tap_rest_api_msdk.tests.test_streams import setup_api, config - - -def test_schema_inference(requests_mock): - resp = setup_api(requests_mock) - - stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[0] - assert stream0.schema == { - '$schema': 'http://json-schema.org/schema#', - 'required': ['key1', 'key2', 'key3'], - 'type': 'object', - 'properties': { - 'field1': {'type': 'string'}, - 'field2': {'type': 'integer'}, - 'key1': {'type': 'string'}, - 'key2': {'type': 'string'}, - 'key3': {'type': 'string'}, - } - } diff --git a/tap_rest_api_msdk/tests/test_utils.py b/tap_rest_api_msdk/tests/test_utils.py deleted file mode 100644 index a7b8d19..0000000 --- a/tap_rest_api_msdk/tests/test_utils.py +++ /dev/null @@ -1,25 +0,0 @@ -import json - -from tap_rest_api_msdk.utils import flatten_json - - -def test_flatten_json(): - d = { - 'a': 1, - 'b': { - 'a': 2, - 'b': {'a': 3}, - 'c': {'a': "bacon", 'b': "yum"} - }, - 'c': [{'foo': 'bar'}, {'eggs': 'spam'}], - 'd': [4, 5], - 'e.-f': 6, - } - ret = flatten_json(d, except_keys=['b_c']) - assert ret['a'] == 1 - assert ret['b_a'] == 2 - assert ret['b_b_a'] == 3 - assert ret['b_c'] == json.dumps({'a': "bacon", 'b': "yum"}) - assert ret['c'] == json.dumps([{'foo': 'bar'}, {'eggs': 'spam'}]) - assert ret['d'] == json.dumps([4, 5]) - assert ret['e__f'] == 6 diff --git a/tap_rest_api_msdk/utils.py b/tap_rest_api_msdk/utils.py index b3c4731..23a8e13 100644 --- a/tap_rest_api_msdk/utils.py +++ b/tap_rest_api_msdk/utils.py @@ -1,33 +1,50 @@ +"""Basic utility functions.""" + import json +from typing import Any -def flatten_json(obj, except_keys=None): +def flatten_json(obj: dict, except_keys: list = None) -> dict: """Flattens a json object by appending the patch as a key in the returned object. + Automatically converts arrays and any provided keys into json strings to prevent flattening further into those branches. Args: - obj (dict): the json object to be flattened. - except_keys (Optional[list(str)]): list of the keys of the nodes that should - be converted to json strings. + obj: the json object to be flattened. + except_keys: list of the keys of the nodes that should be converted to json + strings. + + Returns: + A flattened json object. + """ out = {} if not except_keys: except_keys = [] - def t(s): - """Returns a translation table for converting strings in database friendly column names""" - translation_table = s.maketrans('-.', '__') + def t(s: str) -> str: + """Translate a string to db friendly column names. + + Args: + s: required - string to make a translation table from. + + Returns: + Translation table. + + """ + translation_table = s.maketrans("-.", "__") return s.translate(translation_table) - def flatten(o, exception_keys, name=''): - """Performs the recursive flattening of the json object + def flatten(o: Any, exception_keys: list, name: str = "") -> None: + """Recursive flattening of the json object in place. Args: - o (dict): the json object to be flattened. - exception_keys (list(str)): list of the keys of the nodes that should - be converted to json strings. - name (str): the prefix for the exception_keys + o: the json object to be flattened. + exception_keys: list of the keys of the nodes that should + be converted to json strings. + name: the prefix for the exception_keys + """ if type(o) is dict: for k in o: @@ -35,7 +52,7 @@ def flatten(o, exception_keys, name=''): if name + k in exception_keys: out[t(name + k)] = json.dumps(o[k]) else: - flatten(o[k], exception_keys, name + k + '_') + flatten(o[k], exception_keys, name + k + "_") # if the object is an array, convert to a json string elif type(o) is list: diff --git a/tap_rest_api_msdk/tests/__init__.py b/tests/__init__.py similarity index 100% rename from tap_rest_api_msdk/tests/__init__.py rename to tests/__init__.py diff --git a/tap_rest_api_msdk/tests/test_core.py b/tests/test_core.py similarity index 81% rename from tap_rest_api_msdk/tests/test_core.py rename to tests/test_core.py index c070aec..2e5108c 100644 --- a/tap_rest_api_msdk/tests/test_core.py +++ b/tests/test_core.py @@ -1,12 +1,9 @@ """Tests standard tap features using the built-in SDK tests library.""" -import pytest -import requests - +from singer_sdk.testing import get_standard_tap_tests from tap_rest_api_msdk.tap import TapRestApiMsdk -from tap_rest_api_msdk.tests.test_streams import url_path, json_resp, config -from singer_sdk.testing import get_standard_tap_tests +from tests.test_streams import config, json_resp, url_path # Run standard built-in tap tests from the SDK: @@ -16,4 +13,3 @@ def test_standard_tap_tests(requests_mock): tests = get_standard_tap_tests(TapRestApiMsdk, config=config()) for test in tests: test() - diff --git a/tap_rest_api_msdk/tests/test_streams.py b/tests/test_streams.py similarity index 50% rename from tap_rest_api_msdk/tests/test_streams.py rename to tests/test_streams.py index 6f82b92..17a1dd3 100644 --- a/tap_rest_api_msdk/tests/test_streams.py +++ b/tests/test_streams.py @@ -1,19 +1,28 @@ """Tests stream.py features.""" -import requests +from typing import Any +import requests from tap_rest_api_msdk.tap import TapRestApiMsdk -def config(extras: dict=None): +def config(extras: dict = None) -> dict: + """Utility function giving a basic/common config for streams. + + Args: + extras: items to add to the basic config. + + Returns: + A complete tap config. + """ contents = { - "api_url": "http://example.com", + "api_url": "https://example.com", "name": "stream_name", "auth_method": "no_auth", "auth_token": "", - 'path': '/path_test', - 'primary_keys': ['key1', 'key2'], - 'replication_key': 'key3', + "path": "/path_test", + "primary_keys": ["key1", "key2"], + "replication_key": "key3", "records_path": "$.records[*]", } if extras: @@ -22,21 +31,24 @@ def config(extras: dict=None): return contents -def json_resp(extras: dict=None): +def json_resp(extras: dict = None) -> dict: + """Utility function returning a common response for mocked API calls. + + Args: + extras: items to be added to the contents of the json response. + + Returns: + A json object that mocks the results of an API call. + """ contents = { "records": [ { - 'key1': 'this', - 'key2': 'that', - 'key3': 'foo', + "key1": "this", + "key2": "that", + "key3": "foo", "field1": "I", }, - { - 'key1': 'foo', - 'key2': 'bar', - 'key3': 'spam', - "field2": 8 - }, + {"key1": "foo", "key2": "bar", "key3": "spam", "field2": 8}, ], } if extras: @@ -45,54 +57,129 @@ def json_resp(extras: dict=None): return contents -def url_path(path: str= '/path_test'): - return 'http://example.com' + path +def url_path(path: str = "/path_test") -> str: + """Utility function returning a common url for mocked API calls. + + Args: + path: a path to add to the end of the base url. + + Returns: + A full url. + """ + return "https://example.com" + path -def setup_api(requests_mock, json_extras: dict=None, headers_extras: dict=None, matcher=None) -> requests.Response: +def setup_api( + requests_mock: Any, + json_extras: dict = None, + headers_extras: dict = None, + matcher: Any = None, +) -> requests.Response: + """Utility function for mocking API calls. + + Args: + requests_mock: mock object for requests. + json_extras: extra items to add to the response's results. + headers_extras: extra items to add to the API call's header. + matcher: a function that checks a request's input for the appropriate + configuration. + + Returns: + A mocked requests.Response object. + """ headers_resp = {} if headers_extras: for k, v in headers_extras.items(): headers_resp[k] = v - adapter = requests_mock.get(url_path(), headers=headers_resp, json=json_resp(json_extras), additional_matcher=matcher) + requests_mock.get( + url_path(), + headers=headers_resp, + json=json_resp(json_extras), + additional_matcher=matcher, + ) return requests.Session().get(url_path()) -def test_get_next_page_token_default_jsonpath(requests_mock): - resp = setup_api(requests_mock, json_extras={"next_page": "next_page_token_example"}) +def test_get_next_page_token_default_jsonpath(requests_mock: Any): + """Testing the _get_next_page_token_default method using a provided jsonpath.""" + resp = setup_api( + requests_mock, json_extras={"next_page": "next_page_token_example"} + ) - stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[0] - assert stream0._get_next_page_token_default(resp, "previous_token_example") == "next_page_token_example" + stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[ + 0 + ] + assert ( + stream0._get_next_page_token_default(resp, "previous_token_example") + == "next_page_token_example" + ) assert stream0.get_next_page_token == stream0._get_next_page_token_default -def test_get_next_page_token_default_header(requests_mock): + +def test_get_next_page_token_default_header(requests_mock: Any): + """Testing the _get_next_page_token_default method using a provided header.""" resp = setup_api(requests_mock, headers_extras={"X-Next-Page": "header_page_token"}) - stream0 = TapRestApiMsdk(config=config({"next_page_token_path": None}), parse_env_config=True).discover_streams()[0] - assert stream0._get_next_page_token_default(resp, "previous_token_example") == "header_page_token" + stream0 = TapRestApiMsdk( + config=config({"next_page_token_path": None}), parse_env_config=True + ).discover_streams()[0] + assert ( + stream0._get_next_page_token_default(resp, "previous_token_example") + == "header_page_token" + ) assert stream0.get_next_page_token == stream0._get_next_page_token_default -def test_get_next_page_token_style1_last_page(requests_mock): - resp = setup_api(requests_mock, json_extras={"pagination": {"offset": 1, "limit": 1, "total": 2,}}) + +def test_get_next_page_token_style1_last_page(requests_mock: Any): + """Test get_next_page_token_style1 if it's almost the last page.""" + resp = setup_api( + requests_mock, + json_extras={ + "pagination": { + "offset": 1, + "limit": 1, + "total": 2, + } + }, + ) configs = config({"pagination_response_style": "style1"}) - stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[0] + stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[ + 0 + ] assert stream0._get_next_page_token_style1(resp, "previous_token_example") == 2 assert stream0.get_next_page_token == stream0._get_next_page_token_style1 + def test_get_next_page_token_style1_end(requests_mock): - resp = setup_api(requests_mock, json_extras={"pagination": {"offset": 5, "limit": 1, "total": 2,}}) + """Test get_next_page_token_style1 if it's the last page.""" + resp = setup_api( + requests_mock, + json_extras={ + "pagination": { + "offset": 5, + "limit": 1, + "total": 2, + } + }, + ) configs = config({"pagination_response_style": "style1"}) - stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[0] + stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[ + 0 + ] assert not stream0._get_next_page_token_style1(resp, "previous_token_example") assert stream0.get_next_page_token == stream0._get_next_page_token_style1 + def test_get_url_params_default(requests_mock): - resp = setup_api(requests_mock) + """Test _get_url_params using the default method.""" + setup_api(requests_mock) - stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[0] + stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[ + 0 + ] assert stream0._get_url_params_default({}, "next_page_token_sample") == { "page": "next_page_token_sample", @@ -101,11 +188,15 @@ def test_get_url_params_default(requests_mock): } assert stream0.get_url_params == stream0._get_url_params_default + def test_get_url_params_style1(requests_mock): - resp = setup_api(requests_mock) + """Test _get_url_params using the style1 method.""" + setup_api(requests_mock) configs = config({"pagination_page_size": 1, "pagination_response_style": "style1"}) - stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[0] + stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[ + 0 + ] assert stream0._get_url_params_style1({}, "next_page_token_sample") == { "offset": "next_page_token_sample", "limit": 1, @@ -114,6 +205,7 @@ def test_get_url_params_style1(requests_mock): } assert stream0.get_url_params == stream0._get_url_params_style1 + def test_pagination_style_default(requests_mock): def first_matcher(request): return "page" not in request.url @@ -121,10 +213,16 @@ def first_matcher(request): def second_matcher(request): return "page=next_page_token" in request.url - requests_mock.get(url_path(), additional_matcher=first_matcher, json=json_resp({"next_page": "next_page_token"})) + requests_mock.get( + url_path(), + additional_matcher=first_matcher, + json=json_resp({"next_page": "next_page_token"}), + ) requests_mock.get(url_path(), additional_matcher=second_matcher, json=json_resp()) - stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[0] + stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[ + 0 + ] records_gen = stream0.get_records({}) records = [] for record in records_gen: @@ -137,6 +235,7 @@ def second_matcher(request): json_resp()["records"][1], ] + def test_pagination_style_style1(requests_mock): def first_matcher(request): return "offset" not in request.url @@ -147,10 +246,14 @@ def second_matcher(request): configs = config({"pagination_page_size": 2, "pagination_response_style": "style1"}) json_extras = {"pagination": {"offset": 1, "limit": 1, "total": 2}} - requests_mock.get(url_path(), additional_matcher=first_matcher, json=json_resp(json_extras)) + requests_mock.get( + url_path(), additional_matcher=first_matcher, json=json_resp(json_extras) + ) requests_mock.get(url_path(), additional_matcher=second_matcher, json=json_resp()) - stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[0] + stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[ + 0 + ] records_gen = stream0.get_records({}) records = [] for record in records_gen: diff --git a/tests/test_tap.py b/tests/test_tap.py new file mode 100644 index 0000000..b83e160 --- /dev/null +++ b/tests/test_tap.py @@ -0,0 +1,24 @@ +from tap_rest_api_msdk.tap import TapRestApiMsdk + +from tests.test_streams import config, setup_api + + +def test_schema_inference(requests_mock): + setup_api(requests_mock) + + stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[ + 0 + ] + + assert stream0.schema == { + "$schema": "http://json-schema.org/schema#", + "required": ["key1", "key2", "key3"], + "type": "object", + "properties": { + "field1": {"type": "string"}, + "field2": {"type": "integer"}, + "key1": {"type": "string"}, + "key2": {"type": "string"}, + "key3": {"type": "string"}, + }, + } diff --git a/tests/test_utils.py b/tests/test_utils.py new file mode 100644 index 0000000..1bd891f --- /dev/null +++ b/tests/test_utils.py @@ -0,0 +1,21 @@ +import json + +from tap_rest_api_msdk.utils import flatten_json + + +def test_flatten_json(): + d = { + "a": 1, + "b": {"a": 2, "b": {"a": 3}, "c": {"a": "bacon", "b": "yum"}}, + "c": [{"foo": "bar"}, {"eggs": "spam"}], + "d": [4, 5], + "e.-f": 6, + } + ret = flatten_json(d, except_keys=["b_c"]) + assert ret["a"] == 1 + assert ret["b_a"] == 2 + assert ret["b_b_a"] == 3 + assert ret["b_c"] == json.dumps({"a": "bacon", "b": "yum"}) + assert ret["c"] == json.dumps([{"foo": "bar"}, {"eggs": "spam"}]) + assert ret["d"] == json.dumps([4, 5]) + assert ret["e__f"] == 6 diff --git a/tox.ini b/tox.ini index 717036f..1a23146 100644 --- a/tox.ini +++ b/tox.ini @@ -11,16 +11,21 @@ whitelist_externals = poetry commands = poetry install -v poetry run pytest - poetry run black --check tap_rest_api_msdk/ - poetry run flake8 tap_rest_api_msdk - poetry run pydocstyle tap_rest_api_msdk - poetry run mypy tap_rest_api_msdk --exclude='tap_rest_api_msdk/tests' + poetry run black --check tap_rest_api_msdk/ tests/ + poetry run isort --check tap_rest_api_msdk/ tests/ + poetry run flake8 tap_rest_api_msdk/ tests/ + poetry run mypy tap_rest_api_msdk/ + poetry run pydocstyle tap_rest_api_msdk/ tests/ [flake8] -ignore = W503 +ignore = W503, C901, ANN101 max-line-length = 88 +per-file-ignores = + # Don't require docstrings or type annotations in tests + tests/*:D100,D102,D103,DAR,ANN max-complexity = 10 +docstring-convention = google [pydocstyle] -ignore = D105,D203,D213 +ignore = D105,D203,D213,D406,D407