From 4b98c89d20b4d8f18ade183030641933343a7e66 Mon Sep 17 00:00:00 2001 From: jlloyd-widen <82222659+jlloyd-widen@users.noreply.github.com> Date: Thu, 17 Feb 2022 12:43:57 -0700 Subject: [PATCH] Issue/1 pagination tests (#11) * Implement different styles of pagination support * Add Gitlab CI * fix paging logic * added tests for old functionality as well as proposed pagination * updated docs Co-authored-by: Fred Reimer Co-authored-by: Fred Reimer Co-authored-by: Josh Lloyd Co-authored-by: Josh Lloyd --- README.md | 8 +- meltano.yml | 4 + tap_rest_api_msdk/streams.py | 4 +- tap_rest_api_msdk/tests/test_core.py | 51 ++------ tap_rest_api_msdk/tests/test_streams.py | 164 ++++++++++++++++++++++++ tap_rest_api_msdk/tests/test_tap.py | 20 +++ tap_rest_api_msdk/tests/test_utils.py | 25 ++++ 7 files changed, 232 insertions(+), 44 deletions(-) create mode 100644 tap_rest_api_msdk/tests/test_streams.py create mode 100644 tap_rest_api_msdk/tests/test_tap.py create mode 100644 tap_rest_api_msdk/tests/test_utils.py diff --git a/README.md b/README.md index 3db0c9f..ac7ed30 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,10 @@ plugins: - name: params - name: headers - name: records_path + - name: next_page_token_path + - name: pagination_request_style + - name: pagination_response_style + - name: pagination_page_size - name: primary_keys - name: replication_key - name: except_keys @@ -70,7 +74,7 @@ Config Options: - `headers`: optional: an object of headers to pass into the api calls. - `records_path`: optional: a jsonpath string representing the path in the requests response that contains the records to process. Defaults to `$[*]`. - `pagination_request_style`: optional: style for requesting pagination, defaults to `default`, see Pagination below. -- `pagination_result_style`: optional: style of pagination results, defaults to `default`, see Pagination below. +- `pagination_response_style`: optional: style of pagination results, defaults to `default`, see Pagination below. - `pagination_page_size`: optional: limit for size of page, defaults to None. - `next_page_token_path`: optional: a jsonpath string representing the path to the "next page" token. Defaults to `$.next_page`. - `primary_keys`: required: a list of the json keys of the primary key for the stream. @@ -82,8 +86,8 @@ Config Options: - `num_inference_keys`: optional: number of records used to infer the stream's schema. Defaults to 50. ## Pagination - Pagination is a complex topic as there is no real single standard, and many different implementations. Unless options are provided, both the request and results stype default to the `default`, which is the pagination style originally implemented. + ### Default Request Style The default request style for pagination is described below: - Use next_page_token_path if provided to extract the token from response if found; otherwise diff --git a/meltano.yml b/meltano.yml index 94c0cfc..b61303c 100644 --- a/meltano.yml +++ b/meltano.yml @@ -16,6 +16,10 @@ plugins: - name: params - name: headers - name: records_path + - name: next_page_token_path + - name: pagination_request_style + - name: pagination_response_style + - name: pagination_page_size - name: primary_keys - name: replication_key - name: except_keys diff --git a/tap_rest_api_msdk/streams.py b/tap_rest_api_msdk/streams.py index c8a24e4..7a3a545 100644 --- a/tap_rest_api_msdk/streams.py +++ b/tap_rest_api_msdk/streams.py @@ -67,7 +67,7 @@ def http_headers(self) -> dict: def _get_next_page_token_default(self, response: requests.Response, previous_token: Optional[Any]) -> Optional[Any]: """Return a token for identifying next page or None if no more pages.""" if self.next_page_token_jsonpath: - all_matches = extract_jsonpath("self.next_page_token_jsonpath", response.json()) + all_matches = extract_jsonpath(self.next_page_token_jsonpath, response.json()) first_match = next(iter(all_matches), None) next_page_token = first_match else: @@ -79,7 +79,7 @@ def _get_next_page_token_style1(self, response: requests.Response, previous_toke pagination = response.json().get('pagination', {}) if pagination and all(x in pagination for x in ['offset', 'limit', 'total']): next_page_token = pagination['offset'] + pagination['limit'] - if next_page_token < pagination['total']: + if next_page_token <= pagination['total']: return next_page_token return None diff --git a/tap_rest_api_msdk/tests/test_core.py b/tap_rest_api_msdk/tests/test_core.py index 9841fd8..c070aec 100644 --- a/tap_rest_api_msdk/tests/test_core.py +++ b/tap_rest_api_msdk/tests/test_core.py @@ -1,48 +1,19 @@ """Tests standard tap features using the built-in SDK tests library.""" -import json +import pytest +import requests -from tap_rest_api_msdk.utils import flatten_json +from tap_rest_api_msdk.tap import TapRestApiMsdk +from tap_rest_api_msdk.tests.test_streams import url_path, json_resp, config -SAMPLE_CONFIG = { - "api_url": "https://url.test", - "auth_method": "no_auth", - "auth_token": "", - "streams_config": [{ - 'name': 'test_name', - 'path': 'path_test', - 'primary_keys': ['key1', 'key2'], - 'replication_key': 'key3' - }], - "records_path": "$.jsonpath[*]", -} +from singer_sdk.testing import get_standard_tap_tests # Run standard built-in tap tests from the SDK: -# def test_standard_tap_tests(): -# """Run standard tap tests from the SDK.""" -# tests = get_standard_tap_tests(TapRestApi, config=SAMPLE_CONFIG) -# for test in tests: -# test() +def test_standard_tap_tests(requests_mock): + """Run standard tap tests from the SDK.""" + requests_mock.get(url_path(), json=json_resp()) + tests = get_standard_tap_tests(TapRestApiMsdk, config=config()) + for test in tests: + test() - -def test_flatten_json(): - d = { - 'a': 1, - 'b': { - 'a': 2, - 'b': {'a': 3}, - 'c': {'a': "bacon", 'b': "yum"} - }, - 'c': [{'foo': 'bar'}, {'eggs': 'spam'}], - 'd': [4, 5], - 'e.-f': 6, - } - ret = flatten_json(d, except_keys=['b_c']) - assert ret['a'] == 1 - assert ret['b_a'] == 2 - assert ret['b_b_a'] == 3 - assert ret['b_c'] == json.dumps({'a': "bacon", 'b': "yum"}) - assert ret['c'] == json.dumps([{'foo': 'bar'}, {'eggs': 'spam'}]) - assert ret['d'] == json.dumps([4, 5]) - assert ret['e__f'] == 6 diff --git a/tap_rest_api_msdk/tests/test_streams.py b/tap_rest_api_msdk/tests/test_streams.py new file mode 100644 index 0000000..6f82b92 --- /dev/null +++ b/tap_rest_api_msdk/tests/test_streams.py @@ -0,0 +1,164 @@ +"""Tests stream.py features.""" + +import requests + +from tap_rest_api_msdk.tap import TapRestApiMsdk + + +def config(extras: dict=None): + contents = { + "api_url": "http://example.com", + "name": "stream_name", + "auth_method": "no_auth", + "auth_token": "", + 'path': '/path_test', + 'primary_keys': ['key1', 'key2'], + 'replication_key': 'key3', + "records_path": "$.records[*]", + } + if extras: + for k, v in extras.items(): + contents[k] = v + return contents + + +def json_resp(extras: dict=None): + contents = { + "records": [ + { + 'key1': 'this', + 'key2': 'that', + 'key3': 'foo', + "field1": "I", + }, + { + 'key1': 'foo', + 'key2': 'bar', + 'key3': 'spam', + "field2": 8 + }, + ], + } + if extras: + for k, v in extras.items(): + contents[k] = v + return contents + + +def url_path(path: str= '/path_test'): + return 'http://example.com' + path + + +def setup_api(requests_mock, json_extras: dict=None, headers_extras: dict=None, matcher=None) -> requests.Response: + headers_resp = {} + if headers_extras: + for k, v in headers_extras.items(): + headers_resp[k] = v + + adapter = requests_mock.get(url_path(), headers=headers_resp, json=json_resp(json_extras), additional_matcher=matcher) + return requests.Session().get(url_path()) + + +def test_get_next_page_token_default_jsonpath(requests_mock): + resp = setup_api(requests_mock, json_extras={"next_page": "next_page_token_example"}) + + stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[0] + assert stream0._get_next_page_token_default(resp, "previous_token_example") == "next_page_token_example" + assert stream0.get_next_page_token == stream0._get_next_page_token_default + +def test_get_next_page_token_default_header(requests_mock): + resp = setup_api(requests_mock, headers_extras={"X-Next-Page": "header_page_token"}) + + stream0 = TapRestApiMsdk(config=config({"next_page_token_path": None}), parse_env_config=True).discover_streams()[0] + assert stream0._get_next_page_token_default(resp, "previous_token_example") == "header_page_token" + assert stream0.get_next_page_token == stream0._get_next_page_token_default + +def test_get_next_page_token_style1_last_page(requests_mock): + resp = setup_api(requests_mock, json_extras={"pagination": {"offset": 1, "limit": 1, "total": 2,}}) + configs = config({"pagination_response_style": "style1"}) + + stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[0] + assert stream0._get_next_page_token_style1(resp, "previous_token_example") == 2 + assert stream0.get_next_page_token == stream0._get_next_page_token_style1 + +def test_get_next_page_token_style1_end(requests_mock): + resp = setup_api(requests_mock, json_extras={"pagination": {"offset": 5, "limit": 1, "total": 2,}}) + configs = config({"pagination_response_style": "style1"}) + + stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[0] + assert not stream0._get_next_page_token_style1(resp, "previous_token_example") + assert stream0.get_next_page_token == stream0._get_next_page_token_style1 + +def test_get_url_params_default(requests_mock): + resp = setup_api(requests_mock) + + stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[0] + + assert stream0._get_url_params_default({}, "next_page_token_sample") == { + "page": "next_page_token_sample", + "sort": "asc", + "order_by": "key3", + } + assert stream0.get_url_params == stream0._get_url_params_default + +def test_get_url_params_style1(requests_mock): + resp = setup_api(requests_mock) + configs = config({"pagination_page_size": 1, "pagination_response_style": "style1"}) + + stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[0] + assert stream0._get_url_params_style1({}, "next_page_token_sample") == { + "offset": "next_page_token_sample", + "limit": 1, + "sort": "asc", + "order_by": "key3", + } + assert stream0.get_url_params == stream0._get_url_params_style1 + +def test_pagination_style_default(requests_mock): + def first_matcher(request): + return "page" not in request.url + + def second_matcher(request): + return "page=next_page_token" in request.url + + requests_mock.get(url_path(), additional_matcher=first_matcher, json=json_resp({"next_page": "next_page_token"})) + requests_mock.get(url_path(), additional_matcher=second_matcher, json=json_resp()) + + stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[0] + records_gen = stream0.get_records({}) + records = [] + for record in records_gen: + records.append(record) + + assert records == [ + json_resp()["records"][0], + json_resp()["records"][1], + json_resp()["records"][0], + json_resp()["records"][1], + ] + +def test_pagination_style_style1(requests_mock): + def first_matcher(request): + return "offset" not in request.url + + def second_matcher(request): + return "offset=2" in request.url + + configs = config({"pagination_page_size": 2, "pagination_response_style": "style1"}) + json_extras = {"pagination": {"offset": 1, "limit": 1, "total": 2}} + + requests_mock.get(url_path(), additional_matcher=first_matcher, json=json_resp(json_extras)) + requests_mock.get(url_path(), additional_matcher=second_matcher, json=json_resp()) + + stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[0] + records_gen = stream0.get_records({}) + records = [] + for record in records_gen: + records.append(record) + + assert records == [ + json_resp()["records"][0], + json_resp()["records"][1], + json_resp()["records"][0], + json_resp()["records"][1], + ] diff --git a/tap_rest_api_msdk/tests/test_tap.py b/tap_rest_api_msdk/tests/test_tap.py new file mode 100644 index 0000000..f7408ba --- /dev/null +++ b/tap_rest_api_msdk/tests/test_tap.py @@ -0,0 +1,20 @@ +from tap_rest_api_msdk.tap import TapRestApiMsdk +from tap_rest_api_msdk.tests.test_streams import setup_api, config + + +def test_schema_inference(requests_mock): + resp = setup_api(requests_mock) + + stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[0] + assert stream0.schema == { + '$schema': 'http://json-schema.org/schema#', + 'required': ['key1', 'key2', 'key3'], + 'type': 'object', + 'properties': { + 'field1': {'type': 'string'}, + 'field2': {'type': 'integer'}, + 'key1': {'type': 'string'}, + 'key2': {'type': 'string'}, + 'key3': {'type': 'string'}, + } + } diff --git a/tap_rest_api_msdk/tests/test_utils.py b/tap_rest_api_msdk/tests/test_utils.py new file mode 100644 index 0000000..a7b8d19 --- /dev/null +++ b/tap_rest_api_msdk/tests/test_utils.py @@ -0,0 +1,25 @@ +import json + +from tap_rest_api_msdk.utils import flatten_json + + +def test_flatten_json(): + d = { + 'a': 1, + 'b': { + 'a': 2, + 'b': {'a': 3}, + 'c': {'a': "bacon", 'b': "yum"} + }, + 'c': [{'foo': 'bar'}, {'eggs': 'spam'}], + 'd': [4, 5], + 'e.-f': 6, + } + ret = flatten_json(d, except_keys=['b_c']) + assert ret['a'] == 1 + assert ret['b_a'] == 2 + assert ret['b_b_a'] == 3 + assert ret['b_c'] == json.dumps({'a': "bacon", 'b': "yum"}) + assert ret['c'] == json.dumps([{'foo': 'bar'}, {'eggs': 'spam'}]) + assert ret['d'] == json.dumps([4, 5]) + assert ret['e__f'] == 6