Issue/1 pagination tests (#11)

* Implement different styles of pagination support * Add Gitlab CI * fix paging logic * added tests for old functionality as well as proposed pagination * updated docs Co-authored-by: Fred Reimer <[email protected]> Co-authored-by: Fred Reimer <[email protected]> Co-authored-by: Josh Lloyd <[email protected]> Co-authored-by: Josh Lloyd <[email protected]>
Widen · Feb 17, 2022 · 4b98c89 · 4b98c89
1 parent 947c7fd
commit 4b98c89
Show file tree

Hide file tree

Showing 7 changed files with 232 additions and 44 deletions.
diff --git a/README.md b/README.md
@@ -36,6 +36,10 @@ plugins:
         - name: params
         - name: headers
         - name: records_path
+        - name: next_page_token_path
+        - name: pagination_request_style
+        - name: pagination_response_style
+        - name: pagination_page_size
         - name: primary_keys
         - name: replication_key
         - name: except_keys
@@ -70,7 +74,7 @@ Config Options:
 - `headers`: optional: an object of headers to pass into the api calls.
 - `records_path`: optional: a jsonpath string representing the path in the requests response that contains the records to process. Defaults to `$[*]`.
 - `pagination_request_style`: optional: style for requesting pagination, defaults to `default`, see Pagination below.
-- `pagination_result_style`: optional: style of pagination results, defaults to `default`, see Pagination below.
+- `pagination_response_style`: optional: style of pagination results, defaults to `default`, see Pagination below.
 - `pagination_page_size`: optional: limit for size of page, defaults to None.
 - `next_page_token_path`: optional: a jsonpath string representing the path to the "next page" token. Defaults to `$.next_page`.
 - `primary_keys`: required: a list of the json keys of the primary key for the stream.
@@ -82,8 +86,8 @@ Config Options:
 - `num_inference_keys`: optional: number of records used to infer the stream's schema. Defaults to 50.
 
 ## Pagination
-
 Pagination is a complex topic as there is no real single standard, and many different implementations.  Unless options are provided, both the request and results stype default to the `default`, which is the pagination style originally implemented.
+
 ### Default Request Style
 The default request style for pagination is described below:
 - Use next_page_token_path if provided to extract the token from response if found; otherwise

diff --git a/meltano.yml b/meltano.yml
@@ -16,6 +16,10 @@ plugins:
         - name: params
         - name: headers
         - name: records_path
+        - name: next_page_token_path
+        - name: pagination_request_style
+        - name: pagination_response_style
+        - name: pagination_page_size
         - name: primary_keys
         - name: replication_key
         - name: except_keys

diff --git a/tap_rest_api_msdk/streams.py b/tap_rest_api_msdk/streams.py
@@ -67,7 +67,7 @@ def http_headers(self) -> dict:
     def _get_next_page_token_default(self, response: requests.Response, previous_token: Optional[Any]) -> Optional[Any]:
         """Return a token for identifying next page or None if no more pages."""
         if self.next_page_token_jsonpath:
-            all_matches = extract_jsonpath("self.next_page_token_jsonpath", response.json())
+            all_matches = extract_jsonpath(self.next_page_token_jsonpath, response.json())
             first_match = next(iter(all_matches), None)
             next_page_token = first_match
         else:
@@ -79,7 +79,7 @@ def _get_next_page_token_style1(self, response: requests.Response, previous_toke
         pagination = response.json().get('pagination', {})
         if pagination and all(x in pagination for x in ['offset', 'limit', 'total']):
             next_page_token = pagination['offset'] + pagination['limit']
-            if next_page_token < pagination['total']:
+            if next_page_token <= pagination['total']:
                 return next_page_token
         return None
 

diff --git a/tap_rest_api_msdk/tests/test_core.py b/tap_rest_api_msdk/tests/test_core.py
@@ -1,48 +1,19 @@
 """Tests standard tap features using the built-in SDK tests library."""
 
-import json
+import pytest
+import requests
 
-from tap_rest_api_msdk.utils import flatten_json
+from tap_rest_api_msdk.tap import TapRestApiMsdk
+from tap_rest_api_msdk.tests.test_streams import url_path, json_resp, config
 
-SAMPLE_CONFIG = {
-    "api_url": "https://url.test",
-    "auth_method": "no_auth",
-    "auth_token": "",
-    "streams_config": [{
-        'name': 'test_name',
-        'path': 'path_test',
-        'primary_keys': ['key1', 'key2'],
-        'replication_key': 'key3'
-    }],
-    "records_path": "$.jsonpath[*]",
-}
+from singer_sdk.testing import get_standard_tap_tests
 
 
 # Run standard built-in tap tests from the SDK:
-# def test_standard_tap_tests():
-#     """Run standard tap tests from the SDK."""
-#     tests = get_standard_tap_tests(TapRestApi, config=SAMPLE_CONFIG)
-#     for test in tests:
-#         test()
+def test_standard_tap_tests(requests_mock):
+    """Run standard tap tests from the SDK."""
+    requests_mock.get(url_path(), json=json_resp())
+    tests = get_standard_tap_tests(TapRestApiMsdk, config=config())
+    for test in tests:
+        test()
 
-
-def test_flatten_json():
-    d = {
-        'a': 1,
-        'b': {
-            'a': 2,
-            'b': {'a': 3},
-            'c': {'a': "bacon", 'b': "yum"}
-        },
-        'c': [{'foo': 'bar'}, {'eggs': 'spam'}],
-        'd': [4, 5],
-        'e.-f': 6,
-    }
-    ret = flatten_json(d, except_keys=['b_c'])
-    assert ret['a'] == 1
-    assert ret['b_a'] == 2
-    assert ret['b_b_a'] == 3
-    assert ret['b_c'] == json.dumps({'a': "bacon", 'b': "yum"})
-    assert ret['c'] == json.dumps([{'foo': 'bar'}, {'eggs': 'spam'}])
-    assert ret['d'] == json.dumps([4, 5])
-    assert ret['e__f'] == 6
diff --git a/tap_rest_api_msdk/tests/test_streams.py b/tap_rest_api_msdk/tests/test_streams.py
@@ -0,0 +1,164 @@
+"""Tests stream.py features."""
+
+import requests
+
+from tap_rest_api_msdk.tap import TapRestApiMsdk
+
+
+def config(extras: dict=None):
+    contents = {
+        "api_url": "http://example.com",
+        "name": "stream_name",
+        "auth_method": "no_auth",
+        "auth_token": "",
+        'path': '/path_test',
+        'primary_keys': ['key1', 'key2'],
+        'replication_key': 'key3',
+        "records_path": "$.records[*]",
+    }
+    if extras:
+        for k, v in extras.items():
+            contents[k] = v
+    return contents
+
+
+def json_resp(extras: dict=None):
+    contents = {
+        "records": [
+            {
+                'key1': 'this',
+                'key2': 'that',
+                'key3': 'foo',
+                "field1": "I",
+            },
+            {
+                'key1': 'foo',
+                'key2': 'bar',
+                'key3': 'spam',
+                "field2": 8
+            },
+        ],
+    }
+    if extras:
+        for k, v in extras.items():
+            contents[k] = v
+    return contents
+
+
+def url_path(path: str= '/path_test'):
+    return 'http://example.com' + path
+
+
+def setup_api(requests_mock, json_extras: dict=None, headers_extras: dict=None, matcher=None) -> requests.Response:
+    headers_resp = {}
+    if headers_extras:
+        for k, v in headers_extras.items():
+            headers_resp[k] = v
+
+    adapter = requests_mock.get(url_path(), headers=headers_resp, json=json_resp(json_extras), additional_matcher=matcher)
+    return requests.Session().get(url_path())
+
+
+def test_get_next_page_token_default_jsonpath(requests_mock):
+    resp = setup_api(requests_mock, json_extras={"next_page": "next_page_token_example"})
+
+    stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[0]
+    assert stream0._get_next_page_token_default(resp, "previous_token_example") == "next_page_token_example"
+    assert stream0.get_next_page_token == stream0._get_next_page_token_default
+
+def test_get_next_page_token_default_header(requests_mock):
+    resp = setup_api(requests_mock, headers_extras={"X-Next-Page": "header_page_token"})
+
+    stream0 = TapRestApiMsdk(config=config({"next_page_token_path": None}), parse_env_config=True).discover_streams()[0]
+    assert stream0._get_next_page_token_default(resp, "previous_token_example") == "header_page_token"
+    assert stream0.get_next_page_token == stream0._get_next_page_token_default
+
+def test_get_next_page_token_style1_last_page(requests_mock):
+    resp = setup_api(requests_mock, json_extras={"pagination": {"offset": 1, "limit": 1, "total": 2,}})
+    configs = config({"pagination_response_style": "style1"})
+
+    stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[0]
+    assert stream0._get_next_page_token_style1(resp, "previous_token_example") == 2
+    assert stream0.get_next_page_token == stream0._get_next_page_token_style1
+
+def test_get_next_page_token_style1_end(requests_mock):
+    resp = setup_api(requests_mock, json_extras={"pagination": {"offset": 5, "limit": 1, "total": 2,}})
+    configs = config({"pagination_response_style": "style1"})
+
+    stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[0]
+    assert not stream0._get_next_page_token_style1(resp, "previous_token_example")
+    assert stream0.get_next_page_token == stream0._get_next_page_token_style1
+
+def test_get_url_params_default(requests_mock):
+    resp = setup_api(requests_mock)
+
+    stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[0]
+
+    assert stream0._get_url_params_default({}, "next_page_token_sample") == {
+        "page": "next_page_token_sample",
+        "sort": "asc",
+        "order_by": "key3",
+    }
+    assert stream0.get_url_params == stream0._get_url_params_default
+
+def test_get_url_params_style1(requests_mock):
+    resp = setup_api(requests_mock)
+    configs = config({"pagination_page_size": 1, "pagination_response_style": "style1"})
+
+    stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[0]
+    assert stream0._get_url_params_style1({}, "next_page_token_sample") == {
+        "offset": "next_page_token_sample",
+        "limit": 1,
+        "sort": "asc",
+        "order_by": "key3",
+    }
+    assert stream0.get_url_params == stream0._get_url_params_style1
+
+def test_pagination_style_default(requests_mock):
+    def first_matcher(request):
+        return "page" not in request.url
+
+    def second_matcher(request):
+        return "page=next_page_token" in request.url
+
+    requests_mock.get(url_path(), additional_matcher=first_matcher, json=json_resp({"next_page": "next_page_token"}))
+    requests_mock.get(url_path(), additional_matcher=second_matcher, json=json_resp())
+
+    stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[0]
+    records_gen = stream0.get_records({})
+    records = []
+    for record in records_gen:
+        records.append(record)
+
+    assert records == [
+        json_resp()["records"][0],
+        json_resp()["records"][1],
+        json_resp()["records"][0],
+        json_resp()["records"][1],
+    ]
+
+def test_pagination_style_style1(requests_mock):
+    def first_matcher(request):
+        return "offset" not in request.url
+
+    def second_matcher(request):
+        return "offset=2" in request.url
+
+    configs = config({"pagination_page_size": 2, "pagination_response_style": "style1"})
+    json_extras = {"pagination": {"offset": 1, "limit": 1, "total": 2}}
+
+    requests_mock.get(url_path(), additional_matcher=first_matcher, json=json_resp(json_extras))
+    requests_mock.get(url_path(), additional_matcher=second_matcher, json=json_resp())
+
+    stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[0]
+    records_gen = stream0.get_records({})
+    records = []
+    for record in records_gen:
+        records.append(record)
+
+    assert records == [
+        json_resp()["records"][0],
+        json_resp()["records"][1],
+        json_resp()["records"][0],
+        json_resp()["records"][1],
+    ]
diff --git a/tap_rest_api_msdk/tests/test_tap.py b/tap_rest_api_msdk/tests/test_tap.py
@@ -0,0 +1,20 @@
+from tap_rest_api_msdk.tap import TapRestApiMsdk
+from tap_rest_api_msdk.tests.test_streams import setup_api, config
+
+
+def test_schema_inference(requests_mock):
+    resp = setup_api(requests_mock)
+
+    stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[0]
+    assert stream0.schema == {
+        '$schema': 'http://json-schema.org/schema#',
+        'required': ['key1', 'key2', 'key3'],
+        'type': 'object',
+        'properties': {
+            'field1': {'type': 'string'},
+            'field2': {'type': 'integer'},
+            'key1': {'type': 'string'},
+            'key2': {'type': 'string'},
+            'key3': {'type': 'string'},
+        }
+    }
diff --git a/tap_rest_api_msdk/tests/test_utils.py b/tap_rest_api_msdk/tests/test_utils.py
@@ -0,0 +1,25 @@
+import json
+
+from tap_rest_api_msdk.utils import flatten_json
+
+
+def test_flatten_json():
+    d = {
+        'a': 1,
+        'b': {
+            'a': 2,
+            'b': {'a': 3},
+            'c': {'a': "bacon", 'b': "yum"}
+        },
+        'c': [{'foo': 'bar'}, {'eggs': 'spam'}],
+        'd': [4, 5],
+        'e.-f': 6,
+    }
+    ret = flatten_json(d, except_keys=['b_c'])
+    assert ret['a'] == 1
+    assert ret['b_a'] == 2
+    assert ret['b_b_a'] == 3
+    assert ret['b_c'] == json.dumps({'a': "bacon", 'b': "yum"})
+    assert ret['c'] == json.dumps([{'foo': 'bar'}, {'eggs': 'spam'}])
+    assert ret['d'] == json.dumps([4, 5])
+    assert ret['e__f'] == 6