Skip to content

Commit

Permalink
Issue/1 pagination tests (#11)
Browse files Browse the repository at this point in the history
* Implement different styles of pagination support

* Add Gitlab CI

* fix paging logic

* added tests for old functionality as well as proposed pagination

* updated docs

Co-authored-by: Fred Reimer <[email protected]>
Co-authored-by: Fred Reimer <[email protected]>
Co-authored-by: Josh Lloyd <[email protected]>
Co-authored-by: Josh Lloyd <[email protected]>
  • Loading branch information
5 people authored Feb 17, 2022
1 parent 947c7fd commit 4b98c89
Show file tree
Hide file tree
Showing 7 changed files with 232 additions and 44 deletions.
8 changes: 6 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ plugins:
- name: params
- name: headers
- name: records_path
- name: next_page_token_path
- name: pagination_request_style
- name: pagination_response_style
- name: pagination_page_size
- name: primary_keys
- name: replication_key
- name: except_keys
Expand Down Expand Up @@ -70,7 +74,7 @@ Config Options:
- `headers`: optional: an object of headers to pass into the api calls.
- `records_path`: optional: a jsonpath string representing the path in the requests response that contains the records to process. Defaults to `$[*]`.
- `pagination_request_style`: optional: style for requesting pagination, defaults to `default`, see Pagination below.
- `pagination_result_style`: optional: style of pagination results, defaults to `default`, see Pagination below.
- `pagination_response_style`: optional: style of pagination results, defaults to `default`, see Pagination below.
- `pagination_page_size`: optional: limit for size of page, defaults to None.
- `next_page_token_path`: optional: a jsonpath string representing the path to the "next page" token. Defaults to `$.next_page`.
- `primary_keys`: required: a list of the json keys of the primary key for the stream.
Expand All @@ -82,8 +86,8 @@ Config Options:
- `num_inference_keys`: optional: number of records used to infer the stream's schema. Defaults to 50.

## Pagination

Pagination is a complex topic as there is no real single standard, and many different implementations. Unless options are provided, both the request and results stype default to the `default`, which is the pagination style originally implemented.

### Default Request Style
The default request style for pagination is described below:
- Use next_page_token_path if provided to extract the token from response if found; otherwise
Expand Down
4 changes: 4 additions & 0 deletions meltano.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@ plugins:
- name: params
- name: headers
- name: records_path
- name: next_page_token_path
- name: pagination_request_style
- name: pagination_response_style
- name: pagination_page_size
- name: primary_keys
- name: replication_key
- name: except_keys
Expand Down
4 changes: 2 additions & 2 deletions tap_rest_api_msdk/streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def http_headers(self) -> dict:
def _get_next_page_token_default(self, response: requests.Response, previous_token: Optional[Any]) -> Optional[Any]:
"""Return a token for identifying next page or None if no more pages."""
if self.next_page_token_jsonpath:
all_matches = extract_jsonpath("self.next_page_token_jsonpath", response.json())
all_matches = extract_jsonpath(self.next_page_token_jsonpath, response.json())
first_match = next(iter(all_matches), None)
next_page_token = first_match
else:
Expand All @@ -79,7 +79,7 @@ def _get_next_page_token_style1(self, response: requests.Response, previous_toke
pagination = response.json().get('pagination', {})
if pagination and all(x in pagination for x in ['offset', 'limit', 'total']):
next_page_token = pagination['offset'] + pagination['limit']
if next_page_token < pagination['total']:
if next_page_token <= pagination['total']:
return next_page_token
return None

Expand Down
51 changes: 11 additions & 40 deletions tap_rest_api_msdk/tests/test_core.py
Original file line number Diff line number Diff line change
@@ -1,48 +1,19 @@
"""Tests standard tap features using the built-in SDK tests library."""

import json
import pytest
import requests

from tap_rest_api_msdk.utils import flatten_json
from tap_rest_api_msdk.tap import TapRestApiMsdk
from tap_rest_api_msdk.tests.test_streams import url_path, json_resp, config

SAMPLE_CONFIG = {
"api_url": "https://url.test",
"auth_method": "no_auth",
"auth_token": "",
"streams_config": [{
'name': 'test_name',
'path': 'path_test',
'primary_keys': ['key1', 'key2'],
'replication_key': 'key3'
}],
"records_path": "$.jsonpath[*]",
}
from singer_sdk.testing import get_standard_tap_tests


# Run standard built-in tap tests from the SDK:
# def test_standard_tap_tests():
# """Run standard tap tests from the SDK."""
# tests = get_standard_tap_tests(TapRestApi, config=SAMPLE_CONFIG)
# for test in tests:
# test()
def test_standard_tap_tests(requests_mock):
"""Run standard tap tests from the SDK."""
requests_mock.get(url_path(), json=json_resp())
tests = get_standard_tap_tests(TapRestApiMsdk, config=config())
for test in tests:
test()


def test_flatten_json():
d = {
'a': 1,
'b': {
'a': 2,
'b': {'a': 3},
'c': {'a': "bacon", 'b': "yum"}
},
'c': [{'foo': 'bar'}, {'eggs': 'spam'}],
'd': [4, 5],
'e.-f': 6,
}
ret = flatten_json(d, except_keys=['b_c'])
assert ret['a'] == 1
assert ret['b_a'] == 2
assert ret['b_b_a'] == 3
assert ret['b_c'] == json.dumps({'a': "bacon", 'b': "yum"})
assert ret['c'] == json.dumps([{'foo': 'bar'}, {'eggs': 'spam'}])
assert ret['d'] == json.dumps([4, 5])
assert ret['e__f'] == 6
164 changes: 164 additions & 0 deletions tap_rest_api_msdk/tests/test_streams.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
"""Tests stream.py features."""

import requests

from tap_rest_api_msdk.tap import TapRestApiMsdk


def config(extras: dict=None):
contents = {
"api_url": "http://example.com",
"name": "stream_name",
"auth_method": "no_auth",
"auth_token": "",
'path': '/path_test',
'primary_keys': ['key1', 'key2'],
'replication_key': 'key3',
"records_path": "$.records[*]",
}
if extras:
for k, v in extras.items():
contents[k] = v
return contents


def json_resp(extras: dict=None):
contents = {
"records": [
{
'key1': 'this',
'key2': 'that',
'key3': 'foo',
"field1": "I",
},
{
'key1': 'foo',
'key2': 'bar',
'key3': 'spam',
"field2": 8
},
],
}
if extras:
for k, v in extras.items():
contents[k] = v
return contents


def url_path(path: str= '/path_test'):
return 'http://example.com' + path


def setup_api(requests_mock, json_extras: dict=None, headers_extras: dict=None, matcher=None) -> requests.Response:
headers_resp = {}
if headers_extras:
for k, v in headers_extras.items():
headers_resp[k] = v

adapter = requests_mock.get(url_path(), headers=headers_resp, json=json_resp(json_extras), additional_matcher=matcher)
return requests.Session().get(url_path())


def test_get_next_page_token_default_jsonpath(requests_mock):
resp = setup_api(requests_mock, json_extras={"next_page": "next_page_token_example"})

stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[0]
assert stream0._get_next_page_token_default(resp, "previous_token_example") == "next_page_token_example"
assert stream0.get_next_page_token == stream0._get_next_page_token_default

def test_get_next_page_token_default_header(requests_mock):
resp = setup_api(requests_mock, headers_extras={"X-Next-Page": "header_page_token"})

stream0 = TapRestApiMsdk(config=config({"next_page_token_path": None}), parse_env_config=True).discover_streams()[0]
assert stream0._get_next_page_token_default(resp, "previous_token_example") == "header_page_token"
assert stream0.get_next_page_token == stream0._get_next_page_token_default

def test_get_next_page_token_style1_last_page(requests_mock):
resp = setup_api(requests_mock, json_extras={"pagination": {"offset": 1, "limit": 1, "total": 2,}})
configs = config({"pagination_response_style": "style1"})

stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[0]
assert stream0._get_next_page_token_style1(resp, "previous_token_example") == 2
assert stream0.get_next_page_token == stream0._get_next_page_token_style1

def test_get_next_page_token_style1_end(requests_mock):
resp = setup_api(requests_mock, json_extras={"pagination": {"offset": 5, "limit": 1, "total": 2,}})
configs = config({"pagination_response_style": "style1"})

stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[0]
assert not stream0._get_next_page_token_style1(resp, "previous_token_example")
assert stream0.get_next_page_token == stream0._get_next_page_token_style1

def test_get_url_params_default(requests_mock):
resp = setup_api(requests_mock)

stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[0]

assert stream0._get_url_params_default({}, "next_page_token_sample") == {
"page": "next_page_token_sample",
"sort": "asc",
"order_by": "key3",
}
assert stream0.get_url_params == stream0._get_url_params_default

def test_get_url_params_style1(requests_mock):
resp = setup_api(requests_mock)
configs = config({"pagination_page_size": 1, "pagination_response_style": "style1"})

stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[0]
assert stream0._get_url_params_style1({}, "next_page_token_sample") == {
"offset": "next_page_token_sample",
"limit": 1,
"sort": "asc",
"order_by": "key3",
}
assert stream0.get_url_params == stream0._get_url_params_style1

def test_pagination_style_default(requests_mock):
def first_matcher(request):
return "page" not in request.url

def second_matcher(request):
return "page=next_page_token" in request.url

requests_mock.get(url_path(), additional_matcher=first_matcher, json=json_resp({"next_page": "next_page_token"}))
requests_mock.get(url_path(), additional_matcher=second_matcher, json=json_resp())

stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[0]
records_gen = stream0.get_records({})
records = []
for record in records_gen:
records.append(record)

assert records == [
json_resp()["records"][0],
json_resp()["records"][1],
json_resp()["records"][0],
json_resp()["records"][1],
]

def test_pagination_style_style1(requests_mock):
def first_matcher(request):
return "offset" not in request.url

def second_matcher(request):
return "offset=2" in request.url

configs = config({"pagination_page_size": 2, "pagination_response_style": "style1"})
json_extras = {"pagination": {"offset": 1, "limit": 1, "total": 2}}

requests_mock.get(url_path(), additional_matcher=first_matcher, json=json_resp(json_extras))
requests_mock.get(url_path(), additional_matcher=second_matcher, json=json_resp())

stream0 = TapRestApiMsdk(config=configs, parse_env_config=True).discover_streams()[0]
records_gen = stream0.get_records({})
records = []
for record in records_gen:
records.append(record)

assert records == [
json_resp()["records"][0],
json_resp()["records"][1],
json_resp()["records"][0],
json_resp()["records"][1],
]
20 changes: 20 additions & 0 deletions tap_rest_api_msdk/tests/test_tap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
from tap_rest_api_msdk.tap import TapRestApiMsdk
from tap_rest_api_msdk.tests.test_streams import setup_api, config


def test_schema_inference(requests_mock):
resp = setup_api(requests_mock)

stream0 = TapRestApiMsdk(config=config(), parse_env_config=True).discover_streams()[0]
assert stream0.schema == {
'$schema': 'http://json-schema.org/schema#',
'required': ['key1', 'key2', 'key3'],
'type': 'object',
'properties': {
'field1': {'type': 'string'},
'field2': {'type': 'integer'},
'key1': {'type': 'string'},
'key2': {'type': 'string'},
'key3': {'type': 'string'},
}
}
25 changes: 25 additions & 0 deletions tap_rest_api_msdk/tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import json

from tap_rest_api_msdk.utils import flatten_json


def test_flatten_json():
d = {
'a': 1,
'b': {
'a': 2,
'b': {'a': 3},
'c': {'a': "bacon", 'b': "yum"}
},
'c': [{'foo': 'bar'}, {'eggs': 'spam'}],
'd': [4, 5],
'e.-f': 6,
}
ret = flatten_json(d, except_keys=['b_c'])
assert ret['a'] == 1
assert ret['b_a'] == 2
assert ret['b_b_a'] == 3
assert ret['b_c'] == json.dumps({'a': "bacon", 'b': "yum"})
assert ret['c'] == json.dumps([{'foo': 'bar'}, {'eggs': 'spam'}])
assert ret['d'] == json.dumps([4, 5])
assert ret['e__f'] == 6

0 comments on commit 4b98c89

Please sign in to comment.