From 9608e70805ff548a394b9c46386da36bfac43d66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= <16805946+edgarrmondragon@users.noreply.github.com> Date: Tue, 26 Mar 2024 09:20:10 -0600 Subject: [PATCH] Add `pagination_initial_offset` setting (#43) --- README.md | 4 ++++ meltano.yml | 2 ++ tap_rest_api_msdk/pagination.py | 1 + tap_rest_api_msdk/streams.py | 5 ++++- tap_rest_api_msdk/tap.py | 11 +++++++++++ 5 files changed, 22 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 844e01e..2ef3f28 100644 --- a/README.md +++ b/README.md @@ -64,6 +64,8 @@ plugins: kind: string - name: pagination_total_limit_param kind: string + - name: pagination_initial_offset + kind: integer - name: streams kind: array - name: name @@ -155,6 +157,7 @@ provided at the top-level will be the default values for each stream.: - `pagination_next_page_param`: optional: The name of the param that indicates the page/offset. Defaults to None. - `pagination_limit_per_page_param`: optional: The name of the param that indicates the limit/per_page. Defaults to None. - `pagination_total_limit_param`: optional: The name of the param that indicates the total limit e.g. total, count. Defaults to total +- `pagination_initial_offset`: optional: The initial offset for the first request. Defaults to 1. - `next_page_token_path`: optional: a jsonpath string representing the path to the "next page" token. Defaults to `'$.next_page'` for the `jsonpath_paginator` paginator only otherwise None. - `streams`: required: a list of objects that contain the configuration of each stream. See stream-level params below. - `path`: optional: see stream-level params below. @@ -309,6 +312,7 @@ There are additional request styles supported as follows for pagination. - `pagination_limit_per_page_param` - the name of the API parameter to limit number of records per page. Default parameter name `limit`. - `pagination_total_limit_param` - The name of the param that indicates the total limit e.g. total, count. Defaults to total - `next_page_token_path` - Used to locate an appropriate link in the response. Default None - but looks in the `pagination` section of the JSON response by default. Example, jsonpath to get the offset from the NOAA API `'$.metadata.resultset'`. + - `pagination_initial_offset` - The initial offset for the first request. Defaults to 1. - `simple_header_paginator` - This style uses links in the Header Response to locate the next page. Example the `x-next-page` link used by the Gitlab API. - `header_link_paginator` - This style uses the default header link paginator from the Meltano SDK. - `restapi_header_link_paginator` - This style is a variant on the header_link_paginator. It supports the ability to read from GitHub API. diff --git a/meltano.yml b/meltano.yml index 7e71b27..737c0e0 100644 --- a/meltano.yml +++ b/meltano.yml @@ -38,6 +38,8 @@ plugins: kind: string - name: pagination_total_limit_param kind: string + - name: pagination_initial_offset + kind: integer - name: streams kind: array - name: path diff --git a/tap_rest_api_msdk/pagination.py b/tap_rest_api_msdk/pagination.py index 3c15d27..a2f52af 100644 --- a/tap_rest_api_msdk/pagination.py +++ b/tap_rest_api_msdk/pagination.py @@ -1,4 +1,5 @@ """REST API pagination handling.""" + from typing import Any, Optional, cast from urllib.parse import parse_qs, urlparse diff --git a/tap_rest_api_msdk/streams.py b/tap_rest_api_msdk/streams.py index 0accf71..d06017e 100644 --- a/tap_rest_api_msdk/streams.py +++ b/tap_rest_api_msdk/streams.py @@ -60,6 +60,7 @@ def __init__( pagination_next_page_param: Optional[str] = None, pagination_limit_per_page_param: Optional[str] = None, pagination_total_limit_param: Optional[str] = None, + pagination_initial_offset: int = 1, start_date: Optional[datetime] = None, source_search_field: Optional[str] = None, source_search_query: Optional[str] = None, @@ -91,6 +92,7 @@ def __init__( pagination_next_page_param: see tap.py pagination_limit_per_page_param: see tap.py pagination_total_limit_param: see tap.py + pagination_initial_offset: see tap.py start_date: see tap.py source_search_field: see tap.py source_search_query: see tap.py @@ -163,6 +165,7 @@ def __init__( self.source_search_field = source_search_field self.source_search_query = source_search_query self.pagination_page_size: Optional[int] + self.pagination_initial_offset = pagination_initial_offset # Setting Pagination Limits if self.pagination_request_style == "restapi_header_link_paginator": @@ -303,7 +306,7 @@ def get_new_paginator(self): or self.pagination_request_style == "offset_paginator" ): return RestAPIOffsetPaginator( - start_value=1, + start_value=self.pagination_initial_offset, page_size=self.pagination_page_size, jsonpath=self.next_page_token_jsonpath, pagination_total_limit_param=self.pagination_total_limit_param, diff --git a/tap_rest_api_msdk/tap.py b/tap_rest_api_msdk/tap.py index 484384e..af6e861 100644 --- a/tap_rest_api_msdk/tap.py +++ b/tap_rest_api_msdk/tap.py @@ -385,6 +385,13 @@ class TapRestApiMsdk(Tap): description="The name of the param that indicates the total limit e.g. " "total, count. Defaults to total", ), + th.Property( + "pagination_initial_offset", + th.IntegerType, + default=1, + required=False, + description="The initial offset to start pagination from. Defaults to 1", + ), ) # add common properties to top-level properties @@ -512,6 +519,10 @@ def discover_streams(self) -> List[DynamicStream]: # type: ignore pagination_total_limit_param=self.config.get( "pagination_total_limit_param" ), + pagination_initial_offset=self.config.get( + "pagination_initial_offset", + 1, + ), schema=schema, start_date=start_date, source_search_field=source_search_field,