From dde874e3a60792c487059da092ff82a7d2f2f185 Mon Sep 17 00:00:00 2001 From: Hunter Kuffel Date: Tue, 1 Nov 2022 17:16:37 -0700 Subject: [PATCH 1/3] handle error in validate_response --- tap_instagram/client.py | 41 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/tap_instagram/client.py b/tap_instagram/client.py index 835ad13..9a8e6ef 100644 --- a/tap_instagram/client.py +++ b/tap_instagram/client.py @@ -65,7 +65,17 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]: yield from extract_jsonpath(self.records_jsonpath, input=response.json()) def validate_response(self, response: requests.Response) -> None: - if 400 <= response.status_code < 500: + if ( + response.status_code == 400 + and 'Unsupported get request' in str(response.json().get("error", {}).get("message")) + ): + msg = ( + f"{response.status_code} Client Error: " + f"{response.reason} - {response.json()['error']['message']} for path: {self.path}" + ) + raise UnsupportedGetRequestError(msg) + + elif 401 <= response.status_code < 500: msg = ( f"{response.status_code} Client Error: " f"{response.reason} - {response.json()['error']['message']} for path: {self.path}" @@ -78,3 +88,32 @@ def validate_response(self, response: requests.Response) -> None: f"{response.reason} for path: {self.path}" ) raise RetriableAPIError(msg) + + def get_records(self, context: Optional[dict]) -> Iterable[Dict[str, Any]]: + """Return a generator of row-type dictionary objects. + + Each row emitted should be a dictionary of property names to their values. + + Args: + context: Stream partition or context dictionary. + + Yields: + One item per (possibly processed) record in the API. + """ + try: + for record in self.request_records(context): + transformed_record = self.post_process(record, context) + if transformed_record is None: + # Record filtered out during post_process() + continue + yield transformed_record + except UnsupportedGetRequestError as e: + self.logger.warning(e) + + +class UnsupportedGetRequestError(Exception): + """ + Error object to facilitate skipping IDs that cause trouble + with the API but aren't themselves grounds for ending the + entire ingestion process. + """ From d909b4ebf625cf70a52a28eac8911700722dbf8f Mon Sep 17 00:00:00 2001 From: Hunter Kuffel Date: Wed, 2 Nov 2022 09:47:26 -0700 Subject: [PATCH 2/3] linting changes --- tap_instagram/client.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/tap_instagram/client.py b/tap_instagram/client.py index 9a8e6ef..a28046d 100644 --- a/tap_instagram/client.py +++ b/tap_instagram/client.py @@ -67,18 +67,22 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]: def validate_response(self, response: requests.Response) -> None: if ( response.status_code == 400 - and 'Unsupported get request' in str(response.json().get("error", {}).get("message")) + and 'Unsupported get request' in str( + response.json().get("error", {}).get("message") + ) ): msg = ( f"{response.status_code} Client Error: " - f"{response.reason} - {response.json()['error']['message']} for path: {self.path}" + f"{response.reason} - {response.json()['error']['message']}" + f" for path: {self.path}" ) raise UnsupportedGetRequestError(msg) - elif 401 <= response.status_code < 500: + elif 400 <= response.status_code < 500: msg = ( f"{response.status_code} Client Error: " - f"{response.reason} - {response.json()['error']['message']} for path: {self.path}" + f"{response.reason} - {response.json()['error']['message']}" + f" for path: {self.path}" ) raise FatalAPIError(msg) @@ -113,7 +117,7 @@ def get_records(self, context: Optional[dict]) -> Iterable[Dict[str, Any]]: class UnsupportedGetRequestError(Exception): """ - Error object to facilitate skipping IDs that cause trouble - with the API but aren't themselves grounds for ending the + Error object to facilitate skipping IDs that cause trouble + with the API but aren't themselves grounds for ending the entire ingestion process. """ From 636b7f18ebad73ecfc823cf8d7aa6a3137e90353 Mon Sep 17 00:00:00 2001 From: Hunter Kuffel Date: Wed, 2 Nov 2022 13:55:32 -0700 Subject: [PATCH 3/3] code formatting changes --- tap_instagram/client.py | 7 ++----- tap_instagram/streams.py | 40 ++++++++++++++++------------------------ 2 files changed, 18 insertions(+), 29 deletions(-) diff --git a/tap_instagram/client.py b/tap_instagram/client.py index a28046d..d1880f1 100644 --- a/tap_instagram/client.py +++ b/tap_instagram/client.py @@ -65,11 +65,8 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]: yield from extract_jsonpath(self.records_jsonpath, input=response.json()) def validate_response(self, response: requests.Response) -> None: - if ( - response.status_code == 400 - and 'Unsupported get request' in str( - response.json().get("error", {}).get("message") - ) + if response.status_code == 400 and "Unsupported get request" in str( + response.json().get("error", {}).get("message") ): msg = ( f"{response.status_code} Client Error: " diff --git a/tap_instagram/streams.py b/tap_instagram/streams.py index b403fa4..f3ead37 100644 --- a/tap_instagram/streams.py +++ b/tap_instagram/streams.py @@ -504,12 +504,10 @@ def get_url_params( return params def validate_response(self, response: requests.Response) -> None: - if ( - response.json().get("error", {}).get("error_user_title") - == "Media posted before business account conversion" - or - "(#10) Not enough viewers for the media to show insights" - in str(response.json().get("error", {}).get("message")) + if response.json().get("error", {}).get( + "error_user_title" + ) == "Media posted before business account conversion" or "(#10) Not enough viewers for the media to show insights" in str( + response.json().get("error", {}).get("message") ): self.logger.warning(f"Skipping: {response.json()['error']}") return @@ -519,12 +517,10 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]: resp_json = response.json() # Handle the specific case where FB returns error because media was posted before business acct creation # TODO: Refactor to raise a specific error in validate_response and handle that instead - if ( - resp_json.get("error", {}).get("error_user_title") - == "Media posted before business account conversion" - or - "(#10) Not enough viewers for the media to show insights" - in str(resp_json.get("error", {}).get("message")) + if resp_json.get("error", {}).get( + "error_user_title" + ) == "Media posted before business account conversion" or "(#10) Not enough viewers for the media to show insights" in str( + resp_json.get("error", {}).get("message") ): return for row in resp_json["data"]: @@ -674,12 +670,10 @@ def get_url_params( return params def validate_response(self, response: requests.Response) -> None: - if ( - response.json().get("error", {}).get("error_user_title") - == "Media posted before business account conversion" - or - "(#10) Not enough viewers for the media to show insights" - in str(response.json().get("error", {}).get("message")) + if response.json().get("error", {}).get( + "error_user_title" + ) == "Media posted before business account conversion" or "(#10) Not enough viewers for the media to show insights" in str( + response.json().get("error", {}).get("message") ): self.logger.warning(f"Skipping: {response.json()['error']}") return @@ -689,12 +683,10 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]: resp_json = response.json() # Handle the specific case where FB returns error because media was posted before business acct creation # TODO: Refactor to raise a specific error in validate_response and handle that instead - if ( - resp_json.get("error", {}).get("error_user_title") - == "Media posted before business account conversion" - or - "(#10) Not enough viewers for the media to show insights" - in str(resp_json.get("error", {}).get("message")) + if resp_json.get("error", {}).get( + "error_user_title" + ) == "Media posted before business account conversion" or "(#10) Not enough viewers for the media to show insights" in str( + resp_json.get("error", {}).get("message") ): return for row in resp_json["data"]: