Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CommunityProfile stream and enhance error handling #20

Merged
merged 13 commits into from
Sep 23, 2021
55 changes: 53 additions & 2 deletions tap_github/client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""REST client handling, including GitHubStream base class."""

import requests
from typing import Any, Dict, Optional, Iterable, cast
from typing import Any, Dict, List, Optional, Iterable, cast

from singer_sdk.streams import RESTStream

Expand All @@ -20,6 +20,7 @@ def url_base(self) -> str:

primary_keys = ["id"]
replication_key: Optional[str] = None
tolerated_http_errors: List[int] = []

@property
def http_headers(self) -> dict:
Expand Down Expand Up @@ -71,13 +72,63 @@ def get_url_params(
params["since"] = since
return params

def _request_with_backoff(
self, prepared_request, context: Optional[dict]
) -> requests.Response:
"""Override private method _request_with_backoff to account for expected 404 Not Found erros."""
# TODO - Adapt Singer
response = self.requests_session.send(prepared_request)
if self._LOG_REQUEST_METRICS:
extra_tags = {}
if self._LOG_REQUEST_METRIC_URLS:
extra_tags["url"] = cast(str, prepared_request.path_url)
self._write_request_duration_log(
endpoint=self.path,
response=response,
context=context,
extra_tags=extra_tags,
)
if response.status_code in self.tolerated_http_errors:
self.logger.info(
"Request returned a tolerated error for {}".format(prepared_request.url)
)
self.logger.info(
f"Reason: {response.status_code} - {str(response.content)}"
)
return response

if response.status_code in [401, 403]:
self.logger.info("Failed request for {}".format(prepared_request.url))
self.logger.info(
f"Reason: {response.status_code} - {str(response.content)}"
)
raise RuntimeError(
"Requested resource was unauthorized, forbidden, or not found."
)
elif response.status_code >= 400:
raise RuntimeError(
f"Error making request to API: {prepared_request.url} "
f"[{response.status_code} - {str(response.content)}]".replace(
"\\n", "\n"
)
)
self.logger.debug("Response received successfully.")
return response

def parse_response(self, response: requests.Response) -> Iterable[dict]:
"""Parse the response and return an iterator of result rows."""
# TODO - Split into handle_reponse and parse_response.
if response.status_code in self.tolerated_http_errors:
return []

resp_json = response.json()

if isinstance(resp_json, list):
results = resp_json
else:
elif resp_json.get("items") is not None:
results = resp_json.get("items")
else:
results = [resp_json]

for row in results:
yield row
97 changes: 84 additions & 13 deletions tap_github/streams.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Stream type classes for tap-github."""

import requests
from typing import Any, Dict, Iterable, List, Optional
from singer_sdk import typing as th # JSON Schema typing helpers

Expand Down Expand Up @@ -57,15 +56,6 @@ def partitions(self) -> Optional[List[Dict]]:
return [{"org": r[0], "repo": r[1]} for r in split_repo_names]
return None

def parse_response(self, response: requests.Response) -> Iterable[dict]:
"""
Parse the response which differs for this stream depending on which mode it is run in.
"""
if "searches" in self.config:
return super(GitHubStream, self).parse_response(response)
else:
return [response.json()]

def get_child_context(self, record: dict, context: Optional[dict]) -> dict:
"""Return a child context object from the record and optional provided context.

Expand Down Expand Up @@ -165,9 +155,6 @@ class ReadmeStream(GitHubStream):
ignore_parent_replication_key = False
state_partitioning_keys = ["repo", "org"]

def parse_response(self, response: requests.Response) -> Iterable[dict]:
return [response.json()]

schema = th.PropertiesList(
# Parent Keys
th.Property("repo", th.StringType),
Expand Down Expand Up @@ -195,6 +182,90 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]:
).to_dict()


class CommunityProfileStream(GitHubStream):
"""Defines 'CommunityProfile' stream."""

name = "community_profile"
path = "/repos/{org}/{repo}/community/profile"
primary_keys = ["repo", "org"]
parent_stream_type = RepositoryStream
ignore_parent_replication_key = False
state_partitioning_keys = ["repo", "org"]
tolerated_http_errors = [404]

schema = th.PropertiesList(
# Parent Keys
th.Property("repo", th.StringType),
th.Property("org", th.StringType),
# Community Profile
th.Property("health_percentage", th.IntegerType),
th.Property("description", th.StringType),
th.Property("documentation", th.StringType),
th.Property("updated_at", th.DateTimeType),
th.Property("content_reports_enabled", th.BooleanType),
th.Property(
"files",
th.ObjectType(
th.Property(
"code_of_conduct",
th.ObjectType(
th.Property("key", th.StringType),
th.Property("name", th.StringType),
th.Property("html_url", th.StringType),
th.Property("url", th.StringType),
),
),
th.Property(
"code_of_conduct_file",
th.ObjectType(
th.Property("url", th.StringType),
th.Property("html_url", th.StringType),
),
),
th.Property(
"contributing",
th.ObjectType(
th.Property("url", th.StringType),
th.Property("html_url", th.StringType),
),
),
th.Property(
"issue_template",
th.ObjectType(
th.Property("url", th.StringType),
th.Property("html_url", th.StringType),
),
),
th.Property(
"pull_request_template",
th.ObjectType(
th.Property("url", th.StringType),
th.Property("html_url", th.StringType),
),
),
th.Property(
"license",
th.ObjectType(
th.Property("key", th.StringType),
th.Property("name", th.StringType),
th.Property("spdx_id", th.StringType),
th.Property("node_id", th.StringType),
th.Property("html_url", th.StringType),
th.Property("url", th.StringType),
),
),
th.Property(
"readme",
th.ObjectType(
th.Property("url", th.StringType),
th.Property("html_url", th.StringType),
),
),
),
),
).to_dict()


class IssuesStream(GitHubStream):
"""Defines 'Issues' stream."""

Expand Down
2 changes: 2 additions & 0 deletions tap_github/tap.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
IssuesStream,
IssueCommentsStream,
ReadmeStream,
CommunityProfileStream,
)


Expand Down Expand Up @@ -44,6 +45,7 @@ def discover_streams(self) -> List[Stream]:
IssuesStream(tap=self),
IssueCommentsStream(tap=self),
ReadmeStream(tap=self),
CommunityProfileStream(tap=self),
]


Expand Down