Skip to content

Commit

Permalink
Add missing endpoints available in singer-io/tap-github (#93)
Browse files Browse the repository at this point in the history
* Add assignees stream

* Fix issues with assignee stream

* Add collaborators stream

* Add review comments and reviews stream

* Fix review comments stream and use repo parent instead

* Fix mypy issue

* Fix tests

* add milestone and commit comment streams

* Fix mypy

* Fix tests

* commit wip todo streams

* fix formatting

* [ci skip] format todo file and fix arraytype usage

* [ci skip] more regex magic to convert everything to classes

* Add paths [ci skip]

* Move all streams to main file

* Add replication keys

* fix tests (change type to datetime)

* introduce streams enum

* Fix up organization stream

* Reverse order of testing versions

* remove unsupported types from class

* Fix format

* Try use capital types to pass ci

* Fix tap not including org streams on organization given

* Add test for org stream

* Add rest of org streams

* [ci skip] Temp changes for testing

* Fix parent context being missing

* Set ignore parent replication to true for project

* fix mypy issue

* fix mistyped params

* Add parent keys

* Fix mistyped params

* Fix mistyped ids in events

* [ci skip] Remove pointless comment

* Change ignore parent key to true

* update ignore_parent_replication and remove unneeded import

* Simple comment [ci skip]

* Work on comments [ci skip]

* Work on comments [ci skip]

* Fix mistyped stuff (good catch Laurent) and more comment addressing

* Update fixture comment [ci skip]

* Add bunch of meltano lab sample projects

* update state partitioning keys

* Fix merge

* Add ORG_LEVEL_TOKEN to be used only for specific streams

* Add docstring to alternative_sync_chidren

Co-authored-by: Eric Boucher <[email protected]>
  • Loading branch information
Ry-DS and ericboucher authored Apr 7, 2022
1 parent 0f4606e commit 96c5ad0
Show file tree
Hide file tree
Showing 11 changed files with 989 additions and 126 deletions.
7 changes: 5 additions & 2 deletions .github/workflows/test_tap.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,10 @@ jobs:
runs-on: ubuntu-latest
env:
GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
ORG_LEVEL_TOKEN: ${{secrets.ORG_LEVEL_TOKEN}}
strategy:
matrix:
python-version: [3.7, 3.8, 3.9, "3.10"]
python-version: ["3.10", 3.9, 3.8, 3.7]
# run the matrix jobs one after the other so they can benefit from caching
max-parallel: 1

Expand All @@ -28,7 +29,9 @@ jobs:
path: '**/api_calls_tests_cache.sqlite'
# github cache expires after 1wk, and we expire the content after 24h
# this key should not need to change unless we need to clear the cache
key: api-cache-v2
key: api-cache-v3
restore-keys: |
api-cache-v2
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ tap-github --config CONFIG --discover > ./catalog.json
```

## Contributing
This project uses parent-child streams. Learn more about them [here.](https://gitlab.com/meltano/sdk/-/blob/main/docs/parent_streams.md)

### Initialize your Development Environment

Expand Down
4 changes: 2 additions & 2 deletions tap_github/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def get_next_page_token(
return (previous_token or 1) + 1

def get_url_params(
self, context: Optional[dict], next_page_token: Optional[Any]
self, context: Optional[Dict], next_page_token: Optional[Any]
) -> Dict[str, Any]:
"""Return a dictionary of values to be used in URL parameterization."""
params: dict = {"per_page": self.MAX_PER_PAGE}
Expand Down Expand Up @@ -261,7 +261,7 @@ def parse_response(self, response: requests.Response) -> Iterable[dict]:
yield from extract_jsonpath(self.query_jsonpath, input=resp_json)

def get_url_params(
self, context: Optional[dict], next_page_token: Optional[Any]
self, context: Optional[Dict], next_page_token: Optional[Any]
) -> Dict[str, Any]:
"""Return a dictionary of values to be used in URL parameterization."""
params = context or dict()
Expand Down
164 changes: 164 additions & 0 deletions tap_github/organization_streams.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
"""User Stream types classes for tap-github."""

from typing import Dict, List, Optional, Iterable, Any

from singer_sdk import typing as th # JSON Schema typing helpers

from tap_github.client import GitHubRestStream


class OrganizationStream(GitHubRestStream):
"""Defines a GitHub Organization Stream.
API Reference: https://docs.github.com/en/rest/reference/orgs#get-an-organization
"""

name = "organizations"
path = "/orgs/{org}"

@property
def partitions(self) -> Optional[List[Dict]]:
return [{"org": org} for org in self.config["organizations"]]

def get_child_context(self, record: Dict, context: Optional[Dict]) -> dict:
return {
"org": record["login"],
}

def get_records(self, context: Optional[Dict]) -> Iterable[Dict[str, Any]]:
"""
Override the parent method to allow skipping API calls
if the stream is deselected and skip_parent_streams is True in config.
This allows running the tap with fewer API calls and preserving
quota when only syncing a child stream. Without this,
the API call is sent but data is discarded.
"""
if (
not self.selected
and "skip_parent_streams" in self.config
and self.config["skip_parent_streams"]
and context is not None
):
# build a minimal mock record so that self._sync_records
# can proceed with child streams
yield {
"org": context["org"],
}
else:
yield from super().get_records(context)

schema = th.PropertiesList(
th.Property("login", th.StringType),
th.Property("id", th.IntegerType),
th.Property("node_id", th.StringType),
th.Property("url", th.StringType),
th.Property("repos_url", th.StringType),
th.Property("events_url", th.StringType),
th.Property("hooks_url", th.StringType),
th.Property("issues_url", th.StringType),
th.Property("members_url", th.StringType),
th.Property("public_members_url", th.StringType),
th.Property("avatar_url", th.StringType),
th.Property("description", th.StringType),
).to_dict()


class TeamsStream(GitHubRestStream):
"""
API Reference: https://docs.github.com/en/rest/reference/teams#list-teams
"""

name = "teams"
primary_keys = ["id"]
path = "/orgs/{org}/teams"
ignore_parent_replication_key = True
parent_stream_type = OrganizationStream
state_partitioning_keys = ["org"]

def get_child_context(self, record: Dict, context: Optional[Dict]) -> dict:
new_context = {"team_slug": record["slug"]}
if context:
return {
**context,
**new_context,
}
return new_context

schema = th.PropertiesList(
# Parent Keys
th.Property("org", th.StringType),
# Rest
th.Property("id", th.IntegerType),
th.Property("node_id", th.StringType),
th.Property("url", th.StringType),
th.Property("html_url", th.StringType),
th.Property("name", th.StringType),
th.Property("slug", th.StringType),
th.Property("description", th.StringType),
th.Property("privacy", th.StringType),
th.Property("permission", th.StringType),
th.Property("members_url", th.StringType),
th.Property("repositories_url", th.StringType),
th.Property("parent", th.StringType),
).to_dict()


class TeamMembersStream(GitHubRestStream):
"""
API Reference: https://docs.github.com/en/rest/reference/teams#list-team-members
"""

name = "team_members"
primary_keys = ["id"]
path = "/orgs/{org}/teams/{team_slug}/members"
ignore_parent_replication_key = True
parent_stream_type = TeamsStream
state_partitioning_keys = ["team_slug", "org"]

def get_child_context(self, record: Dict, context: Optional[Dict]) -> dict:
new_context = {"username": record["login"]}
if context:
return {
**context,
**new_context,
}
return new_context

schema = th.PropertiesList(
# Parent keys
th.Property("org", th.StringType),
th.Property("team_slug", th.StringType),
# Rest
th.Property("login", th.StringType),
th.Property("id", th.IntegerType),
th.Property("node_id", th.StringType),
th.Property("avatar_url", th.StringType),
th.Property("gravatar_id", th.StringType),
th.Property("url", th.StringType),
th.Property("html_url", th.StringType),
th.Property("type", th.StringType),
th.Property("site_admin", th.BooleanType),
).to_dict()


class TeamRolesStream(GitHubRestStream):
"""
API Reference: https://docs.github.com/en/rest/reference/teams#get-team-membership-for-a-user
"""

name = "team_roles"
path = "/orgs/{org}/teams/{team_slug}/memberships/{username}"
ignore_parent_replication_key = True
primary_keys = ["url"]
parent_stream_type = TeamMembersStream
state_partitioning_keys = ["username", "team_slug", "org"]

schema = th.PropertiesList(
# Parent keys
th.Property("org", th.StringType),
th.Property("team_slug", th.StringType),
th.Property("username", th.StringType),
# Rest
th.Property("url", th.StringType),
th.Property("role", th.StringType),
th.Property("state", th.StringType),
).to_dict()
Loading

0 comments on commit 96c5ad0

Please sign in to comment.