From 53ee6141961a68e7cf0ea6b9df5fa907d1c291db Mon Sep 17 00:00:00 2001 From: Koen de Laat Date: Fri, 7 Jul 2023 14:52:14 +0200 Subject: [PATCH 1/4] Retrieve email independent of scopes with a GitHubApp --- oauthenticator/github.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/oauthenticator/github.py b/oauthenticator/github.py index 29535e81..ff5d20b4 100644 --- a/oauthenticator/github.py +++ b/oauthenticator/github.py @@ -205,23 +205,30 @@ async def update_auth_model(self, auth_model): # - about /user/emails: https://docs.github.com/en/rest/reference/users#list-email-addresses-for-the-authenticated-user # # Note that the read:user scope does not imply the user:emails scope! + # Note that the retrieved scopes will be empty for GitHub Apps https://docs.github.com/en/developers/apps access_token = auth_model["auth_state"]["token_response"]["access_token"] token_type = auth_model["auth_state"]["token_response"]["token_type"] granted_scopes = auth_model["auth_state"].get("scope", []) if not user_info["email"] and ( - "user" in granted_scopes or "user:email" in granted_scopes + "user" in granted_scopes + or "user:email" in granted_scopes + or not granted_scopes ): - resp_json = await self.httpfetch( + resp = await self.httpfetch( f"{self.github_api}/user/emails", "fetching user emails", method="GET", + parse_json=False, + raise_error=False, headers=self.build_userdata_request_headers(access_token, token_type), validate_cert=self.validate_server_cert, ) - for val in resp_json: - if val["primary"]: - user_info["email"] = val["email"] - break + if resp.code == 200: + resp_json = json.loads((resp.body or b'').decode('utf8', 'replace')) + for val in resp_json: + if val["primary"]: + user_info["email"] = val["email"] + break if self.populate_teams_in_auth_state: if "read:org" not in self.scope: From 4a8a7c69af45eb413a1e8abb43df41686988d45e Mon Sep 17 00:00:00 2001 From: Koen de Laat Date: Fri, 7 Jul 2023 15:19:54 +0200 Subject: [PATCH 2/4] Add `allowed_repositories` to GitHub authenticator Allows for authentication based on repository access, which is required when using outside-collaborators. --- oauthenticator/github.py | 59 +++++++++++++++++++++++++++ oauthenticator/tests/mocks.py | 33 +++++++++------ oauthenticator/tests/test_github.py | 62 ++++++++++++++++++++++++++++- 3 files changed, 140 insertions(+), 14 deletions(-) diff --git a/oauthenticator/github.py b/oauthenticator/github.py index ff5d20b4..4019adb0 100644 --- a/oauthenticator/github.py +++ b/oauthenticator/github.py @@ -114,6 +114,16 @@ def _userdata_url_default(self): """, ) + allowed_repositories = Set( + config=True, + help=""" + Allow users with read access to specified repositories by specifying + repositories like `org-a/repo-1`. + + Requires `repo` to be set in `scope`. + """, + ) + populate_teams_in_auth_state = Bool( False, config=True, @@ -184,6 +194,17 @@ async def check_allowed(self, username, auth_model): message = f"User {username} is not part of allowed_organizations" self.log.warning(message) + if self.allowed_repositories: + access_token = auth_model["auth_state"]["token_response"]["access_token"] + token_type = auth_model["auth_state"]["token_response"]["token_type"] + for repo in self.allowed_repositories: + if await self._check_membership_allowed_repository( + repo, username, access_token, token_type + ): + return True + message = f"User {username} does not have access to allowed_repositories" + self.log.warning(message) + # users should be explicitly allowed via config, otherwise they aren't return False @@ -339,6 +360,44 @@ async def _check_membership_allowed_organizations( ) return False + async def _check_membership_allowed_repository( + self, owner_repo, username, access_token, token_type + ): + """ + Checks if a user is allowed to read the repo via + GitHub's REST API. The `repo` scope is required to check access. + + The `owner_repo` parameter accepts values like `OWNER/REPO`. + """ + headers = self.build_userdata_request_headers(access_token, token_type) + + # https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#get-a-repository + owner, repo = owner_repo.split("/") + api_url = f"{self.github_api}/repos/{owner}/{repo}" + + self.log.debug(f"Checking GitHub repo access: {username} for {owner}/{repo}?") + resp = await self.httpfetch( + api_url, + parse_json=False, + raise_error=False, + method="GET", + headers=headers, + validate_cert=self.validate_server_cert, + ) + if resp.code == 200: + self.log.debug(f"Allowing {username} as access of {owner}/{repo}") + return True + else: + try: + resp_json = json.loads((resp.body or b'').decode('utf8', 'replace')) + message = resp_json.get('message', '') + except ValueError: + message = '' + self.log.debug( + f"{username} does not appear to have access to {owner}/{repo} (status={resp.code}): {message}", + ) + return False + class LocalGitHubOAuthenticator(LocalAuthenticator, GitHubOAuthenticator): """A version that mixes in local system user creation""" diff --git a/oauthenticator/tests/mocks.py b/oauthenticator/tests/mocks.py index b20af39e..5226a56a 100644 --- a/oauthenticator/tests/mocks.py +++ b/oauthenticator/tests/mocks.py @@ -97,6 +97,19 @@ def fetch_impl(self, request, response_callback): response_callback(response) +def extract_token(request): + auth_header = request.headers.get('Authorization') + if auth_header: + token = auth_header.split(None, 1)[1] + else: + query = parse_qs(urlparse(request.url).query) + if 'access_token' in query: + token = query['access_token'][0] + else: + token = None + return token + + def setup_oauth_mock( client, host, @@ -170,19 +183,13 @@ def access_token(request): def get_user(request): assert request.method == 'GET', request.method - auth_header = request.headers.get('Authorization') - if auth_header: - token = auth_header.split(None, 1)[1] - else: - query = parse_qs(urlparse(request.url).query) - if 'access_token' in query: - token = query['access_token'][0] - else: - return HTTPResponse( - request=request, - code=403, - reason='Missing Authorization header', - ) + token = extract_token(request) + if token is None: + return HTTPResponse( + request=request, + code=403, + reason='Missing Authorization header', + ) if token not in access_tokens: return HTTPResponse( request=request, diff --git a/oauthenticator/tests/test_github.py b/oauthenticator/tests/test_github.py index 55b91f48..8594580b 100644 --- a/oauthenticator/tests/test_github.py +++ b/oauthenticator/tests/test_github.py @@ -11,7 +11,7 @@ from traitlets.config import Config from ..github import GitHubOAuthenticator -from .mocks import setup_oauth_mock +from .mocks import extract_token, setup_oauth_mock def user_model(username): @@ -117,6 +117,7 @@ async def test_allowed_org_membership(github_client): "team1": ["user1"], }, } + allowed_repo_members = {"org1": {"repo1": ["user2"]}} member_regex = re.compile(r'/orgs/(.*)/members') @@ -192,17 +193,43 @@ def team_membership(request): return HTTPResponse(request, 404) return HTTPResponse(request, 204) + repo_membership_regex = re.compile(r'/repos/(.*)/(.*)') + + def repo_membership(request): + token = extract_token(request) + user = github_client.access_tokens.get(token) + urlinfo = urlparse(request.url) + urlmatch = repo_membership_regex.match(urlinfo.path) + owner = urlmatch.group(1) + repo = urlmatch.group(2) + username = user.get('login') + print(f"Request owner = {owner}, repo = {repo} username = {username}") + if owner not in allowed_repo_members: + print(f"Owner not found: owner = {owner}") + return HTTPResponse(request, 404) + if repo not in allowed_repo_members[owner]: + print(f"Repo not found in owner: repo = {repo}, owner = {owner}") + return HTTPResponse(request, 404) + if username not in allowed_repo_members[owner][repo]: + print( + f"Member not found: owner = {owner}, repo = {repo}, username = {username}" + ) + return HTTPResponse(request, 404) + return HTTPResponse(request, 200) + ## Perform tests client_hosts = github_client.hosts['api.github.com'] client_hosts.append((team_membership_regex, team_membership)) client_hosts.append((org_membership_regex, org_membership)) + client_hosts.append((repo_membership_regex, repo_membership)) # Run tests twice, once with paginate and once without for paginate in (False, True): client_hosts.append((member_regex, functools.partial(org_members, paginate))) # test org membership + authenticator.allowed_repositories = [] authenticator.allowed_organizations = ["org1"] handled_user_model = user_model("user1") @@ -228,6 +255,39 @@ def team_membership(request): auth_model = await authenticator.get_authenticated_user(handler, None) assert auth_model is None + # test repo membership + authenticator.allowed_organizations = [] + authenticator.allowed_repositories = ["org1/repo1"] + + handled_user_model = user_model("user2") + handler = github_client.handler_for_user(handled_user_model) + auth_model = await authenticator.get_authenticated_user(handler, None) + assert auth_model + + handled_user_model = user_model("user-not-in-repo") + handler = github_client.handler_for_user(handled_user_model) + auth_model = await authenticator.get_authenticated_user(handler, None) + assert auth_model is None + + # test repo membership + authenticator.allowed_organizations = ["org1:team1"] + authenticator.allowed_repositories = ["org1/repo1"] + + handled_user_model = user_model("user1") + handler = github_client.handler_for_user(handled_user_model) + auth_model = await authenticator.get_authenticated_user(handler, None) + assert auth_model + + handled_user_model = user_model("user2") + handler = github_client.handler_for_user(handled_user_model) + auth_model = await authenticator.get_authenticated_user(handler, None) + assert auth_model + + handled_user_model = user_model("user-not-in-repo") + handler = github_client.handler_for_user(handled_user_model) + auth_model = await authenticator.get_authenticated_user(handler, None) + assert auth_model is None + client_hosts.pop() From 3e61aa5ed08ac239acda4482e49ad94e8e1701ec Mon Sep 17 00:00:00 2001 From: Koen de Laat Date: Fri, 7 Jul 2023 22:10:31 +0200 Subject: [PATCH 3/4] Fix list of empty string scope --- oauthenticator/github.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/oauthenticator/github.py b/oauthenticator/github.py index 4019adb0..298cb47f 100644 --- a/oauthenticator/github.py +++ b/oauthenticator/github.py @@ -229,7 +229,9 @@ async def update_auth_model(self, auth_model): # Note that the retrieved scopes will be empty for GitHub Apps https://docs.github.com/en/developers/apps access_token = auth_model["auth_state"]["token_response"]["access_token"] token_type = auth_model["auth_state"]["token_response"]["token_type"] - granted_scopes = auth_model["auth_state"].get("scope", []) + granted_scopes = [ + scope for scope in auth_model["auth_state"].get("scope", []) if scope + ] if not user_info["email"] and ( "user" in granted_scopes or "user:email" in granted_scopes From 38595f821d4d0a972aa51168bb46ba87a28984be Mon Sep 17 00:00:00 2001 From: Koen de Laat Date: Mon, 17 Jul 2023 17:36:48 +0200 Subject: [PATCH 4/4] Error handling of users/email fetch --- oauthenticator/github.py | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/oauthenticator/github.py b/oauthenticator/github.py index 298cb47f..e4c5c31f 100644 --- a/oauthenticator/github.py +++ b/oauthenticator/github.py @@ -7,6 +7,7 @@ from jupyterhub.auth import LocalAuthenticator from requests.utils import parse_header_links +from tornado.httpclient import HTTPClientError from traitlets import Bool, Set, Unicode, default from .oauth2 import OAuthenticator @@ -237,21 +238,27 @@ async def update_auth_model(self, auth_model): or "user:email" in granted_scopes or not granted_scopes ): - resp = await self.httpfetch( - f"{self.github_api}/user/emails", - "fetching user emails", - method="GET", - parse_json=False, - raise_error=False, - headers=self.build_userdata_request_headers(access_token, token_type), - validate_cert=self.validate_server_cert, - ) - if resp.code == 200: - resp_json = json.loads((resp.body or b'').decode('utf8', 'replace')) + try: + resp_json = await self.httpfetch( + f"{self.github_api}/user/emails", + "fetching user emails", + method="GET", + headers=self.build_userdata_request_headers( + access_token, token_type + ), + validate_cert=self.validate_server_cert, + ) for val in resp_json: if val["primary"]: user_info["email"] = val["email"] break + except HTTPClientError as e: + if e.code == 403 and not granted_scopes: + # This means granted_scopes is empty (GitHub App) but we don't have permission to + # read users email + pass + else: + raise e if self.populate_teams_in_auth_state: if "read:org" not in self.scope: