diff --git a/oauthenticator/github.py b/oauthenticator/github.py index fcc63b3b..79013676 100644 --- a/oauthenticator/github.py +++ b/oauthenticator/github.py @@ -8,6 +8,7 @@ from jupyterhub.auth import LocalAuthenticator from requests.utils import parse_header_links +from tornado.httpclient import HTTPClientError from traitlets import Bool, Set, Unicode, default from .oauth2 import OAuthenticator @@ -115,6 +116,16 @@ def _userdata_url_default(self): """, ) + allowed_repositories = Set( + config=True, + help=""" + Allow users with read access to specified repositories by specifying + repositories like `org-a/repo-1`. + + Requires `repo` to be set in `scope`. + """, + ) + populate_teams_in_auth_state = Bool( False, config=True, @@ -185,6 +196,17 @@ async def check_allowed(self, username, auth_model): message = f"User {username} is not part of allowed_organizations" self.log.warning(message) + if self.allowed_repositories: + access_token = auth_model["auth_state"]["token_response"]["access_token"] + token_type = auth_model["auth_state"]["token_response"]["token_type"] + for repo in self.allowed_repositories: + if await self._check_membership_allowed_repository( + repo, username, access_token, token_type + ): + return True + message = f"User {username} does not have access to allowed_repositories" + self.log.warning(message) + # users should be explicitly allowed via config, otherwise they aren't return False @@ -206,23 +228,38 @@ async def update_auth_model(self, auth_model): # - about /user/emails: https://docs.github.com/en/rest/reference/users#list-email-addresses-for-the-authenticated-user # # Note that the read:user scope does not imply the user:emails scope! + # Note that the retrieved scopes will be empty for GitHub Apps https://docs.github.com/en/developers/apps access_token = auth_model["auth_state"]["token_response"]["access_token"] token_type = auth_model["auth_state"]["token_response"]["token_type"] - granted_scopes = auth_model["auth_state"].get("scope", []) + granted_scopes = [ + scope for scope in auth_model["auth_state"].get("scope", []) if scope + ] if not user_info["email"] and ( - "user" in granted_scopes or "user:email" in granted_scopes + "user" in granted_scopes + or "user:email" in granted_scopes + or not granted_scopes ): - resp_json = await self.httpfetch( - f"{self.github_api}/user/emails", - "fetching user emails", - method="GET", - headers=self.build_userdata_request_headers(access_token, token_type), - validate_cert=self.validate_server_cert, - ) - for val in resp_json: - if val["primary"]: - user_info["email"] = val["email"] - break + try: + resp_json = await self.httpfetch( + f"{self.github_api}/user/emails", + "fetching user emails", + method="GET", + headers=self.build_userdata_request_headers( + access_token, token_type + ), + validate_cert=self.validate_server_cert, + ) + for val in resp_json: + if val["primary"]: + user_info["email"] = val["email"] + break + except HTTPClientError as e: + if e.code == 403 and not granted_scopes: + # This means granted_scopes is empty (GitHub App) but we don't have permission to + # read users email + pass + else: + raise e if self.populate_teams_in_auth_state: if "read:org" not in self.scope: @@ -333,6 +370,44 @@ async def _check_membership_allowed_organizations( ) return False + async def _check_membership_allowed_repository( + self, owner_repo, username, access_token, token_type + ): + """ + Checks if a user is allowed to read the repo via + GitHub's REST API. The `repo` scope is required to check access. + + The `owner_repo` parameter accepts values like `OWNER/REPO`. + """ + headers = self.build_userdata_request_headers(access_token, token_type) + + # https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#get-a-repository + owner, repo = owner_repo.split("/") + api_url = f"{self.github_api}/repos/{owner}/{repo}" + + self.log.debug(f"Checking GitHub repo access: {username} for {owner}/{repo}?") + resp = await self.httpfetch( + api_url, + parse_json=False, + raise_error=False, + method="GET", + headers=headers, + validate_cert=self.validate_server_cert, + ) + if resp.code == 200: + self.log.debug(f"Allowing {username} as access of {owner}/{repo}") + return True + else: + try: + resp_json = json.loads((resp.body or b'').decode('utf8', 'replace')) + message = resp_json.get('message', '') + except ValueError: + message = '' + self.log.debug( + f"{username} does not appear to have access to {owner}/{repo} (status={resp.code}): {message}", + ) + return False + class LocalGitHubOAuthenticator(LocalAuthenticator, GitHubOAuthenticator): """A version that mixes in local system user creation""" diff --git a/oauthenticator/tests/mocks.py b/oauthenticator/tests/mocks.py index f59c537a..a2e674f0 100644 --- a/oauthenticator/tests/mocks.py +++ b/oauthenticator/tests/mocks.py @@ -98,6 +98,19 @@ def fetch_impl(self, request, response_callback): response_callback(response) +def extract_token(request): + auth_header = request.headers.get('Authorization') + if auth_header: + token = auth_header.split(None, 1)[1] + else: + query = parse_qs(urlparse(request.url).query) + if 'access_token' in query: + token = query['access_token'][0] + else: + token = None + return token + + def setup_oauth_mock( client, host, @@ -171,19 +184,13 @@ def access_token(request): def get_user(request): assert request.method == 'GET', request.method - auth_header = request.headers.get('Authorization') - if auth_header: - token = auth_header.split(None, 1)[1] - else: - query = parse_qs(urlparse(request.url).query) - if 'access_token' in query: - token = query['access_token'][0] - else: - return HTTPResponse( - request=request, - code=403, - reason='Missing Authorization header', - ) + token = extract_token(request) + if token is None: + return HTTPResponse( + request=request, + code=403, + reason='Missing Authorization header', + ) if token not in access_tokens: return HTTPResponse( request=request, diff --git a/oauthenticator/tests/test_github.py b/oauthenticator/tests/test_github.py index 35b64a46..4ef24c4a 100644 --- a/oauthenticator/tests/test_github.py +++ b/oauthenticator/tests/test_github.py @@ -11,7 +11,7 @@ from traitlets.config import Config from ..github import GitHubOAuthenticator -from .mocks import setup_oauth_mock +from .mocks import extract_token, setup_oauth_mock def user_model(username): @@ -118,6 +118,7 @@ async def test_allowed_org_membership(github_client): "team1": ["user1"], }, } + allowed_repo_members = {"org1": {"repo1": ["user2"]}} member_regex = re.compile(r'/orgs/(.*)/members') @@ -193,17 +194,43 @@ def team_membership(request): return HTTPResponse(request, 404) return HTTPResponse(request, 204) + repo_membership_regex = re.compile(r'/repos/(.*)/(.*)') + + def repo_membership(request): + token = extract_token(request) + user = github_client.access_tokens.get(token) + urlinfo = urlparse(request.url) + urlmatch = repo_membership_regex.match(urlinfo.path) + owner = urlmatch.group(1) + repo = urlmatch.group(2) + username = user.get('login') + print(f"Request owner = {owner}, repo = {repo} username = {username}") + if owner not in allowed_repo_members: + print(f"Owner not found: owner = {owner}") + return HTTPResponse(request, 404) + if repo not in allowed_repo_members[owner]: + print(f"Repo not found in owner: repo = {repo}, owner = {owner}") + return HTTPResponse(request, 404) + if username not in allowed_repo_members[owner][repo]: + print( + f"Member not found: owner = {owner}, repo = {repo}, username = {username}" + ) + return HTTPResponse(request, 404) + return HTTPResponse(request, 200) + ## Perform tests client_hosts = github_client.hosts['api.github.com'] client_hosts.append((team_membership_regex, team_membership)) client_hosts.append((org_membership_regex, org_membership)) + client_hosts.append((repo_membership_regex, repo_membership)) # Run tests twice, once with paginate and once without for paginate in (False, True): client_hosts.append((member_regex, functools.partial(org_members, paginate))) # test org membership + authenticator.allowed_repositories = [] authenticator.allowed_organizations = ["org1"] handled_user_model = user_model("user1") @@ -229,6 +256,39 @@ def team_membership(request): auth_model = await authenticator.get_authenticated_user(handler, None) assert auth_model is None + # test repo membership + authenticator.allowed_organizations = [] + authenticator.allowed_repositories = ["org1/repo1"] + + handled_user_model = user_model("user2") + handler = github_client.handler_for_user(handled_user_model) + auth_model = await authenticator.get_authenticated_user(handler, None) + assert auth_model + + handled_user_model = user_model("user-not-in-repo") + handler = github_client.handler_for_user(handled_user_model) + auth_model = await authenticator.get_authenticated_user(handler, None) + assert auth_model is None + + # test repo membership + authenticator.allowed_organizations = ["org1:team1"] + authenticator.allowed_repositories = ["org1/repo1"] + + handled_user_model = user_model("user1") + handler = github_client.handler_for_user(handled_user_model) + auth_model = await authenticator.get_authenticated_user(handler, None) + assert auth_model + + handled_user_model = user_model("user2") + handler = github_client.handler_for_user(handled_user_model) + auth_model = await authenticator.get_authenticated_user(handler, None) + assert auth_model + + handled_user_model = user_model("user-not-in-repo") + handler = github_client.handler_for_user(handled_user_model) + auth_model = await authenticator.get_authenticated_user(handler, None) + assert auth_model is None + client_hosts.pop()