diff --git a/README.md b/README.md index e141a40f..f315cb04 100644 --- a/README.md +++ b/README.md @@ -25,13 +25,13 @@ A list of release versions is available at https://github.com/MeltanoLabs/tap-gi This tap accepts the following configuration options: - Required: One and only one of the following modes: - 1. `repositories`: an array of strings specifying the GitHub repositories to be included. Each element of the array should be of the form `/`, e.g. `MeltanoLabs/tap-github`. - 2. `organizations`: an array of strings containing the github organizations to be included - 3. `searches`: an array of search descriptor objects with the following properties: - - `name`: a human readable name for the search query - - `query`: a github search string (generally the same as would come after `?q=` in the URL) - 4. `user_usernames`: a list of github usernames - 5. `user_ids`: a list of github user ids [int] + 1. `repositories`: An array of strings specifying the GitHub repositories to be included. Each element of the array should be of the form `/`, e.g. `MeltanoLabs/tap-github`. + 2. `organizations`: An array of strings containing the github organizations to be included + 3. `searches`: An array of search descriptor objects with the following properties: + - `name`: A human readable name for the search query + - `query`: A github search string (generally the same as would come after `?q=` in the URL) + 4. `user_usernames`: A list of github usernames + 5. `user_ids`: A list of github user ids [int] - Highly recommended: - `auth_token` - GitHub token to authenticate with. - `additional_auth_tokens` - List of GitHub tokens to authenticate with. Streams will loop through them when hitting rate limits.. @@ -43,7 +43,8 @@ This tap accepts the following configuration options: - `metrics_log_level` - `stream_maps` - `stream_maps_config` - - `rate_limit_buffer` - A buffer to avoid consuming all query points for the auth_token at hand. Defaults to 1000.", + - `rate_limit_buffer` - A buffer to avoid consuming all query points for the auth_token at hand. Defaults to 1000. + - `expiry_time_buffer` - A buffer used when determining when to refresh Github app tokens. Only relevant when authenticating as a GitHub app. Defaults to 10 minutes. Tokens generated by GitHub apps expire 1 hour after creation, and will be refreshed once fewer than `expiry_time_buffer` minutes remain until the anticipated expiry time. Note that modes 1-3 are `repository` modes and 4-5 are `user` modes and will not run the same set of streams. diff --git a/meltano.yml b/meltano.yml index 8118d91d..105386ca 100644 --- a/meltano.yml +++ b/meltano.yml @@ -21,6 +21,8 @@ plugins: kind: array - name: rate_limit_buffer kind: integer + - name: expiry_time_buffer + kind: integer - name: searches kind: array - name: organizations diff --git a/tap_github/authenticator.py b/tap_github/authenticator.py index 6fe1aefc..dcc169ae 100644 --- a/tap_github/authenticator.py +++ b/tap_github/authenticator.py @@ -3,7 +3,7 @@ import logging import time from copy import deepcopy -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from os import environ from random import choice, shuffle from typing import Any, Dict, List, Optional, Set, Tuple @@ -36,7 +36,7 @@ def __init__( self.logger = logger self.rate_limit = self.DEFAULT_RATE_LIMIT self.rate_limit_remaining = self.DEFAULT_RATE_LIMIT - self.rate_limit_reset: Optional[int] = None + self.rate_limit_reset: Optional[datetime] = None self.rate_limit_used = 0 self.rate_limit_buffer = ( rate_limit_buffer @@ -47,7 +47,9 @@ def __init__( def update_rate_limit(self, response_headers: Any) -> None: self.rate_limit = int(response_headers["X-RateLimit-Limit"]) self.rate_limit_remaining = int(response_headers["X-RateLimit-Remaining"]) - self.rate_limit_reset = int(response_headers["X-RateLimit-Reset"]) + self.rate_limit_reset = datetime.utcfromtimestamp( + int(response_headers["X-RateLimit-Reset"]) + ) self.rate_limit_used = int(response_headers["X-RateLimit-Used"]) def is_valid_token(self) -> bool: @@ -84,7 +86,7 @@ def has_calls_remaining(self) -> bool: return True if ( self.rate_limit_used > (self.rate_limit - self.rate_limit_buffer) - and self.rate_limit_reset > datetime.now().timestamp() + and self.rate_limit_reset > datetime.now() ): return False return True @@ -159,7 +161,13 @@ class AppTokenManager(TokenManager): DEFAULT_RATE_LIMIT = 15000 DEFAULT_EXPIRY_BUFFER_MINS = 10 - def __init__(self, env_key: str, rate_limit_buffer: Optional[int] = None, **kwargs): + def __init__( + self, + env_key: str, + rate_limit_buffer: Optional[int] = None, + expiry_time_buffer: Optional[int] = None, + **kwargs, + ): if rate_limit_buffer is None: rate_limit_buffer = self.DEFAULT_RATE_LIMIT_BUFFER super().__init__(None, rate_limit_buffer=rate_limit_buffer, **kwargs) @@ -171,6 +179,10 @@ def __init__(self, env_key: str, rate_limit_buffer: Optional[int] = None, **kwar parts[2] if len(parts) >= 3 else None ) + if expiry_time_buffer is None: + expiry_time_buffer = self.DEFAULT_EXPIRY_BUFFER_MINS + self.expiry_time_buffer = expiry_time_buffer + self.token_expires_at: Optional[datetime] = None self.claim_token() @@ -204,6 +216,29 @@ def claim_token(self): self.token = None self.token_expires_at = None + def has_calls_remaining(self) -> bool: + """Check if a token has capacity to make more calls. + + Returns: + True if the token is valid and has enough api calls remaining. + """ + if self.token_expires_at is not None: + close_to_expiry = datetime.now() > self.token_expires_at - timedelta( + minutes=self.expiry_time_buffer + ) + + if close_to_expiry: + self.claim_token() + if self.token is None: + if self.logger: + self.logger.warn("GitHub app token refresh failed.") + return False + else: + if self.logger: + self.logger.info("GitHub app token refresh succeeded.") + + return super().has_calls_remaining() + class GitHubTokenAuthenticator(APIAuthenticatorBase): """Base class for offloading API auth.""" @@ -217,6 +252,7 @@ def prepare_tokens(self) -> List[TokenManager]: env_dict = self.get_env() rate_limit_buffer = self._config.get("rate_limit_buffer", None) + expiry_time_buffer = self._config.get("expiry_time_buffer", None) personal_tokens: Set[str] = set() if "auth_token" in self._config: @@ -255,7 +291,10 @@ def prepare_tokens(self) -> List[TokenManager]: env_key = env_dict["GITHUB_APP_PRIVATE_KEY"] try: app_token_manager = AppTokenManager( - env_key, rate_limit_buffer=rate_limit_buffer, logger=self.logger + env_key, + rate_limit_buffer=rate_limit_buffer, + expiry_time_buffer=expiry_time_buffer, + logger=self.logger, ) if app_token_manager.is_valid_token(): token_managers.append(app_token_manager) diff --git a/tap_github/tests/test_authenticator.py b/tap_github/tests/test_authenticator.py index 2a325bb1..516a6eea 100644 --- a/tap_github/tests/test_authenticator.py +++ b/tap_github/tests/test_authenticator.py @@ -1,5 +1,5 @@ import re -from datetime import datetime, timedelta +from datetime import datetime, timedelta, timezone from unittest.mock import MagicMock, patch import pytest @@ -41,7 +41,7 @@ def test_update_rate_limit(self): assert token_manager.rate_limit == 5000 assert token_manager.rate_limit_remaining == 4999 - assert token_manager.rate_limit_reset == 1372700873 + assert token_manager.rate_limit_reset == datetime(2013, 7, 1, 17, 47, 53) assert token_manager.rate_limit_used == 1 def test_is_valid_token_successful(self): @@ -165,6 +165,135 @@ def test_successful_token_generation(self): assert token_manager.token == "valid_token" assert token_manager.token_expires_at == token_time + def test_has_calls_remaining_regenerates_a_token_if_close_to_expiry(self): + unexpired_time = datetime.now() + timedelta(days=1) + expired_time = datetime.now() - timedelta(days=1) + with patch.object(AppTokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("valid_token", unexpired_time), + ): + mock_response_headers = { + "X-RateLimit-Limit": "5000", + "X-RateLimit-Remaining": "4999", + "X-RateLimit-Reset": "1372700873", + "X-RateLimit-Used": "1", + } + + token_manager = AppTokenManager("12345;;key\\ncontent;;67890") + token_manager.logger = MagicMock() + token_manager.token_expires_at = expired_time + token_manager.update_rate_limit(mock_response_headers) + + assert token_manager.has_calls_remaining() + # calling has_calls_remaining() will trigger the token generation function to be called again, + # so token_expires_at should have been reset back to the mocked unexpired_time + assert token_manager.token_expires_at == unexpired_time + token_manager.logger.info.assert_called_once() + assert ( + "GitHub app token refresh succeeded." + in token_manager.logger.info.call_args[0][0] + ) + + def test_has_calls_remaining_logs_warning_if_token_regeneration_fails(self): + unexpired_time = datetime.now() + timedelta(days=1) + expired_time = datetime.now() - timedelta(days=1) + with patch.object( + AppTokenManager, "is_valid_token", return_value=True + ) as mock_is_valid, patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("valid_token", unexpired_time), + ): + mock_response_headers = { + "X-RateLimit-Limit": "5000", + "X-RateLimit-Remaining": "4999", + "X-RateLimit-Reset": "1372700873", + "X-RateLimit-Used": "1", + } + + token_manager = AppTokenManager("12345;;key\\ncontent;;67890") + token_manager.logger = MagicMock() + token_manager.token_expires_at = expired_time + token_manager.update_rate_limit(mock_response_headers) + + mock_is_valid.return_value = False + assert not token_manager.has_calls_remaining() + token_manager.logger.warn.assert_called_once() + assert ( + "GitHub app token refresh failed." + in token_manager.logger.warn.call_args[0][0] + ) + + def test_has_calls_remaining_succeeds_if_token_new_and_never_used(self): + unexpired_time = datetime.now() + timedelta(days=1) + with patch.object(AppTokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("valid_token", unexpired_time), + ): + token_manager = AppTokenManager("12345;;key\\ncontent;;67890") + assert token_manager.has_calls_remaining() + + def test_has_calls_remaining_succeeds_if_time_and_requests_left(self): + unexpired_time = datetime.now() + timedelta(days=1) + with patch.object(AppTokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("valid_token", unexpired_time), + ): + mock_response_headers = { + "X-RateLimit-Limit": "5000", + "X-RateLimit-Remaining": "4999", + "X-RateLimit-Reset": "1372700873", + "X-RateLimit-Used": "1", + } + + token_manager = AppTokenManager("12345;;key\\ncontent;;67890") + token_manager.update_rate_limit(mock_response_headers) + + assert token_manager.has_calls_remaining() + + def test_has_calls_remaining_succeeds_if_time_left_and_reset_time_reached(self): + unexpired_time = datetime.now() + timedelta(days=1) + with patch.object(AppTokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("valid_token", unexpired_time), + ): + mock_response_headers = { + "X-RateLimit-Limit": "5000", + "X-RateLimit-Remaining": "1", + "X-RateLimit-Reset": "1372700873", + "X-RateLimit-Used": "4999", + } + + token_manager = AppTokenManager( + "12345;;key\\ncontent;;67890", rate_limit_buffer=1000 + ) + token_manager.update_rate_limit(mock_response_headers) + + assert token_manager.has_calls_remaining() + + def test_has_calls_remaining_fails_if_time_left_and_few_calls_remaining_and_reset_time_not_reached( + self, + ): + unexpired_time = datetime.now() + timedelta(days=1) + with patch.object(AppTokenManager, "is_valid_token", return_value=True), patch( + "tap_github.authenticator.generate_app_access_token", + return_value=("valid_token", unexpired_time), + ): + mock_response_headers = { + "X-RateLimit-Limit": "5000", + "X-RateLimit-Remaining": "1", + "X-RateLimit-Reset": str( + int((datetime.now() + timedelta(days=100)).timestamp()) + ), + "X-RateLimit-Used": "4999", + } + + token_manager = AppTokenManager( + "12345;;key\\ncontent;;67890", rate_limit_buffer=1000 + ) + token_manager.update_rate_limit(mock_response_headers) + + assert not token_manager.has_calls_remaining() + @pytest.fixture def mock_stream():