Skip to content

Commit dbb02a1

Browse files
authored
Initial commit for new runtime and base class (#360)
The idea here is to split the responsibilities of the current base class into two: A runtime and an extractor. The runtime is responsible for parsing command line arguments, loading config files and so on, before spawning the extractor in a separate process. The runtime will automatically restart the extractor if it crashes, but can also be asked by the extractor to restart it - for example after a config change. The extractor class is then only responsible for running the extractor application itself, making it much cleaner. Also drops 3.9 support
1 parent f969745 commit dbb02a1

File tree

11 files changed

+457
-64
lines changed

11 files changed

+457
-64
lines changed

.github/workflows/release.yaml

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -2,43 +2,43 @@ name: release
22

33
on:
44
push:
5-
branches: [ master ]
5+
branches: [master]
66

77
jobs:
88
test:
9-
uses: ./.github/workflows/test_and_build.yml
10-
secrets: inherit
9+
uses: ./.github/workflows/test_and_build.yml
10+
secrets: inherit
1111

1212
build:
1313
runs-on: ubuntu-latest
1414
environment: CD
1515

1616
needs:
17-
- test
17+
- test
1818

1919
steps:
20-
- uses: actions/checkout@v4
21-
22-
- name: Set up Python
23-
uses: actions/setup-python@v5
24-
with:
25-
python-version: 3.9
26-
27-
- name: Install dependencies
28-
run: |
29-
python3 -m pip install --upgrade pip poetry
30-
poetry config virtualenvs.create false
31-
poetry lock
32-
poetry install
33-
34-
- name: Build package
35-
run: poetry build
36-
37-
- name: Build docs
38-
run: cd docs && make html
39-
40-
- name: Release to PyPI
41-
env:
42-
TWINE_USERNAME: __token__
43-
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
44-
run: twine upload --verbose dist/* || echo 'Version exists'
20+
- uses: actions/checkout@v4
21+
22+
- name: Set up Python
23+
uses: actions/setup-python@v5
24+
with:
25+
python-version: "3.10"
26+
27+
- name: Install dependencies
28+
run: |
29+
python3 -m pip install --upgrade pip poetry
30+
poetry config virtualenvs.create false
31+
poetry lock
32+
poetry install
33+
34+
- name: Build package
35+
run: poetry build
36+
37+
- name: Build docs
38+
run: cd docs && make html
39+
40+
- name: Release to PyPI
41+
env:
42+
TWINE_USERNAME: __token__
43+
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
44+
run: twine upload --verbose dist/* || echo 'Version exists'

.github/workflows/test_and_build.yml

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ name: test_and_build
22

33
on:
44
pull_request:
5-
branches: [ master ]
5+
branches: [master]
66
workflow_call:
77

88
jobs:
@@ -12,28 +12,28 @@ jobs:
1212
strategy:
1313
fail-fast: false
1414
matrix:
15-
python-version: [3.9, "3.10", "3.11", "3.12"]
15+
python-version: ["3.10", "3.11", "3.12"]
1616

1717
steps:
18-
- uses: actions/checkout@v4
18+
- uses: actions/checkout@v4
1919

20-
- name: Set up Python ${{ matrix.python-version }}
21-
uses: actions/setup-python@v5
22-
with:
23-
python-version: ${{ matrix.python-version }}
20+
- name: Set up Python ${{ matrix.python-version }}
21+
uses: actions/setup-python@v5
22+
with:
23+
python-version: ${{ matrix.python-version }}
2424

25-
- name: Install dependencies
26-
run: |
27-
python3 -m pip install --upgrade pip poetry
28-
poetry config virtualenvs.create false
29-
poetry lock
30-
poetry install
25+
- name: Install dependencies
26+
run: |
27+
python3 -m pip install --upgrade pip poetry
28+
poetry config virtualenvs.create false
29+
poetry lock
30+
poetry install
3131
32-
- name: Check codestyle
33-
run: pre-commit run --all
32+
- name: Check codestyle
33+
run: pre-commit run --all
3434

35-
- name: Run tests
36-
env:
35+
- name: Run tests
36+
env:
3737
COGNITE_CLIENT_ID: ${{ secrets.COGNITE_PROJECT_CLIENT_ID }}
3838
COGNITE_CLIENT_SECRET: ${{ secrets.COGNITE_PROJECT_CLIENT_SECRET }}
3939
COGNITE_TOKEN_SCOPES: ${{ secrets.COGNITE_PROJECT_SCOPES }}
@@ -46,17 +46,17 @@ jobs:
4646
COGNITE_DEV_PROJECT: extractor-aws-dub-dev-testing
4747
COGNITE_DEV_BASE_URL: https://aws-dub-dev.cognitedata.com/
4848
COGNITE_DEV_TOKEN_SCOPES: https://aws-dub-dev.cognitedata.com/.default
49-
run: |
50-
coverage run --source cognite.extractorutils -m pytest -v tests
51-
coverage xml
49+
run: |
50+
coverage run --source cognite.extractorutils -m pytest -v tests
51+
coverage xml
5252
53-
- uses: codecov/codecov-action@v4
54-
with:
55-
token: ${{ secrets.CODECOV_TOKEN }}
56-
file: ./coverage.xml
53+
- uses: codecov/codecov-action@v4
54+
with:
55+
token: ${{ secrets.CODECOV_TOKEN }}
56+
file: ./coverage.xml
5757

58-
- name: Build package
59-
run: poetry build
58+
- name: Build package
59+
run: poetry build
6060

61-
- name: Build docs
62-
run: cd docs && make html
61+
- name: Build docs
62+
run: cd docs && make html

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ __pycache__/
66
# Local test files
77
test.py
88
test.yaml
9+
local-test.yaml
910
pyrightconfig.json
1011

1112
# Tokens, etc

cognite/extractorutils/configtools/_util.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,10 @@ def translate_camel(key: str) -> str:
8181
raise ValueError(f"Invalid case style: {case_style}")
8282

8383

84-
def _load_certificate_data(cert_path: str, password: Optional[str]) -> Union[Tuple[str, str], Tuple[bytes, bytes]]:
85-
path = Path(cert_path)
84+
def _load_certificate_data(
85+
cert_path: str | Path, password: Optional[str]
86+
) -> Union[Tuple[str, str], Tuple[bytes, bytes]]:
87+
path = Path(cert_path) if isinstance(cert_path, str) else cert_path
8688
cert_data = Path(path).read_bytes()
8789

8890
if path.suffix == ".pem":

cognite/extractorutils/unstable/configuration/loaders.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from enum import Enum
33
from io import StringIO
44
from pathlib import Path
5-
from typing import Dict, Optional, TextIO, Type, TypeVar, Union
5+
from typing import Dict, Optional, TextIO, Tuple, Type, TypeVar, Union
66

77
from pydantic import ValidationError
88

@@ -33,7 +33,7 @@ def load_file(path: Path, schema: Type[_T]) -> _T:
3333

3434
def load_from_cdf(
3535
cognite_client: CogniteClient, external_id: str, schema: Type[_T], revision: Optional[int] = None
36-
) -> _T:
36+
) -> Tuple[_T, int]:
3737
params: Dict[str, Union[str, int]] = {"externalId": external_id}
3838
if revision:
3939
params["revision"] = revision
@@ -44,7 +44,7 @@ def load_from_cdf(
4444
)
4545
response.raise_for_status()
4646
data = response.json()
47-
return load_io(StringIO(data["config"]), ConfigFormat.YAML, schema)
47+
return load_io(StringIO(data["config"]), ConfigFormat.YAML, schema), data["revision"]
4848

4949

5050
def load_io(stream: TextIO, format: ConfigFormat, schema: Type[_T]) -> _T:

cognite/extractorutils/unstable/configuration/models.py

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,16 @@
77
from humps import kebabize
88
from pydantic import BaseModel, ConfigDict, Field, GetCoreSchemaHandler
99
from pydantic_core import CoreSchema, core_schema
10-
10+
from typing_extensions import assert_never
11+
12+
from cognite.client import CogniteClient
13+
from cognite.client.config import ClientConfig
14+
from cognite.client.credentials import (
15+
CredentialProvider,
16+
OAuthClientCertificate,
17+
OAuthClientCredentials,
18+
)
19+
from cognite.extractorutils.configtools._util import _load_certificate_data
1120
from cognite.extractorutils.exceptions import InvalidConfigError
1221

1322

@@ -33,7 +42,9 @@ class _ClientCredentialsConfig(ConfigModel):
3342
class _ClientCertificateConfig(ConfigModel):
3443
type: Literal["client-certificate"]
3544
client_id: str
36-
certificate_path: Path
45+
path: Path
46+
password: Optional[str] = None
47+
authority_url: str
3748
scopes: List[str]
3849

3950

@@ -121,6 +132,7 @@ class _ConnectionParameters(ConfigModel):
121132
max_connection_pool_size: int = 50
122133
ssl_verify: bool = True
123134
proxies: Dict[str, str] = Field(default_factory=dict)
135+
timeout: TimeIntervalConfig = Field(default_factory=lambda: TimeIntervalConfig("30s"))
124136

125137

126138
class ConnectionConfig(ConfigModel):
@@ -133,6 +145,61 @@ class ConnectionConfig(ConfigModel):
133145

134146
connection: _ConnectionParameters = Field(default_factory=_ConnectionParameters)
135147

148+
def get_cognite_client(self, client_name: str) -> CogniteClient:
149+
from cognite.client.config import global_config
150+
151+
global_config.disable_pypi_version_check = True
152+
global_config.disable_gzip = not self.connection.gzip_compression
153+
global_config.status_forcelist = set(self.connection.status_forcelist)
154+
global_config.max_retries = self.connection.max_retries
155+
global_config.max_retries_connect = self.connection.max_retries_connect
156+
global_config.max_retry_backoff = self.connection.max_retry_backoff.seconds
157+
global_config.max_connection_pool_size = self.connection.max_connection_pool_size
158+
global_config.disable_ssl = not self.connection.ssl_verify
159+
global_config.proxies = self.connection.proxies
160+
161+
credential_provider: CredentialProvider
162+
match self.authentication:
163+
case _ClientCredentialsConfig() as client_credentials:
164+
kwargs = {
165+
"token_url": client_credentials.token_url,
166+
"client_id": client_credentials.client_id,
167+
"client_secret": client_credentials.client_secret,
168+
"scopes": client_credentials.scopes,
169+
}
170+
if client_credentials.audience is not None:
171+
kwargs["audience"] = client_credentials.audience
172+
if client_credentials.resource is not None:
173+
kwargs["resource"] = client_credentials.resource
174+
175+
credential_provider = OAuthClientCredentials(**kwargs) # type: ignore # I know what I'm doing
176+
177+
case _ClientCertificateConfig() as client_certificate:
178+
thumbprint, key = _load_certificate_data(
179+
client_certificate.path,
180+
client_certificate.password,
181+
)
182+
credential_provider = OAuthClientCertificate(
183+
authority_url=client_certificate.authority_url,
184+
client_id=client_certificate.client_id,
185+
cert_thumbprint=str(thumbprint),
186+
certificate=str(key),
187+
scopes=client_certificate.scopes,
188+
)
189+
190+
case _:
191+
assert_never(self.authentication)
192+
193+
client_config = ClientConfig(
194+
project=self.project,
195+
base_url=self.base_url,
196+
client_name=client_name,
197+
timeout=self.connection.timeout.seconds,
198+
credentials=credential_provider,
199+
)
200+
201+
return CogniteClient(client_config)
202+
136203

137204
class LogLevel(Enum):
138205
CRITICAL = "CRITICAL"

cognite/extractorutils/unstable/core/__init__.py

Whitespace-only changes.
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
"""
2+
Example of how you would build an extractor with the new base class
3+
"""
4+
5+
from cognite.extractorutils.unstable.configuration.models import ExtractorConfig
6+
7+
from .base import Extractor
8+
from .runtime import Runtime
9+
10+
11+
class MyConfig(ExtractorConfig):
12+
parameter_one: int
13+
parameter_two: str
14+
15+
16+
class MyExtractor(Extractor[MyConfig]):
17+
NAME = "Test extractor"
18+
EXTERNAL_ID = "test-extractor"
19+
DESCRIPTION = "Test of the new runtime"
20+
VERSION = "1.0.0"
21+
CONFIG_TYPE = MyConfig
22+
23+
def run(self) -> None:
24+
self.logger.info("Started!")
25+
if not self.cancellation_token.wait(10):
26+
raise ValueError("Oops")
27+
28+
29+
if __name__ == "__main__":
30+
runtime = Runtime(MyExtractor)
31+
runtime.run()
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
from enum import Enum
2+
3+
4+
class RuntimeMessage(Enum):
5+
RESTART = 1

0 commit comments

Comments
 (0)