From ec1cc8520b48b4d52d861922d7f322f46ac132f7 Mon Sep 17 00:00:00 2001 From: christophercarlon Date: Wed, 10 Jul 2024 23:20:06 +0100 Subject: [PATCH] Updates 2024-07-10 - Added in unit tests and basic search functionality to Cats Explorer library --- .../workflows/herding_cats_explorer_tests.yml | 20 ++++ ...{lambda_zipper.yml => pipeline_deploy.yml} | 0 herding_cats_explorer/__init__.py | 0 herding_cats_explorer/cats_errors.py | 8 ++ herding_cats_explorer/exploring_cats.py | 67 ----------- herding_cats_explorer/herding_cats.py | 110 ++++++++++++++++++ herding_cats_pipelines/lambda_jobs/main.py | 3 +- pyproject.toml | 5 +- tests/endpoints_still_active.py | 20 ++++ 9 files changed, 164 insertions(+), 69 deletions(-) create mode 100644 .github/workflows/herding_cats_explorer_tests.yml rename .github/workflows/{lambda_zipper.yml => pipeline_deploy.yml} (100%) create mode 100644 herding_cats_explorer/__init__.py create mode 100644 herding_cats_explorer/cats_errors.py delete mode 100644 herding_cats_explorer/exploring_cats.py create mode 100644 herding_cats_explorer/herding_cats.py create mode 100644 tests/endpoints_still_active.py diff --git a/.github/workflows/herding_cats_explorer_tests.yml b/.github/workflows/herding_cats_explorer_tests.yml new file mode 100644 index 0000000..9a66b29 --- /dev/null +++ b/.github/workflows/herding_cats_explorer_tests.yml @@ -0,0 +1,20 @@ +name: Herding Cats Lambda Deploy +on: + push: + branches: + - main + paths: + - 'herding_cats_explorer/**' +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.10' + - name: Install pytest + run: pip install pytest + - name: Run tests + run: pytest tests/ \ No newline at end of file diff --git a/.github/workflows/lambda_zipper.yml b/.github/workflows/pipeline_deploy.yml similarity index 100% rename from .github/workflows/lambda_zipper.yml rename to .github/workflows/pipeline_deploy.yml diff --git a/herding_cats_explorer/__init__.py b/herding_cats_explorer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/herding_cats_explorer/cats_errors.py b/herding_cats_explorer/cats_errors.py new file mode 100644 index 0000000..34cf3c2 --- /dev/null +++ b/herding_cats_explorer/cats_errors.py @@ -0,0 +1,8 @@ +class CATExploreError(Exception): + """Base exception for CATExplore""" + +class CKANFetchError(CATExploreError): + """Raised when CKAN fetch fails""" + +class DCATFetchError(CATExploreError): + """Raised when DCAT fetch fails""" \ No newline at end of file diff --git a/herding_cats_explorer/exploring_cats.py b/herding_cats_explorer/exploring_cats.py deleted file mode 100644 index a5f8b3a..0000000 --- a/herding_cats_explorer/exploring_cats.py +++ /dev/null @@ -1,67 +0,0 @@ -import requests -from typing import Any, Dict - -from loguru import logger - -class CATExplore: - def __init__(self, domain: str) -> None: - self.domain = domain - - def fetch_sample(self) -> Dict[str, Any]: - try: - try: - return self.fetch_ckan_sample() - except Exception as ckan_error: - logger.error(f"CKAN fetch failed: {ckan_error} - Attempting DCAT") - try: - return self.fetch_dcat_sample() - except Exception as dcat_error: - logger.error(f"DCAT fetch failed: {dcat_error}") - raise Exception("Both CKAN and DCAT fetches failed") from dcat_error - except requests.exceptions.RequestException as error: - logger.error(f"An error occurred during the request: {error}") - raise - - def fetch_ckan_sample(self) -> dict: - url = f"https://{self.domain}/api/3/action/package_search" - response = requests.get(url, timeout=15) - response.raise_for_status() - data = response.json() - - if 'result' in data: - if 'results' in data['result'] and data['result']['results']: - return data['result']['results'][0] - elif 'result' in data['result'] and data['result']['result']: - return data['result']['result'][0] - raise Exception("Expected data structure not found in CKAN response") - - def fetch_dcat_sample(self) -> dict: - url = f"https://{self.domain}/api/feed/dcat-ap/2.1.1.json" - response = requests.get(url, timeout=15) - response.raise_for_status() - data = response.json() - - if 'dcat:dataset' in data and isinstance(data['dcat:dataset'], list): - return data['dcat:dataset'][0] - raise Exception("Expected data structure not found in DCAT response") - - @staticmethod - def print_structure(data: Dict[str, Any], indent: int = 0, key: str = "root"): - if isinstance(data, dict): - print(f"{' ' * indent}{key}:") - for k, v in data.items(): - CATExplore.print_structure(v, indent + 1, k) - elif isinstance(data, list): - print(f"{' ' * indent}{key}: (list of {len(data)} items)") - if data: - CATExplore.print_structure(data[0], indent + 1, f"{key}[0]") - else: - value_type = type(data).__name__ - value_preview = str(data)[:50] + "..." if len(str(data)) > 50 else str(data) - print(f"{' ' * indent}{key}: ({value_type}) {value_preview}") - -# Example usage -if __name__ == "__main__": - explorer = CATExplore("data.london.gov.uk") - result = explorer.fetch_sample() - explorer.print_structure(result) diff --git a/herding_cats_explorer/herding_cats.py b/herding_cats_explorer/herding_cats.py new file mode 100644 index 0000000..a196361 --- /dev/null +++ b/herding_cats_explorer/herding_cats.py @@ -0,0 +1,110 @@ +import requests + +from typing import Any, Dict +from loguru import logger +from pprint import pprint + +from herding_cats_explorer.cats_errors import CATExploreError, CKANFetchError, DCATFetchError + + + +class CATExplore: + + CKAN_API_PATH = "/api/3/action/{}" + DCAT_API_PATH = "/api/feed/dcat-ap/2.1.1.json" + REQUEST_TIMEOUT = 15 + + def __init__(self, domain: str) -> None: + """Initialise CATExplore with a domain.""" + self.domain = domain + + # DATA SAMPLES + def fetch_sample(self) -> Dict[str, Any]: + """Fetch a sample from either CKAN or DCAT API.""" + try: + return self.fetch_ckan_sample() + except CKANFetchError as ckan_error: + logger.error(f"CKAN fetch failed: {ckan_error} - Attempting DCAT") + try: + return self.fetch_dcat_sample() + except DCATFetchError as dcat_error: + logger.error(f"DCAT fetch failed: {dcat_error}") + raise CATExploreError("Both CKAN and DCAT fetches failed") from dcat_error + + def fetch_ckan_sample(self, endpoint: str = "package_search") -> Dict[str, Any]: + """Fetch a sample from CKAN API.""" + url = f"https://{self.domain}{self.CKAN_API_PATH.format(endpoint)}" + data = self._make_request(url) + return self._extract_ckan_result_sample(data) + + def fetch_dcat_sample(self) -> Dict[str, Any]: + """Fetch a sample from DCAT API.""" + url = f"https://{self.domain}{self.DCAT_API_PATH}" + data = self._make_request(url) + return self._extract_dcat_result_sample(data) + + # SEARCH DATA + def basic_search_ckan_data(self, user_input: str, endpoint: str = "package_search") -> Dict[str, Any]: + try: + url = f"https://{self.domain}{self.CKAN_API_PATH.format(endpoint)}" + params = { + "q": user_input + } + return self._make_request(url, params) + except requests.exceptions.RequestException as error: + logger.error(f"An error occurred during the request: {error}") + raise + + # UTILITY FUNCTIONS + def _make_request(self, url: str, params: Dict[str, Any] = None) -> Dict[str, Any]: + """Make a GET request to the specified URL with optional parameters.""" + try: + response = requests.get(url, params=params, timeout=self.REQUEST_TIMEOUT) + response.raise_for_status() + return response.json() + except requests.exceptions.RequestException as error: + logger.error(f"An error occurred during the request: {error}") + raise + + @staticmethod + def _extract_ckan_result_sample(data: Dict[str, Any]) -> Dict[str, Any]: + """Extract the first result from CKAN API response.""" + if 'result' in data: + if 'results' in data['result'] and data['result']['results']: + return data['result']['results'][0] + elif 'result' in data['result'] and data['result']['result']: + return data['result']['result'][0] + raise CKANFetchError("Expected data structure not found in CKAN response") + + @staticmethod + def _extract_dcat_result_sample(data: Dict[str, Any]) -> Dict[str, Any]: + """Extract the first result from DCAT API response.""" + if 'dcat:dataset' in data and isinstance(data['dcat:dataset'], list): + return data['dcat:dataset'][0] + raise DCATFetchError("Expected data structure not found in DCAT response") + + @staticmethod + def print_structure(data: Any, indent: int = 0, key: str = "root") -> None: + """Print the structure of any data type.""" + if isinstance(data, dict): + print(f"{' ' * indent}{key}:") + for k, v in data.items(): + CATExplore.print_structure(v, indent + 1, k) + elif isinstance(data, list): + print(f"{' ' * indent}{key}: (list of {len(data)} items)") + if data: + CATExplore.print_structure(data[0], indent + 1, f"{key}[0]") + else: + value_type = type(data).__name__ + value_preview = str(data)[:50] + "..." if len(str(data)) > 50 else str(data) + print(f"{' ' * indent}{key}: ({value_type}) {value_preview}") + + @staticmethod + def pretty_print_helper(data: Any) -> None: + return pprint(data) + +# Example usage +if __name__ == "__main__": + explorer = CATExplore("data.london.gov.uk") + result = explorer.basic_search_ckan_data("climate") + explorer.pretty_print_helper(result) diff --git a/herding_cats_pipelines/lambda_jobs/main.py b/herding_cats_pipelines/lambda_jobs/main.py index 9f6a3cc..dc8278e 100644 --- a/herding_cats_pipelines/lambda_jobs/main.py +++ b/herding_cats_pipelines/lambda_jobs/main.py @@ -18,7 +18,8 @@ def lambda_handler(event, context) -> json: catalogues_list = [ "https://data.london.gov.uk/api/action/package_search", - "https://opendata.bristol.gov.uk/api/feed/dcat-ap/2.1.1.json" + "https://opendata.bristol.gov.uk/api/feed/dcat-ap/2.1.1.json", + "https://www.data.gov.uk/api/action/package_search" ] try: diff --git a/pyproject.toml b/pyproject.toml index 82ba4ed..6903f27 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,8 +16,11 @@ openpyxl = "^3.1.5" ruff = "^0.5.1" tabulate = "^0.9.0" boto3 = "^1.34.140" - +pytest = "^8.2.2" [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" + +[tool.poetry.dev-dependencies] +pytest = "^8.2.2" \ No newline at end of file diff --git a/tests/endpoints_still_active.py b/tests/endpoints_still_active.py new file mode 100644 index 0000000..4639766 --- /dev/null +++ b/tests/endpoints_still_active.py @@ -0,0 +1,20 @@ +import pytest +import requests +from herding_cats_explorer.herding_cats import CATExplore + +@pytest.fixture +def cat_explore(): + return CATExplore("data.london.gov.uk") + +@pytest.mark.parametrize("endpoint", [ + "package_search" +]) +def test_fetch_ckan_sample_endpoint_active(cat_explore, endpoint): + try: + # Attempt to fetch data from the endpoint + result = cat_explore.fetch_ckan_sample(endpoint) + + # If we get here, the request was successful + assert True, f"Endpoint {endpoint} is active" + except requests.exceptions.RequestException as e: + pytest.fail(f"Endpoint {endpoint} is not active: {str(e)}") \ No newline at end of file