Skip to content

Commit

Permalink
Updates 2024-07-10 - Added in unit tests and basic search functionali…
Browse files Browse the repository at this point in the history
…ty to Cats Explorer library
  • Loading branch information
CHRISCARLON committed Jul 10, 2024
1 parent 4549ebd commit ec1cc85
Show file tree
Hide file tree
Showing 9 changed files with 164 additions and 69 deletions.
20 changes: 20 additions & 0 deletions .github/workflows/herding_cats_explorer_tests.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: Herding Cats Lambda Deploy
on:
push:
branches:
- main
paths:
- 'herding_cats_explorer/**'
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.10'
- name: Install pytest
run: pip install pytest
- name: Run tests
run: pytest tests/
File renamed without changes.
Empty file.
8 changes: 8 additions & 0 deletions herding_cats_explorer/cats_errors.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
class CATExploreError(Exception):
"""Base exception for CATExplore"""

class CKANFetchError(CATExploreError):
"""Raised when CKAN fetch fails"""

class DCATFetchError(CATExploreError):
"""Raised when DCAT fetch fails"""
67 changes: 0 additions & 67 deletions herding_cats_explorer/exploring_cats.py

This file was deleted.

110 changes: 110 additions & 0 deletions herding_cats_explorer/herding_cats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import requests

from typing import Any, Dict
from loguru import logger
from pprint import pprint

from herding_cats_explorer.cats_errors import CATExploreError, CKANFetchError, DCATFetchError



class CATExplore:

CKAN_API_PATH = "/api/3/action/{}"
DCAT_API_PATH = "/api/feed/dcat-ap/2.1.1.json"
REQUEST_TIMEOUT = 15

def __init__(self, domain: str) -> None:
"""Initialise CATExplore with a domain."""
self.domain = domain

# DATA SAMPLES
def fetch_sample(self) -> Dict[str, Any]:
"""Fetch a sample from either CKAN or DCAT API."""
try:
return self.fetch_ckan_sample()
except CKANFetchError as ckan_error:
logger.error(f"CKAN fetch failed: {ckan_error} - Attempting DCAT")
try:
return self.fetch_dcat_sample()
except DCATFetchError as dcat_error:
logger.error(f"DCAT fetch failed: {dcat_error}")
raise CATExploreError("Both CKAN and DCAT fetches failed") from dcat_error

def fetch_ckan_sample(self, endpoint: str = "package_search") -> Dict[str, Any]:
"""Fetch a sample from CKAN API."""
url = f"https://{self.domain}{self.CKAN_API_PATH.format(endpoint)}"
data = self._make_request(url)
return self._extract_ckan_result_sample(data)

def fetch_dcat_sample(self) -> Dict[str, Any]:
"""Fetch a sample from DCAT API."""
url = f"https://{self.domain}{self.DCAT_API_PATH}"
data = self._make_request(url)
return self._extract_dcat_result_sample(data)

# SEARCH DATA
def basic_search_ckan_data(self, user_input: str, endpoint: str = "package_search") -> Dict[str, Any]:
try:
url = f"https://{self.domain}{self.CKAN_API_PATH.format(endpoint)}"
params = {
"q": user_input
}
return self._make_request(url, params)
except requests.exceptions.RequestException as error:
logger.error(f"An error occurred during the request: {error}")
raise

# UTILITY FUNCTIONS
def _make_request(self, url: str, params: Dict[str, Any] = None) -> Dict[str, Any]:
"""Make a GET request to the specified URL with optional parameters."""
try:
response = requests.get(url, params=params, timeout=self.REQUEST_TIMEOUT)
response.raise_for_status()
return response.json()
except requests.exceptions.RequestException as error:
logger.error(f"An error occurred during the request: {error}")
raise

@staticmethod
def _extract_ckan_result_sample(data: Dict[str, Any]) -> Dict[str, Any]:
"""Extract the first result from CKAN API response."""
if 'result' in data:
if 'results' in data['result'] and data['result']['results']:
return data['result']['results'][0]
elif 'result' in data['result'] and data['result']['result']:
return data['result']['result'][0]
raise CKANFetchError("Expected data structure not found in CKAN response")

@staticmethod
def _extract_dcat_result_sample(data: Dict[str, Any]) -> Dict[str, Any]:
"""Extract the first result from DCAT API response."""
if 'dcat:dataset' in data and isinstance(data['dcat:dataset'], list):
return data['dcat:dataset'][0]
raise DCATFetchError("Expected data structure not found in DCAT response")

@staticmethod
def print_structure(data: Any, indent: int = 0, key: str = "root") -> None:
"""Print the structure of any data type."""
if isinstance(data, dict):
print(f"{' ' * indent}{key}:")
for k, v in data.items():
CATExplore.print_structure(v, indent + 1, k)
elif isinstance(data, list):
print(f"{' ' * indent}{key}: (list of {len(data)} items)")
if data:
CATExplore.print_structure(data[0], indent + 1, f"{key}[0]")
else:
value_type = type(data).__name__
value_preview = str(data)[:50] + "..." if len(str(data)) > 50 else str(data)
print(f"{' ' * indent}{key}: ({value_type}) {value_preview}")

@staticmethod
def pretty_print_helper(data: Any) -> None:
return pprint(data)

# Example usage
if __name__ == "__main__":
explorer = CATExplore("data.london.gov.uk")
result = explorer.basic_search_ckan_data("climate")
explorer.pretty_print_helper(result)
3 changes: 2 additions & 1 deletion herding_cats_pipelines/lambda_jobs/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ def lambda_handler(event, context) -> json:

catalogues_list = [
"https://data.london.gov.uk/api/action/package_search",
"https://opendata.bristol.gov.uk/api/feed/dcat-ap/2.1.1.json"
"https://opendata.bristol.gov.uk/api/feed/dcat-ap/2.1.1.json",
"https://www.data.gov.uk/api/action/package_search"
]

try:
Expand Down
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@ openpyxl = "^3.1.5"
ruff = "^0.5.1"
tabulate = "^0.9.0"
boto3 = "^1.34.140"

pytest = "^8.2.2"

[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"

[tool.poetry.dev-dependencies]
pytest = "^8.2.2"
20 changes: 20 additions & 0 deletions tests/endpoints_still_active.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import pytest
import requests
from herding_cats_explorer.herding_cats import CATExplore

@pytest.fixture
def cat_explore():
return CATExplore("data.london.gov.uk")

@pytest.mark.parametrize("endpoint", [
"package_search"
])
def test_fetch_ckan_sample_endpoint_active(cat_explore, endpoint):
try:
# Attempt to fetch data from the endpoint
result = cat_explore.fetch_ckan_sample(endpoint)

# If we get here, the request was successful
assert True, f"Endpoint {endpoint} is active"
except requests.exceptions.RequestException as e:
pytest.fail(f"Endpoint {endpoint} is not active: {str(e)}")

0 comments on commit ec1cc85

Please sign in to comment.