Skip to content

Commit

Permalink
feat(cat session): simplified cat session initialisation and improved…
Browse files Browse the repository at this point in the history
… error messages [2024-12-30]
  • Loading branch information
CHRISCARLON committed Dec 30, 2024
1 parent de0045a commit 772aedc
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 69 deletions.
42 changes: 41 additions & 1 deletion HerdingCats/errors/cats_errors.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,47 @@
from typing import Optional

class CatSessionError(Exception):
pass
"""
Custom exception class for CatSession errors.
Used when there are issues with establishing or maintaining catalogue sessions.
"""
RED = '\033[91m'
YELLOW = '\033[93m'
RESET = '\033[0m'

def __init__(
self,
message: str,
url: Optional[str] = None,
original_error: Optional[Exception] = None
) -> None:
"""
Initialize the CatSession error with enhanced error information.
Args:
message: The main error message
url: The URL that caused the error (if applicable)
original_error: The original exception that was caught (if any)
"""
self.message = message
self.url = url
self.original_error = original_error

error_msg = f"{self.RED}[CatSession Error] 🐈: {message}{self.RESET}"

if url:
error_msg += f"\n{self.YELLOW}Failed URL: {url}{self.RESET}"

if original_error:
error_msg += (
f"\n{self.YELLOW}Original error: "
f"{str(original_error)}{self.RESET}"
)

super().__init__(error_msg)

def __str__(self) -> str:
return self.args[0]

class CatExplorerError(Exception):
pass
Expand Down
102 changes: 34 additions & 68 deletions HerdingCats/session/cat_session.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import requests

from typing import Union
from loguru import logger
from urllib.parse import urlparse
Expand All @@ -16,99 +15,62 @@ class CatalogueType(Enum):
# START A SESSION WITH A DATA CATALOGUE
class CatSession:
def __init__(
self, domain: Union[str, CkanDataCatalogues, OpenDataSoftDataCatalogues, FrenchGouvCatalogue]
self, catalogue: Union[CkanDataCatalogues, OpenDataSoftDataCatalogues, FrenchGouvCatalogue]
) -> None:
"""
Initialise a session with a valid domain or a predefined catalog.
Initialise a session with a predefined catalog.
Args:
domain (url or catalogue item): str or catalog enum
catalogue: A predefined catalogue from one of the supported enum types
(CkanDataCatalogues, OpenDataSoftDataCatalogues, or FrenchGouvCatalogue)
"""
self.domain, self.catalogue_type = self._process_domain(domain)
self.domain, self.catalogue_type = self._process_catalogue(catalogue)
self.session = requests.Session()
self.base_url = (
f"https://{self.domain}"
if not self.domain.startswith("http")
else self.domain
)
self.base_url = f"https://{self.domain}" if not self.domain.startswith("http") else self.domain
self._validate_url()

@staticmethod
def _process_domain(
domain: Union[str, CkanDataCatalogues, OpenDataSoftDataCatalogues, FrenchGouvCatalogue]
def _process_catalogue(
catalogue: Union[CkanDataCatalogues, OpenDataSoftDataCatalogues, FrenchGouvCatalogue]
) -> tuple[str, CatalogueType]:
"""
Process the domain to ensure that it's in the correct format.
This iterates through the CkanDataCatalogues, OpenDataSoftDataCatalogues, and FrenchGouvCatalogue
Enums and checks for a match.
Otherwise it processes the url as normal.
Process the predefined catalogue to extract domain and type.
Args:
domain (url or catalogue item): str or catalog enum
catalogue: A predefined catalogue enum
Returns:
a tuple of (url in the correct format, catalog type)
tuple[str, CatalogueType]: A tuple of (domain, catalogue_type)
"""
# Check predefined catalogs first
if isinstance(domain, (CkanDataCatalogues, OpenDataSoftDataCatalogues, FrenchGouvCatalogue)):
if isinstance(domain, FrenchGouvCatalogue):
match catalogue:
case FrenchGouvCatalogue():
catalog_type = CatalogueType.GOUV_FR
else:
catalog_type = (
CatalogueType.CKAN
if isinstance(domain, CkanDataCatalogues)
else CatalogueType.OPENDATA_SOFT
case CkanDataCatalogues():
catalog_type = CatalogueType.CKAN
case OpenDataSoftDataCatalogues():
catalog_type = CatalogueType.OPENDATA_SOFT
case _:
raise ValueError(
"Catalogue must be one of: CkanDataCatalogues, OpenDataSoftDataCatalogues, or FrenchGouvCatalogue"
)
parsed_url = urlparse(domain.value)
return parsed_url.netloc if parsed_url.netloc else parsed_url.path, catalog_type

# Process as normal site url
# Check if site url is in the catalogue already
elif isinstance(domain, str):
for catalog_enum in (CkanDataCatalogues, OpenDataSoftDataCatalogues, FrenchGouvCatalogue):
for catalog in catalog_enum:
if domain.lower() == catalog.name.lower().replace("_", " "):
parsed_url = urlparse(catalog.value)
url = parsed_url.netloc if parsed_url.netloc else parsed_url.path
if catalog_enum == FrenchGouvCatalogue:
catalog_type = CatalogueType.GOUV_FR
else:
catalog_type = (
CatalogueType.CKAN
if catalog_enum == CkanDataCatalogues
else CatalogueType.OPENDATA_SOFT
)
return url, catalog_type

# If not a predefined catalogue item, process as a regular domain or URL
parsed = urlparse(domain)
domain_str = parsed.netloc if parsed.netloc else parsed.path

# Check if it's a French government domain
# Otherwise default to CKAN
if domain_str.endswith('.gouv.fr'):
return domain_str, CatalogueType.GOUV_FR
else:
return domain_str, CatalogueType.CKAN
else:
raise ValueError(
"Domain must be a string, CkanDataCatalogues enum, OpenDataSoftDataCatalogues enum, or FrenchGouvCatalogue enum"
)

parsed_url = urlparse(catalogue.value)
return parsed_url.netloc if parsed_url.netloc else parsed_url.path, catalog_type

def _validate_url(self) -> None:
"""
Validate the URL to catch any errors.
Will raise status code error if there is a problem with url.
Will raise status code error if there is a problem with the url.
"""
try:
response = self.session.get(self.base_url, timeout=10)
response.raise_for_status()
except requests.RequestException as e:
logger.error(f"Failed to connect to {self.base_url}: {str(e)}")
raise CatSessionError(
f"Invalid or unreachable URL: {self.base_url}. Error: {str(e)}"
message="Invalid or unreachable URL",
url=self.base_url,
original_error=e
)

def start_session(self) -> None:
Expand All @@ -119,7 +81,11 @@ def start_session(self) -> None:
logger.success(f"Session started successfully with {self.domain}")
except requests.RequestException as e:
logger.error(f"Failed to start session: {e}")
raise CatSessionError(f"Failed to start session: {str(e)}")
raise CatSessionError(
message="Failed to start session",
url=self.base_url,
original_error=e
)

def close_session(self) -> None:
"""Close the session."""
Expand All @@ -137,4 +103,4 @@ def __exit__(self, exc_type, exc_val, exc_tb):

def get_catalogue_type(self) -> CatalogueType:
"""Return the catalog type (CKAN, OpenDataSoft, or French Government)"""
return self.catalogue_type
return self.catalogue_type

0 comments on commit 772aedc

Please sign in to comment.