From 2eadf757456cfe79584695ba0cf58463c912725c Mon Sep 17 00:00:00 2001
From: Chris Carlon <cmcarleone@gmail.com>
Date: Thu, 26 Dec 2024 21:18:42 +0000
Subject: [PATCH] feat!(french data loader): started french gouv catalogue data
 loader class [2024-12-26]

BREAKING CHANGE: started french gouv catalogue data loader class
---
 HerdingCats/__init__.py                |   5 +-
 HerdingCats/data_loader/data_loader.py | 590 +++++++++++++++----------
 HerdingCats/endpoints/api_endpoints.py |   1 +
 HerdingCats/explorer/cat_explore.py    |  44 +-
 README.md                              |  31 +-
 5 files changed, 421 insertions(+), 250 deletions(-)

diff --git a/HerdingCats/__init__.py b/HerdingCats/__init__.py
index 2815cbc..a4f119a 100644
--- a/HerdingCats/__init__.py
+++ b/HerdingCats/__init__.py
@@ -1,4 +1,4 @@
-from .data_loader.data_loader import CkanCatResourceLoader, OpenDataSoftResourceLoader
+from .data_loader.data_loader import CkanCatResourceLoader, OpenDataSoftResourceLoader, FrenchGouvResourceLoader
 from .explorer.cat_explore import CkanCatExplorer, OpenDataSoftCatExplorer, FrenchGouvCatExplorer
 from .session.cat_session import CatSession
 from .errors.cats_errors import CatSessionError, CatExplorerError, OpenDataSoftExplorerError
@@ -17,7 +17,8 @@
     "OpenDataSoftResourceLoader",
     "OpenDataSoftExplorerError",
     "FrenchGouvCatExplorer",
-    "FrenchGouvCatalogue"
+    "FrenchGouvCatalogue",
+    "FrenchGouvResourceLoader"
 ]
 
 __version__ = "0.1.6"
diff --git a/HerdingCats/data_loader/data_loader.py b/HerdingCats/data_loader/data_loader.py
index 23094cd..6a1b048 100644
--- a/HerdingCats/data_loader/data_loader.py
+++ b/HerdingCats/data_loader/data_loader.py
@@ -20,7 +20,7 @@
 
 
 # START TO WRANGLE / ANALYSE
-# LOAD DATA RESOURCES INTO STORAGE
+# LOAD CKAN DATA RESOURCES INTO STORAGE
 class CkanCatResourceLoader:
     """A class to load data resources into various formats and storage systems."""
     
@@ -296,272 +296,400 @@ def aws_s3_data_loader(self, resource_data: List, bucket_name: str,
             logger.error(f"AWS S3 upload error: {e}")
             raise
 
-
+# START TO WRANGLE / ANALYSE
+# LOAD OPEN DATA SOFT DATA RESOURCES INTO STORAGE
 class OpenDataSoftResourceLoader:
+    """A class to load OpenDataSoft resources into various formats and storage systems."""
+
+    SUPPORTED_FORMATS = {
+        "spreadsheet": ["xls", "xlsx"],
+        "csv": ["csv"],
+        "parquet": ["parquet"],
+        "geopackage": ["gpkg", "geopackage"]
+    }
+
     def __init__(self) -> None:
-        pass
+        self._validate_dependencies()
 
-    def polars_data_loader(
-            self, resource_data: Optional[List[Dict]], format_type: Literal["parquet"], api_key: Optional[str] = None
-        ) -> pl.DataFrame:
-            """
-            Load data from a resource URL into a Polars DataFrame.
-            Args:
-                resource_data: List of dictionaries containing resource information
-                format_type: Expected format type (currently only supports 'parquet')
-                api_key: Optional API key for authentication with OpenDataSoft
-            Returns:
-                Polars DataFrame
-            Raises:
-                OpenDataSoftExplorerError: If resource data is missing or download fails
-
-            # Example usage
-            import HerdingCats as hc
-
-            def main():
-                with hc.CatSession(hc.OpenDataSoftDataCatalogues.UK_POWER_NETWORKS) as session:
-                    explore = hc.OpenDataSoftCatExplorer(session)
-                    data_loader = hc.OpenDataSoftResourceLoader()
-
-                    data = explore.show_dataset_export_options_dict("ukpn-smart-meter-installation-volumes")
-                    pl_df = data_loader.polars_data_loader(data, "parquet", "api_key")
-                    print(pl_df.head(10))
-
-            if __name__ == "__main__":
-                main()
+    def _validate_dependencies(self):
+        """Validate that all required dependencies are available."""
+        required_modules = {
+            'pandas': pd,
+            'polars': pl,
+            'duckdb': duckdb,
+            'boto3': boto3,
+            'pyarrow': pa
+        }
+        missing = [name for name, module in required_modules.items() if module is None]
+        if missing:
+            raise ImportError(f"Missing required dependencies: {', '.join(missing)}")
 
-            """
+    @staticmethod
+    def validate_inputs(func):
+        """Decorator to validate resource data containing download URLs and formats."""
+        @wraps(func)
+        def wrapper(self, resource_data: Optional[List[Dict]], *args, **kwargs):
+            # Check if resource data exists and is non-empty
             if not resource_data:
-                raise OpenDataSoftExplorerError("No resource data provided")
+                logger.error("No resource data provided")
+                raise ValueError("Resource data must be provided")
+                
+            if not isinstance(resource_data, list):
+                logger.error("Resource data must be a list")
+                raise ValueError("Resource data must be a list of dictionaries")
+            return func(self, resource_data, *args, **kwargs)
+        return wrapper
+    
+    def _validate_resource_data(
+        self, 
+        resource_data: Optional[List[Dict[str, str]]], 
+        format_type: str
+    ) -> str:
+        """Validate resource data and extract download URL."""
+        if not resource_data:
+            raise OpenDataSoftExplorerError("No resource data provided")
 
-            headers = {'Accept': 'application/parquet'}
+        # Get all supported formats
+        all_formats = [fmt for formats in self.SUPPORTED_FORMATS.values() for fmt in formats]
+        
+        # If the provided format_type is a category, get its format
+        valid_formats = (self.SUPPORTED_FORMATS.get(format_type, []) 
+                        if format_type in self.SUPPORTED_FORMATS 
+                        else [format_type])
+
+        # Validate format type
+        if format_type not in self.SUPPORTED_FORMATS and format_type not in all_formats:
+            raise OpenDataSoftExplorerError(
+                f"Unsupported format: {format_type}. "
+                f"Supported formats: csv, parquet, xls, xlsx, geopackage"
+            )
+
+        # Find matching resource
+        url = next(
+            (r.get('download_url') for r in resource_data 
+             if r.get('format', '').lower() in valid_formats),
+            None
+        )
+        
+        # If format provided does not have a url provide the formats that do
+        if not url:
+            available_formats = [r['format'] for r in resource_data]
+            raise OpenDataSoftExplorerError(
+                f"No resource found with format: {format_type}. "
+                f"Available formats: {', '.join(available_formats)}"
+            )
+            
+        return url
+
+    def _fetch_data(self, url: str, api_key: Optional[str] = None) -> BytesIO:
+        """Fetch data from URL and return as BytesIO object."""
+        try:
+            # Add API key to URL if provided
             if api_key:
-                headers['Authorization'] = f'apikey {api_key}'
-
-            for resource in resource_data:
-                if resource.get('format', '').lower() == 'parquet':
-                    url = resource.get('download_url')
-                    if not url:
-                        continue
-                    try:
-                        response = requests.get(url, headers=headers)
-                        response.raise_for_status()
-                        binary_data = BytesIO(response.content)
-                        df = pl.read_parquet(binary_data)
-
-                        if df.height == 0 and not api_key:
-                            raise OpenDataSoftExplorerError(
-                                "Received empty DataFrame. This likely means an API key is required for this dataset. "
-                                "Please provide an API key and try again. You can usually do this by creating an account with the datastore you are tyring to access"
-                            )
-                        return df
-
-                    except (requests.RequestException, Exception) as e:
-                        raise OpenDataSoftExplorerError("Failed to download resource", e)
-
-            raise OpenDataSoftExplorerError("No parquet format resource found")
+                url = f"{url}?apikey={api_key}"
+                
+            response = requests.get(url)
+            response.raise_for_status()
+            return BytesIO(response.content)
+        except requests.RequestException as e:
+            raise OpenDataSoftExplorerError(f"Failed to download resource: {str(e)}", e)
 
-    def pandas_data_loader(
-            self, resource_data: Optional[List[Dict]], format_type: Literal["parquet"], api_key: Optional[str] = None
-        ) -> pd.DataFrame:
-            """
-            Load data from a resource URL into a Polars DataFrame.
-            Args:
-                resource_data: List of dictionaries containing resource information
-                format_type: Expected format type (currently only supports 'parquet')
-                api_key: Optional API key for authentication with OpenDataSoft
-            Returns:
-                Polars DataFrame
-            Raises:
-                OpenDataSoftExplorerError: If resource data is missing or download fails
-
-            # Example usage
-            import HerdingCats as hc
-
-            def main():
-                with hc.CatSession(hc.OpenDataSoftDataCatalogues.UK_POWER_NETWORKS) as session:
-                    explore = hc.OpenDataSoftCatExplorer(session)
-                    data_loader = hc.OpenDataSoftResourceLoader()
-
-                    data = explore.show_dataset_export_options_dict("ukpn-smart-meter-installation-volumes")
-                    pd_df = data_loader.pandas_data_loader(data, "parquet", "api_key")
-                    print(pd_df.head(10))
-
-            if __name__ == "__main__":
-                main()
+    def _verify_data(self, df: Union[pd.DataFrame, pl.DataFrame], api_key: Optional[str]) -> None:
+        """Verify that the DataFrame is not empty when no API key is provided."""
+        is_empty = df.empty if isinstance(df, pd.DataFrame) else df.height == 0
+        if is_empty and not api_key:
+            raise OpenDataSoftExplorerError(
+                "Received empty DataFrame. This likely means an API key is required. "
+                "Please provide an API key and try again."
+            )
 
-            """
-            if not resource_data:
-                raise OpenDataSoftExplorerError("No resource data provided")
+    def _load_dataframe(
+        self,
+        binary_data: BytesIO,
+        format_type: str,
+        loader_type: Literal["pandas", "polars"],
+        sheet_name: Optional[str] = None
+    ) -> Union[pd.DataFrame, pl.DataFrame]:
+        """Load binary data into specified DataFrame type."""
+        try:
+            match (format_type, loader_type):
+                case ("parquet", "pandas"):
+                    return pd.read_parquet(binary_data)
+                case ("parquet", "polars"):
+                    return pl.read_parquet(binary_data)
+                case ("csv", "pandas"):
+                    return pd.read_csv(binary_data)
+                case ("csv", "polars"):
+                    return pl.read_csv(binary_data)
+                case (("xls" | "xlsx" | "spreadsheet"), "pandas"):
+                    return pd.read_excel(binary_data, sheet_name=sheet_name) if sheet_name else pd.read_excel(binary_data)
+                case (("xls" | "xlsx" | "spreadsheet"), "polars"):
+                    return pl.read_excel(binary_data, sheet_name=sheet_name) if sheet_name else pl.read_excel(binary_data)
+                case (("geopackage" | "gpkg"), _):
+                    raise ValueError("Geopackage format requires using geopandas or a specialized GIS library")
+                case _:
+                    raise ValueError(f"Unsupported format {format_type} or loader type {loader_type}")
+        except Exception as e:
+            raise OpenDataSoftExplorerError(f"Failed to load {loader_type} DataFrame: {str(e)}", e)
 
-            headers = {'Accept': 'application/parquet'}
-            if api_key:
-                headers['Authorization'] = f'apikey {api_key}'
-
-            for resource in resource_data:
-                if resource.get('format', '').lower() == 'parquet':
-                    url = resource.get('download_url')
-                    if not url:
-                        continue
-                    try:
-                        response = requests.get(url, headers=headers)
-                        response.raise_for_status()
-                        binary_data = BytesIO(response.content)
-                        df = pd.read_parquet(binary_data)
-
-                        if df.size == 0 and not api_key:
-                            raise OpenDataSoftExplorerError(
-                                "Received empty DataFrame. This likely means an API key is required for this dataset. "
-                                "Please provide an API key and try again. You can usually do this by creating an account with the datastore you are tyring to access"
-                            )
-                        return df
-
-                    except (requests.RequestException, Exception) as e:
-                        raise OpenDataSoftExplorerError("Failed to download resource", e)
-
-            raise OpenDataSoftExplorerError("No parquet format resource found")
+    @overload
+    def _load_to_frame(
+        self,
+        resource_data: Optional[List[Dict[str, str]]],
+        format_type: str,
+        loader_type: Literal["pandas"],
+        api_key: Optional[str] = None,
+        sheet_name: Optional[str] = None
+    ) -> PandasDataFrame: ...
+
+    @overload
+    def _load_to_frame(
+        self,
+        resource_data: Optional[List[Dict[str, str]]],
+        format_type: str,
+        loader_type: Literal["polars"],
+        api_key: Optional[str] = None,
+        sheet_name: Optional[str] = None
+    ) -> PolarsDataFrame: ...
+
+    def _load_to_frame(
+        self,
+        resource_data: Optional[List[Dict[str, str]]],
+        format_type: str,
+        loader_type: Literal["pandas", "polars"],
+        api_key: Optional[str] = None,
+        sheet_name: Optional[str] = None
+    ) -> Union[pd.DataFrame, pl.DataFrame]:
+        """Common method for loading data into pandas or polars DataFrame."""
+        url = self._validate_resource_data(resource_data, format_type)
+        binary_data = self._fetch_data(url, api_key)
+        df = self._load_dataframe(binary_data, format_type, loader_type, sheet_name)
+        self._verify_data(df, api_key)
+        return df
+
+    @validate_inputs
+    def polars_data_loader(
+        self,
+        resource_data: Optional[List[Dict[str, str]]],
+        format_type: Literal["csv", "parquet", "spreadsheet", "xls", "xlsx"],
+        api_key: Optional[str] = None,
+        sheet_name: Optional[str] = None
+    ) -> pl.DataFrame:
+        """Load data from a resource URL into a Polars DataFrame."""
+        return self._load_to_frame(resource_data, format_type, "polars", api_key, sheet_name)
+
+    @validate_inputs
+    def pandas_data_loader(
+        self,
+        resource_data: Optional[List[Dict[str, str]]],
+        format_type: Literal["csv", "parquet", "spreadsheet", "xls", "xlsx"],
+        api_key: Optional[str] = None,
+        sheet_name: Optional[str] = None
+    ) -> pd.DataFrame:
+        """Load data from a resource URL into a Pandas DataFrame."""
+        return self._load_to_frame(resource_data, format_type, "pandas", api_key, sheet_name)
 
+    @validate_inputs
     def duckdb_data_loader(
-        self, 
-        resource_data: Optional[List[Dict]], 
-        format_type: Literal["parquet", "xlsx", "csv"],
-        api_key: Optional[str] = None
+        self,
+        resource_data: Optional[List[Dict[str, str]]],
+        format_type: Literal["csv", "parquet", "xls", "xlsx"],
+        api_key: Optional[str] = None,
+        sheet_name: Optional[str] = None
     ) -> duckdb.DuckDBPyConnection:
-        """
-        Load data from a resource URL directly into DuckDB.
+        """Load data from a resource URL directly into DuckDB."""
+        url = self._validate_resource_data(resource_data, format_type)
         
-        Args:
-            resource_data: List of dictionaries containing resource information
-            format_type: Expected format type ('parquet', 'xlsx', or 'csv')
-            api_key: Optional API key for authentication with OpenDataSoft
-            
-        Returns:
-            DuckDB connection with loaded data
+        if api_key:
+            url = f"{url}?apikey={api_key}"
             
-        Raises:
-            OpenDataSoftExplorerError: If resource data is missing or download fails
-        """
-        if not resource_data:
-            raise OpenDataSoftExplorerError("No resource data provided")
-
-        # Create in-memory DuckDB connection
         con = duckdb.connect(':memory:')
         con.execute("SET force_download=true")
+        con.execute("INSTALL spatial")
+        con.execute("LOAD spatial")
         
-        for resource in resource_data:
-            match resource.get('format', '').lower():
-                case fmt if fmt == format_type:
-                    url = resource.get('download_url')
-                    if not url:
-                        continue
-                        
-                    try:
-                        # Append API key to URL if provided
-                        if api_key:
-                            url = f"{url}?apikey={api_key}"
-                        
-                        # Load data based on format type
-                        match format_type:
-                            case "parquet":
-                                con.execute(
-                                    "CREATE TABLE data AS SELECT * FROM read_parquet(?)",
-                                    [url]
-                                )
-                            case "xlsx":
-                                con.execute(
-                                    "CREATE TABLE data AS SELECT * FROM read_xlsx(?)",
-                                    [url]
-                                )
-                            case "csv":
-                                con.execute(
-                                    "CREATE TABLE data AS SELECT * FROM read_csv_auto(?)",
-                                    [url]
-                                )
-                        
-                        # Verify data was loaded
-                        sample_data = con.execute("SELECT * FROM data LIMIT 10").fetchall()
-                        if not sample_data and not api_key:
-                            raise OpenDataSoftExplorerError(
-                                "Received empty dataset. This likely means an API key is required. "
-                                "Please provide an API key and try again. You can usually do this by "
-                                "creating an account with the datastore you are trying to access"
-                                )
-                        
-                        return con
-                        
-                    except duckdb.Error as e:
-                        raise OpenDataSoftExplorerError(f"Failed to load {format_type} resource into DuckDB", e)
-                
+        try:
+            # Use match statement for format handling
+            match format_type:
+                case "parquet":
+                    con.execute("CREATE TABLE data AS SELECT * FROM read_parquet(?)", [url])
+                case "csv":
+                    con.execute("CREATE TABLE data AS SELECT * FROM read_csv(?)", [url])
+                case "xls" | "xlsx" | "spreadsheet":
+                    if sheet_name:
+                        con.execute("CREATE TABLE data AS SELECT * FROM st_read(?, sheet_name=?)", [url, sheet_name])
+                    else:
+                        con.execute("CREATE TABLE data AS SELECT * FROM st_read(?)", [url])
                 case _:
-                    continue
-        
-        raise OpenDataSoftExplorerError(f"No {format_type} format resource found")
+                    raise ValueError(f"Unsupported format type: {format_type}")
+            
+            # Verify data was loaded
+            sample_data = con.execute("SELECT * FROM data LIMIT 10").fetchall()
+            if not sample_data and not api_key:
+                raise OpenDataSoftExplorerError(
+                    "Received empty dataset. This likely means an API key is required."
+                )
+            
+            return con
+            
+        except duckdb.Error as e:
+            raise OpenDataSoftExplorerError(f"Failed to load {format_type} resource into DuckDB", e)
+
+
+    def _verify_s3_bucket(self, s3_client, bucket_name: str) -> None:
+        """Verify S3 bucket exists."""
+        try:
+            s3_client.head_bucket(Bucket=bucket_name)
+            logger.success("Bucket Found")
+        except ClientError as e:
+            error_code = int(e.response["Error"]["Code"])
+            if error_code == 404:
+                raise ValueError(f"Bucket '{bucket_name}' does not exist")
+            raise
+
+    def _convert_to_parquet(self, binary_data: BytesIO, format_type: str) -> BytesIO:
+        """Convert input data to parquet format."""
+        try:
+            match format_type:
+                case "csv":
+                    df = pd.read_csv(binary_data)
+                case "xls" | "xlsx":
+                    df = pd.read_excel(binary_data)
+                case _:
+                    raise ValueError(f"Unsupported format type for Parquet conversion: {format_type}")
 
+            if df.empty:
+                raise ValueError("No data was loaded from the source file")
+
+            # Convert to parquet
+            parquet_buffer = BytesIO()
+            table = pa.Table.from_pandas(df)
+            pq.write_table(table, parquet_buffer)
+            parquet_buffer.seek(0)
+            return parquet_buffer
+        except Exception as e:
+            raise OpenDataSoftExplorerError(f"Failed to convert to parquet: {str(e)}", e)
+    
+    @validate_inputs
     def aws_s3_data_loader(
         self,
-        resource_data: Optional[List[Dict]],
+        resource_data: List[Dict[str, str]],
         bucket_name: str,
         custom_name: str,
-        api_key: Optional[str] = None,
-    ) -> None:
+        mode: Literal["raw", "parquet"],
+        format_type: Literal["csv", "parquet", "xls", "xlsx"],
+        api_key: Optional[str] = None
+    ) -> str:
         """
-        Load resource data into remote S3 storage as a parquet file.
+        Load resource data into remote S3 storage.
 
         Args:
-            resource_data: List of dictionaries containing resource information
+            resource_data: List of dictionaries containing format and download_url
             bucket_name: S3 bucket name
             custom_name: Custom prefix for the filename
+            mode: 'raw' to keep original format, 'parquet' to convert to parquet
+            format_type: Format to download ('csv', 'parquet', 'xls', 'xlsx')
             api_key: Optional API key for authentication
-        """
-        if not resource_data:
-            raise OpenDataSoftExplorerError("No resource data provided")
 
-        if not bucket_name:
-            raise ValueError("No bucket name provided")
-
-        # Create an S3 client
+        Returns:
+            str: Name of the uploaded file
+        """
+    
+        # Validate inputs
+        if not all(isinstance(x, str) and x.strip() for x in [bucket_name, custom_name]):
+            raise ValueError("Bucket name and custom name must be non-empty strings")
+        
+        # Get URL for specified format
+        url = self._validate_resource_data(resource_data, format_type)
+        
+        # Fetch data
+        binary_data = self._fetch_data(url, api_key)
+        
+        # Setup S3
         s3_client = boto3.client("s3")
-        logger.success("S3 Client Created")
+        self._verify_s3_bucket(s3_client, bucket_name)
 
-        # Check if the bucket exists
         try:
-            s3_client.head_bucket(Bucket=bucket_name)
-            logger.success("Bucket Found")
-        except ClientError as e:
-            error_code = int(e.response["Error"]["Code"])
-            if error_code == 404:
-                logger.error(f"Bucket '{bucket_name}' does not exist.")
-            else:
-                logger.error(f"Error checking bucket '{bucket_name}': {e}")
-            return
-
-        headers = {'Accept': 'application/parquet'}
-        if api_key:
-            headers['Authorization'] = f'apikey {api_key}'
-
-        for resource in resource_data:
-            if resource.get('format', '').lower() == 'parquet':
-                url = resource.get('download_url')
-                if not url:
-                    continue
+            match mode:
+                case "raw":
+                    filename = f"{custom_name}-{uuid.uuid4()}.{format_type}"
+                    s3_client.upload_fileobj(binary_data, bucket_name, filename)
+                case "parquet":
+                    parquet_buffer = self._convert_to_parquet(binary_data, format_type)
+                    filename = f"{custom_name}-{uuid.uuid4()}.parquet"
+                    s3_client.upload_fileobj(parquet_buffer, bucket_name, filename)
+            
+            logger.success(f"File uploaded successfully to S3 as {filename}")
+            return filename
+        except Exception as e:
+            logger.error(f"AWS S3 upload error: {e}")
+            raise
 
-                try:
-                    response = requests.get(url, headers=headers)
-                    response.raise_for_status()
-                    binary_data = BytesIO(response.content)
+# START TO WRANGLE / ANALYSE
+# LOAD FRENCH GOUV DATA RESOURCES INTO STORAGE
+class FrenchGouvResourceLoader:
+    """A class to load French Gouv data resources into various formats and storage systems."""
 
-                    # Generate a unique filename
-                    filename = f"{custom_name}-{uuid.uuid4()}.parquet"
+    SUPPORTED_FORMATS = {
+        "spreadsheet": ["xls", "xlsx"],
+        "csv": ["csv"],
+        "parquet": ["parquet"],
+        "geopackage": ["gpkg", "geopackage"]
+    }
 
-                    # Upload the parquet file directly
-                    s3_client.upload_fileobj(binary_data, bucket_name, filename)
-                    logger.success("Parquet file uploaded successfully to S3")
-                    return
+    def __init__(self) -> None:
+        self._validate_dependencies()
 
-                except requests.RequestException as e:
-                    raise OpenDataSoftExplorerError("Failed to download resource", e)
-                except ClientError as e:
-                    logger.error(f"Error: {e}")
-                    return
+    def _validate_dependencies(self):
+        """Validate that all required dependencies are available."""
+        required_modules = {
+            'pandas': pd,
+            'polars': pl,
+            'duckdb': duckdb,
+            'boto3': boto3,
+            'pyarrow': pa
+        }
+        missing = [name for name, module in required_modules.items() if module is None]
+        if missing:
+            raise ImportError(f"Missing required dependencies: {', '.join(missing)}")
+        
+    def validate_resource_data(
+    self, 
+    resource_data: Optional[List[Dict[str, str]]], 
+    format_type: str
+    ) -> str:
+        """Validate resource data and extract download URL."""
+        if not resource_data:
+            raise OpenDataSoftExplorerError("No resource data provided")
 
-        raise OpenDataSoftExplorerError("No parquet format resource found")
+        # Get all supported formats
+        all_formats = [fmt for formats in self.SUPPORTED_FORMATS.values() for fmt in formats]
+        
+        # If the provided format_type is a category, get its format
+        valid_formats = (self.SUPPORTED_FORMATS.get(format_type, []) 
+                        if format_type in self.SUPPORTED_FORMATS 
+                        else [format_type])
+        
+        # Validate format type
+        if format_type not in self.SUPPORTED_FORMATS and format_type not in all_formats:
+            raise OpenDataSoftExplorerError(
+                f"Unsupported format: {format_type}. "
+                f"Supported formats: csv, parquet, xls, xlsx, geopackage"
+            )
+
+        # Find matching resource
+        url = next(
+            (r.get('resource_latest') for r in resource_data 
+                if r.get('resource_format', '').lower() in valid_formats),
+            None
+        )
+        
+        # If format provided does not have a url provide the formats that do
+        if not url:
+            available_formats = [r['resource_format'] for r in resource_data]
+            raise OpenDataSoftExplorerError(
+                f"No resource found with format: {format_type}. "
+                f"Available formats: {', '.join(available_formats)}"
+            )
+            
+        return url
\ No newline at end of file
diff --git a/HerdingCats/endpoints/api_endpoints.py b/HerdingCats/endpoints/api_endpoints.py
index ebbcd69..6364587 100644
--- a/HerdingCats/endpoints/api_endpoints.py
+++ b/HerdingCats/endpoints/api_endpoints.py
@@ -38,6 +38,7 @@ class OpenDataSoftDataCatalogues(Enum):
     ELIA_BELGIAN_ENERGY = "https://opendata.elia.be"
     EDF_ENERGY = "https://opendata.edf.fr"
     CADENT_GAS = "https://cadentgas.opendatasoft.com"
+    GRD_FRANCE = "https://opendata.agenceore.fr"
     # Add more catalogues as needed...
 
 class OpenDataSoftApiPaths:
diff --git a/HerdingCats/explorer/cat_explore.py b/HerdingCats/explorer/cat_explore.py
index 125c37c..ee3227a 100644
--- a/HerdingCats/explorer/cat_explore.py
+++ b/HerdingCats/explorer/cat_explore.py
@@ -1321,7 +1321,7 @@ def get_dataset_meta_dataframe(self, identifier: str, df_type: Literal["pandas",
             logger.error(f"Error fetching dataset {identifier}: {str(e)}")
             return pd.DataFrame() if df_type == "pandas" else pl.DataFrame()
 
-    def get_datasets_by_identifiers(self, identifiers: list) -> dict:
+    def get_multiple_datasets_meta(self, identifiers: list) -> dict:
         """
         Fetches multiple datasets using a list of IDs or slugs.
 
@@ -1352,7 +1352,7 @@ def get_datasets_by_identifiers(self, identifiers: list) -> dict:
     # ----------------------------
     # Show available resources for a particular dataset
     # ----------------------------
-    def get_dataset_resource(self, dataset_id: str, resource_id: str) -> dict:
+    def get_dataset_resource_export(self, dataset_id: str, resource_id: str) -> dict:
         """
         Fetches metadata for a specific resource within a dataset.
         
@@ -1417,6 +1417,46 @@ def get_dataset_resource_dataframe(self, dataset_id: str, resource_id: str, df_t
         except Exception as e:
             logger.error(f"Error fetching resource {resource_id}: {str(e)}")
             return pd.DataFrame() if df_type == "pandas" else pl.DataFrame()
+    
+    def get_dataset_resource_meta(self, data: dict) -> List[Dict[str, Any]] | None:
+        
+        if len(data) == 0:
+            raise ValueError("Data can't be empty")
+        
+        try:
+            result = self._extract_resource_data(data)
+            return result
+        except Exception as e:
+            logger.error("Error fetching resource: {str(e)}")
+
+    @staticmethod
+    def _extract_resource_data(data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        Extracts specific fields for a specific package and creates a list of dictionaries,
+        one for each resource, containing the specified fields.
+
+        Args:
+        data (Dict[str, Any]): The input package data dictionary.
+
+        Returns:
+        List[Dict[str, Any]]: A list of dictionaries, each containing the specified fields for a resource.
+        """
+
+        base_fields = {
+            "dataset_id": data.get("id"),
+            "slug": data.get("slug"),
+        }
+
+        resource_fields = ["created_at", "id", "format", "url", "title", "latest", "last_modified", "frequency", "extras"]
+
+        result = []
+        for resource in data.get("resources", []):
+            resource_data = base_fields.copy()
+            for field in resource_fields:
+                resource_data[f"resource_{field}"] = resource.get(field)
+            result.append(resource_data)
+
+        return result
 
     # ----------------------------
     # Show all organisation available
diff --git a/README.md b/README.md
index e41381e..b1acf07 100644
--- a/README.md
+++ b/README.md
@@ -39,20 +39,21 @@ I'll help format these tables in clean markdown:
 
 ## Supported Catalogues
 
-| Catalogue Name             | Website                          | Catalogue Backend |
-| -------------------------- | -------------------------------- | ----------------- |
-| London Datastore           | data.london.gov.uk               | CKAN              |
-| Subak Data Catalogue       | data.subak.org                   | CKAN              |
-| UK Gov Open Data           | data.gov.uk                      | CKAN              |
-| Humanitarian Data Exchange | data.humdata.org                 | CKAN              |
-| UK Power Networks          | ukpowernetworks.opendatasoft.com | Open Datasoft     |
-| Infrabel                   | opendata.infrabel.be             | Open Datasoft     |
-| Paris                      | opendata.paris.fr                | Open Datasoft     |
-| Toulouse                   | data.toulouse-metropole.fr       | Open Datasoft     |
-| Elia Belgian Energy        | opendata.elia.be                 | Open Datasoft     |
-| EDF Energy                 | opendata.edf.fr                  | Open Datasoft     |
-| Cadent Gas                 | cadentgas.opendatasoft.com       | Open Datasoft     |
-| French Gov Open Data       | data.gouv.fr                     | CKAN              |
+| Catalogue Name                                                            | Website                          | Catalogue Backend |
+| ------------------------------------------------------------------------- | -------------------------------- | ----------------- |
+| London Datastore                                                          | data.london.gov.uk               | CKAN              |
+| Subak Data Catalogue                                                      | data.subak.org                   | CKAN              |
+| UK Gov Open Data                                                          | data.gov.uk                      | CKAN              |
+| Humanitarian Data Exchange                                                | data.humdata.org                 | CKAN              |
+| UK Power Networks                                                         | ukpowernetworks.opendatasoft.com | Open Datasoft     |
+| Infrabel                                                                  | opendata.infrabel.be             | Open Datasoft     |
+| Paris                                                                     | opendata.paris.fr                | Open Datasoft     |
+| Toulouse                                                                  | data.toulouse-metropole.fr       | Open Datasoft     |
+| Elia Belgian Energy                                                       | opendata.elia.be                 | Open Datasoft     |
+| EDF Energy                                                                | opendata.edf.fr                  | Open Datasoft     |
+| Cadent Gas                                                                | cadentgas.opendatasoft.com       | Open Datasoft     |
+| French Gov Open Data                                                      | data.gouv.fr                     | CKAN              |
+| Gestionnaire de Réseaux de Distribution (French equivalent of GDNs in UK) | opendata.agenceore.fr            | Open Datasoft     |
 
 ## In Development
 
@@ -63,7 +64,7 @@ I'll help format these tables in clean markdown:
 | Data Mill North   | datamillnorth.org       | TBC          | Different implementation - may not work with all methods |
 | Canada Open Data  | open.canada.ca          | TBC          | Different implementation needs investigation             |
 
-# Herding-Cats Quick Start!🏃‍♂️‍➡️
+## Herding-Cats Quick Start!🏃‍♂️‍➡️
 
 ## Overview